Files
racket-chatgpt-bootstrap/package.php
T
2026-05-25 13:47:46 +02:00

477 lines
12 KiB
PHP

<?php
/*
* package.php
*
* Vereist:
*
* gitfetcher.php
* b64parts.php
*
* Routes:
*
* /package?name=<package>&next=...
* HTML-pagina met base64 part-links voor data/<package>.zip.
*
* /package-part?name=<package>&part=000001&next=...
* text/plain met base64-inhoud van één part.
*
* Regels:
*
* - HTML voor index/pagina's.
* - text/plain voor payload.
* - Payload is altijd base64.
* - Eén next-id per gegenereerde HTML-pagina.
*/
ini_set('display_errors', '1');
ini_set('display_startup_errors', '1');
ini_set('log_errors', '1');
error_reporting(E_ALL);
require_once __DIR__ . '/nexttoken.php';
$TOKENS = new NextTokenStore(__DIR__ . '/data/racket-sandbox.sqlite');
@set_time_limit(300);
ignore_user_abort(false);
require_once __DIR__ . '/gitfetcher.php';
require_once __DIR__ . '/b64parts.php';
require_once __DIR__ . '/base64config.php';
require_once __DIR__ . '/lib/catalog-http.php';
require_once __DIR__ . '/lib/racket-data.php';
define('DATA_DIR', __DIR__ . '/data');
define('CATALOG_PACKAGE_BASE', 'https://pkgs.racket-lang.org/pkg/');
define('CATALOG_CACHE_TTL', 3600);
$chunkConfig = load_base64_chunk_config();
$packageZipMaxBase64Kb = (int)($chunkConfig['package_zip_max_base64_kb'] ?? 2048);
if ($packageZipMaxBase64Kb < 1) {
$packageZipMaxBase64Kb = 1;
}
define('PACKAGE_ZIP_MAX_BASE64_KB', $packageZipMaxBase64Kb);
define('PACKAGE_ZIP_MAX_BASE64_BYTES', PACKAGE_ZIP_MAX_BASE64_KB * 1024);
$NEXT_ID = $TOKENS->create();
function path_only()
{
$p = parse_url($_SERVER['REQUEST_URI'] ?? '/', PHP_URL_PATH);
$p = '/' . trim($p ?: '/', '/');
return $p === '/' ? '/' : $p;
}
function h($s)
{
return htmlspecialchars((string)$s, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8');
}
function current_scheme()
{
if (!empty($_SERVER['HTTP_X_FORWARDED_PROTO'])) {
return strtolower(trim(explode(',', $_SERVER['HTTP_X_FORWARDED_PROTO'])[0]));
}
return (!empty($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== 'off') ? 'https' : 'http';
}
function current_host()
{
return $_SERVER['HTTP_HOST'] ?? 'localhost';
}
function make_url($path, $query = array())
{
global $NEXT_ID;
$query['next'] = $NEXT_ID;
return current_scheme() . '://' . current_host() . $path . '?' . http_build_query($query);
}
function html_response($html, $status = 200)
{
http_response_code($status);
header('Content-Type: text/html; charset=utf-8');
header('Cache-Control: no-store, no-cache, must-revalidate, max-age=0');
header('Pragma: no-cache');
echo $html;
exit;
}
function text_response($text, $status = 200)
{
http_response_code($status);
header('Content-Type: text/plain; charset=us-ascii');
header('Content-Disposition: inline');
header('X-Content-Type-Options: nosniff');
header('Cache-Control: no-store, no-cache, must-revalidate, max-age=0');
header('Pragma: no-cache');
echo $text;
if ($text === '' || substr($text, -1) !== "\n") {
echo "\n";
}
exit;
}
function fail_html($message, $status = 500)
{
html_response(
'<!doctype html><html><head><meta charset="utf-8">' .
'<title>Package error</title></head><body>' .
'<h1>Package error</h1>' .
'<pre>' . h($message) . '</pre>' .
'</body></html>',
$status
);
}
function fail_text($message, $status = 500)
{
text_response("error: " . $message . "\n", $status);
}
function package_name_ok($name)
{
return rktd_package_name_ok($name);
}
function ensure_data_dir()
{
try {
catalog_http_ensure_dir(DATA_DIR);
} catch (Throwable $e) {
fail_html($e->getMessage());
}
}
function catalog_cache_file($package)
{
ensure_data_dir();
return DATA_DIR . '/catalog-' . sha1($package) . '.rktd';
}
function catalog_meta_file($package)
{
ensure_data_dir();
return DATA_DIR . '/catalog-' . sha1($package) . '.meta.json';
}
function get_catalog_package_text($package)
{
$cacheFile = catalog_cache_file($package);
$url = CATALOG_PACKAGE_BASE . rawurlencode($package);
try {
return catalog_http_fetch_cached(
$url,
$cacheFile,
catalog_meta_file($package),
CATALOG_CACHE_TTL,
'rktsndbx-package-entry/1.0',
180
);
} catch (Throwable $e) {
fail_html($e->getMessage());
}
}
function extract_catalog_source($catalogText)
{
return rktd_extract_catalog_source($catalogText);
}
function normalize_source_to_repo_url($source)
{
$source = trim((string)$source);
if ($source === '') {
fail_html('Lege package source.');
}
/*
* github://github.com/owner/repo[/branch[/subdir]]
* Voor nu gebruiken we owner/repo. Subdirs lossen we later op.
*/
$p = parse_url($source);
if ($p === false || empty($p['scheme'])) {
fail_html('Kan package source niet parsen: ' . $source);
}
$scheme = strtolower($p['scheme']);
if ($scheme === 'github') {
$host = !empty($p['host']) ? strtolower($p['host']) : 'github.com';
$path = trim($p['path'] ?? '', '/');
$bits = explode('/', $path);
if ($host !== 'github.com' || count($bits) < 2) {
fail_html('Ongeldige github package source: ' . $source);
}
return 'https://github.com/' . $bits[0] . '/' . $bits[1];
}
/*
* git+https://... normaliseren. gitfetcher.php kan hier ook deels mee
* omgaan, maar dit houdt metadata netter.
*/
if (strpos($source, 'git+https://') === 0) {
return 'https://' . substr($source, strlen('git+https://'));
}
if (strpos($source, 'git+http://') === 0) {
return 'http://' . substr($source, strlen('git+http://'));
}
/*
* Verwijder query/fragment voor de repository-fetch.
* ?path=... en #branch/subdir pakken we later apart aan.
*/
$source = preg_replace('/[?#].*$/', '', $source);
return $source;
}
function ensure_package_zip_and_parts($package)
{
$catalogText = get_catalog_package_text($package);
$source = extract_catalog_source($catalogText);
if ($source === null || $source === '') {
fail_html('Geen source gevonden in Racket package catalogus voor package: ' . $package);
}
$repoUrl = normalize_source_to_repo_url($source);
$fetcher = new GitFetcher(array(
'data_dir' => DATA_DIR,
));
try {
$zipInfo = $fetcher->ensurePackageZip($package, $repoUrl);
} catch (Throwable $e) {
fail_html(
"Kon package zip niet ophalen.\n\n" .
"Package: " . $package . "\n" .
"Catalog source: " . $source . "\n" .
"Repo URL: " . $repoUrl . "\n\n" .
$e->getMessage()
);
}
$zipFile = DATA_DIR . '/' . $package . '.zip';
if (!is_file($zipFile) || !is_readable($zipFile)) {
fail_html('Zipbestand ontbreekt na fetch: ' . $zipFile);
}
$parts = new Base64Parts(DATA_DIR, PACKAGE_ZIP_MAX_BASE64_BYTES);
try {
$manifest = ensure_parts_for_zip($parts, $package, $zipFile);
} catch (Throwable $e) {
fail_html('Kon base64-parts niet maken: ' . $e->getMessage());
}
return array(
'package' => $package,
'source' => $source,
'repo_url' => $repoUrl,
'zip_info' => $zipInfo,
'manifest' => $manifest,
);
}
function ensure_parts_for_zip($parts, $package, $zipFile)
{
$zipSha = hash_file('sha256', $zipFile);
$zipSize = filesize($zipFile);
/*
* Hergebruik bestaande parts als ze nog exact bij de zip horen.
*/
try {
$manifest = $parts->loadManifest($package);
if (isset($manifest['source_sha256']) &&
$manifest['source_sha256'] === $zipSha &&
isset($manifest['source_bytes']) &&
(int)$manifest['source_bytes'] === (int)$zipSize &&
isset($manifest['max_base64_bytes']) &&
(int)$manifest['max_base64_bytes'] === PACKAGE_ZIP_MAX_BASE64_BYTES &&
isset($manifest['parts']) &&
is_array($manifest['parts']) &&
count($manifest['parts']) > 0) {
$ok = true;
foreach ($manifest['parts'] as $part) {
if (empty($part['file']) || !is_file($part['file']) || !is_readable($part['file'])) {
$ok = false;
break;
}
}
if ($ok) {
$manifest['parts_status'] = 'cached';
return $manifest;
}
}
} catch (Throwable $e) {
/*
* Geen manifest of ongeldig manifest: gewoon opnieuw maken.
*/
}
$manifest = $parts->splitFile($zipFile, $package, true);
$manifest['parts_status'] = 'created';
return $manifest;
}
function serve_package_page()
{
global $NEXT_ID;
$package = $_GET['name'] ?? '';
if (!package_name_ok($package)) {
fail_html('Ongeldige of ontbrekende package naam.', 400);
}
$info = ensure_package_zip_and_parts($package);
$manifest = $info['manifest'];
$zipInfo = $info['zip_info'];
$rows = '';
foreach ($manifest['parts'] as $part) {
$n = $part['number'];
$url = make_url('/package-part', array(
'name' => $package,
'part' => $n,
));
$rows .=
'<tr>' .
'<td>' . h($n) . '</td>' .
'<td>' . h((string)$part['base64_bytes']) . '</td>' .
'<td><a href="' . h($url) . '">' . h($url) . '</a></td>' .
'</tr>' . "\n";
}
html_response('<!doctype html>
<html lang="nl">
<head>
<meta charset="utf-8">
<title>Package ' . h($package) . '</title>
<link rel="stylesheet" href="/styles.css">
</head>
<body class="simple-doc">
<h1>Package ' . h($package) . '</h1>
<p>
Deze pagina is HTML. Alle part-links hieronder geven <code>text/plain</code>
met base64-inhoud terug. Dezelfde <code>next</code> wordt gebruikt voor alle
part-links op deze pagina.
</p>
<h2>Bron</h2>
<table>
<tr><th>catalog source</th><td><code>' . h($info['source']) . '</code></td></tr>
<tr><th>repo url</th><td><code>' . h($info['repo_url']) . '</code></td></tr>
<tr><th>fetch status</th><td><code>' . h($zipInfo['status'] ?? '') . '</code></td></tr>
<tr><th>default branch</th><td><code>' . h($zipInfo['default_branch'] ?? '') . '</code></td></tr>
<tr><th>head sha</th><td><code>' . h($zipInfo['head_sha'] ?? '') . '</code></td></tr>
<tr><th>zip file</th><td><code>' . h($zipInfo['zip_file'] ?? '') . '</code></td></tr>
<tr><th>zip bytes</th><td><code>' . h((string)($zipInfo['zip_bytes'] ?? '')) . '</code></td></tr>
<tr><th>zip sha256</th><td><code>' . h($zipInfo['zip_sha256'] ?? '') . '</code></td></tr>
<tr><th>parts status</th><td><code>' . h($manifest['parts_status'] ?? '') . '</code></td></tr>
<tr><th>max base64 part size</th><td><code>' . h((string)PACKAGE_ZIP_MAX_BASE64_KB) . '</code> KiB (<code>' . h((string)PACKAGE_ZIP_MAX_BASE64_BYTES) . '</code> bytes)</td></tr>
<tr><th>binary chunk size</th><td><code>' . h((string)($manifest['binary_chunk_bytes'] ?? '')) . '</code> bytes</td></tr>
<tr><th>part count</th><td><code>' . h((string)$manifest['part_count']) . '</code></td></tr>
<tr><th>next id</th><td><code>' . h($NEXT_ID) . '</code></td></tr>
</table>
<h2>Base64 parts</h2>
<table>
<thead>
<tr>
<th>part</th>
<th>base64 bytes</th>
<th>text/plain URL</th>
</tr>
</thead>
<tbody>
' . $rows . '
</tbody>
</table>
<h2>Reconstructie in de sandbox</h2>
<pre>
# download alle links als:
# ' . h($package) . '.part.000001.b64
# ' . h($package) . '.part.000002.b64
# enz.
cat ' . h($package) . '.part.*.b64 &gt; ' . h($package) . '.zip.b64
base64 -d ' . h($package) . '.zip.b64 &gt; ' . h($package) . '.zip
raco pkg install --auto ./' . h($package) . '.zip
</pre>
</body>
</html>');
}
function serve_package_part()
{
$package = $_GET['name'] ?? '';
$part = $_GET['part'] ?? '';
if (!package_name_ok($package)) {
fail_text('Ongeldige of ontbrekende package naam.', 400);
}
if (!is_string($part) || !preg_match('/^[0-9]{6}$/', $part)) {
fail_text('Ongeldig of ontbrekend partnummer.', 400);
}
$parts = new Base64Parts(DATA_DIR, PACKAGE_ZIP_MAX_BASE64_BYTES);
try {
$txt = $parts->readPart($package, $part);
} catch (Throwable $e) {
fail_text($e->getMessage(), 404);
}
text_response($txt, 200);
}
$path = path_only();
if ($path === '/package') {
$TOKENS->check_valid_next('html');
serve_package_page();
}
if ($path === '/package-part') {
$TOKENS->check_valid_next('text');
serve_package_part();
}
fail_html('Onbekende route: ' . $path, 404);