Files
www-data 2f2e8869d6 Reorganize PHP internals and static assets
Move shared PHP code into private/, move JavaScript files into js/, and block direct access to private/. Remove unused API key and cache artifacts from the working tree.
2026-05-26 11:32:36 +02:00

579 lines
18 KiB
PHP

<?php
/*
* gitfetcher.php
*
* Doel:
* Voor een gegeven package-naam en git repository:
*
* 1. bepaal de default branch;
* 2. bepaal de huidige HEAD commit SHA;
* 3. controleer data/<package>.json;
* 4. als de SHA gelijk is en data/<package>.zip bestaat: niets downloaden;
* 5. anders: haal de repository-zip op en sla op als data/<package>.zip.
*
* Ondersteund:
* - github.com
* - git.dijkewijk.nl Gitea
* - codeberg.org Gitea
*
* GitHub:
* - branch-SHA via /repos/{owner}/{repo}/branches/{branch}
* - zip via /repos/{owner}/{repo}/zipball/{ref}
*
* Gitea/Codeberg:
* - repository-info via /api/v1/repos/{owner}/{repo}
* - branch-SHA via /api/v1/repos/{owner}/{repo}/branches/{branch}
* - zip via /owner/repo/archive/{ref}.zip
*/
class GitFetcherException extends Exception
{
}
class GitFetcher
{
private $dataDir;
private $timeout;
private $connectTimeout;
private $userAgent;
private $tokensByHost;
public function __construct($options = array())
{
$this->dataDir = isset($options['data_dir'])
? rtrim((string)$options['data_dir'], '/')
: dirname(__DIR__) . '/data';
$this->timeout = isset($options['timeout']) ? (int)$options['timeout'] : 180;
$this->connectTimeout = isset($options['connect_timeout']) ? (int)$options['connect_timeout'] : 20;
$this->userAgent = isset($options['user_agent'])
? (string)$options['user_agent']
: 'rktsndbx-gitfetcher/1.0';
$this->tokensByHost = isset($options['tokens']) && is_array($options['tokens'])
? $options['tokens']
: array();
}
/*
* Hoofdentry voor jouw package-route.
*
* Voorbeeld:
*
* $gf = new GitFetcher();
* $info = $gf->ensurePackageZip(
* 'html-parsing',
* 'https://github.com/soegaard/html-parsing'
* );
*
* Resultaat:
*
* data/html-parsing.zip
* data/html-parsing.json
*/
public function ensurePackageZip($packageName, $repoUrl)
{
$packageName = $this->safePackageName($packageName);
$repo = $this->parseRepositoryUrl($repoUrl);
$head = $this->currentHead($repo);
$zipFile = $this->packageZipFile($packageName);
$metaFile = $this->packageMetaFile($packageName);
$old = $this->readJsonFile($metaFile);
if (is_file($zipFile) &&
is_readable($zipFile) &&
is_array($old) &&
isset($old['head_sha']) &&
$old['head_sha'] === $head['head_sha'] &&
isset($old['repo_url']) &&
$old['repo_url'] === $repoUrl) {
return array(
'status' => 'cached',
'package' => $packageName,
'repo_url' => $repoUrl,
'host' => $repo['host'],
'owner' => $repo['owner'],
'repo' => $repo['repo'],
'default_branch' => $head['default_branch'],
'head_sha' => $head['head_sha'],
'zip_file' => $zipFile,
'meta_file' => $metaFile,
'zip_bytes' => filesize($zipFile),
'zip_sha256' => hash_file('sha256', $zipFile),
);
}
$archive = $this->downloadArchiveForHead($repo, $head);
$this->ensureDataDir();
$tmpZip = $zipFile . '.tmp.' . getmypid();
$tmpMeta = $metaFile . '.tmp.' . getmypid();
if (file_put_contents($tmpZip, $archive['bytes'], LOCK_EX) === false) {
@unlink($tmpZip);
throw new GitFetcherException('Kan tijdelijke zip niet schrijven: ' . $tmpZip);
}
$meta = array(
'package' => $packageName,
'repo_url' => $repoUrl,
'host' => $repo['host'],
'owner' => $repo['owner'],
'repo' => $repo['repo'],
'default_branch' => $head['default_branch'],
'head_sha' => $head['head_sha'],
'archive_url' => $archive['archive_url'],
'zip_file' => $zipFile,
'zip_bytes' => strlen($archive['bytes']),
'zip_sha256' => hash('sha256', $archive['bytes']),
'updated_at' => gmdate('c'),
);
$json = json_encode($meta, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
if ($json === false || file_put_contents($tmpMeta, $json . "\n", LOCK_EX) === false) {
@unlink($tmpZip);
@unlink($tmpMeta);
throw new GitFetcherException('Kan tijdelijke metadata niet schrijven: ' . $tmpMeta);
}
if (!rename($tmpZip, $zipFile)) {
@unlink($tmpZip);
@unlink($tmpMeta);
throw new GitFetcherException('Kan zip niet plaatsen: ' . $zipFile);
}
if (!rename($tmpMeta, $metaFile)) {
@unlink($tmpMeta);
throw new GitFetcherException('Kan metadata niet plaatsen: ' . $metaFile);
}
$meta['status'] = 'downloaded';
return $meta;
}
/*
* Alleen controleren, zonder zip te downloaden.
*/
public function packageZipIsCurrent($packageName, $repoUrl)
{
$packageName = $this->safePackageName($packageName);
$repo = $this->parseRepositoryUrl($repoUrl);
$head = $this->currentHead($repo);
$zipFile = $this->packageZipFile($packageName);
$metaFile = $this->packageMetaFile($packageName);
$old = $this->readJsonFile($metaFile);
return is_file($zipFile) &&
is_readable($zipFile) &&
is_array($old) &&
isset($old['repo_url']) &&
$old['repo_url'] === $repoUrl &&
isset($old['head_sha']) &&
$old['head_sha'] === $head['head_sha'];
}
/*
* Bepaal default branch + huidige commit SHA.
*/
public function currentHeadForRepositoryUrl($repoUrl)
{
$repo = $this->parseRepositoryUrl($repoUrl);
return $this->currentHead($repo);
}
private function safePackageName($packageName)
{
$packageName = (string)$packageName;
if (!preg_match('/^[A-Za-z0-9_.+-]+$/', $packageName)) {
throw new GitFetcherException('Ongeldige package naam: ' . $packageName);
}
return $packageName;
}
private function packageZipFile($packageName)
{
return $this->dataDir . '/' . $packageName . '.zip';
}
private function packageMetaFile($packageName)
{
return $this->dataDir . '/' . $packageName . '.json';
}
private function ensureDataDir()
{
if (!is_dir($this->dataDir)) {
if (!mkdir($this->dataDir, 0755, true)) {
throw new GitFetcherException('Kan data directory niet maken: ' . $this->dataDir);
}
}
if (!is_writable($this->dataDir)) {
throw new GitFetcherException('Data directory is niet schrijfbaar: ' . $this->dataDir);
}
}
private function readJsonFile($file)
{
if (!is_file($file) || !is_readable($file)) {
return null;
}
$raw = file_get_contents($file);
if ($raw === false || $raw === '') {
return null;
}
$json = json_decode($raw, true);
return is_array($json) ? $json : null;
}
public function parseRepositoryUrl($repoUrl)
{
$repoUrl = trim((string)$repoUrl);
/*
* SSH-vorm:
* git@github.com:owner/repo.git
*/
if (preg_match('/^git@([^:]+):(.+)$/', $repoUrl, $m)) {
return $this->parseHostAndPath(strtolower($m[1]), trim($m[2], '/'), $repoUrl);
}
if (strpos($repoUrl, 'git+https://') === 0) {
$repoUrl = 'https://' . substr($repoUrl, strlen('git+https://'));
} elseif (strpos($repoUrl, 'git+http://') === 0) {
$repoUrl = 'http://' . substr($repoUrl, strlen('git+http://'));
}
$p = parse_url($repoUrl);
if ($p === false || empty($p['host']) || empty($p['path'])) {
throw new GitFetcherException('Ongeldige repository URL: ' . $repoUrl);
}
return $this->parseHostAndPath(
strtolower($p['host']),
trim($p['path'], '/'),
$repoUrl
);
}
private function parseHostAndPath($host, $path, $originalUrl)
{
$kinds = array(
'github.com' => 'github',
'git.dijkewijk.nl' => 'gitea',
'codeberg.org' => 'gitea',
);
if (!isset($kinds[$host])) {
throw new GitFetcherException('Niet-ondersteunde git host: ' . $host);
}
if (substr($path, -4) === '.git') {
$path = substr($path, 0, -4);
}
$bits = explode('/', $path);
if (count($bits) < 2 || $bits[0] === '' || $bits[1] === '') {
throw new GitFetcherException('Kan owner/repo niet bepalen uit URL: ' . $originalUrl);
}
return array(
'kind' => $kinds[$host],
'host' => $host,
'owner' => $bits[0],
'repo' => $bits[1],
);
}
private function currentHead($repo)
{
if ($repo['kind'] === 'github') {
return $this->githubCurrentHead($repo);
}
return $this->giteaCurrentHead($repo);
}
private function githubCurrentHead($repo)
{
$repoApi =
'https://api.github.com/repos/' .
rawurlencode($repo['owner']) . '/' .
rawurlencode($repo['repo']);
$repoJson = $this->httpGetJson($repoApi, $repo['host']);
if (empty($repoJson['default_branch']) || !is_string($repoJson['default_branch'])) {
throw new GitFetcherException('GitHub API gaf geen default_branch.');
}
$branch = $repoJson['default_branch'];
$branchApi =
'https://api.github.com/repos/' .
rawurlencode($repo['owner']) . '/' .
rawurlencode($repo['repo']) .
'/branches/' .
rawurlencode($branch);
$branchJson = $this->httpGetJson($branchApi, $repo['host']);
if (empty($branchJson['commit']['sha']) || !is_string($branchJson['commit']['sha'])) {
throw new GitFetcherException('GitHub API gaf geen branch commit SHA.');
}
return array(
'default_branch' => $branch,
'head_sha' => $branchJson['commit']['sha'],
);
}
private function giteaCurrentHead($repo)
{
$repoApi =
'https://' . $repo['host'] .
'/api/v1/repos/' .
rawurlencode($repo['owner']) . '/' .
rawurlencode($repo['repo']);
$repoJson = $this->httpGetJson($repoApi, $repo['host']);
$branch = null;
if (!empty($repoJson['default_branch']) && is_string($repoJson['default_branch'])) {
$branch = $repoJson['default_branch'];
} elseif (!empty($repoJson['default_branch_name']) && is_string($repoJson['default_branch_name'])) {
$branch = $repoJson['default_branch_name'];
}
if ($branch === null || $branch === '') {
$branch = 'main';
}
$branchApi =
'https://' . $repo['host'] .
'/api/v1/repos/' .
rawurlencode($repo['owner']) . '/' .
rawurlencode($repo['repo']) .
'/branches/' .
rawurlencode($branch);
$branchJson = $this->httpGetJson($branchApi, $repo['host']);
$sha = $this->extractGiteaBranchSha($branchJson);
if ($sha === null || $sha === '') {
throw new GitFetcherException('Gitea API gaf geen branch commit SHA.');
}
return array(
'default_branch' => $branch,
'head_sha' => $sha,
);
}
private function extractGiteaBranchSha($branchJson)
{
/*
* Gitea/Forgejo varianten komen in de praktijk voor als:
* commit.id
* commit.sha
* commit.commit.id
*/
$paths = array(
array('commit', 'id'),
array('commit', 'sha'),
array('commit', 'commit', 'id'),
);
foreach ($paths as $path) {
$v = $branchJson;
foreach ($path as $k) {
if (!is_array($v) || !array_key_exists($k, $v)) {
$v = null;
break;
}
$v = $v[$k];
}
if (is_string($v) && preg_match('/^[0-9a-f]{7,40}$/i', $v)) {
return $v;
}
}
return null;
}
private function downloadArchiveForHead($repo, $head)
{
/*
* Bij voorkeur downloaden we op exacte SHA, niet op branchnaam.
* Dan hoort de zip exact bij de SHA die we in metadata opslaan.
*/
$shaUrl = $this->archiveUrl($repo, $head['head_sha']);
$branchUrl = $this->archiveUrl($repo, $head['default_branch']);
try {
$bytes = $this->httpGet($shaUrl, $repo['host'], true);
return array(
'archive_url' => $shaUrl,
'bytes' => $bytes,
);
} catch (GitFetcherException $e) {
/*
* Sommige Gitea/Forgejo instanties accepteren branch/tag ref wel
* maar commit-SHA niet in archive/<ref>.zip. Dan fallback naar
* branch. De SHA-check blijft alsnog gebaseerd op de API.
*/
$bytes = $this->httpGet($branchUrl, $repo['host'], true);
return array(
'archive_url' => $branchUrl,
'bytes' => $bytes,
);
}
}
private function archiveUrl($repo, $ref)
{
if ($ref === '') {
throw new GitFetcherException('Lege archive ref.');
}
if ($repo['kind'] === 'github') {
return
'https://api.github.com/repos/' .
rawurlencode($repo['owner']) . '/' .
rawurlencode($repo['repo']) .
'/zipball/' .
rawurlencode($ref);
}
return
'https://' . $repo['host'] . '/' .
rawurlencode($repo['owner']) . '/' .
rawurlencode($repo['repo']) .
'/archive/' .
rawurlencode($ref) .
'.zip';
}
private function httpGetJson($url, $host)
{
$body = $this->httpGet($url, $host, true);
$json = json_decode($body, true);
if (!is_array($json)) {
throw new GitFetcherException('Response is geen JSON: ' . $url);
}
return $json;
}
private function httpGet($url, $host, $followRedirects)
{
if (!function_exists('curl_init')) {
return $this->httpGetWithoutCurl($url, $host);
}
$headers = array(
'User-Agent: ' . $this->userAgent,
);
if ($host === 'github.com') {
$headers[] = 'Accept: application/vnd.github+json';
$headers[] = 'X-GitHub-Api-Version: 2022-11-28';
}
if (!empty($this->tokensByHost[$host])) {
if ($host === 'github.com') {
$headers[] = 'Authorization: Bearer ' . $this->tokensByHost[$host];
} else {
$headers[] = 'Authorization: token ' . $this->tokensByHost[$host];
}
}
$ch = curl_init($url);
curl_setopt_array($ch, array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => $followRedirects ? true : false,
CURLOPT_CONNECTTIMEOUT => $this->connectTimeout,
CURLOPT_TIMEOUT => $this->timeout,
CURLOPT_USERAGENT => $this->userAgent,
CURLOPT_HTTPHEADER => $headers,
CURLOPT_FAILONERROR => false,
));
$body = curl_exec($ch);
if ($body === false) {
$err = curl_error($ch);
curl_close($ch);
throw new GitFetcherException('HTTP request mislukt: ' . $err . ' url=' . $url);
}
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($status < 200 || $status >= 300) {
throw new GitFetcherException(
'HTTP status ' . $status . ' voor ' . $url . "\n" .
substr((string)$body, 0, 500)
);
}
return $body;
}
private function httpGetWithoutCurl($url, $host)
{
$headers = array(
'User-Agent: ' . $this->userAgent,
);
if ($host === 'github.com') {
$headers[] = 'Accept: application/vnd.github+json';
$headers[] = 'X-GitHub-Api-Version: 2022-11-28';
}
if (!empty($this->tokensByHost[$host])) {
if ($host === 'github.com') {
$headers[] = 'Authorization: Bearer ' . $this->tokensByHost[$host];
} else {
$headers[] = 'Authorization: token ' . $this->tokensByHost[$host];
}
}
$ctx = stream_context_create(array(
'http' => array(
'method' => 'GET',
'timeout' => $this->timeout,
'ignore_errors' => true,
'header' => implode("\r\n", $headers) . "\r\n",
),
));
$body = @file_get_contents($url, false, $ctx);
if ($body === false) {
throw new GitFetcherException('HTTP request mislukt: ' . $url);
}
return $body;
}
}