2f2e8869d6
Move shared PHP code into private/, move JavaScript files into js/, and block direct access to private/. Remove unused API key and cache artifacts from the working tree.
579 lines
18 KiB
PHP
579 lines
18 KiB
PHP
<?php
|
|
/*
|
|
* gitfetcher.php
|
|
*
|
|
* Doel:
|
|
* Voor een gegeven package-naam en git repository:
|
|
*
|
|
* 1. bepaal de default branch;
|
|
* 2. bepaal de huidige HEAD commit SHA;
|
|
* 3. controleer data/<package>.json;
|
|
* 4. als de SHA gelijk is en data/<package>.zip bestaat: niets downloaden;
|
|
* 5. anders: haal de repository-zip op en sla op als data/<package>.zip.
|
|
*
|
|
* Ondersteund:
|
|
* - github.com
|
|
* - git.dijkewijk.nl Gitea
|
|
* - codeberg.org Gitea
|
|
*
|
|
* GitHub:
|
|
* - branch-SHA via /repos/{owner}/{repo}/branches/{branch}
|
|
* - zip via /repos/{owner}/{repo}/zipball/{ref}
|
|
*
|
|
* Gitea/Codeberg:
|
|
* - repository-info via /api/v1/repos/{owner}/{repo}
|
|
* - branch-SHA via /api/v1/repos/{owner}/{repo}/branches/{branch}
|
|
* - zip via /owner/repo/archive/{ref}.zip
|
|
*/
|
|
|
|
class GitFetcherException extends Exception
|
|
{
|
|
}
|
|
|
|
class GitFetcher
|
|
{
|
|
private $dataDir;
|
|
private $timeout;
|
|
private $connectTimeout;
|
|
private $userAgent;
|
|
private $tokensByHost;
|
|
|
|
public function __construct($options = array())
|
|
{
|
|
$this->dataDir = isset($options['data_dir'])
|
|
? rtrim((string)$options['data_dir'], '/')
|
|
: dirname(__DIR__) . '/data';
|
|
|
|
$this->timeout = isset($options['timeout']) ? (int)$options['timeout'] : 180;
|
|
$this->connectTimeout = isset($options['connect_timeout']) ? (int)$options['connect_timeout'] : 20;
|
|
$this->userAgent = isset($options['user_agent'])
|
|
? (string)$options['user_agent']
|
|
: 'rktsndbx-gitfetcher/1.0';
|
|
|
|
$this->tokensByHost = isset($options['tokens']) && is_array($options['tokens'])
|
|
? $options['tokens']
|
|
: array();
|
|
}
|
|
|
|
/*
|
|
* Hoofdentry voor jouw package-route.
|
|
*
|
|
* Voorbeeld:
|
|
*
|
|
* $gf = new GitFetcher();
|
|
* $info = $gf->ensurePackageZip(
|
|
* 'html-parsing',
|
|
* 'https://github.com/soegaard/html-parsing'
|
|
* );
|
|
*
|
|
* Resultaat:
|
|
*
|
|
* data/html-parsing.zip
|
|
* data/html-parsing.json
|
|
*/
|
|
public function ensurePackageZip($packageName, $repoUrl)
|
|
{
|
|
$packageName = $this->safePackageName($packageName);
|
|
$repo = $this->parseRepositoryUrl($repoUrl);
|
|
$head = $this->currentHead($repo);
|
|
|
|
$zipFile = $this->packageZipFile($packageName);
|
|
$metaFile = $this->packageMetaFile($packageName);
|
|
|
|
$old = $this->readJsonFile($metaFile);
|
|
|
|
if (is_file($zipFile) &&
|
|
is_readable($zipFile) &&
|
|
is_array($old) &&
|
|
isset($old['head_sha']) &&
|
|
$old['head_sha'] === $head['head_sha'] &&
|
|
isset($old['repo_url']) &&
|
|
$old['repo_url'] === $repoUrl) {
|
|
|
|
return array(
|
|
'status' => 'cached',
|
|
'package' => $packageName,
|
|
'repo_url' => $repoUrl,
|
|
'host' => $repo['host'],
|
|
'owner' => $repo['owner'],
|
|
'repo' => $repo['repo'],
|
|
'default_branch' => $head['default_branch'],
|
|
'head_sha' => $head['head_sha'],
|
|
'zip_file' => $zipFile,
|
|
'meta_file' => $metaFile,
|
|
'zip_bytes' => filesize($zipFile),
|
|
'zip_sha256' => hash_file('sha256', $zipFile),
|
|
);
|
|
}
|
|
|
|
$archive = $this->downloadArchiveForHead($repo, $head);
|
|
$this->ensureDataDir();
|
|
|
|
$tmpZip = $zipFile . '.tmp.' . getmypid();
|
|
$tmpMeta = $metaFile . '.tmp.' . getmypid();
|
|
|
|
if (file_put_contents($tmpZip, $archive['bytes'], LOCK_EX) === false) {
|
|
@unlink($tmpZip);
|
|
throw new GitFetcherException('Kan tijdelijke zip niet schrijven: ' . $tmpZip);
|
|
}
|
|
|
|
$meta = array(
|
|
'package' => $packageName,
|
|
'repo_url' => $repoUrl,
|
|
'host' => $repo['host'],
|
|
'owner' => $repo['owner'],
|
|
'repo' => $repo['repo'],
|
|
'default_branch' => $head['default_branch'],
|
|
'head_sha' => $head['head_sha'],
|
|
'archive_url' => $archive['archive_url'],
|
|
'zip_file' => $zipFile,
|
|
'zip_bytes' => strlen($archive['bytes']),
|
|
'zip_sha256' => hash('sha256', $archive['bytes']),
|
|
'updated_at' => gmdate('c'),
|
|
);
|
|
|
|
$json = json_encode($meta, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
|
|
|
if ($json === false || file_put_contents($tmpMeta, $json . "\n", LOCK_EX) === false) {
|
|
@unlink($tmpZip);
|
|
@unlink($tmpMeta);
|
|
throw new GitFetcherException('Kan tijdelijke metadata niet schrijven: ' . $tmpMeta);
|
|
}
|
|
|
|
if (!rename($tmpZip, $zipFile)) {
|
|
@unlink($tmpZip);
|
|
@unlink($tmpMeta);
|
|
throw new GitFetcherException('Kan zip niet plaatsen: ' . $zipFile);
|
|
}
|
|
|
|
if (!rename($tmpMeta, $metaFile)) {
|
|
@unlink($tmpMeta);
|
|
throw new GitFetcherException('Kan metadata niet plaatsen: ' . $metaFile);
|
|
}
|
|
|
|
$meta['status'] = 'downloaded';
|
|
return $meta;
|
|
}
|
|
|
|
/*
|
|
* Alleen controleren, zonder zip te downloaden.
|
|
*/
|
|
public function packageZipIsCurrent($packageName, $repoUrl)
|
|
{
|
|
$packageName = $this->safePackageName($packageName);
|
|
$repo = $this->parseRepositoryUrl($repoUrl);
|
|
$head = $this->currentHead($repo);
|
|
|
|
$zipFile = $this->packageZipFile($packageName);
|
|
$metaFile = $this->packageMetaFile($packageName);
|
|
$old = $this->readJsonFile($metaFile);
|
|
|
|
return is_file($zipFile) &&
|
|
is_readable($zipFile) &&
|
|
is_array($old) &&
|
|
isset($old['repo_url']) &&
|
|
$old['repo_url'] === $repoUrl &&
|
|
isset($old['head_sha']) &&
|
|
$old['head_sha'] === $head['head_sha'];
|
|
}
|
|
|
|
/*
|
|
* Bepaal default branch + huidige commit SHA.
|
|
*/
|
|
public function currentHeadForRepositoryUrl($repoUrl)
|
|
{
|
|
$repo = $this->parseRepositoryUrl($repoUrl);
|
|
return $this->currentHead($repo);
|
|
}
|
|
|
|
private function safePackageName($packageName)
|
|
{
|
|
$packageName = (string)$packageName;
|
|
|
|
if (!preg_match('/^[A-Za-z0-9_.+-]+$/', $packageName)) {
|
|
throw new GitFetcherException('Ongeldige package naam: ' . $packageName);
|
|
}
|
|
|
|
return $packageName;
|
|
}
|
|
|
|
private function packageZipFile($packageName)
|
|
{
|
|
return $this->dataDir . '/' . $packageName . '.zip';
|
|
}
|
|
|
|
private function packageMetaFile($packageName)
|
|
{
|
|
return $this->dataDir . '/' . $packageName . '.json';
|
|
}
|
|
|
|
private function ensureDataDir()
|
|
{
|
|
if (!is_dir($this->dataDir)) {
|
|
if (!mkdir($this->dataDir, 0755, true)) {
|
|
throw new GitFetcherException('Kan data directory niet maken: ' . $this->dataDir);
|
|
}
|
|
}
|
|
|
|
if (!is_writable($this->dataDir)) {
|
|
throw new GitFetcherException('Data directory is niet schrijfbaar: ' . $this->dataDir);
|
|
}
|
|
}
|
|
|
|
private function readJsonFile($file)
|
|
{
|
|
if (!is_file($file) || !is_readable($file)) {
|
|
return null;
|
|
}
|
|
|
|
$raw = file_get_contents($file);
|
|
|
|
if ($raw === false || $raw === '') {
|
|
return null;
|
|
}
|
|
|
|
$json = json_decode($raw, true);
|
|
|
|
return is_array($json) ? $json : null;
|
|
}
|
|
|
|
public function parseRepositoryUrl($repoUrl)
|
|
{
|
|
$repoUrl = trim((string)$repoUrl);
|
|
|
|
/*
|
|
* SSH-vorm:
|
|
* git@github.com:owner/repo.git
|
|
*/
|
|
if (preg_match('/^git@([^:]+):(.+)$/', $repoUrl, $m)) {
|
|
return $this->parseHostAndPath(strtolower($m[1]), trim($m[2], '/'), $repoUrl);
|
|
}
|
|
|
|
if (strpos($repoUrl, 'git+https://') === 0) {
|
|
$repoUrl = 'https://' . substr($repoUrl, strlen('git+https://'));
|
|
} elseif (strpos($repoUrl, 'git+http://') === 0) {
|
|
$repoUrl = 'http://' . substr($repoUrl, strlen('git+http://'));
|
|
}
|
|
|
|
$p = parse_url($repoUrl);
|
|
|
|
if ($p === false || empty($p['host']) || empty($p['path'])) {
|
|
throw new GitFetcherException('Ongeldige repository URL: ' . $repoUrl);
|
|
}
|
|
|
|
return $this->parseHostAndPath(
|
|
strtolower($p['host']),
|
|
trim($p['path'], '/'),
|
|
$repoUrl
|
|
);
|
|
}
|
|
|
|
private function parseHostAndPath($host, $path, $originalUrl)
|
|
{
|
|
$kinds = array(
|
|
'github.com' => 'github',
|
|
'git.dijkewijk.nl' => 'gitea',
|
|
'codeberg.org' => 'gitea',
|
|
);
|
|
|
|
if (!isset($kinds[$host])) {
|
|
throw new GitFetcherException('Niet-ondersteunde git host: ' . $host);
|
|
}
|
|
|
|
if (substr($path, -4) === '.git') {
|
|
$path = substr($path, 0, -4);
|
|
}
|
|
|
|
$bits = explode('/', $path);
|
|
|
|
if (count($bits) < 2 || $bits[0] === '' || $bits[1] === '') {
|
|
throw new GitFetcherException('Kan owner/repo niet bepalen uit URL: ' . $originalUrl);
|
|
}
|
|
|
|
return array(
|
|
'kind' => $kinds[$host],
|
|
'host' => $host,
|
|
'owner' => $bits[0],
|
|
'repo' => $bits[1],
|
|
);
|
|
}
|
|
|
|
private function currentHead($repo)
|
|
{
|
|
if ($repo['kind'] === 'github') {
|
|
return $this->githubCurrentHead($repo);
|
|
}
|
|
|
|
return $this->giteaCurrentHead($repo);
|
|
}
|
|
|
|
private function githubCurrentHead($repo)
|
|
{
|
|
$repoApi =
|
|
'https://api.github.com/repos/' .
|
|
rawurlencode($repo['owner']) . '/' .
|
|
rawurlencode($repo['repo']);
|
|
|
|
$repoJson = $this->httpGetJson($repoApi, $repo['host']);
|
|
|
|
if (empty($repoJson['default_branch']) || !is_string($repoJson['default_branch'])) {
|
|
throw new GitFetcherException('GitHub API gaf geen default_branch.');
|
|
}
|
|
|
|
$branch = $repoJson['default_branch'];
|
|
|
|
$branchApi =
|
|
'https://api.github.com/repos/' .
|
|
rawurlencode($repo['owner']) . '/' .
|
|
rawurlencode($repo['repo']) .
|
|
'/branches/' .
|
|
rawurlencode($branch);
|
|
|
|
$branchJson = $this->httpGetJson($branchApi, $repo['host']);
|
|
|
|
if (empty($branchJson['commit']['sha']) || !is_string($branchJson['commit']['sha'])) {
|
|
throw new GitFetcherException('GitHub API gaf geen branch commit SHA.');
|
|
}
|
|
|
|
return array(
|
|
'default_branch' => $branch,
|
|
'head_sha' => $branchJson['commit']['sha'],
|
|
);
|
|
}
|
|
|
|
private function giteaCurrentHead($repo)
|
|
{
|
|
$repoApi =
|
|
'https://' . $repo['host'] .
|
|
'/api/v1/repos/' .
|
|
rawurlencode($repo['owner']) . '/' .
|
|
rawurlencode($repo['repo']);
|
|
|
|
$repoJson = $this->httpGetJson($repoApi, $repo['host']);
|
|
|
|
$branch = null;
|
|
|
|
if (!empty($repoJson['default_branch']) && is_string($repoJson['default_branch'])) {
|
|
$branch = $repoJson['default_branch'];
|
|
} elseif (!empty($repoJson['default_branch_name']) && is_string($repoJson['default_branch_name'])) {
|
|
$branch = $repoJson['default_branch_name'];
|
|
}
|
|
|
|
if ($branch === null || $branch === '') {
|
|
$branch = 'main';
|
|
}
|
|
|
|
$branchApi =
|
|
'https://' . $repo['host'] .
|
|
'/api/v1/repos/' .
|
|
rawurlencode($repo['owner']) . '/' .
|
|
rawurlencode($repo['repo']) .
|
|
'/branches/' .
|
|
rawurlencode($branch);
|
|
|
|
$branchJson = $this->httpGetJson($branchApi, $repo['host']);
|
|
$sha = $this->extractGiteaBranchSha($branchJson);
|
|
|
|
if ($sha === null || $sha === '') {
|
|
throw new GitFetcherException('Gitea API gaf geen branch commit SHA.');
|
|
}
|
|
|
|
return array(
|
|
'default_branch' => $branch,
|
|
'head_sha' => $sha,
|
|
);
|
|
}
|
|
|
|
private function extractGiteaBranchSha($branchJson)
|
|
{
|
|
/*
|
|
* Gitea/Forgejo varianten komen in de praktijk voor als:
|
|
* commit.id
|
|
* commit.sha
|
|
* commit.commit.id
|
|
*/
|
|
$paths = array(
|
|
array('commit', 'id'),
|
|
array('commit', 'sha'),
|
|
array('commit', 'commit', 'id'),
|
|
);
|
|
|
|
foreach ($paths as $path) {
|
|
$v = $branchJson;
|
|
|
|
foreach ($path as $k) {
|
|
if (!is_array($v) || !array_key_exists($k, $v)) {
|
|
$v = null;
|
|
break;
|
|
}
|
|
|
|
$v = $v[$k];
|
|
}
|
|
|
|
if (is_string($v) && preg_match('/^[0-9a-f]{7,40}$/i', $v)) {
|
|
return $v;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private function downloadArchiveForHead($repo, $head)
|
|
{
|
|
/*
|
|
* Bij voorkeur downloaden we op exacte SHA, niet op branchnaam.
|
|
* Dan hoort de zip exact bij de SHA die we in metadata opslaan.
|
|
*/
|
|
$shaUrl = $this->archiveUrl($repo, $head['head_sha']);
|
|
$branchUrl = $this->archiveUrl($repo, $head['default_branch']);
|
|
|
|
try {
|
|
$bytes = $this->httpGet($shaUrl, $repo['host'], true);
|
|
return array(
|
|
'archive_url' => $shaUrl,
|
|
'bytes' => $bytes,
|
|
);
|
|
} catch (GitFetcherException $e) {
|
|
/*
|
|
* Sommige Gitea/Forgejo instanties accepteren branch/tag ref wel
|
|
* maar commit-SHA niet in archive/<ref>.zip. Dan fallback naar
|
|
* branch. De SHA-check blijft alsnog gebaseerd op de API.
|
|
*/
|
|
$bytes = $this->httpGet($branchUrl, $repo['host'], true);
|
|
return array(
|
|
'archive_url' => $branchUrl,
|
|
'bytes' => $bytes,
|
|
);
|
|
}
|
|
}
|
|
|
|
private function archiveUrl($repo, $ref)
|
|
{
|
|
if ($ref === '') {
|
|
throw new GitFetcherException('Lege archive ref.');
|
|
}
|
|
|
|
if ($repo['kind'] === 'github') {
|
|
return
|
|
'https://api.github.com/repos/' .
|
|
rawurlencode($repo['owner']) . '/' .
|
|
rawurlencode($repo['repo']) .
|
|
'/zipball/' .
|
|
rawurlencode($ref);
|
|
}
|
|
|
|
return
|
|
'https://' . $repo['host'] . '/' .
|
|
rawurlencode($repo['owner']) . '/' .
|
|
rawurlencode($repo['repo']) .
|
|
'/archive/' .
|
|
rawurlencode($ref) .
|
|
'.zip';
|
|
}
|
|
|
|
private function httpGetJson($url, $host)
|
|
{
|
|
$body = $this->httpGet($url, $host, true);
|
|
$json = json_decode($body, true);
|
|
|
|
if (!is_array($json)) {
|
|
throw new GitFetcherException('Response is geen JSON: ' . $url);
|
|
}
|
|
|
|
return $json;
|
|
}
|
|
|
|
private function httpGet($url, $host, $followRedirects)
|
|
{
|
|
if (!function_exists('curl_init')) {
|
|
return $this->httpGetWithoutCurl($url, $host);
|
|
}
|
|
|
|
$headers = array(
|
|
'User-Agent: ' . $this->userAgent,
|
|
);
|
|
|
|
if ($host === 'github.com') {
|
|
$headers[] = 'Accept: application/vnd.github+json';
|
|
$headers[] = 'X-GitHub-Api-Version: 2022-11-28';
|
|
}
|
|
|
|
if (!empty($this->tokensByHost[$host])) {
|
|
if ($host === 'github.com') {
|
|
$headers[] = 'Authorization: Bearer ' . $this->tokensByHost[$host];
|
|
} else {
|
|
$headers[] = 'Authorization: token ' . $this->tokensByHost[$host];
|
|
}
|
|
}
|
|
|
|
$ch = curl_init($url);
|
|
|
|
curl_setopt_array($ch, array(
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_FOLLOWLOCATION => $followRedirects ? true : false,
|
|
CURLOPT_CONNECTTIMEOUT => $this->connectTimeout,
|
|
CURLOPT_TIMEOUT => $this->timeout,
|
|
CURLOPT_USERAGENT => $this->userAgent,
|
|
CURLOPT_HTTPHEADER => $headers,
|
|
CURLOPT_FAILONERROR => false,
|
|
));
|
|
|
|
$body = curl_exec($ch);
|
|
|
|
if ($body === false) {
|
|
$err = curl_error($ch);
|
|
curl_close($ch);
|
|
throw new GitFetcherException('HTTP request mislukt: ' . $err . ' url=' . $url);
|
|
}
|
|
|
|
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
curl_close($ch);
|
|
|
|
if ($status < 200 || $status >= 300) {
|
|
throw new GitFetcherException(
|
|
'HTTP status ' . $status . ' voor ' . $url . "\n" .
|
|
substr((string)$body, 0, 500)
|
|
);
|
|
}
|
|
|
|
return $body;
|
|
}
|
|
|
|
private function httpGetWithoutCurl($url, $host)
|
|
{
|
|
$headers = array(
|
|
'User-Agent: ' . $this->userAgent,
|
|
);
|
|
|
|
if ($host === 'github.com') {
|
|
$headers[] = 'Accept: application/vnd.github+json';
|
|
$headers[] = 'X-GitHub-Api-Version: 2022-11-28';
|
|
}
|
|
|
|
if (!empty($this->tokensByHost[$host])) {
|
|
if ($host === 'github.com') {
|
|
$headers[] = 'Authorization: Bearer ' . $this->tokensByHost[$host];
|
|
} else {
|
|
$headers[] = 'Authorization: token ' . $this->tokensByHost[$host];
|
|
}
|
|
}
|
|
|
|
$ctx = stream_context_create(array(
|
|
'http' => array(
|
|
'method' => 'GET',
|
|
'timeout' => $this->timeout,
|
|
'ignore_errors' => true,
|
|
'header' => implode("\r\n", $headers) . "\r\n",
|
|
),
|
|
));
|
|
|
|
$body = @file_get_contents($url, false, $ctx);
|
|
|
|
if ($body === false) {
|
|
throw new GitFetcherException('HTTP request mislukt: ' . $url);
|
|
}
|
|
|
|
return $body;
|
|
}
|
|
}
|