.json; * 4. als de SHA gelijk is en data/.zip bestaat: niets downloaden; * 5. anders: haal de repository-zip op en sla op als data/.zip. * * Ondersteund: * - github.com * - git.dijkewijk.nl Gitea * - codeberg.org Gitea * * GitHub: * - branch-SHA via /repos/{owner}/{repo}/branches/{branch} * - zip via /repos/{owner}/{repo}/zipball/{ref} * * Gitea/Codeberg: * - repository-info via /api/v1/repos/{owner}/{repo} * - branch-SHA via /api/v1/repos/{owner}/{repo}/branches/{branch} * - zip via /owner/repo/archive/{ref}.zip */ class GitFetcherException extends Exception { } class GitFetcher { private $dataDir; private $timeout; private $connectTimeout; private $userAgent; private $tokensByHost; public function __construct($options = array()) { $this->dataDir = isset($options['data_dir']) ? rtrim((string)$options['data_dir'], '/') : dirname(__DIR__) . '/data'; $this->timeout = isset($options['timeout']) ? (int)$options['timeout'] : 180; $this->connectTimeout = isset($options['connect_timeout']) ? (int)$options['connect_timeout'] : 20; $this->userAgent = isset($options['user_agent']) ? (string)$options['user_agent'] : 'rktsndbx-gitfetcher/1.0'; $this->tokensByHost = isset($options['tokens']) && is_array($options['tokens']) ? $options['tokens'] : array(); } /* * Hoofdentry voor jouw package-route. * * Voorbeeld: * * $gf = new GitFetcher(); * $info = $gf->ensurePackageZip( * 'html-parsing', * 'https://github.com/soegaard/html-parsing' * ); * * Resultaat: * * data/html-parsing.zip * data/html-parsing.json */ public function ensurePackageZip($packageName, $repoUrl) { $packageName = $this->safePackageName($packageName); $repo = $this->parseRepositoryUrl($repoUrl); $head = $this->currentHead($repo); $zipFile = $this->packageZipFile($packageName); $metaFile = $this->packageMetaFile($packageName); $old = $this->readJsonFile($metaFile); if (is_file($zipFile) && is_readable($zipFile) && is_array($old) && isset($old['head_sha']) && $old['head_sha'] === $head['head_sha'] && isset($old['repo_url']) && $old['repo_url'] === $repoUrl) { return array( 'status' => 'cached', 'package' => $packageName, 'repo_url' => $repoUrl, 'host' => $repo['host'], 'owner' => $repo['owner'], 'repo' => $repo['repo'], 'default_branch' => $head['default_branch'], 'head_sha' => $head['head_sha'], 'zip_file' => $zipFile, 'meta_file' => $metaFile, 'zip_bytes' => filesize($zipFile), 'zip_sha256' => hash_file('sha256', $zipFile), ); } $archive = $this->downloadArchiveForHead($repo, $head); $this->ensureDataDir(); $tmpZip = $zipFile . '.tmp.' . getmypid(); $tmpMeta = $metaFile . '.tmp.' . getmypid(); if (file_put_contents($tmpZip, $archive['bytes'], LOCK_EX) === false) { @unlink($tmpZip); throw new GitFetcherException('Kan tijdelijke zip niet schrijven: ' . $tmpZip); } $meta = array( 'package' => $packageName, 'repo_url' => $repoUrl, 'host' => $repo['host'], 'owner' => $repo['owner'], 'repo' => $repo['repo'], 'default_branch' => $head['default_branch'], 'head_sha' => $head['head_sha'], 'archive_url' => $archive['archive_url'], 'zip_file' => $zipFile, 'zip_bytes' => strlen($archive['bytes']), 'zip_sha256' => hash('sha256', $archive['bytes']), 'updated_at' => gmdate('c'), ); $json = json_encode($meta, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES); if ($json === false || file_put_contents($tmpMeta, $json . "\n", LOCK_EX) === false) { @unlink($tmpZip); @unlink($tmpMeta); throw new GitFetcherException('Kan tijdelijke metadata niet schrijven: ' . $tmpMeta); } if (!rename($tmpZip, $zipFile)) { @unlink($tmpZip); @unlink($tmpMeta); throw new GitFetcherException('Kan zip niet plaatsen: ' . $zipFile); } if (!rename($tmpMeta, $metaFile)) { @unlink($tmpMeta); throw new GitFetcherException('Kan metadata niet plaatsen: ' . $metaFile); } $meta['status'] = 'downloaded'; return $meta; } /* * Alleen controleren, zonder zip te downloaden. */ public function packageZipIsCurrent($packageName, $repoUrl) { $packageName = $this->safePackageName($packageName); $repo = $this->parseRepositoryUrl($repoUrl); $head = $this->currentHead($repo); $zipFile = $this->packageZipFile($packageName); $metaFile = $this->packageMetaFile($packageName); $old = $this->readJsonFile($metaFile); return is_file($zipFile) && is_readable($zipFile) && is_array($old) && isset($old['repo_url']) && $old['repo_url'] === $repoUrl && isset($old['head_sha']) && $old['head_sha'] === $head['head_sha']; } /* * Bepaal default branch + huidige commit SHA. */ public function currentHeadForRepositoryUrl($repoUrl) { $repo = $this->parseRepositoryUrl($repoUrl); return $this->currentHead($repo); } private function safePackageName($packageName) { $packageName = (string)$packageName; if (!preg_match('/^[A-Za-z0-9_.+-]+$/', $packageName)) { throw new GitFetcherException('Ongeldige package naam: ' . $packageName); } return $packageName; } private function packageZipFile($packageName) { return $this->dataDir . '/' . $packageName . '.zip'; } private function packageMetaFile($packageName) { return $this->dataDir . '/' . $packageName . '.json'; } private function ensureDataDir() { if (!is_dir($this->dataDir)) { if (!mkdir($this->dataDir, 0755, true)) { throw new GitFetcherException('Kan data directory niet maken: ' . $this->dataDir); } } if (!is_writable($this->dataDir)) { throw new GitFetcherException('Data directory is niet schrijfbaar: ' . $this->dataDir); } } private function readJsonFile($file) { if (!is_file($file) || !is_readable($file)) { return null; } $raw = file_get_contents($file); if ($raw === false || $raw === '') { return null; } $json = json_decode($raw, true); return is_array($json) ? $json : null; } public function parseRepositoryUrl($repoUrl) { $repoUrl = trim((string)$repoUrl); /* * SSH-vorm: * git@github.com:owner/repo.git */ if (preg_match('/^git@([^:]+):(.+)$/', $repoUrl, $m)) { return $this->parseHostAndPath(strtolower($m[1]), trim($m[2], '/'), $repoUrl); } if (strpos($repoUrl, 'git+https://') === 0) { $repoUrl = 'https://' . substr($repoUrl, strlen('git+https://')); } elseif (strpos($repoUrl, 'git+http://') === 0) { $repoUrl = 'http://' . substr($repoUrl, strlen('git+http://')); } $p = parse_url($repoUrl); if ($p === false || empty($p['host']) || empty($p['path'])) { throw new GitFetcherException('Ongeldige repository URL: ' . $repoUrl); } return $this->parseHostAndPath( strtolower($p['host']), trim($p['path'], '/'), $repoUrl ); } private function parseHostAndPath($host, $path, $originalUrl) { $kinds = array( 'github.com' => 'github', 'git.dijkewijk.nl' => 'gitea', 'codeberg.org' => 'gitea', ); if (!isset($kinds[$host])) { throw new GitFetcherException('Niet-ondersteunde git host: ' . $host); } if (substr($path, -4) === '.git') { $path = substr($path, 0, -4); } $bits = explode('/', $path); if (count($bits) < 2 || $bits[0] === '' || $bits[1] === '') { throw new GitFetcherException('Kan owner/repo niet bepalen uit URL: ' . $originalUrl); } return array( 'kind' => $kinds[$host], 'host' => $host, 'owner' => $bits[0], 'repo' => $bits[1], ); } private function currentHead($repo) { if ($repo['kind'] === 'github') { return $this->githubCurrentHead($repo); } return $this->giteaCurrentHead($repo); } private function githubCurrentHead($repo) { $repoApi = 'https://api.github.com/repos/' . rawurlencode($repo['owner']) . '/' . rawurlencode($repo['repo']); $repoJson = $this->httpGetJson($repoApi, $repo['host']); if (empty($repoJson['default_branch']) || !is_string($repoJson['default_branch'])) { throw new GitFetcherException('GitHub API gaf geen default_branch.'); } $branch = $repoJson['default_branch']; $branchApi = 'https://api.github.com/repos/' . rawurlencode($repo['owner']) . '/' . rawurlencode($repo['repo']) . '/branches/' . rawurlencode($branch); $branchJson = $this->httpGetJson($branchApi, $repo['host']); if (empty($branchJson['commit']['sha']) || !is_string($branchJson['commit']['sha'])) { throw new GitFetcherException('GitHub API gaf geen branch commit SHA.'); } return array( 'default_branch' => $branch, 'head_sha' => $branchJson['commit']['sha'], ); } private function giteaCurrentHead($repo) { $repoApi = 'https://' . $repo['host'] . '/api/v1/repos/' . rawurlencode($repo['owner']) . '/' . rawurlencode($repo['repo']); $repoJson = $this->httpGetJson($repoApi, $repo['host']); $branch = null; if (!empty($repoJson['default_branch']) && is_string($repoJson['default_branch'])) { $branch = $repoJson['default_branch']; } elseif (!empty($repoJson['default_branch_name']) && is_string($repoJson['default_branch_name'])) { $branch = $repoJson['default_branch_name']; } if ($branch === null || $branch === '') { $branch = 'main'; } $branchApi = 'https://' . $repo['host'] . '/api/v1/repos/' . rawurlencode($repo['owner']) . '/' . rawurlencode($repo['repo']) . '/branches/' . rawurlencode($branch); $branchJson = $this->httpGetJson($branchApi, $repo['host']); $sha = $this->extractGiteaBranchSha($branchJson); if ($sha === null || $sha === '') { throw new GitFetcherException('Gitea API gaf geen branch commit SHA.'); } return array( 'default_branch' => $branch, 'head_sha' => $sha, ); } private function extractGiteaBranchSha($branchJson) { /* * Gitea/Forgejo varianten komen in de praktijk voor als: * commit.id * commit.sha * commit.commit.id */ $paths = array( array('commit', 'id'), array('commit', 'sha'), array('commit', 'commit', 'id'), ); foreach ($paths as $path) { $v = $branchJson; foreach ($path as $k) { if (!is_array($v) || !array_key_exists($k, $v)) { $v = null; break; } $v = $v[$k]; } if (is_string($v) && preg_match('/^[0-9a-f]{7,40}$/i', $v)) { return $v; } } return null; } private function downloadArchiveForHead($repo, $head) { /* * Bij voorkeur downloaden we op exacte SHA, niet op branchnaam. * Dan hoort de zip exact bij de SHA die we in metadata opslaan. */ $shaUrl = $this->archiveUrl($repo, $head['head_sha']); $branchUrl = $this->archiveUrl($repo, $head['default_branch']); try { $bytes = $this->httpGet($shaUrl, $repo['host'], true); return array( 'archive_url' => $shaUrl, 'bytes' => $bytes, ); } catch (GitFetcherException $e) { /* * Sommige Gitea/Forgejo instanties accepteren branch/tag ref wel * maar commit-SHA niet in archive/.zip. Dan fallback naar * branch. De SHA-check blijft alsnog gebaseerd op de API. */ $bytes = $this->httpGet($branchUrl, $repo['host'], true); return array( 'archive_url' => $branchUrl, 'bytes' => $bytes, ); } } private function archiveUrl($repo, $ref) { if ($ref === '') { throw new GitFetcherException('Lege archive ref.'); } if ($repo['kind'] === 'github') { return 'https://api.github.com/repos/' . rawurlencode($repo['owner']) . '/' . rawurlencode($repo['repo']) . '/zipball/' . rawurlencode($ref); } return 'https://' . $repo['host'] . '/' . rawurlencode($repo['owner']) . '/' . rawurlencode($repo['repo']) . '/archive/' . rawurlencode($ref) . '.zip'; } private function httpGetJson($url, $host) { $body = $this->httpGet($url, $host, true); $json = json_decode($body, true); if (!is_array($json)) { throw new GitFetcherException('Response is geen JSON: ' . $url); } return $json; } private function httpGet($url, $host, $followRedirects) { if (!function_exists('curl_init')) { return $this->httpGetWithoutCurl($url, $host); } $headers = array( 'User-Agent: ' . $this->userAgent, ); if ($host === 'github.com') { $headers[] = 'Accept: application/vnd.github+json'; $headers[] = 'X-GitHub-Api-Version: 2022-11-28'; } if (!empty($this->tokensByHost[$host])) { if ($host === 'github.com') { $headers[] = 'Authorization: Bearer ' . $this->tokensByHost[$host]; } else { $headers[] = 'Authorization: token ' . $this->tokensByHost[$host]; } } $ch = curl_init($url); curl_setopt_array($ch, array( CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => $followRedirects ? true : false, CURLOPT_CONNECTTIMEOUT => $this->connectTimeout, CURLOPT_TIMEOUT => $this->timeout, CURLOPT_USERAGENT => $this->userAgent, CURLOPT_HTTPHEADER => $headers, CURLOPT_FAILONERROR => false, )); $body = curl_exec($ch); if ($body === false) { $err = curl_error($ch); curl_close($ch); throw new GitFetcherException('HTTP request mislukt: ' . $err . ' url=' . $url); } $status = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($status < 200 || $status >= 300) { throw new GitFetcherException( 'HTTP status ' . $status . ' voor ' . $url . "\n" . substr((string)$body, 0, 500) ); } return $body; } private function httpGetWithoutCurl($url, $host) { $headers = array( 'User-Agent: ' . $this->userAgent, ); if ($host === 'github.com') { $headers[] = 'Accept: application/vnd.github+json'; $headers[] = 'X-GitHub-Api-Version: 2022-11-28'; } if (!empty($this->tokensByHost[$host])) { if ($host === 'github.com') { $headers[] = 'Authorization: Bearer ' . $this->tokensByHost[$host]; } else { $headers[] = 'Authorization: token ' . $this->tokensByHost[$host]; } } $ctx = stream_context_create(array( 'http' => array( 'method' => 'GET', 'timeout' => $this->timeout, 'ignore_errors' => true, 'header' => implode("\r\n", $headers) . "\r\n", ), )); $body = @file_get_contents($url, false, $ctx); if ($body === false) { throw new GitFetcherException('HTTP request mislukt: ' . $url); } return $body; } }