initial import
This commit is contained in:
+578
@@ -0,0 +1,578 @@
|
||||
<?php
|
||||
/*
|
||||
* gitfetcher.php
|
||||
*
|
||||
* Doel:
|
||||
* Voor een gegeven package-naam en git repository:
|
||||
*
|
||||
* 1. bepaal de default branch;
|
||||
* 2. bepaal de huidige HEAD commit SHA;
|
||||
* 3. controleer data/<package>.json;
|
||||
* 4. als de SHA gelijk is en data/<package>.zip bestaat: niets downloaden;
|
||||
* 5. anders: haal de repository-zip op en sla op als data/<package>.zip.
|
||||
*
|
||||
* Ondersteund:
|
||||
* - github.com
|
||||
* - git.dijkewijk.nl Gitea
|
||||
* - codeberg.org Gitea
|
||||
*
|
||||
* GitHub:
|
||||
* - branch-SHA via /repos/{owner}/{repo}/branches/{branch}
|
||||
* - zip via /repos/{owner}/{repo}/zipball/{ref}
|
||||
*
|
||||
* Gitea/Codeberg:
|
||||
* - repository-info via /api/v1/repos/{owner}/{repo}
|
||||
* - branch-SHA via /api/v1/repos/{owner}/{repo}/branches/{branch}
|
||||
* - zip via /owner/repo/archive/{ref}.zip
|
||||
*/
|
||||
|
||||
class GitFetcherException extends Exception
|
||||
{
|
||||
}
|
||||
|
||||
class GitFetcher
|
||||
{
|
||||
private $dataDir;
|
||||
private $timeout;
|
||||
private $connectTimeout;
|
||||
private $userAgent;
|
||||
private $tokensByHost;
|
||||
|
||||
public function __construct($options = array())
|
||||
{
|
||||
$this->dataDir = isset($options['data_dir'])
|
||||
? rtrim((string)$options['data_dir'], '/')
|
||||
: __DIR__ . '/data';
|
||||
|
||||
$this->timeout = isset($options['timeout']) ? (int)$options['timeout'] : 180;
|
||||
$this->connectTimeout = isset($options['connect_timeout']) ? (int)$options['connect_timeout'] : 20;
|
||||
$this->userAgent = isset($options['user_agent'])
|
||||
? (string)$options['user_agent']
|
||||
: 'rktsndbx-gitfetcher/1.0';
|
||||
|
||||
$this->tokensByHost = isset($options['tokens']) && is_array($options['tokens'])
|
||||
? $options['tokens']
|
||||
: array();
|
||||
}
|
||||
|
||||
/*
|
||||
* Hoofdentry voor jouw package-route.
|
||||
*
|
||||
* Voorbeeld:
|
||||
*
|
||||
* $gf = new GitFetcher();
|
||||
* $info = $gf->ensurePackageZip(
|
||||
* 'html-parsing',
|
||||
* 'https://github.com/soegaard/html-parsing'
|
||||
* );
|
||||
*
|
||||
* Resultaat:
|
||||
*
|
||||
* data/html-parsing.zip
|
||||
* data/html-parsing.json
|
||||
*/
|
||||
public function ensurePackageZip($packageName, $repoUrl)
|
||||
{
|
||||
$packageName = $this->safePackageName($packageName);
|
||||
$repo = $this->parseRepositoryUrl($repoUrl);
|
||||
$head = $this->currentHead($repo);
|
||||
|
||||
$zipFile = $this->packageZipFile($packageName);
|
||||
$metaFile = $this->packageMetaFile($packageName);
|
||||
|
||||
$old = $this->readJsonFile($metaFile);
|
||||
|
||||
if (is_file($zipFile) &&
|
||||
is_readable($zipFile) &&
|
||||
is_array($old) &&
|
||||
isset($old['head_sha']) &&
|
||||
$old['head_sha'] === $head['head_sha'] &&
|
||||
isset($old['repo_url']) &&
|
||||
$old['repo_url'] === $repoUrl) {
|
||||
|
||||
return array(
|
||||
'status' => 'cached',
|
||||
'package' => $packageName,
|
||||
'repo_url' => $repoUrl,
|
||||
'host' => $repo['host'],
|
||||
'owner' => $repo['owner'],
|
||||
'repo' => $repo['repo'],
|
||||
'default_branch' => $head['default_branch'],
|
||||
'head_sha' => $head['head_sha'],
|
||||
'zip_file' => $zipFile,
|
||||
'meta_file' => $metaFile,
|
||||
'zip_bytes' => filesize($zipFile),
|
||||
'zip_sha256' => hash_file('sha256', $zipFile),
|
||||
);
|
||||
}
|
||||
|
||||
$archive = $this->downloadArchiveForHead($repo, $head);
|
||||
$this->ensureDataDir();
|
||||
|
||||
$tmpZip = $zipFile . '.tmp.' . getmypid();
|
||||
$tmpMeta = $metaFile . '.tmp.' . getmypid();
|
||||
|
||||
if (file_put_contents($tmpZip, $archive['bytes'], LOCK_EX) === false) {
|
||||
@unlink($tmpZip);
|
||||
throw new GitFetcherException('Kan tijdelijke zip niet schrijven: ' . $tmpZip);
|
||||
}
|
||||
|
||||
$meta = array(
|
||||
'package' => $packageName,
|
||||
'repo_url' => $repoUrl,
|
||||
'host' => $repo['host'],
|
||||
'owner' => $repo['owner'],
|
||||
'repo' => $repo['repo'],
|
||||
'default_branch' => $head['default_branch'],
|
||||
'head_sha' => $head['head_sha'],
|
||||
'archive_url' => $archive['archive_url'],
|
||||
'zip_file' => $zipFile,
|
||||
'zip_bytes' => strlen($archive['bytes']),
|
||||
'zip_sha256' => hash('sha256', $archive['bytes']),
|
||||
'updated_at' => gmdate('c'),
|
||||
);
|
||||
|
||||
$json = json_encode($meta, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
||||
|
||||
if ($json === false || file_put_contents($tmpMeta, $json . "\n", LOCK_EX) === false) {
|
||||
@unlink($tmpZip);
|
||||
@unlink($tmpMeta);
|
||||
throw new GitFetcherException('Kan tijdelijke metadata niet schrijven: ' . $tmpMeta);
|
||||
}
|
||||
|
||||
if (!rename($tmpZip, $zipFile)) {
|
||||
@unlink($tmpZip);
|
||||
@unlink($tmpMeta);
|
||||
throw new GitFetcherException('Kan zip niet plaatsen: ' . $zipFile);
|
||||
}
|
||||
|
||||
if (!rename($tmpMeta, $metaFile)) {
|
||||
@unlink($tmpMeta);
|
||||
throw new GitFetcherException('Kan metadata niet plaatsen: ' . $metaFile);
|
||||
}
|
||||
|
||||
$meta['status'] = 'downloaded';
|
||||
return $meta;
|
||||
}
|
||||
|
||||
/*
|
||||
* Alleen controleren, zonder zip te downloaden.
|
||||
*/
|
||||
public function packageZipIsCurrent($packageName, $repoUrl)
|
||||
{
|
||||
$packageName = $this->safePackageName($packageName);
|
||||
$repo = $this->parseRepositoryUrl($repoUrl);
|
||||
$head = $this->currentHead($repo);
|
||||
|
||||
$zipFile = $this->packageZipFile($packageName);
|
||||
$metaFile = $this->packageMetaFile($packageName);
|
||||
$old = $this->readJsonFile($metaFile);
|
||||
|
||||
return is_file($zipFile) &&
|
||||
is_readable($zipFile) &&
|
||||
is_array($old) &&
|
||||
isset($old['repo_url']) &&
|
||||
$old['repo_url'] === $repoUrl &&
|
||||
isset($old['head_sha']) &&
|
||||
$old['head_sha'] === $head['head_sha'];
|
||||
}
|
||||
|
||||
/*
|
||||
* Bepaal default branch + huidige commit SHA.
|
||||
*/
|
||||
public function currentHeadForRepositoryUrl($repoUrl)
|
||||
{
|
||||
$repo = $this->parseRepositoryUrl($repoUrl);
|
||||
return $this->currentHead($repo);
|
||||
}
|
||||
|
||||
private function safePackageName($packageName)
|
||||
{
|
||||
$packageName = (string)$packageName;
|
||||
|
||||
if (!preg_match('/^[A-Za-z0-9_.+-]+$/', $packageName)) {
|
||||
throw new GitFetcherException('Ongeldige package naam: ' . $packageName);
|
||||
}
|
||||
|
||||
return $packageName;
|
||||
}
|
||||
|
||||
private function packageZipFile($packageName)
|
||||
{
|
||||
return $this->dataDir . '/' . $packageName . '.zip';
|
||||
}
|
||||
|
||||
private function packageMetaFile($packageName)
|
||||
{
|
||||
return $this->dataDir . '/' . $packageName . '.json';
|
||||
}
|
||||
|
||||
private function ensureDataDir()
|
||||
{
|
||||
if (!is_dir($this->dataDir)) {
|
||||
if (!mkdir($this->dataDir, 0755, true)) {
|
||||
throw new GitFetcherException('Kan data directory niet maken: ' . $this->dataDir);
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_writable($this->dataDir)) {
|
||||
throw new GitFetcherException('Data directory is niet schrijfbaar: ' . $this->dataDir);
|
||||
}
|
||||
}
|
||||
|
||||
private function readJsonFile($file)
|
||||
{
|
||||
if (!is_file($file) || !is_readable($file)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$raw = file_get_contents($file);
|
||||
|
||||
if ($raw === false || $raw === '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$json = json_decode($raw, true);
|
||||
|
||||
return is_array($json) ? $json : null;
|
||||
}
|
||||
|
||||
public function parseRepositoryUrl($repoUrl)
|
||||
{
|
||||
$repoUrl = trim((string)$repoUrl);
|
||||
|
||||
/*
|
||||
* SSH-vorm:
|
||||
* git@github.com:owner/repo.git
|
||||
*/
|
||||
if (preg_match('/^git@([^:]+):(.+)$/', $repoUrl, $m)) {
|
||||
return $this->parseHostAndPath(strtolower($m[1]), trim($m[2], '/'), $repoUrl);
|
||||
}
|
||||
|
||||
if (strpos($repoUrl, 'git+https://') === 0) {
|
||||
$repoUrl = 'https://' . substr($repoUrl, strlen('git+https://'));
|
||||
} elseif (strpos($repoUrl, 'git+http://') === 0) {
|
||||
$repoUrl = 'http://' . substr($repoUrl, strlen('git+http://'));
|
||||
}
|
||||
|
||||
$p = parse_url($repoUrl);
|
||||
|
||||
if ($p === false || empty($p['host']) || empty($p['path'])) {
|
||||
throw new GitFetcherException('Ongeldige repository URL: ' . $repoUrl);
|
||||
}
|
||||
|
||||
return $this->parseHostAndPath(
|
||||
strtolower($p['host']),
|
||||
trim($p['path'], '/'),
|
||||
$repoUrl
|
||||
);
|
||||
}
|
||||
|
||||
private function parseHostAndPath($host, $path, $originalUrl)
|
||||
{
|
||||
$kinds = array(
|
||||
'github.com' => 'github',
|
||||
'git.dijkewijk.nl' => 'gitea',
|
||||
'codeberg.org' => 'gitea',
|
||||
);
|
||||
|
||||
if (!isset($kinds[$host])) {
|
||||
throw new GitFetcherException('Niet-ondersteunde git host: ' . $host);
|
||||
}
|
||||
|
||||
if (substr($path, -4) === '.git') {
|
||||
$path = substr($path, 0, -4);
|
||||
}
|
||||
|
||||
$bits = explode('/', $path);
|
||||
|
||||
if (count($bits) < 2 || $bits[0] === '' || $bits[1] === '') {
|
||||
throw new GitFetcherException('Kan owner/repo niet bepalen uit URL: ' . $originalUrl);
|
||||
}
|
||||
|
||||
return array(
|
||||
'kind' => $kinds[$host],
|
||||
'host' => $host,
|
||||
'owner' => $bits[0],
|
||||
'repo' => $bits[1],
|
||||
);
|
||||
}
|
||||
|
||||
private function currentHead($repo)
|
||||
{
|
||||
if ($repo['kind'] === 'github') {
|
||||
return $this->githubCurrentHead($repo);
|
||||
}
|
||||
|
||||
return $this->giteaCurrentHead($repo);
|
||||
}
|
||||
|
||||
private function githubCurrentHead($repo)
|
||||
{
|
||||
$repoApi =
|
||||
'https://api.github.com/repos/' .
|
||||
rawurlencode($repo['owner']) . '/' .
|
||||
rawurlencode($repo['repo']);
|
||||
|
||||
$repoJson = $this->httpGetJson($repoApi, $repo['host']);
|
||||
|
||||
if (empty($repoJson['default_branch']) || !is_string($repoJson['default_branch'])) {
|
||||
throw new GitFetcherException('GitHub API gaf geen default_branch.');
|
||||
}
|
||||
|
||||
$branch = $repoJson['default_branch'];
|
||||
|
||||
$branchApi =
|
||||
'https://api.github.com/repos/' .
|
||||
rawurlencode($repo['owner']) . '/' .
|
||||
rawurlencode($repo['repo']) .
|
||||
'/branches/' .
|
||||
rawurlencode($branch);
|
||||
|
||||
$branchJson = $this->httpGetJson($branchApi, $repo['host']);
|
||||
|
||||
if (empty($branchJson['commit']['sha']) || !is_string($branchJson['commit']['sha'])) {
|
||||
throw new GitFetcherException('GitHub API gaf geen branch commit SHA.');
|
||||
}
|
||||
|
||||
return array(
|
||||
'default_branch' => $branch,
|
||||
'head_sha' => $branchJson['commit']['sha'],
|
||||
);
|
||||
}
|
||||
|
||||
private function giteaCurrentHead($repo)
|
||||
{
|
||||
$repoApi =
|
||||
'https://' . $repo['host'] .
|
||||
'/api/v1/repos/' .
|
||||
rawurlencode($repo['owner']) . '/' .
|
||||
rawurlencode($repo['repo']);
|
||||
|
||||
$repoJson = $this->httpGetJson($repoApi, $repo['host']);
|
||||
|
||||
$branch = null;
|
||||
|
||||
if (!empty($repoJson['default_branch']) && is_string($repoJson['default_branch'])) {
|
||||
$branch = $repoJson['default_branch'];
|
||||
} elseif (!empty($repoJson['default_branch_name']) && is_string($repoJson['default_branch_name'])) {
|
||||
$branch = $repoJson['default_branch_name'];
|
||||
}
|
||||
|
||||
if ($branch === null || $branch === '') {
|
||||
$branch = 'main';
|
||||
}
|
||||
|
||||
$branchApi =
|
||||
'https://' . $repo['host'] .
|
||||
'/api/v1/repos/' .
|
||||
rawurlencode($repo['owner']) . '/' .
|
||||
rawurlencode($repo['repo']) .
|
||||
'/branches/' .
|
||||
rawurlencode($branch);
|
||||
|
||||
$branchJson = $this->httpGetJson($branchApi, $repo['host']);
|
||||
$sha = $this->extractGiteaBranchSha($branchJson);
|
||||
|
||||
if ($sha === null || $sha === '') {
|
||||
throw new GitFetcherException('Gitea API gaf geen branch commit SHA.');
|
||||
}
|
||||
|
||||
return array(
|
||||
'default_branch' => $branch,
|
||||
'head_sha' => $sha,
|
||||
);
|
||||
}
|
||||
|
||||
private function extractGiteaBranchSha($branchJson)
|
||||
{
|
||||
/*
|
||||
* Gitea/Forgejo varianten komen in de praktijk voor als:
|
||||
* commit.id
|
||||
* commit.sha
|
||||
* commit.commit.id
|
||||
*/
|
||||
$paths = array(
|
||||
array('commit', 'id'),
|
||||
array('commit', 'sha'),
|
||||
array('commit', 'commit', 'id'),
|
||||
);
|
||||
|
||||
foreach ($paths as $path) {
|
||||
$v = $branchJson;
|
||||
|
||||
foreach ($path as $k) {
|
||||
if (!is_array($v) || !array_key_exists($k, $v)) {
|
||||
$v = null;
|
||||
break;
|
||||
}
|
||||
|
||||
$v = $v[$k];
|
||||
}
|
||||
|
||||
if (is_string($v) && preg_match('/^[0-9a-f]{7,40}$/i', $v)) {
|
||||
return $v;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private function downloadArchiveForHead($repo, $head)
|
||||
{
|
||||
/*
|
||||
* Bij voorkeur downloaden we op exacte SHA, niet op branchnaam.
|
||||
* Dan hoort de zip exact bij de SHA die we in metadata opslaan.
|
||||
*/
|
||||
$shaUrl = $this->archiveUrl($repo, $head['head_sha']);
|
||||
$branchUrl = $this->archiveUrl($repo, $head['default_branch']);
|
||||
|
||||
try {
|
||||
$bytes = $this->httpGet($shaUrl, $repo['host'], true);
|
||||
return array(
|
||||
'archive_url' => $shaUrl,
|
||||
'bytes' => $bytes,
|
||||
);
|
||||
} catch (GitFetcherException $e) {
|
||||
/*
|
||||
* Sommige Gitea/Forgejo instanties accepteren branch/tag ref wel
|
||||
* maar commit-SHA niet in archive/<ref>.zip. Dan fallback naar
|
||||
* branch. De SHA-check blijft alsnog gebaseerd op de API.
|
||||
*/
|
||||
$bytes = $this->httpGet($branchUrl, $repo['host'], true);
|
||||
return array(
|
||||
'archive_url' => $branchUrl,
|
||||
'bytes' => $bytes,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private function archiveUrl($repo, $ref)
|
||||
{
|
||||
if ($ref === '') {
|
||||
throw new GitFetcherException('Lege archive ref.');
|
||||
}
|
||||
|
||||
if ($repo['kind'] === 'github') {
|
||||
return
|
||||
'https://api.github.com/repos/' .
|
||||
rawurlencode($repo['owner']) . '/' .
|
||||
rawurlencode($repo['repo']) .
|
||||
'/zipball/' .
|
||||
rawurlencode($ref);
|
||||
}
|
||||
|
||||
return
|
||||
'https://' . $repo['host'] . '/' .
|
||||
rawurlencode($repo['owner']) . '/' .
|
||||
rawurlencode($repo['repo']) .
|
||||
'/archive/' .
|
||||
rawurlencode($ref) .
|
||||
'.zip';
|
||||
}
|
||||
|
||||
private function httpGetJson($url, $host)
|
||||
{
|
||||
$body = $this->httpGet($url, $host, true);
|
||||
$json = json_decode($body, true);
|
||||
|
||||
if (!is_array($json)) {
|
||||
throw new GitFetcherException('Response is geen JSON: ' . $url);
|
||||
}
|
||||
|
||||
return $json;
|
||||
}
|
||||
|
||||
private function httpGet($url, $host, $followRedirects)
|
||||
{
|
||||
if (!function_exists('curl_init')) {
|
||||
return $this->httpGetWithoutCurl($url, $host);
|
||||
}
|
||||
|
||||
$headers = array(
|
||||
'User-Agent: ' . $this->userAgent,
|
||||
);
|
||||
|
||||
if ($host === 'github.com') {
|
||||
$headers[] = 'Accept: application/vnd.github+json';
|
||||
$headers[] = 'X-GitHub-Api-Version: 2022-11-28';
|
||||
}
|
||||
|
||||
if (!empty($this->tokensByHost[$host])) {
|
||||
if ($host === 'github.com') {
|
||||
$headers[] = 'Authorization: Bearer ' . $this->tokensByHost[$host];
|
||||
} else {
|
||||
$headers[] = 'Authorization: token ' . $this->tokensByHost[$host];
|
||||
}
|
||||
}
|
||||
|
||||
$ch = curl_init($url);
|
||||
|
||||
curl_setopt_array($ch, array(
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_FOLLOWLOCATION => $followRedirects ? true : false,
|
||||
CURLOPT_CONNECTTIMEOUT => $this->connectTimeout,
|
||||
CURLOPT_TIMEOUT => $this->timeout,
|
||||
CURLOPT_USERAGENT => $this->userAgent,
|
||||
CURLOPT_HTTPHEADER => $headers,
|
||||
CURLOPT_FAILONERROR => false,
|
||||
));
|
||||
|
||||
$body = curl_exec($ch);
|
||||
|
||||
if ($body === false) {
|
||||
$err = curl_error($ch);
|
||||
curl_close($ch);
|
||||
throw new GitFetcherException('HTTP request mislukt: ' . $err . ' url=' . $url);
|
||||
}
|
||||
|
||||
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
|
||||
if ($status < 200 || $status >= 300) {
|
||||
throw new GitFetcherException(
|
||||
'HTTP status ' . $status . ' voor ' . $url . "\n" .
|
||||
substr((string)$body, 0, 500)
|
||||
);
|
||||
}
|
||||
|
||||
return $body;
|
||||
}
|
||||
|
||||
private function httpGetWithoutCurl($url, $host)
|
||||
{
|
||||
$headers = array(
|
||||
'User-Agent: ' . $this->userAgent,
|
||||
);
|
||||
|
||||
if ($host === 'github.com') {
|
||||
$headers[] = 'Accept: application/vnd.github+json';
|
||||
$headers[] = 'X-GitHub-Api-Version: 2022-11-28';
|
||||
}
|
||||
|
||||
if (!empty($this->tokensByHost[$host])) {
|
||||
if ($host === 'github.com') {
|
||||
$headers[] = 'Authorization: Bearer ' . $this->tokensByHost[$host];
|
||||
} else {
|
||||
$headers[] = 'Authorization: token ' . $this->tokensByHost[$host];
|
||||
}
|
||||
}
|
||||
|
||||
$ctx = stream_context_create(array(
|
||||
'http' => array(
|
||||
'method' => 'GET',
|
||||
'timeout' => $this->timeout,
|
||||
'ignore_errors' => true,
|
||||
'header' => implode("\r\n", $headers) . "\r\n",
|
||||
),
|
||||
));
|
||||
|
||||
$body = @file_get_contents($url, false, $ctx);
|
||||
|
||||
if ($body === false) {
|
||||
throw new GitFetcherException('HTTP request mislukt: ' . $url);
|
||||
}
|
||||
|
||||
return $body;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user