2f2e8869d6
Move shared PHP code into private/, move JavaScript files into js/, and block direct access to private/. Remove unused API key and cache artifacts from the working tree.
264 lines
7.5 KiB
PHP
264 lines
7.5 KiB
PHP
<?php
|
|
/*
|
|
* b64parts.php
|
|
*
|
|
* Zet een gegeven bestand, bijvoorbeeld een zip, om naar base64-parts.
|
|
*
|
|
* Regels:
|
|
* - Inputbestand mag binary zijn.
|
|
* - Output-parts zijn plain base64 tekst.
|
|
* - Geen chunk_split; dus geen extra newline-overhead.
|
|
* - Max part size geldt voor de base64-tekst, niet voor de binary input.
|
|
*
|
|
* Voorbeeld:
|
|
*
|
|
* require_once __DIR__ . '/b64parts.php';
|
|
*
|
|
* $parts = new Base64Parts(__DIR__ . '/data');
|
|
* $manifest = $parts->splitFile(
|
|
* __DIR__ . '/data/html-parsing.zip',
|
|
* 'html-parsing'
|
|
* );
|
|
*
|
|
* print_r($manifest);
|
|
*/
|
|
|
|
class Base64PartsException extends Exception
|
|
{
|
|
}
|
|
|
|
class Base64Parts
|
|
{
|
|
private $dataDir;
|
|
private $maxBase64PartBytes;
|
|
|
|
public function __construct($dataDir, $maxBase64PartBytes = null)
|
|
{
|
|
$this->dataDir = rtrim((string)$dataDir, '/');
|
|
|
|
/*
|
|
* Max grootte van de base64-text per part.
|
|
* Houd dit op 2 MiB tenzij je fetcher kleiner nodig heeft.
|
|
*/
|
|
$this->maxBase64PartBytes = $maxBase64PartBytes === null
|
|
? 2 * 1024 * 1024
|
|
: (int)$maxBase64PartBytes;
|
|
|
|
if ($this->maxBase64PartBytes < 1024) {
|
|
throw new Base64PartsException('maxBase64PartBytes is te klein.');
|
|
}
|
|
}
|
|
|
|
public function splitFile($sourceFile, $name, $clearOldParts = true)
|
|
{
|
|
$sourceFile = (string)$sourceFile;
|
|
$name = $this->safeName($name);
|
|
|
|
if (!is_file($sourceFile) || !is_readable($sourceFile)) {
|
|
throw new Base64PartsException('Bronbestand ontbreekt of is niet leesbaar: ' . $sourceFile);
|
|
}
|
|
|
|
$this->ensureDataDir();
|
|
|
|
if ($clearOldParts) {
|
|
$this->removeParts($name);
|
|
}
|
|
|
|
/*
|
|
* Base64 maakt van 3 binary bytes precies 4 tekstbytes.
|
|
* Door de binary chunk op een veelvoud van 3 te houden, krijgen
|
|
* alle niet-laatste parts een nette base64-lengte zonder padding.
|
|
*/
|
|
$maxBinaryChunkBytes = intdiv($this->maxBase64PartBytes, 4) * 3;
|
|
|
|
if ($maxBinaryChunkBytes < 3) {
|
|
throw new Base64PartsException('Berekende binary chunk size is ongeldig.');
|
|
}
|
|
|
|
$in = fopen($sourceFile, 'rb');
|
|
|
|
if ($in === false) {
|
|
throw new Base64PartsException('Kan bronbestand niet openen: ' . $sourceFile);
|
|
}
|
|
|
|
$parts = array();
|
|
$nr = 1;
|
|
$totalBinaryBytes = 0;
|
|
$totalBase64Bytes = 0;
|
|
|
|
while (!feof($in)) {
|
|
$bin = fread($in, $maxBinaryChunkBytes);
|
|
|
|
if ($bin === false) {
|
|
fclose($in);
|
|
throw new Base64PartsException('Fout bij lezen van: ' . $sourceFile);
|
|
}
|
|
|
|
if ($bin === '') {
|
|
break;
|
|
}
|
|
|
|
$b64 = base64_encode($bin);
|
|
|
|
if (strlen($b64) > $this->maxBase64PartBytes) {
|
|
fclose($in);
|
|
throw new Base64PartsException('Interne fout: base64 part is groter dan maximum.');
|
|
}
|
|
|
|
$partNumber = sprintf('%06d', $nr);
|
|
$partFile = $this->partFile($name, $partNumber);
|
|
|
|
if (file_put_contents($partFile, $b64, LOCK_EX) === false) {
|
|
fclose($in);
|
|
throw new Base64PartsException('Kan part niet schrijven: ' . $partFile);
|
|
}
|
|
|
|
$binaryBytes = strlen($bin);
|
|
$base64Bytes = strlen($b64);
|
|
|
|
$parts[] = array(
|
|
'number' => $partNumber,
|
|
'file' => $partFile,
|
|
'basename' => basename($partFile),
|
|
'binary_bytes' => $binaryBytes,
|
|
'base64_bytes' => $base64Bytes,
|
|
);
|
|
|
|
$totalBinaryBytes += $binaryBytes;
|
|
$totalBase64Bytes += $base64Bytes;
|
|
$nr++;
|
|
}
|
|
|
|
fclose($in);
|
|
|
|
if (count($parts) === 0) {
|
|
throw new Base64PartsException('Bronbestand is leeg: ' . $sourceFile);
|
|
}
|
|
|
|
$manifest = array(
|
|
'name' => $name,
|
|
'source_file' => $sourceFile,
|
|
'source_bytes' => filesize($sourceFile),
|
|
'source_sha256' => hash_file('sha256', $sourceFile),
|
|
'max_base64_bytes' => $this->maxBase64PartBytes,
|
|
'binary_chunk_bytes' => $maxBinaryChunkBytes,
|
|
'part_count' => count($parts),
|
|
'total_binary_bytes' => $totalBinaryBytes,
|
|
'total_base64_bytes' => $totalBase64Bytes,
|
|
'parts' => $parts,
|
|
'created_at' => gmdate('c'),
|
|
);
|
|
|
|
$manifestFile = $this->manifestFile($name);
|
|
|
|
if (file_put_contents(
|
|
$manifestFile,
|
|
json_encode($manifest, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n",
|
|
LOCK_EX
|
|
) === false) {
|
|
throw new Base64PartsException('Kan manifest niet schrijven: ' . $manifestFile);
|
|
}
|
|
|
|
$manifest['manifest_file'] = $manifestFile;
|
|
|
|
return $manifest;
|
|
}
|
|
|
|
public function readPart($name, $partNumber)
|
|
{
|
|
$name = $this->safeName($name);
|
|
|
|
if (!preg_match('/^[0-9]{6}$/', (string)$partNumber)) {
|
|
throw new Base64PartsException('Ongeldig partnummer: ' . $partNumber);
|
|
}
|
|
|
|
$file = $this->partFile($name, $partNumber);
|
|
|
|
if (!is_file($file) || !is_readable($file)) {
|
|
throw new Base64PartsException('Part ontbreekt of is niet leesbaar: ' . $file);
|
|
}
|
|
|
|
$txt = file_get_contents($file);
|
|
|
|
if ($txt === false) {
|
|
throw new Base64PartsException('Kan part niet lezen: ' . $file);
|
|
}
|
|
|
|
return $txt;
|
|
}
|
|
|
|
public function loadManifest($name)
|
|
{
|
|
$name = $this->safeName($name);
|
|
$file = $this->manifestFile($name);
|
|
|
|
if (!is_file($file) || !is_readable($file)) {
|
|
throw new Base64PartsException('Manifest ontbreekt: ' . $file);
|
|
}
|
|
|
|
$raw = file_get_contents($file);
|
|
|
|
if ($raw === false) {
|
|
throw new Base64PartsException('Kan manifest niet lezen: ' . $file);
|
|
}
|
|
|
|
$json = json_decode($raw, true);
|
|
|
|
if (!is_array($json)) {
|
|
throw new Base64PartsException('Manifest is geen geldige JSON: ' . $file);
|
|
}
|
|
|
|
return $json;
|
|
}
|
|
|
|
public function removeParts($name)
|
|
{
|
|
$name = $this->safeName($name);
|
|
|
|
foreach (glob($this->dataDir . '/' . $name . '.part.*.b64') ?: array() as $file) {
|
|
if (is_file($file)) {
|
|
@unlink($file);
|
|
}
|
|
}
|
|
|
|
$manifest = $this->manifestFile($name);
|
|
|
|
if (is_file($manifest)) {
|
|
@unlink($manifest);
|
|
}
|
|
}
|
|
|
|
private function ensureDataDir()
|
|
{
|
|
if (!is_dir($this->dataDir)) {
|
|
if (!mkdir($this->dataDir, 0755, true)) {
|
|
throw new Base64PartsException('Kan data directory niet maken: ' . $this->dataDir);
|
|
}
|
|
}
|
|
|
|
if (!is_writable($this->dataDir)) {
|
|
throw new Base64PartsException('Data directory is niet schrijfbaar: ' . $this->dataDir);
|
|
}
|
|
}
|
|
|
|
private function safeName($name)
|
|
{
|
|
$name = (string)$name;
|
|
|
|
if (!preg_match('/^[A-Za-z0-9_.+-]+$/', $name)) {
|
|
throw new Base64PartsException('Ongeldige naam: ' . $name);
|
|
}
|
|
|
|
return $name;
|
|
}
|
|
|
|
private function partFile($name, $partNumber)
|
|
{
|
|
return $this->dataDir . '/' . $name . '.part.' . $partNumber . '.b64';
|
|
}
|
|
|
|
private function manifestFile($name)
|
|
{
|
|
return $this->dataDir . '/' . $name . '.parts.json';
|
|
}
|
|
} |