initial import
This commit is contained in:
+264
@@ -0,0 +1,264 @@
|
||||
<?php
|
||||
/*
|
||||
* b64parts.php
|
||||
*
|
||||
* Zet een gegeven bestand, bijvoorbeeld een zip, om naar base64-parts.
|
||||
*
|
||||
* Regels:
|
||||
* - Inputbestand mag binary zijn.
|
||||
* - Output-parts zijn plain base64 tekst.
|
||||
* - Geen chunk_split; dus geen extra newline-overhead.
|
||||
* - Max part size geldt voor de base64-tekst, niet voor de binary input.
|
||||
*
|
||||
* Voorbeeld:
|
||||
*
|
||||
* require_once __DIR__ . '/b64parts.php';
|
||||
*
|
||||
* $parts = new Base64Parts(__DIR__ . '/data');
|
||||
* $manifest = $parts->splitFile(
|
||||
* __DIR__ . '/data/html-parsing.zip',
|
||||
* 'html-parsing'
|
||||
* );
|
||||
*
|
||||
* print_r($manifest);
|
||||
*/
|
||||
|
||||
class Base64PartsException extends Exception
|
||||
{
|
||||
}
|
||||
|
||||
class Base64Parts
|
||||
{
|
||||
private $dataDir;
|
||||
private $maxBase64PartBytes;
|
||||
|
||||
public function __construct($dataDir, $maxBase64PartBytes = null)
|
||||
{
|
||||
$this->dataDir = rtrim((string)$dataDir, '/');
|
||||
|
||||
/*
|
||||
* Max grootte van de base64-text per part.
|
||||
* Houd dit op 2 MiB tenzij je fetcher kleiner nodig heeft.
|
||||
*/
|
||||
$this->maxBase64PartBytes = $maxBase64PartBytes === null
|
||||
? 2 * 1024 * 1024
|
||||
: (int)$maxBase64PartBytes;
|
||||
|
||||
if ($this->maxBase64PartBytes < 1024) {
|
||||
throw new Base64PartsException('maxBase64PartBytes is te klein.');
|
||||
}
|
||||
}
|
||||
|
||||
public function splitFile($sourceFile, $name, $clearOldParts = true)
|
||||
{
|
||||
$sourceFile = (string)$sourceFile;
|
||||
$name = $this->safeName($name);
|
||||
|
||||
if (!is_file($sourceFile) || !is_readable($sourceFile)) {
|
||||
throw new Base64PartsException('Bronbestand ontbreekt of is niet leesbaar: ' . $sourceFile);
|
||||
}
|
||||
|
||||
$this->ensureDataDir();
|
||||
|
||||
if ($clearOldParts) {
|
||||
$this->removeParts($name);
|
||||
}
|
||||
|
||||
/*
|
||||
* Base64 maakt van 3 binary bytes precies 4 tekstbytes.
|
||||
* Door de binary chunk op een veelvoud van 3 te houden, krijgen
|
||||
* alle niet-laatste parts een nette base64-lengte zonder padding.
|
||||
*/
|
||||
$maxBinaryChunkBytes = intdiv($this->maxBase64PartBytes, 4) * 3;
|
||||
|
||||
if ($maxBinaryChunkBytes < 3) {
|
||||
throw new Base64PartsException('Berekende binary chunk size is ongeldig.');
|
||||
}
|
||||
|
||||
$in = fopen($sourceFile, 'rb');
|
||||
|
||||
if ($in === false) {
|
||||
throw new Base64PartsException('Kan bronbestand niet openen: ' . $sourceFile);
|
||||
}
|
||||
|
||||
$parts = array();
|
||||
$nr = 1;
|
||||
$totalBinaryBytes = 0;
|
||||
$totalBase64Bytes = 0;
|
||||
|
||||
while (!feof($in)) {
|
||||
$bin = fread($in, $maxBinaryChunkBytes);
|
||||
|
||||
if ($bin === false) {
|
||||
fclose($in);
|
||||
throw new Base64PartsException('Fout bij lezen van: ' . $sourceFile);
|
||||
}
|
||||
|
||||
if ($bin === '') {
|
||||
break;
|
||||
}
|
||||
|
||||
$b64 = base64_encode($bin);
|
||||
|
||||
if (strlen($b64) > $this->maxBase64PartBytes) {
|
||||
fclose($in);
|
||||
throw new Base64PartsException('Interne fout: base64 part is groter dan maximum.');
|
||||
}
|
||||
|
||||
$partNumber = sprintf('%06d', $nr);
|
||||
$partFile = $this->partFile($name, $partNumber);
|
||||
|
||||
if (file_put_contents($partFile, $b64, LOCK_EX) === false) {
|
||||
fclose($in);
|
||||
throw new Base64PartsException('Kan part niet schrijven: ' . $partFile);
|
||||
}
|
||||
|
||||
$binaryBytes = strlen($bin);
|
||||
$base64Bytes = strlen($b64);
|
||||
|
||||
$parts[] = array(
|
||||
'number' => $partNumber,
|
||||
'file' => $partFile,
|
||||
'basename' => basename($partFile),
|
||||
'binary_bytes' => $binaryBytes,
|
||||
'base64_bytes' => $base64Bytes,
|
||||
);
|
||||
|
||||
$totalBinaryBytes += $binaryBytes;
|
||||
$totalBase64Bytes += $base64Bytes;
|
||||
$nr++;
|
||||
}
|
||||
|
||||
fclose($in);
|
||||
|
||||
if (count($parts) === 0) {
|
||||
throw new Base64PartsException('Bronbestand is leeg: ' . $sourceFile);
|
||||
}
|
||||
|
||||
$manifest = array(
|
||||
'name' => $name,
|
||||
'source_file' => $sourceFile,
|
||||
'source_bytes' => filesize($sourceFile),
|
||||
'source_sha256' => hash_file('sha256', $sourceFile),
|
||||
'max_base64_bytes' => $this->maxBase64PartBytes,
|
||||
'binary_chunk_bytes' => $maxBinaryChunkBytes,
|
||||
'part_count' => count($parts),
|
||||
'total_binary_bytes' => $totalBinaryBytes,
|
||||
'total_base64_bytes' => $totalBase64Bytes,
|
||||
'parts' => $parts,
|
||||
'created_at' => gmdate('c'),
|
||||
);
|
||||
|
||||
$manifestFile = $this->manifestFile($name);
|
||||
|
||||
if (file_put_contents(
|
||||
$manifestFile,
|
||||
json_encode($manifest, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n",
|
||||
LOCK_EX
|
||||
) === false) {
|
||||
throw new Base64PartsException('Kan manifest niet schrijven: ' . $manifestFile);
|
||||
}
|
||||
|
||||
$manifest['manifest_file'] = $manifestFile;
|
||||
|
||||
return $manifest;
|
||||
}
|
||||
|
||||
public function readPart($name, $partNumber)
|
||||
{
|
||||
$name = $this->safeName($name);
|
||||
|
||||
if (!preg_match('/^[0-9]{6}$/', (string)$partNumber)) {
|
||||
throw new Base64PartsException('Ongeldig partnummer: ' . $partNumber);
|
||||
}
|
||||
|
||||
$file = $this->partFile($name, $partNumber);
|
||||
|
||||
if (!is_file($file) || !is_readable($file)) {
|
||||
throw new Base64PartsException('Part ontbreekt of is niet leesbaar: ' . $file);
|
||||
}
|
||||
|
||||
$txt = file_get_contents($file);
|
||||
|
||||
if ($txt === false) {
|
||||
throw new Base64PartsException('Kan part niet lezen: ' . $file);
|
||||
}
|
||||
|
||||
return $txt;
|
||||
}
|
||||
|
||||
public function loadManifest($name)
|
||||
{
|
||||
$name = $this->safeName($name);
|
||||
$file = $this->manifestFile($name);
|
||||
|
||||
if (!is_file($file) || !is_readable($file)) {
|
||||
throw new Base64PartsException('Manifest ontbreekt: ' . $file);
|
||||
}
|
||||
|
||||
$raw = file_get_contents($file);
|
||||
|
||||
if ($raw === false) {
|
||||
throw new Base64PartsException('Kan manifest niet lezen: ' . $file);
|
||||
}
|
||||
|
||||
$json = json_decode($raw, true);
|
||||
|
||||
if (!is_array($json)) {
|
||||
throw new Base64PartsException('Manifest is geen geldige JSON: ' . $file);
|
||||
}
|
||||
|
||||
return $json;
|
||||
}
|
||||
|
||||
public function removeParts($name)
|
||||
{
|
||||
$name = $this->safeName($name);
|
||||
|
||||
foreach (glob($this->dataDir . '/' . $name . '.part.*.b64') ?: array() as $file) {
|
||||
if (is_file($file)) {
|
||||
@unlink($file);
|
||||
}
|
||||
}
|
||||
|
||||
$manifest = $this->manifestFile($name);
|
||||
|
||||
if (is_file($manifest)) {
|
||||
@unlink($manifest);
|
||||
}
|
||||
}
|
||||
|
||||
private function ensureDataDir()
|
||||
{
|
||||
if (!is_dir($this->dataDir)) {
|
||||
if (!mkdir($this->dataDir, 0755, true)) {
|
||||
throw new Base64PartsException('Kan data directory niet maken: ' . $this->dataDir);
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_writable($this->dataDir)) {
|
||||
throw new Base64PartsException('Data directory is niet schrijfbaar: ' . $this->dataDir);
|
||||
}
|
||||
}
|
||||
|
||||
private function safeName($name)
|
||||
{
|
||||
$name = (string)$name;
|
||||
|
||||
if (!preg_match('/^[A-Za-z0-9_.+-]+$/', $name)) {
|
||||
throw new Base64PartsException('Ongeldige naam: ' . $name);
|
||||
}
|
||||
|
||||
return $name;
|
||||
}
|
||||
|
||||
private function partFile($name, $partNumber)
|
||||
{
|
||||
return $this->dataDir . '/' . $name . '.part.' . $partNumber . '.b64';
|
||||
}
|
||||
|
||||
private function manifestFile($name)
|
||||
{
|
||||
return $this->dataDir . '/' . $name . '.parts.json';
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user