splitFile( * __DIR__ . '/data/html-parsing.zip', * 'html-parsing' * ); * * print_r($manifest); */ class Base64PartsException extends Exception { } class Base64Parts { private $dataDir; private $maxBase64PartBytes; public function __construct($dataDir, $maxBase64PartBytes = null) { $this->dataDir = rtrim((string)$dataDir, '/'); /* * Max grootte van de base64-text per part. * Houd dit op 2 MiB tenzij je fetcher kleiner nodig heeft. */ $this->maxBase64PartBytes = $maxBase64PartBytes === null ? 2 * 1024 * 1024 : (int)$maxBase64PartBytes; if ($this->maxBase64PartBytes < 1024) { throw new Base64PartsException('maxBase64PartBytes is te klein.'); } } public function splitFile($sourceFile, $name, $clearOldParts = true) { $sourceFile = (string)$sourceFile; $name = $this->safeName($name); if (!is_file($sourceFile) || !is_readable($sourceFile)) { throw new Base64PartsException('Bronbestand ontbreekt of is niet leesbaar: ' . $sourceFile); } $this->ensureDataDir(); if ($clearOldParts) { $this->removeParts($name); } /* * Base64 maakt van 3 binary bytes precies 4 tekstbytes. * Door de binary chunk op een veelvoud van 3 te houden, krijgen * alle niet-laatste parts een nette base64-lengte zonder padding. */ $maxBinaryChunkBytes = intdiv($this->maxBase64PartBytes, 4) * 3; if ($maxBinaryChunkBytes < 3) { throw new Base64PartsException('Berekende binary chunk size is ongeldig.'); } $in = fopen($sourceFile, 'rb'); if ($in === false) { throw new Base64PartsException('Kan bronbestand niet openen: ' . $sourceFile); } $parts = array(); $nr = 1; $totalBinaryBytes = 0; $totalBase64Bytes = 0; while (!feof($in)) { $bin = fread($in, $maxBinaryChunkBytes); if ($bin === false) { fclose($in); throw new Base64PartsException('Fout bij lezen van: ' . $sourceFile); } if ($bin === '') { break; } $b64 = base64_encode($bin); if (strlen($b64) > $this->maxBase64PartBytes) { fclose($in); throw new Base64PartsException('Interne fout: base64 part is groter dan maximum.'); } $partNumber = sprintf('%06d', $nr); $partFile = $this->partFile($name, $partNumber); if (file_put_contents($partFile, $b64, LOCK_EX) === false) { fclose($in); throw new Base64PartsException('Kan part niet schrijven: ' . $partFile); } $binaryBytes = strlen($bin); $base64Bytes = strlen($b64); $parts[] = array( 'number' => $partNumber, 'file' => $partFile, 'basename' => basename($partFile), 'binary_bytes' => $binaryBytes, 'base64_bytes' => $base64Bytes, ); $totalBinaryBytes += $binaryBytes; $totalBase64Bytes += $base64Bytes; $nr++; } fclose($in); if (count($parts) === 0) { throw new Base64PartsException('Bronbestand is leeg: ' . $sourceFile); } $manifest = array( 'name' => $name, 'source_file' => $sourceFile, 'source_bytes' => filesize($sourceFile), 'source_sha256' => hash_file('sha256', $sourceFile), 'max_base64_bytes' => $this->maxBase64PartBytes, 'binary_chunk_bytes' => $maxBinaryChunkBytes, 'part_count' => count($parts), 'total_binary_bytes' => $totalBinaryBytes, 'total_base64_bytes' => $totalBase64Bytes, 'parts' => $parts, 'created_at' => gmdate('c'), ); $manifestFile = $this->manifestFile($name); if (file_put_contents( $manifestFile, json_encode($manifest, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES) . "\n", LOCK_EX ) === false) { throw new Base64PartsException('Kan manifest niet schrijven: ' . $manifestFile); } $manifest['manifest_file'] = $manifestFile; return $manifest; } public function readPart($name, $partNumber) { $name = $this->safeName($name); if (!preg_match('/^[0-9]{6}$/', (string)$partNumber)) { throw new Base64PartsException('Ongeldig partnummer: ' . $partNumber); } $file = $this->partFile($name, $partNumber); if (!is_file($file) || !is_readable($file)) { throw new Base64PartsException('Part ontbreekt of is niet leesbaar: ' . $file); } $txt = file_get_contents($file); if ($txt === false) { throw new Base64PartsException('Kan part niet lezen: ' . $file); } return $txt; } public function loadManifest($name) { $name = $this->safeName($name); $file = $this->manifestFile($name); if (!is_file($file) || !is_readable($file)) { throw new Base64PartsException('Manifest ontbreekt: ' . $file); } $raw = file_get_contents($file); if ($raw === false) { throw new Base64PartsException('Kan manifest niet lezen: ' . $file); } $json = json_decode($raw, true); if (!is_array($json)) { throw new Base64PartsException('Manifest is geen geldige JSON: ' . $file); } return $json; } public function removeParts($name) { $name = $this->safeName($name); foreach (glob($this->dataDir . '/' . $name . '.part.*.b64') ?: array() as $file) { if (is_file($file)) { @unlink($file); } } $manifest = $this->manifestFile($name); if (is_file($manifest)) { @unlink($manifest); } } private function ensureDataDir() { if (!is_dir($this->dataDir)) { if (!mkdir($this->dataDir, 0755, true)) { throw new Base64PartsException('Kan data directory niet maken: ' . $this->dataDir); } } if (!is_writable($this->dataDir)) { throw new Base64PartsException('Data directory is niet schrijfbaar: ' . $this->dataDir); } } private function safeName($name) { $name = (string)$name; if (!preg_match('/^[A-Za-z0-9_.+-]+$/', $name)) { throw new Base64PartsException('Ongeldige naam: ' . $name); } return $name; } private function partFile($name, $partNumber) { return $this->dataDir . '/' . $name . '.part.' . $partNumber . '.b64'; } private function manifestFile($name) { return $this->dataDir . '/' . $name . '.parts.json'; } }