#lang racket/base (require racket/contract racket/path racket/string racket/runtime-path ) (provide audio-format? audio-sniff-format audio-sniff-format/extension audio-sniff-extension audio-format-matches? audio-format-known?) (define audio-formats '(mp3 flac ogg vorbis opus wav aiff mp4 aac alac encrypted-audio ac3 ape wavpack wma matroska)) (define audio-status-formats '(unknown file-not-found file-not-readable not-a-file)) (define known-formats (append audio-formats audio-status-formats)) (define (audio-format? v) (not (eq? (memq v known-formats) #f))) (define sniff-bytes 4096) (define mp4-head-peek-sizes '(4096 8192 16384 32768 65536 131072 153600)) (define mp4-tail-peek-blocks '(4096 8192 16384 32768 65536 65536)) (define (audio-sniff-extension* file) (let ([ext (path-get-extension (build-path file))]) (cond [(not ext) #f] [else (let ([s (string-downcase (bytes->string/utf-8 ext #\?))]) (if (and (> (string-length s) 0) (char=? (string-ref s 0) #\.)) (substring s 1) s))]))) (define (file-readable-status file) (cond [(not (file-exists? file)) 'file-not-found] [else (let ([typ (file-or-directory-type file #f)]) (cond [(not typ) 'file-not-found] [(not (eq? typ 'file)) 'not-a-file] [else #t]))])) (define (read-prefix/status file [n sniff-bytes]) (with-handlers ([exn:fail:filesystem? (lambda (_) 'file-not-readable)]) (call-with-input-file file (lambda (in) (let ([b (read-bytes n in)]) (if (eof-object? b) #"" b))) #:mode 'binary))) (define (read-block/status file start size) (with-handlers ([exn:fail:filesystem? (lambda (_) 'file-not-readable)]) (call-with-input-file file (lambda (in) (file-position in start) (let ([b (read-bytes size in)]) (if (eof-object? b) #"" b))) #:mode 'binary))) (define (u8 b i) (bytes-ref b i)) (define (bytes-prefix? b prefix) (and (>= (bytes-length b) (bytes-length prefix)) (bytes=? (subbytes b 0 (bytes-length prefix)) prefix))) (define (bytes-at? b pos marker) (and (>= (bytes-length b) (+ pos (bytes-length marker))) (bytes=? (subbytes b pos (+ pos (bytes-length marker))) marker))) (define (bytes-contains? b marker) (let ([blen (bytes-length b)] [mlen (bytes-length marker)]) (cond [(zero? mlen) #t] [(< blen mlen) #f] [else (let loop ([i 0]) (cond [(> (+ i mlen) blen) #f] [(bytes=? (subbytes b i (+ i mlen)) marker) #t] [else (loop (add1 i))]))]))) (define asf-guid (bytes #x30 #x26 #xb2 #x75 #x8e #x66 #xcf #x11 #xa6 #xd9 #x00 #xaa #x00 #x62 #xce #x6c)) (define (riff-wave? b) (and (bytes-at? b 0 #"RIFF") (bytes-at? b 8 #"WAVE"))) (define (aiff? b) (and (bytes-at? b 0 #"FORM") (or (bytes-at? b 8 #"AIFF") (bytes-at? b 8 #"AIFC")))) (define (iso-bmff? b) (bytes-at? b 4 #"ftyp")) (define (matroska-or-webm? b) (bytes-prefix? b #"\x1a\x45\xdf\xa3")) (define (ac3? b) (bytes-prefix? b #"\x0b\x77")) (define (ape? b) (bytes-prefix? b #"MAC ")) (define (wavpack? b) (bytes-prefix? b #"wvpk")) (define (sniff-ogg-subtype b) (cond [(bytes-contains? b #"OpusHead") 'opus] [(bytes-contains? b #"\x01vorbis") 'vorbis] [(bytes-contains? b #"vorbis") 'vorbis] [(bytes-contains? b #"fLaC") 'flac] [else 'ogg])) (define (mp4-codec-peek b) (cond [(bytes-contains? b #"enca") 'encrypted-audio] [(bytes-contains? b #"alac") 'alac] [(bytes-contains? b #"mp4a") 'aac] [else #f])) (define (mp4-codec-peek/tail file) (with-handlers ([exn:fail:filesystem? (lambda (_) 'file-not-readable)]) (let ([file-len (file-size file)]) (let loop ([blocks mp4-tail-peek-blocks] [end file-len]) (cond [(null? blocks) #f] [(<= end 0) #f] [else (let* ([block-size (car blocks)] [start (max 0 (- end block-size))] [size (- end start)] [b (read-block/status file start size)]) (cond [(not (bytes? b)) b] [else (let ([r (mp4-codec-peek b)]) (if r r (loop (cdr blocks) start)))]))]))))) (define (mp4-codec-peek/head file) (let loop ([sizes (cdr mp4-head-peek-sizes)]) (cond [(null? sizes) #f] [else (let ([b (read-prefix/status file (car sizes))]) (cond [(not (bytes? b)) b] [else (let ([r (mp4-codec-peek b)]) (if r r (loop (cdr sizes))))]))]))) (define (mp4-codec-peek/file file head) (let ([r0 (mp4-codec-peek head)]) (cond [r0 r0] [else (let ([rtail (mp4-codec-peek/tail file)]) (cond [rtail rtail] [else (let ([rhead (mp4-codec-peek/head file)]) (if rhead rhead #f))]))]))) (define (id3? b) (and (bytes-at? b 0 #"ID3") (>= (bytes-length b) 10) (not (= (u8 b 3) #xff)) (not (= (u8 b 4) #xff)) (zero? (bitwise-and (u8 b 6) #x80)) (zero? (bitwise-and (u8 b 7) #x80)) (zero? (bitwise-and (u8 b 8) #x80)) (zero? (bitwise-and (u8 b 9) #x80)))) (define (synchsafe-size b0 b1 b2 b3) (+ (arithmetic-shift b0 21) (arithmetic-shift b1 14) (arithmetic-shift b2 7) b3)) (define (id3-total-size b) (and (id3? b) (+ 10 (synchsafe-size (u8 b 6) (u8 b 7) (u8 b 8) (u8 b 9))))) (define (valid-mp3-frame-at? b i) (and (>= (bytes-length b) (+ i 4)) (let* ([b0 (u8 b i)] [b1 (u8 b (+ i 1))] [b2 (u8 b (+ i 2))] [b3 (u8 b (+ i 3))] [version (bitwise-and (arithmetic-shift b1 -3) #x03)] [layer (bitwise-and (arithmetic-shift b1 -1) #x03)] [bitrate (bitwise-and (arithmetic-shift b2 -4) #x0f)] [sample-rate (bitwise-and (arithmetic-shift b2 -2) #x03)] [emphasis (bitwise-and b3 #x03)]) (and (= b0 #xff) (= (bitwise-and b1 #xe0) #xe0) (not (= version #b01)) (not (= layer #b00)) (not (= bitrate #b0000)) (not (= bitrate #b1111)) (not (= sample-rate #b11)) (not (= emphasis #b10)))))) (define (mp3-frame-sync? b) (or (valid-mp3-frame-at? b 0) (let ([id3-size (id3-total-size b)]) (and id3-size (< id3-size (bytes-length b)) (valid-mp3-frame-at? b id3-size))))) (define (mp3? b) (or (id3? b) (mp3-frame-sync? b))) (define (aac-adts-sync? b) (and (>= (bytes-length b) 7) (= (u8 b 0) #xff) (= (bitwise-and (u8 b 1) #xf6) #xf0) (= (bitwise-and (u8 b 1) #x06) #x00) (not (= (bitwise-and (arithmetic-shift (u8 b 2) -2) #x0f) #x0f)))) (define (audio-sniff-format* file) (let ([status (file-readable-status file)]) (cond [(not (eq? status #t)) status] [else (let ([b (read-prefix/status file)]) (cond [(symbol? b) b] ;; Hard signatures [(bytes-prefix? b #"fLaC") 'flac] [(bytes-prefix? b #"OggS") (sniff-ogg-subtype b)] [(riff-wave? b) 'wav] [(aiff? b) 'aiff] [(iso-bmff? b) (let ([codec (mp4-codec-peek/file file b)]) (if codec codec 'mp4))] [(bytes-prefix? b asf-guid) 'wma] [(matroska-or-webm? b) 'matroska] [(ac3? b) 'ac3] [(ape? b) 'ape] [(wavpack? b) 'wavpack] ;; Heuristics [(mp3? b) 'mp3] [(aac-adts-sync? b) 'aac] [else 'unknown]))]))) (define (audio-sniff-format/extension* file) (let ([fmt (audio-sniff-format* file)]) (cond [(not (eq? (memq fmt '(file-not-found file-not-readable not-a-file)) #f)) fmt] [(not (eq? fmt 'unknown)) fmt] [else (case (string->symbol (or (audio-sniff-extension* file) "")) [(mp3 mp2 mp1) 'mp3] [(flac) 'flac] [(ogg oga) 'ogg] [(opus) 'opus] [(wav wave) 'wav] [(aif aiff aifc) 'aiff] [(m4a mp4 m4b m4p) 'mp4] [(aac) 'aac] [(alac) 'alac] [(ac3) 'ac3] [(ape) 'ape] [(wv wvp wvpk wavpack) 'wavpack] [(wma asf) 'wma] [(webm mka mkv) 'matroska] [else 'unknown])]))) (define (audio-format-known?* fmt) (not (eq? (memq fmt audio-formats) #f))) (define (audio-format-matches?* file formats) (not (eq? (memq (audio-sniff-format/extension* file) formats) #f))) (define/contract (audio-sniff-extension file) (-> path-string? (or/c string? #f)) (audio-sniff-extension* file)) (define/contract (audio-sniff-format file) (-> path-string? audio-format?) (audio-sniff-format* file)) (define/contract (audio-sniff-format/extension file) (-> path-string? audio-format?) (audio-sniff-format/extension* file)) (define/contract (audio-format-known? fmt) (-> symbol? boolean?) (audio-format-known?* fmt)) (define/contract (audio-format-matches? file formats) (-> path-string? (listof symbol?) boolean?) (audio-format-matches?* file formats)) (define-runtime-path audio-tests "tests") (define (sniff-test) (for-each (λ (ext) (let* ((dir (build-path audio-tests)) (ext* (format ".~a" ext)) (files (map (λ (f) (build-path audio-tests f)) (filter (λ (f) (string-suffix? (format "~a" f) ext*)) (directory-list dir)))) ) (for-each (λ (f) (displayln (format "~a: ~a" f (audio-sniff-format/extension f)))) files) ) ) '(aac adts flac mp3 ogg ts ac3 aiff m4a mp4 ogx wav wv mp2)) )