Files
racket-audio/audio-sniffer.rkt

359 lines
10 KiB
Racket

#lang racket/base
(require racket/contract
racket/path
racket/string
racket/runtime-path
)
(provide audio-format?
audio-sniff-format
audio-sniff-format/extension
audio-sniff-extension
audio-format-matches?
audio-format-known?)
(define audio-formats
'(mp3 flac ogg vorbis opus wav aiff
mp4 aac alac encrypted-audio
ac3 ape wavpack wma matroska))
(define audio-status-formats
'(unknown file-not-found file-not-readable not-a-file))
(define known-formats
(append audio-formats audio-status-formats))
(define (audio-format? v)
(not (eq? (memq v known-formats) #f)))
(define sniff-bytes 4096)
(define mp4-head-peek-sizes
'(4096 8192 16384 32768 65536 131072 153600))
(define mp4-tail-peek-blocks
'(4096 8192 16384 32768 65536 65536))
(define (audio-sniff-extension* file)
(let ([ext (path-get-extension (build-path file))])
(cond
[(not ext) #f]
[else
(let ([s (string-downcase (bytes->string/utf-8 ext #\?))])
(if (and (> (string-length s) 0)
(char=? (string-ref s 0) #\.))
(substring s 1)
s))])))
(define (file-readable-status file)
(cond
[(not (file-exists? file)) 'file-not-found]
[else
(let ([typ (file-or-directory-type file #f)])
(cond
[(not typ) 'file-not-found]
[(not (eq? typ 'file)) 'not-a-file]
[else #t]))]))
(define (read-prefix/status file [n sniff-bytes])
(with-handlers ([exn:fail:filesystem?
(lambda (_) 'file-not-readable)])
(call-with-input-file file
(lambda (in)
(let ([b (read-bytes n in)])
(if (eof-object? b) #"" b)))
#:mode 'binary)))
(define (read-block/status file start size)
(with-handlers ([exn:fail:filesystem?
(lambda (_) 'file-not-readable)])
(call-with-input-file file
(lambda (in)
(file-position in start)
(let ([b (read-bytes size in)])
(if (eof-object? b) #"" b)))
#:mode 'binary)))
(define (u8 b i)
(bytes-ref b i))
(define (bytes-prefix? b prefix)
(and (>= (bytes-length b) (bytes-length prefix))
(bytes=? (subbytes b 0 (bytes-length prefix)) prefix)))
(define (bytes-at? b pos marker)
(and (>= (bytes-length b) (+ pos (bytes-length marker)))
(bytes=? (subbytes b pos (+ pos (bytes-length marker)))
marker)))
(define (bytes-contains? b marker)
(let ([blen (bytes-length b)]
[mlen (bytes-length marker)])
(cond
[(zero? mlen) #t]
[(< blen mlen) #f]
[else
(let loop ([i 0])
(cond
[(> (+ i mlen) blen) #f]
[(bytes=? (subbytes b i (+ i mlen)) marker) #t]
[else (loop (add1 i))]))])))
(define asf-guid
(bytes #x30 #x26 #xb2 #x75 #x8e #x66 #xcf #x11
#xa6 #xd9 #x00 #xaa #x00 #x62 #xce #x6c))
(define (riff-wave? b)
(and (bytes-at? b 0 #"RIFF")
(bytes-at? b 8 #"WAVE")))
(define (aiff? b)
(and (bytes-at? b 0 #"FORM")
(or (bytes-at? b 8 #"AIFF")
(bytes-at? b 8 #"AIFC"))))
(define (iso-bmff? b)
(bytes-at? b 4 #"ftyp"))
(define (matroska-or-webm? b)
(bytes-prefix? b #"\x1a\x45\xdf\xa3"))
(define (ac3? b)
(bytes-prefix? b #"\x0b\x77"))
(define (ape? b)
(bytes-prefix? b #"MAC "))
(define (wavpack? b)
(bytes-prefix? b #"wvpk"))
(define (sniff-ogg-subtype b)
(cond
[(bytes-contains? b #"OpusHead") 'opus]
[(bytes-contains? b #"\x01vorbis") 'vorbis]
[(bytes-contains? b #"vorbis") 'vorbis]
[(bytes-contains? b #"fLaC") 'flac]
[else 'ogg]))
(define (mp4-codec-peek b)
(cond
[(bytes-contains? b #"enca") 'encrypted-audio]
[(bytes-contains? b #"alac") 'alac]
[(bytes-contains? b #"mp4a") 'aac]
[else #f]))
(define (mp4-codec-peek/tail file)
(with-handlers ([exn:fail:filesystem?
(lambda (_) 'file-not-readable)])
(let ([file-len (file-size file)])
(let loop ([blocks mp4-tail-peek-blocks]
[end file-len])
(cond
[(null? blocks) #f]
[(<= end 0) #f]
[else
(let* ([block-size (car blocks)]
[start (max 0 (- end block-size))]
[size (- end start)]
[b (read-block/status file start size)])
(cond
[(not (bytes? b)) b]
[else
(let ([r (mp4-codec-peek b)])
(if r
r
(loop (cdr blocks) start)))]))])))))
(define (mp4-codec-peek/head file)
(let loop ([sizes (cdr mp4-head-peek-sizes)])
(cond
[(null? sizes) #f]
[else
(let ([b (read-prefix/status file (car sizes))])
(cond
[(not (bytes? b)) b]
[else
(let ([r (mp4-codec-peek b)])
(if r
r
(loop (cdr sizes))))]))])))
(define (mp4-codec-peek/file file head)
(let ([r0 (mp4-codec-peek head)])
(cond
[r0 r0]
[else
(let ([rtail (mp4-codec-peek/tail file)])
(cond
[rtail rtail]
[else
(let ([rhead (mp4-codec-peek/head file)])
(if rhead rhead #f))]))])))
(define (id3? b)
(and (bytes-at? b 0 #"ID3")
(>= (bytes-length b) 10)
(not (= (u8 b 3) #xff))
(not (= (u8 b 4) #xff))
(zero? (bitwise-and (u8 b 6) #x80))
(zero? (bitwise-and (u8 b 7) #x80))
(zero? (bitwise-and (u8 b 8) #x80))
(zero? (bitwise-and (u8 b 9) #x80))))
(define (synchsafe-size b0 b1 b2 b3)
(+ (arithmetic-shift b0 21)
(arithmetic-shift b1 14)
(arithmetic-shift b2 7)
b3))
(define (id3-total-size b)
(and (id3? b)
(+ 10
(synchsafe-size (u8 b 6)
(u8 b 7)
(u8 b 8)
(u8 b 9)))))
(define (valid-mp3-frame-at? b i)
(and (>= (bytes-length b) (+ i 4))
(let* ([b0 (u8 b i)]
[b1 (u8 b (+ i 1))]
[b2 (u8 b (+ i 2))]
[b3 (u8 b (+ i 3))]
[version (bitwise-and (arithmetic-shift b1 -3) #x03)]
[layer (bitwise-and (arithmetic-shift b1 -1) #x03)]
[bitrate (bitwise-and (arithmetic-shift b2 -4) #x0f)]
[sample-rate (bitwise-and (arithmetic-shift b2 -2)
#x03)]
[emphasis (bitwise-and b3 #x03)])
(and (= b0 #xff)
(= (bitwise-and b1 #xe0) #xe0)
(not (= version #b01))
(not (= layer #b00))
(not (= bitrate #b0000))
(not (= bitrate #b1111))
(not (= sample-rate #b11))
(not (= emphasis #b10))))))
(define (mp3-frame-sync? b)
(or (valid-mp3-frame-at? b 0)
(let ([id3-size (id3-total-size b)])
(and id3-size
(< id3-size (bytes-length b))
(valid-mp3-frame-at? b id3-size)))))
(define (mp3? b)
(or (id3? b)
(mp3-frame-sync? b)))
(define (aac-adts-sync? b)
(and (>= (bytes-length b) 7)
(= (u8 b 0) #xff)
(= (bitwise-and (u8 b 1) #xf6) #xf0)
(= (bitwise-and (u8 b 1) #x06) #x00)
(not (= (bitwise-and (arithmetic-shift (u8 b 2) -2)
#x0f)
#x0f))))
(define (audio-sniff-format* file)
(let ([status (file-readable-status file)])
(cond
[(not (eq? status #t)) status]
[else
(let ([b (read-prefix/status file)])
(cond
[(symbol? b) b]
;; Hard signatures
[(bytes-prefix? b #"fLaC") 'flac]
[(bytes-prefix? b #"OggS") (sniff-ogg-subtype b)]
[(riff-wave? b) 'wav]
[(aiff? b) 'aiff]
[(iso-bmff? b)
(let ([codec (mp4-codec-peek/file file b)])
(if codec codec 'mp4))]
[(bytes-prefix? b asf-guid) 'wma]
[(matroska-or-webm? b) 'matroska]
[(ac3? b) 'ac3]
[(ape? b) 'ape]
[(wavpack? b) 'wavpack]
;; Heuristics
[(mp3? b) 'mp3]
[(aac-adts-sync? b) 'aac]
[else 'unknown]))])))
(define (audio-sniff-format/extension* file)
(let ([fmt (audio-sniff-format* file)])
(cond
[(not (eq? (memq fmt
'(file-not-found file-not-readable not-a-file))
#f))
fmt]
[(not (eq? fmt 'unknown)) fmt]
[else
(case (string->symbol (or (audio-sniff-extension* file) ""))
[(mp3 mp2 mp1) 'mp3]
[(flac) 'flac]
[(ogg oga) 'ogg]
[(opus) 'opus]
[(wav wave) 'wav]
[(aif aiff aifc) 'aiff]
[(m4a mp4 m4b m4p) 'mp4]
[(aac) 'aac]
[(alac) 'alac]
[(ac3) 'ac3]
[(ape) 'ape]
[(wv wvp wvpk wavpack) 'wavpack]
[(wma asf) 'wma]
[(webm mka mkv) 'matroska]
[else 'unknown])])))
(define (audio-format-known?* fmt)
(not (eq? (memq fmt audio-formats) #f)))
(define (audio-format-matches?* file formats)
(not (eq? (memq (audio-sniff-format/extension* file) formats) #f)))
(define/contract (audio-sniff-extension file)
(-> path-string? (or/c string? #f))
(audio-sniff-extension* file))
(define/contract (audio-sniff-format file)
(-> path-string? audio-format?)
(audio-sniff-format* file))
(define/contract (audio-sniff-format/extension file)
(-> path-string? audio-format?)
(audio-sniff-format/extension* file))
(define/contract (audio-format-known? fmt)
(-> symbol? boolean?)
(audio-format-known?* fmt))
(define/contract (audio-format-matches? file formats)
(-> path-string? (listof symbol?) boolean?)
(audio-format-matches?* file formats))
(define-runtime-path audio-tests "tests")
(define (sniff-test)
(for-each (λ (ext)
(let* ((dir (build-path audio-tests))
(ext* (format ".~a" ext))
(files (map (λ (f)
(build-path audio-tests f))
(filter (λ (f)
(string-suffix? (format "~a" f) ext*))
(directory-list dir))))
)
(for-each (λ (f)
(displayln (format "~a: ~a" f (audio-sniff-format/extension f))))
files)
)
)
'(aac adts flac mp3 ogg ts ac3 aiff m4a mp4 ogx wav wv mp2))
)