diff --git a/info.rkt b/info.rkt index ea0e3e6..417e4f6 100644 --- a/info.rkt +++ b/info.rkt @@ -12,6 +12,10 @@ ("scrbl/audio-decoder.scrbl" () (library)) ("scrbl/flac-decoder.scrbl" () (library)) ("scrbl/mp3-decoder.scrbl" () (library)) + ("scrbl/audio-sniffer.scrbl" () (library)) + ("scrbl/ffmpeg-ffi.scrbl" () (library)) + ("scrbl/ffmpeg-decoder.scrbl" () (library)) + ("scrbl/ffmpeg-c-backend.scrbl" () (library)) ) ) diff --git a/libao.rkt b/libao.rkt index 4a65671..e5cc1ca 100644 --- a/libao.rkt +++ b/libao.rkt @@ -51,7 +51,6 @@ (define (ao-supported-music-format? f) (and (symbol? f) (or (eq? f 'flac) - (eq? f 'mp3) (eq? f 'ao)))) diff --git a/scrbl/audio-sniffer.rkt b/scrbl/audio-sniffer.rkt new file mode 100644 index 0000000..9451239 --- /dev/null +++ b/scrbl/audio-sniffer.rkt @@ -0,0 +1,126 @@ +#lang scribble/manual + +@title{Audio Sniffer} +@author{@author+email["Hans Dijkema" "hans@dijkewijk.nl"]} + +@defmodule[audio-sniffer] + +This module provides utilities to determine the format of an audio file. +It combines lightweight content sniffing with extension-based fallback. + +The sniffer is designed to be robust against incomplete data and +progressively increases the amount of data inspected. + +@defproc[(audio-sniff-extension [file path-string?]) (or/c string? #f)]{ +Returns the file extension (without dot) or @racket[#f] if none could be determined. + +The result is purely based on the filename and does not inspect file +contents. +} + +@defproc[(audio-sniff-format [file path-string?]) symbol?]{ +Determines the audio format by inspecting the file contents. + +The function reads portions of the file (both head and tail) and tries +to match known signatures. + +Returns a symbol such as: + +@itemlist[ + #:style 'compact + @item{@racket['mp3]} + @item{@racket['flac]} + @item{@racket['ogg]} + @item{@racket['wav]} + @item{@racket['aiff]} + @item{@racket['mp4]} + @item{@racket['unknown]} +] + +If the file cannot be read, a filesystem-related symbol is returned +instead of raising an exception. +} + +@defproc[(audio-sniff-format/extension [file path-string?]) symbol?]{ +Determines the audio format using sniffing, with a fallback to the file +extension. + +If @racket[audio-sniff-format] returns @racket['unknown], the extension +is used as a secondary source. + +This function provides the most practical format detection. +} + +@defproc[(audio-format-known? [fmt symbol?]) boolean?]{ +Returns @racket[#t] if the given format symbol is recognized by the +sniffer. +} + +@defproc[(audio-format-matches? [file path-string?] + [formats (listof symbol?)]) + boolean?]{ +Returns @racket[#t] if the detected format of @racket[file] is a member +of @racket[formats]. + +Detection is performed using @racket[audio-sniff-format/extension]. +} + +@section{Detection Strategy} + +The sniffer follows a layered strategy: + +@itemlist[ + #:style 'compact + @item{Read a small head section of the file (starting at 4096 bytes)} + @item{Inspect tail sections for formats that store metadata at the end (e.g. mp4)} + @item{Increase head size exponentially if needed} + @item{Fallback to file extension if no signature is found} +] + +This approach balances performance with robustness, especially for +container formats where identifying markers may not be located at the +start of the file. + +@section{Error Handling} + +Instead of raising exceptions, the sniffer returns symbolic error +conditions when the file cannot be inspected: + +@itemlist[ + #:style 'compact + @item{@racket['file-not-found]} + @item{@racket['file-not-readable]} + @item{@racket['not-a-file]} +] + +These values can be handled by higher-level code without requiring +exception handling. + +@section{Supported Formats} + +The sniffer recognizes a broad range of formats, including: + +@itemlist[ + #:style 'compact + @item{mp3} + @item{flac} + @item{ogg / opus} + @item{wav} + @item{aiff} + @item{mp4 / m4a} + @item{aac} + @item{alac} + @item{ac3} + @item{ape} + @item{wavpack} + @item{wma} + @item{matroska} +] + +Extension fallback supports the same set of formats. + +@section{Notes} + +Sniffing is heuristic in nature. While probably reliable, it is possible +that malformed or unusual files are reported as @racket['unknown]. + diff --git a/scrbl/ffmpeg-c-backend.scrbl b/scrbl/ffmpeg-c-backend.scrbl new file mode 100644 index 0000000..838f16a --- /dev/null +++ b/scrbl/ffmpeg-c-backend.scrbl @@ -0,0 +1,218 @@ +#lang scribble/manual + +@title{FFmpeg Audio Backend} +@author{@author+email["Hans Dijkema" "hans@dijkewijk.nl"]} + +@section{Overview} + +The FFmpeg audio backend is a small C++ wrapper with a plain C ABI. It hides +the FFmpeg data structures from the caller and exposes a simple +audio-only decoder interface. + +The caller does not handle FFmpeg streams, packets, frames, codec +contexts or resampler objects. A file is opened, the best audio stream is +selected, and decoding is performed by repeatedly calling +@tt{fmpg_decode_next}. + +The output format is fixed: signed 32-bit integer PCM, interleaved, in +native endian format. + +A sample frame means one sample moment across all channels. For stereo +S32, one sample frame contains two @tt{int32_t} values and therefore +takes 8 bytes. + +@section{Opaque Instance} + +@verbatim|{ +typedef struct fmpg_instance fmpg_instance; +}| + +The decoder instance is opaque. The caller only receives and passes +around a pointer to this type. All FFmpeg state is stored internally. + +@section{Lifecycle} + +@verbatim|{ +fmpg_instance *fmpg_init(void); +}| + +Creates a new decoder instance. + +Before allocating the instance, the backend checks whether the FFmpeg major +versions used at compile time match the FFmpeg major versions available +at runtime. If they do not match, @tt{NULL} is returned. + +Returns a pointer to a new @tt{fmpg_instance}, or @tt{NULL} on failure. + +@verbatim|{ +void fmpg_free(fmpg_instance *instance); +}| + +Frees the decoder instance. If the instance still has an open input, it +is closed as part of destruction. + +@verbatim|{ +int fmpg_open_file(fmpg_instance *instance, const char *filename); +}| + +Opens a media file, selects the best audio stream, initializes the +decoder and initializes the resampler. + +After a successful call, stream information, duration and metadata can be +read using the getter functions. + +Returns @tt{1} on success and @tt{0} on failure. The call fails if the +instance is @tt{NULL}, if a file is already open, if @tt{filename} is +@tt{NULL}, if no usable audio stream is found, or if FFmpeg cannot open +or initialize the file. + +@verbatim|{ +void fmpg_close(fmpg_instance *instance); +}| + +Closes the current file and releases all FFmpeg state owned by the +instance. The instance itself remains valid and may be reused. + +@verbatim|{ +int fmpg_is_open(fmpg_instance *instance); +}| + +Returns @tt{1} if the instance is open and ready to decode. Otherwise +returns @tt{0}. + +@section{Audio Information} + +@verbatim|{ +int fmpg_audio_stream_count(fmpg_instance *instance); +int fmpg_audio_sample_rate(fmpg_instance *instance); +int fmpg_audio_channels(fmpg_instance *instance); +int fmpg_audio_bits_per_sample(fmpg_instance *instance); +int fmpg_audio_bytes_per_sample(fmpg_instance *instance); +int64_t fmpg_duration_ms(fmpg_instance *instance); +int64_t fmpg_duration_samples(fmpg_instance *instance); +}| + +These functions return information about the selected audio stream. + +@itemlist[ + #:style 'compact + @item{@tt{fmpg_audio_stream_count} returns the number of audio streams found in the opened file, or @tt{0}.} + @item{@tt{fmpg_audio_sample_rate} returns the selected stream's sample rate, or @tt{0}.} + @item{@tt{fmpg_audio_channels} returns the selected stream's channel count, or @tt{0}.} + @item{@tt{fmpg_audio_bits_per_sample} always returns @tt{32}.} + @item{@tt{fmpg_audio_bytes_per_sample} always returns @tt{4}.} + @item{@tt{fmpg_duration_ms} returns the duration in milliseconds, or @tt{-1}.} + @item{@tt{fmpg_duration_samples} returns the duration in output sample frames, or @tt{-1}.} +] + +@section{Metadata} + +@verbatim|{ +const char *fmpg_file_title(fmpg_instance *instance); +const char *fmpg_file_author(fmpg_instance *instance); +const char *fmpg_file_album(fmpg_instance *instance); +const char *fmpg_file_genre(fmpg_instance *instance); +const char *fmpg_file_comment(fmpg_instance *instance); +const char *fmpg_file_copyright(fmpg_instance *instance); +int fmpg_file_year(fmpg_instance *instance); +int fmpg_file_track(fmpg_instance *instance); +int64_t fmpg_file_bitrate(fmpg_instance *instance); +}| + +The metadata getters return values read from the container metadata. A +missing string value is returned as an empty string. A missing numeric +value is returned as @tt{-1}. @tt{fmpg_file_author} returns the +@tt{artist} metadata field. + +@section{Decoding} + +@verbatim|{ +int fmpg_decode_next(fmpg_instance *instance); +}| + +Decodes the next block of audio. + +Internally, the backend reads packets from the selected audio stream, feeds +them to the FFmpeg decoder, receives all available decoded frames, +converts them to signed 32-bit interleaved PCM, and concatenates the +result in the instance output buffer. + +Packets from non-selected streams are skipped internally. + +Returns @tt{1} if decoded PCM data is available through +@tt{fmpg_buffer} and @tt{fmpg_buffer_size}. Returns @tt{0} at EOF or on +error. + +@verbatim|{ +int fmpg_seek_ms(fmpg_instance *instance, int64_t target_pos_ms); +}| + +Seeks to an absolute position in milliseconds. + +FFmpeg may seek to a packet before the requested timestamp. After +seeking, this backend discards decoded pre-roll samples until the requested +output sample position is reached, when timestamps are available. + +Returns @tt{1} on success and @tt{0} on failure. + +@section{Output Buffer and Sample Positions} + +@verbatim|{ +const uint8_t *fmpg_buffer(fmpg_instance *instance); +int fmpg_buffer_size(fmpg_instance *instance); +int64_t fmpg_buffer_samples(fmpg_instance *instance); +int64_t fmpg_buffer_start_sample(fmpg_instance *instance); +int64_t fmpg_buffer_end_sample(fmpg_instance *instance); +int64_t fmpg_sample_position(fmpg_instance *instance); +double fmpg_timecode(fmpg_instance *instance); +}| + +@tt{fmpg_buffer} returns a pointer to the current decoded PCM buffer, or +@tt{NULL} if there is no current buffer. The pointer remains valid only +until the next API call that decodes, seeks, closes or frees the +instance. + +@tt{fmpg_buffer_size} returns the size of the current buffer in bytes. +@tt{fmpg_buffer_samples} returns the number of sample frames in the +current buffer. @tt{fmpg_buffer_start_sample} returns the absolute +sample-frame index of the first sample frame in the buffer, and +@tt{fmpg_buffer_end_sample} returns the absolute sample-frame index just +after the current buffer. + +@tt{fmpg_sample_position} returns the current absolute sample position in +the music stream. After a successful @tt{fmpg_decode_next}, this is the +same value as @tt{fmpg_buffer_end_sample}. + +@tt{fmpg_timecode} returns the approximate start time of the current +decoded block in seconds. + +@section{FFmpeg Version Checks} + +@verbatim|{ +const char *fmpg_ffmpeg_version(void); +const char *fmpg_int_version2string(unsigned version); +int fmpg_compatible_ffmpeg(void); +}| + +@tt{fmpg_ffmpeg_version} returns a string describing the FFmpeg versions +used when the backend was compiled. The string includes avformat, avcodec, +swresample and avutil. + +@tt{fmpg_int_version2string} converts an FFmpeg integer version value to +a string of the form @tt{major.minor.micro}. + +@tt{fmpg_compatible_ffmpeg} checks whether the FFmpeg major versions used +at compile time match the FFmpeg major versions available at runtime. +It returns @tt{1} when the versions are compatible and @tt{0} otherwise. + +@section{Decoder Model} + +The backend uses the modern FFmpeg send/receive decoding model. Packets are +sent with @tt{avcodec_send_packet}, decoded frames are received with +@tt{avcodec_receive_frame}, and conversion to the fixed output format is +done with libswresample. + +The public API intentionally avoids exposing these details. From the +caller perspective, decoding is a sequence of calls to +@tt{fmpg_decode_next} followed by reading the current output buffer and +its sample-position metadata. \ No newline at end of file diff --git a/scrbl/ffmpeg-decoder.scrbl b/scrbl/ffmpeg-decoder.scrbl new file mode 100644 index 0000000..a93336e --- /dev/null +++ b/scrbl/ffmpeg-decoder.scrbl @@ -0,0 +1,122 @@ +#lang scribble/manual + +@title{FFmpeg Decoder} +@author{@author+email["Hans Dijkema" "hans@dijkewijk.nl"]} + +@defmodule[ffmpeg-decoder] + +This module provides an audio decoder based on the FFmpeg audio shim. It +uses the lower-level @racketmodname[racket-sound/ffmpeg-ffi] module and presents a +callback-based decoder interface comparable to the other audio decoders. + +The native FFmpeg layer decodes audio to signed 32-bit interleaved PCM. +The decoder therefore reports 32 bits per sample and 4 bytes per sample +when no more specific information is available. + +@defproc[(ffmpeg-valid? [audio-file any/c]) boolean?]{ +Returns @racket[#t]. + +This predicate is deliberately weak. Existence and extension checks are +expected to be performed by the generic audio-decoder layer. Actual file +validation is done when the FFmpeg shim opens the file. +} + +@defproc[(ffmpeg-open [audio-file (or/c path? string?)] + [cb-stream-info procedure?] + [cb-audio procedure?]) + (or/c any/c #f)]{ +Opens @racket[audio-file] and returns an opaque decoder handle, or +@racket[#f] if the file does not exist. + +If @racket[audio-file] is a path, it is converted to a string before it +is passed to the native layer. + +The @racket[cb-stream-info] callback is called with a mutable hash that +describes the stream. The @racket[cb-audio] callback is called with the +same kind of hash, a PCM buffer pointer and the buffer size in bytes. +} + +@defproc[(ffmpeg-read [handle any/c]) any/c]{ +Starts reading and decoding audio from @racket[handle]. + +This function loops until decoding reaches the end of the stream or +until @racket[ffmpeg-stop] requests termination. During the read loop, +pending seek requests made with @racket[ffmpeg-seek] are applied before +the next native read. + +The stream-info callback is called when format information becomes +available. The audio callback is called as: + +@racketblock[ +(cb-audio info buffer size) +] + +where @racket[info] is a mutable hash, @racket[buffer] is a pointer to +interleaved signed 32-bit PCM data, and @racket[size] is the size of the +buffer in bytes. + +When reading stops, the native FFmpeg instance is closed and deleted. +} + +@defproc[(ffmpeg-seek [handle any/c] + [percentage real?]) + void?]{ +Requests a seek operation. + +The @racket[percentage] argument is interpreted as a percentage of the +total number of samples in the stream. Fractional percentages are +allowed. The actual seek is performed by @racket[ffmpeg-read] before the +next native read call. + +If the total sample count is unknown or invalid, no seek request is made. +} + +@defproc[(ffmpeg-stop [handle any/c]) void?]{ +Requests the read loop to stop. + +This function waits until @racket[ffmpeg-read] has left its read loop. +It polls the internal reading flag with a short sleep interval. +} + +@section{Stream Information} + +The stream-info and audio callbacks receive a mutable hash. The decoder +stores at least the following keys: + +@itemlist[ + #:style 'compact + @item{@racket['sample-rate]} + @item{@racket['channels]} + @item{@racket['bits-per-sample]} + @item{@racket['bytes-per-sample]} + @item{@racket['total-samples]} + @item{@racket['duration]} +] + +For audio callbacks, the hash is also updated with: + +@itemlist[ + #:style 'compact + @item{@racket['sample], the current sample position} + @item{@racket['current-time], the current time in seconds} +] + +If the native layer omits format values, the decoder fills in the most +recent known values. Initial defaults are 44100 Hz, 2 channels, 32 bits +per sample and 4 bytes per sample. + +@section{Decoding Model} + +The decoder keeps a small Racket handle around the native FFmpeg handler. +The handle stores the callbacks, stop and seek state, the current reading +state and the current format hash. + +Seeking is asynchronous with respect to @racket[ffmpeg-seek]: the +function only records the requested target sample. The read loop applies +the pending seek request before decoding the next block. + +@section{Notes} + +The FFmpeg shim output is expected to be signed 32-bit interleaved PCM. +This keeps the decoder interface suitable for a playback pipeline that +feeds decoded audio to libao. \ No newline at end of file diff --git a/scrbl/ffmpeg-ffi.scrbl b/scrbl/ffmpeg-ffi.scrbl new file mode 100644 index 0000000..173e01c --- /dev/null +++ b/scrbl/ffmpeg-ffi.scrbl @@ -0,0 +1,160 @@ +#lang scribble/manual + +@title{FFmpeg FFI} +@author{@author+email["Hans Dijkema" "hans@dijkewijk.nl"]} + +@defmodule[ffmpeg-ffi] + +This module provides the low-level Racket FFI binding for the native +FFmpeg audio shim. The native shim exposes an opaque FFmpeg instance and +keeps all decoder state inside that instance. + +The output format of the native shim is signed 32-bit interleaved PCM. +The buffer returned by the native layer is copied into Racket-managed +memory before it is passed to higher layers. + +@defproc[(fmpg-ffi-decoder-handler) procedure?]{ +Creates a new FFmpeg decoder command handler. + +The returned procedure manages one native FFmpeg instance. Commands are +sent as a symbol followed by command-specific arguments. + +@itemlist[ + #:style 'compact + @item{@racket['new] creates the native FFmpeg instance and returns @racket[#t].} + @item{@racket['delete] frees the native FFmpeg instance and returns @racket[#t].} + @item{@racket['init] opens a file and fetches stream and metadata information.} + @item{@racket['close] closes the currently opened file.} + @item{@racket['format] calls a format callback with the current stream format.} + @item{@racket['info] writes stream information to the sound logger.} + @item{@racket['read] decodes the next audio block.} + @item{@racket['seek] seeks to an absolute PCM sample position.} + @item{@racket['tell] returns the current PCM sample position.} + @item{@racket['file] returns the currently opened filename.} + @item{@racket['metadata] returns a hash with file metadata.} +] +} + +@section{Command Interface} + +The command handler is used as follows: + +@racketblock[ +(define h (fmpg-ffi-decoder-handler)) + +(h 'new) +(h 'init filename) +(h 'read audio-callback format-callback) +(h 'close) +(h 'delete) +] + +The @racket['new] command must be called before @racket['init]. A +handler owns at most one native FFmpeg instance. Calling @racket['new] +twice without @racket['delete] raises an error. + +@section{Format Callback} + +The @racket['format] command and the first @racket['read] call report +the stream format by calling the supplied callback as follows: + +@racketblock[ +(format-callback pcm-pos + sample-rate + channels + bits-per-sample + bytes-per-sample + pcm-length) +] + +The @racket[pcm-pos] argument is the current PCM sample position. +The @racket[pcm-length] argument is the total number of PCM samples, or +@racket[-1] when this is not known. + +@section{Reading Audio} + +The @racket['read] command decodes one audio block. It expects an audio +callback and a format callback: + +@racketblock[ +(h 'read audio-callback format-callback) +] + +On the first read, the format callback is called before audio data is +returned. If decoding produces data, the audio callback is called as: + +@racketblock[ +(audio-callback 'data pcm-pos buffer size) +] + +The @racket[pcm-pos] argument is the absolute sample position of the +first sample frame in the buffer. The @racket[buffer] argument points to +a copied PCM buffer, and @racket[size] is the buffer size in bytes. + +When the stream ends, the callback is called as: + +@racketblock[ +(audio-callback 'done -1 #f 0) +] + +The command returns @racket[#t]. + +@section{Seeking} + +The @racket['seek] command takes an absolute PCM sample position: + +@racketblock[ +(h 'seek pcm-pos) +] + +The sample position is converted to milliseconds using the current +sample rate and is then passed to the native FFmpeg shim. After seeking, +the current PCM position is updated from the native decoder. + +@section{Metadata} + +The @racket['metadata] command returns a mutable hash with the following +keys: + +@itemlist[ + #:style 'compact + @item{@racket['title]} + @item{@racket['author]} + @item{@racket['album]} + @item{@racket['genre]} + @item{@racket['comment]} + @item{@racket['copyright]} + @item{@racket['year]} + @item{@racket['track]} + @item{@racket['bitrate]} + @item{@racket['duration-ms]} + @item{@racket['audio-streams]} +] + +Missing string fields are returned as empty strings. Missing numeric +fields are returned as @racket[-1]. + +@section{Native Library} + +The module loads a shared library named @racket["ffmpeg_audio"] or +@racket["libffmpeg_audio"] using @racket[get-lib]. + +The native layer is expected to provide an instance-only FFmpeg API. +The relevant C-side properties are: + +@itemlist[ + #:style 'compact + @item{decoder state is stored in an opaque @tt{fmpg_instance};} + @item{output is signed 32-bit interleaved PCM;} + @item{the native buffer remains valid only until the next decode, seek, + close or free call;} + @item{Racket copies the buffer before passing it upward.} +] + +@section{Errors} + +Native failures are reported as Racket errors. Examples include failure +to allocate the native instance, failure to open a file and failure to +seek to a requested sample position. + +Unknown commands also raise an error. \ No newline at end of file