documentation update

2026-04-28 17:59:48 +02:00
parent 9c3514fd3f
commit f3a842e6b9
6 changed files with 630 additions and 1 deletions
@@ -12,6 +12,10 @@
    ("scrbl/audio-decoder.scrbl" () (library))
    ("scrbl/flac-decoder.scrbl" () (library))
    ("scrbl/mp3-decoder.scrbl" () (library))
+    ("scrbl/audio-sniffer.scrbl" () (library))
+    ("scrbl/ffmpeg-ffi.scrbl" () (library))
+    ("scrbl/ffmpeg-decoder.scrbl" () (library))
+    ("scrbl/ffmpeg-c-backend.scrbl" () (library))
    )
  )

@@ -51,7 +51,6 @@
 (define (ao-supported-music-format? f)
  (and (symbol? f)
       (or (eq? f 'flac)
-           (eq? f 'mp3)
           (eq? f 'ao))))


@@ -0,0 +1,126 @@
+#lang scribble/manual
+
+@title{Audio Sniffer}
+@author{@author+email["Hans Dijkema" "hans@dijkewijk.nl"]}
+
+@defmodule[audio-sniffer]
+
+This module provides utilities to determine the format of an audio file.
+It combines lightweight content sniffing with extension-based fallback.
+
+The sniffer is designed to be robust against incomplete data and
+progressively increases the amount of data inspected.
+
+@defproc[(audio-sniff-extension [file path-string?]) (or/c string? #f)]{
+Returns the file extension (without dot) or @racket[#f] if none could be determined.
+
+The result is purely based on the filename and does not inspect file
+contents.
+}
+
+@defproc[(audio-sniff-format [file path-string?]) symbol?]{
+Determines the audio format by inspecting the file contents.
+
+The function reads portions of the file (both head and tail) and tries
+to match known signatures.
+
+Returns a symbol such as:
+
+@itemlist[
+  #:style 'compact
+  @item{@racket['mp3]}
+  @item{@racket['flac]}
+  @item{@racket['ogg]}
+  @item{@racket['wav]}
+  @item{@racket['aiff]}
+  @item{@racket['mp4]}
+  @item{@racket['unknown]}
+]
+
+If the file cannot be read, a filesystem-related symbol is returned
+instead of raising an exception.
+}
+
+@defproc[(audio-sniff-format/extension [file path-string?]) symbol?]{
+Determines the audio format using sniffing, with a fallback to the file
+extension.
+
+If @racket[audio-sniff-format] returns @racket['unknown], the extension
+is used as a secondary source.
+
+This function provides the most practical format detection.
+}
+
+@defproc[(audio-format-known? [fmt symbol?]) boolean?]{
+Returns @racket[#t] if the given format symbol is recognized by the
+sniffer.
+}
+
+@defproc[(audio-format-matches? [file path-string?]
+                               [formats (listof symbol?)])
+         boolean?]{
+Returns @racket[#t] if the detected format of @racket[file] is a member
+of @racket[formats].
+
+Detection is performed using @racket[audio-sniff-format/extension].
+}
+
+@section{Detection Strategy}
+
+The sniffer follows a layered strategy:
+
+@itemlist[
+  #:style 'compact
+  @item{Read a small head section of the file (starting at 4096 bytes)}
+  @item{Inspect tail sections for formats that store metadata at the end (e.g. mp4)}
+  @item{Increase head size exponentially if needed}
+  @item{Fallback to file extension if no signature is found}
+]
+
+This approach balances performance with robustness, especially for
+container formats where identifying markers may not be located at the
+start of the file.
+
+@section{Error Handling}
+
+Instead of raising exceptions, the sniffer returns symbolic error
+conditions when the file cannot be inspected:
+
+@itemlist[
+  #:style 'compact
+  @item{@racket['file-not-found]}
+  @item{@racket['file-not-readable]}
+  @item{@racket['not-a-file]}
+]
+
+These values can be handled by higher-level code without requiring
+exception handling.
+
+@section{Supported Formats}
+
+The sniffer recognizes a broad range of formats, including:
+
+@itemlist[
+  #:style 'compact
+  @item{mp3}
+  @item{flac}
+  @item{ogg / opus}
+  @item{wav}
+  @item{aiff}
+  @item{mp4 / m4a}
+  @item{aac}
+  @item{alac}
+  @item{ac3}
+  @item{ape}
+  @item{wavpack}
+  @item{wma}
+  @item{matroska}
+]
+
+Extension fallback supports the same set of formats.
+
+@section{Notes}
+
+Sniffing is heuristic in nature. While probably reliable, it is possible
+that malformed or unusual files are reported as @racket['unknown].
+
@@ -0,0 +1,218 @@
+#lang scribble/manual
+
+@title{FFmpeg Audio Backend}
+@author{@author+email["Hans Dijkema" "hans@dijkewijk.nl"]}
+
+@section{Overview}
+
+The FFmpeg audio backend is a small C++ wrapper with a plain C ABI. It hides
+the FFmpeg data structures from the caller and exposes a simple
+audio-only decoder interface.
+
+The caller does not handle FFmpeg streams, packets, frames, codec
+contexts or resampler objects. A file is opened, the best audio stream is
+selected, and decoding is performed by repeatedly calling
+@tt{fmpg_decode_next}.
+
+The output format is fixed: signed 32-bit integer PCM, interleaved, in
+native endian format.
+
+A sample frame means one sample moment across all channels. For stereo
+S32, one sample frame contains two @tt{int32_t} values and therefore
+takes 8 bytes.
+
+@section{Opaque Instance}
+
+@verbatim|{
+typedef struct fmpg_instance fmpg_instance;
+}|
+
+The decoder instance is opaque. The caller only receives and passes
+around a pointer to this type. All FFmpeg state is stored internally.
+
+@section{Lifecycle}
+
+@verbatim|{
+fmpg_instance *fmpg_init(void);
+}|
+
+Creates a new decoder instance.
+
+Before allocating the instance, the backend checks whether the FFmpeg major
+versions used at compile time match the FFmpeg major versions available
+at runtime. If they do not match, @tt{NULL} is returned.
+
+Returns a pointer to a new @tt{fmpg_instance}, or @tt{NULL} on failure.
+
+@verbatim|{
+void fmpg_free(fmpg_instance *instance);
+}|
+
+Frees the decoder instance. If the instance still has an open input, it
+is closed as part of destruction.
+
+@verbatim|{
+int fmpg_open_file(fmpg_instance *instance, const char *filename);
+}|
+
+Opens a media file, selects the best audio stream, initializes the
+decoder and initializes the resampler.
+
+After a successful call, stream information, duration and metadata can be
+read using the getter functions.
+
+Returns @tt{1} on success and @tt{0} on failure. The call fails if the
+instance is @tt{NULL}, if a file is already open, if @tt{filename} is
+@tt{NULL}, if no usable audio stream is found, or if FFmpeg cannot open
+or initialize the file.
+
+@verbatim|{
+void fmpg_close(fmpg_instance *instance);
+}|
+
+Closes the current file and releases all FFmpeg state owned by the
+instance. The instance itself remains valid and may be reused.
+
+@verbatim|{
+int fmpg_is_open(fmpg_instance *instance);
+}|
+
+Returns @tt{1} if the instance is open and ready to decode. Otherwise
+returns @tt{0}.
+
+@section{Audio Information}
+
+@verbatim|{
+int fmpg_audio_stream_count(fmpg_instance *instance);
+int fmpg_audio_sample_rate(fmpg_instance *instance);
+int fmpg_audio_channels(fmpg_instance *instance);
+int fmpg_audio_bits_per_sample(fmpg_instance *instance);
+int fmpg_audio_bytes_per_sample(fmpg_instance *instance);
+int64_t fmpg_duration_ms(fmpg_instance *instance);
+int64_t fmpg_duration_samples(fmpg_instance *instance);
+}|
+
+These functions return information about the selected audio stream.
+
+@itemlist[
+  #:style 'compact
+  @item{@tt{fmpg_audio_stream_count} returns the number of audio streams found in the opened file, or @tt{0}.}
+  @item{@tt{fmpg_audio_sample_rate} returns the selected stream's sample rate, or @tt{0}.}
+  @item{@tt{fmpg_audio_channels} returns the selected stream's channel count, or @tt{0}.}
+  @item{@tt{fmpg_audio_bits_per_sample} always returns @tt{32}.}
+  @item{@tt{fmpg_audio_bytes_per_sample} always returns @tt{4}.}
+  @item{@tt{fmpg_duration_ms} returns the duration in milliseconds, or @tt{-1}.}
+  @item{@tt{fmpg_duration_samples} returns the duration in output sample frames, or @tt{-1}.}
+]
+
+@section{Metadata}
+
+@verbatim|{
+const char *fmpg_file_title(fmpg_instance *instance);
+const char *fmpg_file_author(fmpg_instance *instance);
+const char *fmpg_file_album(fmpg_instance *instance);
+const char *fmpg_file_genre(fmpg_instance *instance);
+const char *fmpg_file_comment(fmpg_instance *instance);
+const char *fmpg_file_copyright(fmpg_instance *instance);
+int fmpg_file_year(fmpg_instance *instance);
+int fmpg_file_track(fmpg_instance *instance);
+int64_t fmpg_file_bitrate(fmpg_instance *instance);
+}|
+
+The metadata getters return values read from the container metadata. A
+missing string value is returned as an empty string. A missing numeric
+value is returned as @tt{-1}. @tt{fmpg_file_author} returns the
+@tt{artist} metadata field.
+
+@section{Decoding}
+
+@verbatim|{
+int fmpg_decode_next(fmpg_instance *instance);
+}|
+
+Decodes the next block of audio.
+
+Internally, the backend reads packets from the selected audio stream, feeds
+them to the FFmpeg decoder, receives all available decoded frames,
+converts them to signed 32-bit interleaved PCM, and concatenates the
+result in the instance output buffer.
+
+Packets from non-selected streams are skipped internally.
+
+Returns @tt{1} if decoded PCM data is available through
+@tt{fmpg_buffer} and @tt{fmpg_buffer_size}. Returns @tt{0} at EOF or on
+error.
+
+@verbatim|{
+int fmpg_seek_ms(fmpg_instance *instance, int64_t target_pos_ms);
+}|
+
+Seeks to an absolute position in milliseconds.
+
+FFmpeg may seek to a packet before the requested timestamp. After
+seeking, this backend discards decoded pre-roll samples until the requested
+output sample position is reached, when timestamps are available.
+
+Returns @tt{1} on success and @tt{0} on failure.
+
+@section{Output Buffer and Sample Positions}
+
+@verbatim|{
+const uint8_t *fmpg_buffer(fmpg_instance *instance);
+int fmpg_buffer_size(fmpg_instance *instance);
+int64_t fmpg_buffer_samples(fmpg_instance *instance);
+int64_t fmpg_buffer_start_sample(fmpg_instance *instance);
+int64_t fmpg_buffer_end_sample(fmpg_instance *instance);
+int64_t fmpg_sample_position(fmpg_instance *instance);
+double fmpg_timecode(fmpg_instance *instance);
+}|
+
+@tt{fmpg_buffer} returns a pointer to the current decoded PCM buffer, or
+@tt{NULL} if there is no current buffer. The pointer remains valid only
+until the next API call that decodes, seeks, closes or frees the
+instance.
+
+@tt{fmpg_buffer_size} returns the size of the current buffer in bytes.
+@tt{fmpg_buffer_samples} returns the number of sample frames in the
+current buffer. @tt{fmpg_buffer_start_sample} returns the absolute
+sample-frame index of the first sample frame in the buffer, and
+@tt{fmpg_buffer_end_sample} returns the absolute sample-frame index just
+after the current buffer.
+
+@tt{fmpg_sample_position} returns the current absolute sample position in
+the music stream. After a successful @tt{fmpg_decode_next}, this is the
+same value as @tt{fmpg_buffer_end_sample}.
+
+@tt{fmpg_timecode} returns the approximate start time of the current
+decoded block in seconds.
+
+@section{FFmpeg Version Checks}
+
+@verbatim|{
+const char *fmpg_ffmpeg_version(void);
+const char *fmpg_int_version2string(unsigned version);
+int fmpg_compatible_ffmpeg(void);
+}|
+
+@tt{fmpg_ffmpeg_version} returns a string describing the FFmpeg versions
+used when the backend was compiled. The string includes avformat, avcodec,
+swresample and avutil.
+
+@tt{fmpg_int_version2string} converts an FFmpeg integer version value to
+a string of the form @tt{major.minor.micro}.
+
+@tt{fmpg_compatible_ffmpeg} checks whether the FFmpeg major versions used
+at compile time match the FFmpeg major versions available at runtime.
+It returns @tt{1} when the versions are compatible and @tt{0} otherwise.
+
+@section{Decoder Model}
+
+The backend uses the modern FFmpeg send/receive decoding model. Packets are
+sent with @tt{avcodec_send_packet}, decoded frames are received with
+@tt{avcodec_receive_frame}, and conversion to the fixed output format is
+done with libswresample.
+
+The public API intentionally avoids exposing these details. From the
+caller perspective, decoding is a sequence of calls to
+@tt{fmpg_decode_next} followed by reading the current output buffer and
+its sample-position metadata.
@@ -0,0 +1,122 @@
+#lang scribble/manual
+
+@title{FFmpeg Decoder}
+@author{@author+email["Hans Dijkema" "hans@dijkewijk.nl"]}
+
+@defmodule[ffmpeg-decoder]
+
+This module provides an audio decoder based on the FFmpeg audio shim. It
+uses the lower-level @racketmodname[racket-sound/ffmpeg-ffi] module and presents a
+callback-based decoder interface comparable to the other audio decoders.
+
+The native FFmpeg layer decodes audio to signed 32-bit interleaved PCM.
+The decoder therefore reports 32 bits per sample and 4 bytes per sample
+when no more specific information is available.
+
+@defproc[(ffmpeg-valid? [audio-file any/c]) boolean?]{
+Returns @racket[#t].
+
+This predicate is deliberately weak. Existence and extension checks are
+expected to be performed by the generic audio-decoder layer. Actual file
+validation is done when the FFmpeg shim opens the file.
+}
+
+@defproc[(ffmpeg-open [audio-file (or/c path? string?)]
+                      [cb-stream-info procedure?]
+                      [cb-audio procedure?])
+         (or/c any/c #f)]{
+Opens @racket[audio-file] and returns an opaque decoder handle, or
+@racket[#f] if the file does not exist.
+
+If @racket[audio-file] is a path, it is converted to a string before it
+is passed to the native layer.
+
+The @racket[cb-stream-info] callback is called with a mutable hash that
+describes the stream. The @racket[cb-audio] callback is called with the
+same kind of hash, a PCM buffer pointer and the buffer size in bytes.
+}
+
+@defproc[(ffmpeg-read [handle any/c]) any/c]{
+Starts reading and decoding audio from @racket[handle].
+
+This function loops until decoding reaches the end of the stream or
+until @racket[ffmpeg-stop] requests termination. During the read loop,
+pending seek requests made with @racket[ffmpeg-seek] are applied before
+the next native read.
+
+The stream-info callback is called when format information becomes
+available. The audio callback is called as:
+
+@racketblock[
+(cb-audio info buffer size)
+]
+
+where @racket[info] is a mutable hash, @racket[buffer] is a pointer to
+interleaved signed 32-bit PCM data, and @racket[size] is the size of the
+buffer in bytes.
+
+When reading stops, the native FFmpeg instance is closed and deleted.
+}
+
+@defproc[(ffmpeg-seek [handle any/c]
+                      [percentage real?])
+         void?]{
+Requests a seek operation.
+
+The @racket[percentage] argument is interpreted as a percentage of the
+total number of samples in the stream. Fractional percentages are
+allowed. The actual seek is performed by @racket[ffmpeg-read] before the
+next native read call.
+
+If the total sample count is unknown or invalid, no seek request is made.
+}
+
+@defproc[(ffmpeg-stop [handle any/c]) void?]{
+Requests the read loop to stop.
+
+This function waits until @racket[ffmpeg-read] has left its read loop.
+It polls the internal reading flag with a short sleep interval.
+}
+
+@section{Stream Information}
+
+The stream-info and audio callbacks receive a mutable hash. The decoder
+stores at least the following keys:
+
+@itemlist[
+  #:style 'compact
+  @item{@racket['sample-rate]}
+  @item{@racket['channels]}
+  @item{@racket['bits-per-sample]}
+  @item{@racket['bytes-per-sample]}
+  @item{@racket['total-samples]}
+  @item{@racket['duration]}
+]
+
+For audio callbacks, the hash is also updated with:
+
+@itemlist[
+  #:style 'compact
+  @item{@racket['sample], the current sample position}
+  @item{@racket['current-time], the current time in seconds}
+]
+
+If the native layer omits format values, the decoder fills in the most
+recent known values. Initial defaults are 44100 Hz, 2 channels, 32 bits
+per sample and 4 bytes per sample.
+
+@section{Decoding Model}
+
+The decoder keeps a small Racket handle around the native FFmpeg handler.
+The handle stores the callbacks, stop and seek state, the current reading
+state and the current format hash.
+
+Seeking is asynchronous with respect to @racket[ffmpeg-seek]: the
+function only records the requested target sample. The read loop applies
+the pending seek request before decoding the next block.
+
+@section{Notes}
+
+The FFmpeg shim output is expected to be signed 32-bit interleaved PCM.
+This keeps the decoder interface suitable for a playback pipeline that
+feeds decoded audio to libao.
@@ -0,0 +1,160 @@
+#lang scribble/manual
+
+@title{FFmpeg FFI}
+@author{@author+email["Hans Dijkema" "hans@dijkewijk.nl"]}
+
+@defmodule[ffmpeg-ffi]
+
+This module provides the low-level Racket FFI binding for the native
+FFmpeg audio shim. The native shim exposes an opaque FFmpeg instance and
+keeps all decoder state inside that instance.
+
+The output format of the native shim is signed 32-bit interleaved PCM.
+The buffer returned by the native layer is copied into Racket-managed
+memory before it is passed to higher layers.
+
+@defproc[(fmpg-ffi-decoder-handler) procedure?]{
+Creates a new FFmpeg decoder command handler.
+
+The returned procedure manages one native FFmpeg instance. Commands are
+sent as a symbol followed by command-specific arguments.
+
+@itemlist[
+  #:style 'compact
+  @item{@racket['new] creates the native FFmpeg instance and returns @racket[#t].}
+  @item{@racket['delete] frees the native FFmpeg instance and returns @racket[#t].}
+  @item{@racket['init] opens a file and fetches stream and metadata information.}
+  @item{@racket['close] closes the currently opened file.}
+  @item{@racket['format] calls a format callback with the current stream format.}
+  @item{@racket['info] writes stream information to the sound logger.}
+  @item{@racket['read] decodes the next audio block.}
+  @item{@racket['seek] seeks to an absolute PCM sample position.}
+  @item{@racket['tell] returns the current PCM sample position.}
+  @item{@racket['file] returns the currently opened filename.}
+  @item{@racket['metadata] returns a hash with file metadata.}
+]
+}
+
+@section{Command Interface}
+
+The command handler is used as follows:
+
+@racketblock[
+(define h (fmpg-ffi-decoder-handler))
+
+(h 'new)
+(h 'init filename)
+(h 'read audio-callback format-callback)
+(h 'close)
+(h 'delete)
+]
+
+The @racket['new] command must be called before @racket['init]. A
+handler owns at most one native FFmpeg instance. Calling @racket['new]
+twice without @racket['delete] raises an error.
+
+@section{Format Callback}
+
+The @racket['format] command and the first @racket['read] call report
+the stream format by calling the supplied callback as follows:
+
+@racketblock[
+(format-callback pcm-pos
+                 sample-rate
+                 channels
+                 bits-per-sample
+                 bytes-per-sample
+                 pcm-length)
+]
+
+The @racket[pcm-pos] argument is the current PCM sample position.
+The @racket[pcm-length] argument is the total number of PCM samples, or
+@racket[-1] when this is not known.
+
+@section{Reading Audio}
+
+The @racket['read] command decodes one audio block. It expects an audio
+callback and a format callback:
+
+@racketblock[
+(h 'read audio-callback format-callback)
+]
+
+On the first read, the format callback is called before audio data is
+returned. If decoding produces data, the audio callback is called as:
+
+@racketblock[
+(audio-callback 'data pcm-pos buffer size)
+]
+
+The @racket[pcm-pos] argument is the absolute sample position of the
+first sample frame in the buffer. The @racket[buffer] argument points to
+a copied PCM buffer, and @racket[size] is the buffer size in bytes.
+
+When the stream ends, the callback is called as:
+
+@racketblock[
+(audio-callback 'done -1 #f 0)
+]
+
+The command returns @racket[#t].
+
+@section{Seeking}
+
+The @racket['seek] command takes an absolute PCM sample position:
+
+@racketblock[
+(h 'seek pcm-pos)
+]
+
+The sample position is converted to milliseconds using the current
+sample rate and is then passed to the native FFmpeg shim. After seeking,
+the current PCM position is updated from the native decoder.
+
+@section{Metadata}
+
+The @racket['metadata] command returns a mutable hash with the following
+keys:
+
+@itemlist[
+  #:style 'compact
+  @item{@racket['title]}
+  @item{@racket['author]}
+  @item{@racket['album]}
+  @item{@racket['genre]}
+  @item{@racket['comment]}
+  @item{@racket['copyright]}
+  @item{@racket['year]}
+  @item{@racket['track]}
+  @item{@racket['bitrate]}
+  @item{@racket['duration-ms]}
+  @item{@racket['audio-streams]}
+]
+
+Missing string fields are returned as empty strings. Missing numeric
+fields are returned as @racket[-1].
+
+@section{Native Library}
+
+The module loads a shared library named @racket["ffmpeg_audio"] or
+@racket["libffmpeg_audio"] using @racket[get-lib].
+
+The native layer is expected to provide an instance-only FFmpeg API.
+The relevant C-side properties are:
+
+@itemlist[
+  #:style 'compact
+  @item{decoder state is stored in an opaque @tt{fmpg_instance};}
+  @item{output is signed 32-bit interleaved PCM;}
+  @item{the native buffer remains valid only until the next decode, seek,
+        close or free call;}
+  @item{Racket copies the buffer before passing it upward.}
+]
+
+@section{Errors}
+
+Native failures are reported as Racket errors. Examples include failure
+to allocate the native instance, failure to open a file and failure to
+seek to a requested sample position.
+
+Unknown commands also raise an error.