initial import from racket-sound -> racket-audio

2026-05-04 12:07:45 +02:00
parent f500f1711b
commit 87980f508a
28 changed files with 6282 additions and 16 deletions
@@ -0,0 +1,218 @@
+#lang scribble/manual
+
+@title{FFmpeg Audio Backend}
+@author{@author+email["Hans Dijkema" "hans@dijkewijk.nl"]}
+
+@section{Overview}
+
+The FFmpeg audio backend is a small C++ wrapper with a plain C ABI. It hides
+the FFmpeg data structures from the caller and exposes a simple
+audio-only decoder interface.
+
+The caller does not handle FFmpeg streams, packets, frames, codec
+contexts or resampler objects. A file is opened, the best audio stream is
+selected, and decoding is performed by repeatedly calling
+@tt{fmpg_decode_next}.
+
+The output format is fixed: signed 32-bit integer PCM, interleaved, in
+native endian format.
+
+A sample frame means one sample moment across all channels. For stereo
+S32, one sample frame contains two @tt{int32_t} values and therefore
+takes 8 bytes.
+
+@section{Opaque Instance}
+
+@verbatim|{
+typedef struct fmpg_instance fmpg_instance;
+}|
+
+The decoder instance is opaque. The caller only receives and passes
+around a pointer to this type. All FFmpeg state is stored internally.
+
+@section{Lifecycle}
+
+@verbatim|{
+fmpg_instance *fmpg_init(void);
+}|
+
+Creates a new decoder instance.
+
+Before allocating the instance, the backend checks whether the FFmpeg major
+versions used at compile time match the FFmpeg major versions available
+at runtime. If they do not match, @tt{NULL} is returned.
+
+Returns a pointer to a new @tt{fmpg_instance}, or @tt{NULL} on failure.
+
+@verbatim|{
+void fmpg_free(fmpg_instance *instance);
+}|
+
+Frees the decoder instance. If the instance still has an open input, it
+is closed as part of destruction.
+
+@verbatim|{
+int fmpg_open_file(fmpg_instance *instance, const char *filename);
+}|
+
+Opens a media file, selects the best audio stream, initializes the
+decoder and initializes the resampler.
+
+After a successful call, stream information, duration and metadata can be
+read using the getter functions.
+
+Returns @tt{1} on success and @tt{0} on failure. The call fails if the
+instance is @tt{NULL}, if a file is already open, if @tt{filename} is
+@tt{NULL}, if no usable audio stream is found, or if FFmpeg cannot open
+or initialize the file.
+
+@verbatim|{
+void fmpg_close(fmpg_instance *instance);
+}|
+
+Closes the current file and releases all FFmpeg state owned by the
+instance. The instance itself remains valid and may be reused.
+
+@verbatim|{
+int fmpg_is_open(fmpg_instance *instance);
+}|
+
+Returns @tt{1} if the instance is open and ready to decode. Otherwise
+returns @tt{0}.
+
+@section{Audio Information}
+
+@verbatim|{
+int fmpg_audio_stream_count(fmpg_instance *instance);
+int fmpg_audio_sample_rate(fmpg_instance *instance);
+int fmpg_audio_channels(fmpg_instance *instance);
+int fmpg_audio_bits_per_sample(fmpg_instance *instance);
+int fmpg_audio_bytes_per_sample(fmpg_instance *instance);
+int64_t fmpg_duration_ms(fmpg_instance *instance);
+int64_t fmpg_duration_samples(fmpg_instance *instance);
+}|
+
+These functions return information about the selected audio stream.
+
+@itemlist[
+  #:style 'compact
+  @item{@tt{fmpg_audio_stream_count} returns the number of audio streams found in the opened file, or @tt{0}.}
+  @item{@tt{fmpg_audio_sample_rate} returns the selected stream's sample rate, or @tt{0}.}
+  @item{@tt{fmpg_audio_channels} returns the selected stream's channel count, or @tt{0}.}
+  @item{@tt{fmpg_audio_bits_per_sample} always returns @tt{32}.}
+  @item{@tt{fmpg_audio_bytes_per_sample} always returns @tt{4}.}
+  @item{@tt{fmpg_duration_ms} returns the duration in milliseconds, or @tt{-1}.}
+  @item{@tt{fmpg_duration_samples} returns the duration in output sample frames, or @tt{-1}.}
+]
+
+@section{Metadata}
+
+@verbatim|{
+const char *fmpg_file_title(fmpg_instance *instance);
+const char *fmpg_file_author(fmpg_instance *instance);
+const char *fmpg_file_album(fmpg_instance *instance);
+const char *fmpg_file_genre(fmpg_instance *instance);
+const char *fmpg_file_comment(fmpg_instance *instance);
+const char *fmpg_file_copyright(fmpg_instance *instance);
+int fmpg_file_year(fmpg_instance *instance);
+int fmpg_file_track(fmpg_instance *instance);
+int64_t fmpg_file_bitrate(fmpg_instance *instance);
+}|
+
+The metadata getters return values read from the container metadata. A
+missing string value is returned as an empty string. A missing numeric
+value is returned as @tt{-1}. @tt{fmpg_file_author} returns the
+@tt{artist} metadata field.
+
+@section{Decoding}
+
+@verbatim|{
+int fmpg_decode_next(fmpg_instance *instance);
+}|
+
+Decodes the next block of audio.
+
+Internally, the backend reads packets from the selected audio stream, feeds
+them to the FFmpeg decoder, receives all available decoded frames,
+converts them to signed 32-bit interleaved PCM, and concatenates the
+result in the instance output buffer.
+
+Packets from non-selected streams are skipped internally.
+
+Returns @tt{1} if decoded PCM data is available through
+@tt{fmpg_buffer} and @tt{fmpg_buffer_size}. Returns @tt{0} at EOF or on
+error.
+
+@verbatim|{
+int fmpg_seek_ms(fmpg_instance *instance, int64_t target_pos_ms);
+}|
+
+Seeks to an absolute position in milliseconds.
+
+FFmpeg may seek to a packet before the requested timestamp. After
+seeking, this backend discards decoded pre-roll samples until the requested
+output sample position is reached, when timestamps are available.
+
+Returns @tt{1} on success and @tt{0} on failure.
+
+@section{Output Buffer and Sample Positions}
+
+@verbatim|{
+const uint8_t *fmpg_buffer(fmpg_instance *instance);
+int fmpg_buffer_size(fmpg_instance *instance);
+int64_t fmpg_buffer_samples(fmpg_instance *instance);
+int64_t fmpg_buffer_start_sample(fmpg_instance *instance);
+int64_t fmpg_buffer_end_sample(fmpg_instance *instance);
+int64_t fmpg_sample_position(fmpg_instance *instance);
+double fmpg_timecode(fmpg_instance *instance);
+}|
+
+@tt{fmpg_buffer} returns a pointer to the current decoded PCM buffer, or
+@tt{NULL} if there is no current buffer. The pointer remains valid only
+until the next API call that decodes, seeks, closes or frees the
+instance.
+
+@tt{fmpg_buffer_size} returns the size of the current buffer in bytes.
+@tt{fmpg_buffer_samples} returns the number of sample frames in the
+current buffer. @tt{fmpg_buffer_start_sample} returns the absolute
+sample-frame index of the first sample frame in the buffer, and
+@tt{fmpg_buffer_end_sample} returns the absolute sample-frame index just
+after the current buffer.
+
+@tt{fmpg_sample_position} returns the current absolute sample position in
+the music stream. After a successful @tt{fmpg_decode_next}, this is the
+same value as @tt{fmpg_buffer_end_sample}.
+
+@tt{fmpg_timecode} returns the approximate start time of the current
+decoded block in seconds.
+
+@section{FFmpeg Version Checks}
+
+@verbatim|{
+const char *fmpg_ffmpeg_version(void);
+const char *fmpg_int_version2string(unsigned version);
+int fmpg_compatible_ffmpeg(void);
+}|
+
+@tt{fmpg_ffmpeg_version} returns a string describing the FFmpeg versions
+used when the backend was compiled. The string includes avformat, avcodec,
+swresample and avutil.
+
+@tt{fmpg_int_version2string} converts an FFmpeg integer version value to
+a string of the form @tt{major.minor.micro}.
+
+@tt{fmpg_compatible_ffmpeg} checks whether the FFmpeg major versions used
+at compile time match the FFmpeg major versions available at runtime.
+It returns @tt{1} when the versions are compatible and @tt{0} otherwise.
+
+@section{Decoder Model}
+
+The backend uses the modern FFmpeg send/receive decoding model. Packets are
+sent with @tt{avcodec_send_packet}, decoded frames are received with
+@tt{avcodec_receive_frame}, and conversion to the fixed output format is
+done with libswresample.
+
+The public API intentionally avoids exposing these details. From the
+caller perspective, decoding is a sequence of calls to
+@tt{fmpg_decode_next} followed by reading the current output buffer and
+its sample-position metadata.