From d7be947886bd11a68d98447ebb59dc5084aa2b14 Mon Sep 17 00:00:00 2001 From: Hans Dijkema Date: Fri, 5 Jun 2026 22:17:10 +0200 Subject: [PATCH] =?UTF-8?q?=C3=92pus=20toevoeging=20via=20xiph=20library?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- audio-decoder.rkt | 48 ++- main.rkt | 3 + opusfile-decoder.rkt | 300 +++++++++++++++++++ scrbl/ffmpeg-definitions2.scrbl | 441 ++++++++++++++++++++++++++++ scrbl/libao-async-ffi-racket2.scrbl | 306 +++++++++++++++++++ 5 files changed, 1082 insertions(+), 16 deletions(-) create mode 100644 opusfile-decoder.rkt create mode 100644 scrbl/ffmpeg-definitions2.scrbl create mode 100644 scrbl/libao-async-ffi-racket2.scrbl diff --git a/audio-decoder.rkt b/audio-decoder.rkt index 6519421..afef104 100644 --- a/audio-decoder.rkt +++ b/audio-decoder.rkt @@ -2,6 +2,7 @@ (require "flac-decoder.rkt" "mp3-decoder.rkt" + "opusfile-decoder.rkt" "ffmpeg-decoder.rkt" "audio-sniffer.rkt" "private/utils.rkt" @@ -22,6 +23,8 @@ make-audio-reader audio-handle? audio-supported-extensions + current-opusfile-output-format + opusfile-output-format? ) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -56,7 +59,18 @@ mp3-stop 'ao)) - ;; FFmpeg decodere + ;; Opus, via Xiph libopusfile + (hash-set! audio-readers + 'opusfile + (make-audio-reader '("opus") + opusfile-valid? + opusfile-open + opusfile-read + opusfile-seek + opusfile-stop + 'ao)) + + ;; FFmpeg decoder (hash-set! audio-readers 'ffmpeg (make-audio-reader '("ogg" "oga" "opus" @@ -229,21 +243,23 @@ (not (null? (filter (λ (e) (string-ci=? ext e)) (audio-reader-exts reader))))) (define reader-for-kind - (make-hash '((mp3 . ffmpeg) ; ffmpeg does a better job on gapless playback... - (flac . flac) - (ogg . ffmpeg) - (vorbis . ffmpeg) - (opus . ffmpeg) - (wav . ffmpeg) - (aiff . ffmpeg) - (mp4 . ffmpeg) - (aac . ffmpeg) - (alac . ffmpeg) - (ac3 . ffmpeg) - (ape . ffmpeg) - (wavpack . ffmpeg) - (wma . ffmpeg) - (matroska . ffmpeg)))) + (make-hash + (list (cons 'mp3 'ffmpeg) ; ffmpeg does a better job on gapless playback... + (cons 'flac 'flac) + (cons 'ogg 'ffmpeg) + (cons 'vorbis 'ffmpeg) + (cons 'opus (if (opusfile-available?) 'opusfile 'ffmpeg)) + (cons 'wav 'ffmpeg) + (cons 'aiff 'ffmpeg) + (cons 'mp4 'ffmpeg) + (cons 'aac 'ffmpeg) + (cons 'alac 'ffmpeg) + (cons 'ac3 'ffmpeg) + (cons 'ape 'ffmpeg) + (cons 'wavpack 'ffmpeg) + (cons 'wma 'ffmpeg) + (cons 'matroska 'ffmpeg)))) + (define (find-reader audio-file) diff --git a/main.rkt b/main.rkt index 5c558e4..0104ce4 100644 --- a/main.rkt +++ b/main.rkt @@ -3,10 +3,13 @@ (require "taglib.rkt" "audio-sniffer.rkt" "audio-player.rkt" + "opusfile-decoder.rkt" ) (provide (all-from-out "taglib.rkt") (all-from-out "audio-sniffer.rkt") (all-from-out "audio-player.rkt") + current-opusfile-output-format + opusfile-output-format? ) diff --git a/opusfile-decoder.rkt b/opusfile-decoder.rkt new file mode 100644 index 0000000..4c5c71c --- /dev/null +++ b/opusfile-decoder.rkt @@ -0,0 +1,300 @@ +(module opusfile-decoder racket/base + + (require ffi/unsafe + "private/utils.rkt") + + (provide opusfile-open + opusfile-valid? + opusfile-read + opusfile-stop + opusfile-seek + opusfile-available? + current-opusfile-output-format + opusfile-output-format?) + + ;; Xiph libopusfile backend for Ogg Opus streams. + ;; + ;; By default this backend uses op_read(), which returns signed 16-bit + ;; interleaved PCM. That is the most efficient path for direct libao + ;; playback. For users who prefer the wider decoder output path, set + ;; current-opusfile-output-format to 's24. In that mode the backend uses + ;; op_read_float() and converts the interleaved float output to packed signed + ;; 24-bit PCM in native byte order. + ;; + ;; Opus decode output is always 48 kHz PCM. The original input rate, if + ;; present in metadata, is not the actual decoder output rate. + + (define libopusfile + (with-handlers ([exn:fail? (lambda (_) #f)]) + (ffi-lib "libopusfile" '("0" #f)))) + + (define _OggOpusFile _pointer) + + (define default-frames-per-read 4096) + (define opus-sample-rate 48000) + + (define (opusfile-output-format? v) + (or (eq? v 's16) (eq? v 's24))) + + (define current-opusfile-output-format + (make-parameter 's16 + (lambda (v) + (unless (opusfile-output-format? v) + (raise-argument-error 'current-opusfile-output-format "(or/c 's16 's24)" v)) + v))) + + (define (opus-bits-per-sample) + (case (current-opusfile-output-format) + [(s16) 16] + [(s24) 24])) + + (define (opus-bytes-per-sample) + (case (current-opusfile-output-format) + [(s16) 2] + [(s24) 3])) + + (define (ffi-proc name type) + (and libopusfile + (with-handlers ([exn:fail? (lambda (_) #f)]) + (get-ffi-obj name libopusfile type)))) + + (define op_open_file + (ffi-proc "op_open_file" + (_fun _path (err : (_ptr o _int)) + -> (r : _OggOpusFile) + -> (values r err)))) + + (define op_free + (ffi-proc "op_free" + (_fun _OggOpusFile -> _void))) + + (define op_channel_count + (ffi-proc "op_channel_count" + (_fun _OggOpusFile _int -> _int))) + + (define op_pcm_total + (ffi-proc "op_pcm_total" + (_fun _OggOpusFile _int -> _int64))) + + (define op_pcm_seek + (ffi-proc "op_pcm_seek" + (_fun _OggOpusFile _int64 -> _int))) + + (define op_read + (ffi-proc "op_read" + (_fun _OggOpusFile _bytes _int (li : (_ptr o _int)) + -> (r : _int) + -> (values r li)))) + + (define op_read_float + (ffi-proc "op_read_float" + (_fun _OggOpusFile _pointer _int (li : (_ptr o _int)) + -> (r : _int) + -> (values r li)))) + + (define (opusfile-available?) + (and libopusfile + op_open_file + op_free + op_channel_count + op_pcm_total + op_pcm_seek + op_read + op_read_float + #t)) + + (define-struct opusfile-handle + (of cb-info cb-audio + (stop #:mutable) + (seek #:mutable) + (reading #:mutable) + (format #:mutable) + (pcm-pos #:mutable)) + #:transparent) + + (define (raise-opus who fmt . args) + (apply error who fmt args)) + + (define (check-libopusfile who) + (unless (opusfile-available?) + (raise-opus who "libopusfile could not be loaded"))) + + (define (correct-format-hash h) + (unless (hash-ref h 'sample-rate #f) + (hash-set! h 'sample-rate opus-sample-rate)) + (unless (hash-ref h 'bits-per-sample #f) + (hash-set! h 'bits-per-sample (opus-bits-per-sample))) + (unless (hash-ref h 'bytes-per-sample #f) + (hash-set! h 'bytes-per-sample (opus-bytes-per-sample))) + (unless (hash-ref h 'sample-format #f) + (hash-set! h 'sample-format (current-opusfile-output-format))) + (unless (hash-ref h 'total-samples #f) + (hash-set! h 'total-samples 0) + (hash-set! h 'duration 0))) + + (define (report-format handle) + (let ((cb (opusfile-handle-cb-info handle))) + (when (procedure? cb) + (cb (opusfile-handle-format handle))))) + + (define (make-format channels total-samples) + (let ((h (make-hash))) + (hash-set! h 'duration (if (and (integer? total-samples) (>= total-samples 0)) + (exact->inexact (/ total-samples opus-sample-rate)) + 0.0)) + (hash-set! h 'sample-rate opus-sample-rate) + (hash-set! h 'channels channels) + (hash-set! h 'bits-per-sample (opus-bits-per-sample)) + (hash-set! h 'bytes-per-sample (opus-bytes-per-sample)) + (hash-set! h 'sample-format (current-opusfile-output-format)) + (hash-set! h 'total-samples total-samples) + h)) + + (define (give-audio handle buffer size) + (let ((h (opusfile-handle-format handle))) + (correct-format-hash h) + (hash-set! h 'sample (opusfile-handle-pcm-pos handle)) + (hash-set! h 'current-time (exact->inexact (/ (opusfile-handle-pcm-pos handle) + opus-sample-rate))) + ((opusfile-handle-cb-audio handle) h buffer size))) + + (define s24-pos-scale #x7FFFFF) + (define s24-neg-scale #x800000) + + (define (clip-sample x) + (cond [(< x -1.0) -1.0] + [(> x 1.0) 1.0] + [else x])) + + (define (float->s24 x) + (let ((y (clip-sample x))) + (if (negative? y) + (inexact->exact (round (* y s24-neg-scale))) + (inexact->exact (round (* y s24-pos-scale)))))) + + (define (write-s24-native! bs offset sample) + (let ((v (if (negative? sample) (+ sample #x1000000) sample))) + (if (system-big-endian?) + (begin + (bytes-set! bs offset (bitwise-and (arithmetic-shift v -16) #xFF)) + (bytes-set! bs (+ offset 1) (bitwise-and (arithmetic-shift v -8) #xFF)) + (bytes-set! bs (+ offset 2) (bitwise-and v #xFF))) + (begin + (bytes-set! bs offset (bitwise-and v #xFF)) + (bytes-set! bs (+ offset 1) (bitwise-and (arithmetic-shift v -8) #xFF)) + (bytes-set! bs (+ offset 2) (bitwise-and (arithmetic-shift v -16) #xFF)))))) + + (define (opusfile-valid? audio-file) + (and (opusfile-available?) + (file-exists? audio-file) + #t)) + + (define (opusfile-open audio-file* cb-stream-info cb-audio) + (check-libopusfile 'opusfile-open) + (let ((audio-file (if (path? audio-file*) + (path->string audio-file*) + audio-file*))) + (if (file-exists? audio-file) + (let-values (((of err) (op_open_file audio-file))) + (if of + (let* ((channels (op_channel_count of -1)) + (total-samples (op_pcm_total of -1)) + (fmt (make-format channels total-samples)) + (h (make-opusfile-handle of cb-stream-info cb-audio #f #f #f fmt 0))) + (report-format h) + h) + (raise-opus 'opusfile-open + "could not open Opus file ~a; opusfile error code: ~a" + audio-file err))) + #f))) + + (define (handle-pending-seek! handle) + (unless (eq? (opusfile-handle-seek handle) #f) + (let ((sample (opusfile-handle-seek handle))) + (dbg-sound "Seeking opusfile to sample ~a" sample) + (let ((r (op_pcm_seek (opusfile-handle-of handle) sample))) + (when (negative? r) + (err-sound "opusfile seek error: ~a" r)) + (when (not (negative? r)) + (set-opusfile-handle-pcm-pos! handle sample))) + (set-opusfile-handle-seek! handle #f)))) + + (define (read-s16 handle channels) + (let* ((max-samples (* default-frames-per-read channels)) + (buffer (make-bytes (* max-samples 2)))) + (let-values (((read-frames link-index) + (op_read (opusfile-handle-of handle) buffer max-samples))) + (cond [(negative? read-frames) + (values read-frames #f 0)] + [(zero? read-frames) + (values 0 #f 0)] + [else + (let* ((read-samples (* read-frames channels)) + (read-bytes (* read-samples 2)) + (out (if (= read-bytes (bytes-length buffer)) buffer (subbytes buffer 0 read-bytes)))) + (values read-frames out read-bytes))])))) + + (define (read-s24 handle channels) + (let* ((max-samples (* default-frames-per-read channels)) + (float-buffer (malloc _float max-samples 'atomic-interior))) + (let-values (((read-frames link-index) + (op_read_float (opusfile-handle-of handle) float-buffer max-samples))) + (cond [(negative? read-frames) + (values read-frames #f 0)] + [(zero? read-frames) + (values 0 #f 0)] + [else + (let* ((read-samples (* read-frames channels)) + (out (make-bytes (* read-samples 3)))) + (for ([i (in-range read-samples)]) + (write-s24-native! out (* i 3) (float->s24 (ptr-ref float-buffer _float i)))) + (values read-frames out (bytes-length out)))])))) + + (define (read-audio-buffer handle channels) + (case (current-opusfile-output-format) + [(s16) (read-s16 handle channels)] + [(s24) (read-s24 handle channels)])) + + (define (opusfile-read handle) + (set-opusfile-handle-stop! handle #f) + (set-opusfile-handle-reading! handle #t) + (let loop () + (cond + [(opusfile-handle-stop handle) + (dbg-sound "Stopping opusfile decoding") + (set-opusfile-handle-reading! handle #f) + 'stopped-reading] + [else + (handle-pending-seek! handle) + (let ((channels (hash-ref (opusfile-handle-format handle) 'channels 2))) + (let-values (((read-frames out read-bytes) (read-audio-buffer handle channels))) + (cond [(negative? read-frames) + (err-sound "opusfile decode error: ~a" read-frames) + (set-opusfile-handle-stop! handle #t) + (loop)] + [(zero? read-frames) + (set-opusfile-handle-stop! handle #t) + (loop)] + [else + (give-audio handle out read-bytes) + (set-opusfile-handle-pcm-pos! handle (+ (opusfile-handle-pcm-pos handle) read-frames)) + (loop)])))])) + (op_free (opusfile-handle-of handle)) + (set-opusfile-handle-reading! handle #f)) + + (define (opusfile-seek handle percentage) + (let* ((fmt (opusfile-handle-format handle)) + (total-samples (hash-ref fmt 'total-samples 0))) + (unless (or (eq? total-samples #f) (= total-samples -1) (= total-samples 0)) + (let* ((percentage (max 0 (min 100 percentage))) + (sample (inexact->exact + (round (* (exact->inexact (/ percentage 100.0)) + total-samples))))) + (set-opusfile-handle-seek! handle sample))))) + + (define (opusfile-stop handle) + (set-opusfile-handle-stop! handle #t) + (while (opusfile-handle-reading handle) + (sleep 0.01))) + + ) ; end of module diff --git a/scrbl/ffmpeg-definitions2.scrbl b/scrbl/ffmpeg-definitions2.scrbl new file mode 100644 index 0000000..a9ffdd8 --- /dev/null +++ b/scrbl/ffmpeg-definitions2.scrbl @@ -0,0 +1,441 @@ +#lang scribble/manual + +@(require (for-label racket/base + (except-in racket/contract ->) + racket/path + ffi/unsafe + let-assert + early-return + "../ffmpeg-definitions.rkt" + "../private/cstruct-helper.rkt")) + +@title[#:tag "ffmpeg-definitions"]{FFmpeg Decoder Definitions} +@author[@author+email["Hans Dijkema" "hans@dijkewijk.nl"]] + +@defmodule[racket-audio/ffmpeg-definitions] + +This module provides the direct FFmpeg-backed decoder layer used by the audio +pipeline. It is deliberately small and stateful. A caller creates one decoder +instance, opens one file on it, queries the selected audio stream, repeatedly +asks for the next PCM block, and closes the instance again. + +The module does not expose FFmpeg metadata. It only exposes the information +needed for playback: stream count, sample rate, channel count, duration, +bitrate, decoded PCM data, and sample positions. The output format is fixed: +interleaved signed 32-bit PCM, four bytes per sample, using FFmpeg's +@tt{AV_SAMPLE_FMT_S32} sample format. + +The FFmpeg libraries are loaded when the module is required. The module checks +that the runtime FFmpeg major versions are in the supported range configured by +the implementation. This binding targets the FFmpeg library major versions +used by FFmpeg 6, 7, and 8: @tt{libavutil} 58 to 60, @tt{libavcodec} 60 to 62, +@tt{libavformat} 60 to 62, and @tt{libswresample} 4 to 6. Unsupported runtime +versions fail early, before a decoder instance is used. + +On Windows, the private library loader may download the bundled sound-library +set into Racket's add-on directory before the FFI libraries are opened. On +Unix-like systems, the FFmpeg libraries are expected to be installed by the +operating system or platform package manager and to be reachable by Racket's +FFI library search path. + +@section{Layering} + +This module is the low-level Racket FFI layer. It is normally wrapped by +@filepath{ffmpeg-ffi.rkt} and then by @filepath{ffmpeg-decoder.rkt}. The first +wrapper adapts this module to the command protocol used by the audio decoder +frontend. The second wrapper exposes the callback-oriented decoder interface +used by the rest of the playback pipeline. + +The distinction matters for buffer lifetime. At this level, +@racket[fmpg-buffer] returns the current buffer owned by the decoder instance. +The adapter in @filepath{ffmpeg-ffi.rkt} copies that buffer before passing it to +@filepath{ffmpeg-decoder.rkt}. Code that uses this module directly must copy +the buffer itself when the bytes must survive the next decoder operation. + +@section{FFmpeg version information} + +@defproc[(ffmpeg-version [lib (or/c 'avutil 'avcodec 'avformat + 'swr 'swresample)]) + (list/c exact-nonnegative-integer? + exact-nonnegative-integer? + exact-nonnegative-integer?)]{ +Returns the runtime version of one FFmpeg library as a three-element list +containing the major, minor, and micro version numbers. The symbols +@racket['swr] and @racket['swresample] both refer to @tt{libswresample}. + +The version is read from FFmpeg's packed integer value. For example, a runtime +value corresponding to @tt{62.28.100} is returned as @racket['(62 28 100)]. +The function raises an exception for an unknown library symbol. +} + +The runtime versions determine which partial FFmpeg struct layouts are safe to +use. If a future FFmpeg major release changes a layout before one of the +fields read by this module, the supported range should be extended only after +the affected partial definitions have been checked. + +@section{Implementation strategy} + +This module talks directly to the FFmpeg shared libraries through Racket's FFI. +There is no C shim that hides FFmpeg's structs or normalizes their layout. The +price of that choice is that the Racket side must know enough of the relevant C +struct layouts to read the fields used by the decoder. The benefit is that the +binding remains a Racket module with direct access to the platform FFmpeg +libraries. + +@subsection{C structs and offsets} + +Small and stable structures, such as @tt{AVRational} and +@tt{AVChannelLayout}, are described with @racket[define-cstruct]. A +@racket[define-cstruct] form describes the C fields to Racket's FFI. Racket +then calculates the correct field offsets for the current platform ABI and +creates the corresponding pointer type, constructor, accessors and mutators. + +The larger FFmpeg structures are handled by @racket[def-cstruct] from +@filepath{private/cstruct-helper.rkt}. Structures such as +@tt{AVCodecParameters}, @tt{AVStream}, @tt{AVFormatContext}, @tt{AVFrame} and +@tt{AVPacket} are large and may differ between FFmpeg major versions. The +decoder only needs a few fields from each one, but those fields must still be +read from their exact native offsets. + +The helper solves this by describing the complete field sequence up to the last +field the backend needs. Unnamed entries are used only to advance the offset. +Named entries become generated accessors. Repeated entries such as +@racket[(6 _int)] keep the definition compact while still allowing Racket's FFI +to compute alignment, padding and pointer size correctly. Tail fields after +the last required member are not described. + +The right layout is selected when the module is required, after the runtime +FFmpeg major versions have been read from the libraries. For the supported +range, @tt{_AVCodecParameters} uses one layout for @tt{libavcodec} major +version 60 and another for major versions 61 and 62. Likewise, +@tt{_AVFrame} uses one layout for @tt{libavutil} major version 58 and +another for major versions 59 and 60. The other partial structs used by this +module are defined with a single layout across the supported versions. + +@subsection{Defensive control flow} + +Most FFmpeg calls report ordinary failure through C-style return values or null +pointers. The implementation treats those results as normal control flow. The +@racket[let/assert] form is used for setup paths where each native result must +be checked before the next native call is made. It behaves like a sequential +binding form: each binding can be checked immediately, and a failed check +returns the specified failure value for the whole form. + +That style is used for opening a file, selecting stream information, allocating +the codec context, and initializing the resampler. Predicates such as +@tt{a-!nullptr?}, @tt{a-nullptr?}, @tt{a-true?}, and @tt{a->=?} express the +usual FFmpeg checks directly next to the binding that produced the value. + +The decode and seek paths also use @racket[early-return] where processing must +stop immediately from a nested position. This keeps the normal FFmpeg outcomes +away from exception-based control flow while still making cleanup actions local +to the point where a failure can occur. + +@section{Decoder instances} + +A decoder instance is an opaque value returned by @racket[fmpg-init]. Its +structure type and predicate are not exported. Pass the value back to the +functions in this module and do not inspect it directly. The contracts below +therefore use @racket[any/c] for the instance argument. Operationally, that +argument must be a value returned by @racket[fmpg-init]. + +The instance owns native FFmpeg resources: a format context, a codec context, +an audio frame, a resampler, and the Racket byte string used for the current +PCM block. Finalizers are installed as a last line of defence, but callers +should still call @racket[fmpg-close!] explicitly when playback stops or when +the file is no longer needed. Explicit close keeps the lifetime of native +resources predictable. + +@defproc[(fmpg-init) any/c]{ +Creates a new decoder instance. The result is an opaque instance value, or +@racket[#f] if the instance could not be created. + +Creating the instance does not open a file. Use @racket[fmpg-open-file!] +before querying stream information or decoding audio. +} + +@defproc[(fmpg-open-file! [instance any/c] + [filename (or/c path? string?)]) + (integer-in 0 1)]{ +Opens @racket[filename] on @racket[instance], reads the stream information, +selects the best audio stream, initializes the codec context, and initializes +the resampler. + +The function returns @racket[1] on success and @racket[0] on failure. On +failure, partially initialized native state is closed again. A non-string, +non-path filename is treated as an open failure and returns @racket[0]. + +An instance can only have one file open. Close it with @racket[fmpg-close!] +before opening another file on the same instance. +} + +@defproc[(fmpg-close! [instance any/c]) void?]{ +Closes @racket[instance] if it is open and releases the native FFmpeg resources +owned by the instance. The codec context, frame and resampler are freed before +the format context is closed. This order avoids keeping decoder pointers that +refer to streams from an already closed container. + +The stored audio information is reset. Calling this function with @racket[#f] +or with an already closed instance is harmless. +} + +@defproc[(fmpg-is-open [instance any/c]) (integer-in 0 1)]{ +Returns @racket[1] when @racket[instance] is ready for decoding and @racket[0] +otherwise. An instance is ready only after a file has been opened, a usable +audio stream has been selected, and the decoder and resampler have been +initialized. +} + +@section{Audio stream information} + +The decoder selects one audio stream for playback using FFmpeg's best-stream +selection. The stream count reports how many audio streams were found in the +container, but decoding is performed only for the selected stream. + +The term @italic{sample} in this module means a sample frame: one time step in +the audio stream, across all channels. For stereo 32-bit output, one sample +frame therefore occupies @racket[(* 2 4)] bytes in the returned PCM buffer. + +@defproc[(fmpg-audio-stream-count [instance any/c]) + exact-nonnegative-integer?]{ +Returns the number of audio streams in the open container. If the instance is +not open, the result is @racket[0]. This count is informational; actual stream +selection is performed during @racket[fmpg-open-file!]. +} + +@deftogether[ +(@defproc[(fmpg-audio-sample-rate [instance any/c]) + exact-nonnegative-integer?] + @defproc[(fmpg-audio-channels [instance any/c]) + exact-nonnegative-integer?])]{ +Return the sample rate and channel count of the selected audio stream. If the +instance is not ready, both functions return @racket[0]. +} + +@deftogether[ +(@defproc[(fmpg-audio-bits-per-sample [instance any/c]) + exact-positive-integer?] + @defproc[(fmpg-audio-bytes-per-sample [instance any/c]) + exact-positive-integer?])]{ +Return the fixed output sample width in bits and bytes. The current output +format is 32-bit signed PCM, so @racket[fmpg-audio-bits-per-sample] returns +@racket[32] and @racket[fmpg-audio-bytes-per-sample] returns @racket[4]. The +values are independent of the input file's original sample format and do not +depend on the instance state. +} + +@deftogether[ +(@defproc[(fmpg-duration-ms [instance any/c]) exact-integer?] + @defproc[(fmpg-duration-samples [instance any/c]) exact-integer?])]{ +Return the duration of the selected audio stream in milliseconds and in sample +frames. If the stream duration is not available, the container duration is +used as a fallback. If no duration can be determined, or when the instance is +not ready, the result is @racket[-1]. +} + +@defproc[(fmpg-file-bitrate [instance any/c]) exact-integer?]{ +Returns the container bitrate in bits per second. If the bitrate is unavailable +or if the instance is not open, the result is @racket[-1]. Only positive +FFmpeg bitrates are passed through as reliable. +} + +@section{Output format} + +The decoder output format is intentionally fixed: + +@itemlist[ + #:style 'compact + @item{sample format: signed 32-bit PCM, @tt{AV_SAMPLE_FMT_S32}} + @item{layout: interleaved} + @item{sample rate: the selected stream's sample rate} + @item{channels: the selected stream's channel count} +] + +This keeps the playback layer simple. The FFmpeg input format may be planar, +floating point, compressed, or otherwise different; @tt{libswresample} converts +the decoded frames to the fixed output format before the bytes are exposed to +Racket. + +@section{Decoding} + +Decoding is block oriented. Each call to @racket[fmpg-decode-next!] clears the +previous PCM block and attempts to produce the next decoded block for the +selected audio stream. When the call returns @racket[1], the block can be read +with @racket[fmpg-buffer] and described with the buffer query functions. + +@defproc[(fmpg-decode-next! [instance any/c]) exact-integer?]{ +Decodes until a block of PCM output is available, end of stream is reached, or +an error occurs. The return values are: + +@itemlist[ + #:style 'compact + @item{@racket[1]: a new PCM buffer is available through @racket[fmpg-buffer].} + @item{@racket[0]: decoding is complete and no more PCM is available.} + @item{A negative value: decoding failed or the instance was not ready.} +] + +Internally, the decoder first tries to receive frames that FFmpeg may already +have buffered. If no frame is ready, it reads packets until it finds a packet +for the selected audio stream. Packets from other streams are skipped and +immediately unreferenced. Sent packets are unreferenced after +@tt{avcodec_send_packet}, because the codec has then taken what it needs. + +At end of input, the function drains both the codec and the resampler. This is +necessary because FFmpeg and @tt{libswresample} may still hold delayed samples +even after the demuxer has no more packets. +} + +@section{Decoded buffers} + +The PCM buffer belongs to the decoder instance. It is replaced by the next +call to @racket[fmpg-decode-next!], @racket[fmpg-seek-ms!], or +@racket[fmpg-close!]. Treat the returned byte string as read-only. Copy it if +it must outlive the next decoder operation or if another component may mutate +it. + +@defproc[(fmpg-buffer [instance any/c]) (or/c bytes? #f)]{ +Returns the current decoded PCM block as a byte string, or @racket[#f] when no +PCM block is available. + +The byte string contains interleaved signed 32-bit samples. Its logical frame +count is available as the difference between @racket[fmpg-buffer-end-sample] +and @racket[fmpg-buffer-start-sample]. Its byte size is also available through +@racket[fmpg-buffer-size]. +} + +@defproc[(fmpg-buffer-size [instance any/c]) exact-nonnegative-integer?]{ +Returns the number of valid bytes in the current PCM buffer. If no decoder +state is available, or if the size would not fit in the internal integer range, +the function returns @racket[0]. +} + +@deftogether[ +(@defproc[(fmpg-buffer-start-sample [instance any/c]) + exact-nonnegative-integer?] + @defproc[(fmpg-buffer-end-sample [instance any/c]) + exact-nonnegative-integer?] + @defproc[(fmpg-sample-position [instance any/c]) + exact-nonnegative-integer?])]{ +Return sample-frame positions for the current decoder state. + +@racket[fmpg-buffer-start-sample] returns the first sample frame represented by +the current PCM buffer. @racket[fmpg-buffer-end-sample] returns the half-open +end position: the first sample frame after the current buffer. +@racket[fmpg-sample-position] returns the next sample position the decoder +expects to produce. + +These values count sample frames, not individual channel samples. For stereo +audio, one sample frame contains one sample for the left channel and one sample +for the right channel. +} + +@section{Seeking} + +@defproc[(fmpg-seek-ms! [instance any/c] + [target-pos-ms exact-nonnegative-integer?]) + (integer-in 0 1)]{ +Seeks the selected audio stream to @racket[target-pos-ms] milliseconds and +resets the decoder and resampler state. The function returns @racket[1] on +success and @racket[0] on failure. Seeking is allowed only when the instance +is already ready for decoding and the target position is non-negative. + +Seeking uses FFmpeg's backward seek flag. FFmpeg may therefore seek to a packet +position before the requested target. The decoder stores a discard target in +sample frames. During the following decode calls, frames before the target are +dropped, and frames that overlap the target are trimmed so the exposed PCM +buffer starts at, or as close as FFmpeg can provide to, the requested position. + +After a successful seek, the codec buffers are flushed, the resampler is closed +and reinitialized, EOF state is cleared, and sample bookkeeping is reset to the +target position. +} + +@section{Resource ownership} + +The decoder instance owns the native FFmpeg objects it allocates. The codec +pointer returned by FFmpeg is not owned by the instance, but the codec context, +frame, resampler and format context are. They are released by +@racket[fmpg-close!]. Finalizers are registered as a safety net, but callers +should close decoder instances explicitly. + +Temporary native buffers used during resampling are allocated only for the +duration of a conversion step and are always freed before control returns to the +caller. The public PCM buffer is a Racket byte string, so it can safely be +passed to the Racket-side playback backend. + +@section{Use through the decoder frontend} + +The direct API above is normally wrapped by @filepath{ffmpeg-ffi.rkt} and by +@filepath{ffmpeg-decoder.rkt}. The frontend function @tt{ffmpeg-open} returns +a handle or @racket[#f] when the file does not exist. Its stream-info callback +receives a mutable hash with at least these playback keys: + +@racketblock[ +(list 'sample-rate + 'channels + 'bits-per-sample + 'bytes-per-sample + 'total-samples + 'duration)] + +The audio callback receives the same hash extended for the current buffer with +these keys: + +@racketblock[ +(list 'sample + 'current-time)] + +The hash is followed by a copied byte string and its valid byte count. The +copy is made by @filepath{ffmpeg-ffi.rkt}, not by the low-level buffer function +itself. + +The frontend's seek function accepts a percentage of the stream and translates +that percentage to a sample position. The adapter then translates the sample +position to milliseconds and calls @racket[fmpg-seek-ms!]. This is why the +low-level module exposes millisecond seeking while the frontend exposes +percentage seeking. + +@section{Examples} + +The following example opens a file, decodes all PCM blocks, and reports their +byte ranges and sample ranges. A real playback loop would pass each buffer to +the audio output layer before requesting the next block. + +@racketblock[ +(define dec (fmpg-init)) + +(when (and dec (= (fmpg-open-file! dec "track.ogg") 1)) + (printf "~a Hz, ~a channels, ~a ms\n" + (fmpg-audio-sample-rate dec) + (fmpg-audio-channels dec) + (fmpg-duration-ms dec)) + + (let loop () + (case (fmpg-decode-next! dec) + [(1) + (define pcm (fmpg-buffer dec)) + (define size (fmpg-buffer-size dec)) + (define start (fmpg-buffer-start-sample dec)) + (define end (fmpg-buffer-end-sample dec)) + (printf "decoded ~a bytes, samples [~a, ~a)\n" + size start end) + ;; Pass pcm to the audio output layer here, or copy it if needed. + (loop)] + [(0) + (printf "done\n")] + [else + (error "decode error")])) + + (fmpg-close! dec)) +] + +A simple seek flow looks the same after the seek succeeds. The following code +moves to 30 seconds and then requests the next decoded buffer. + +@racketblock[ +(when (= (fmpg-seek-ms! dec 30000) 1) + (when (= (fmpg-decode-next! dec) 1) + (define pcm (fmpg-buffer dec)) + (define start (fmpg-buffer-start-sample dec)) + (printf "first buffer after seek starts at sample ~a\n" start))) +] diff --git a/scrbl/libao-async-ffi-racket2.scrbl b/scrbl/libao-async-ffi-racket2.scrbl new file mode 100644 index 0000000..db8b4b6 --- /dev/null +++ b/scrbl/libao-async-ffi-racket2.scrbl @@ -0,0 +1,306 @@ +#lang scribble/manual + +@(require (for-label racket/base + racket/contract + "../libao-async-ffi-racket.rkt")) + +@title{Pure Racket Asynchronous libao Backend} + +@defmodule[racket-audio/libao-async-ffi-racket] + +This module implements the asynchronous libao playback backend used by +@racketmodname[racket-audio]. It provides the same public Racket API as the +older C-backed asynchronous player, but keeps the queueing, buffering, +conversion and worker-thread logic in Racket. The only foreign calls made by +this module are the direct calls into Xiph's libao library. + +The module is intended as a low-level backend. Higher-level player code should +normally use the public audio-player interface instead of calling this module +directly. It is documented here because it defines the exact contract between +decoded PCM data and the libao output path. + +@section{Overview} + +The backend accepts decoded PCM buffers, converts them when needed, groups small +buffers into larger playback chunks, and sends those chunks to libao from a +dedicated Racket worker thread. The worker thread calls @racket[ao_play] as a +blocking foreign call, so other Racket threads and places do not have to wait +for the audio device to accept more data. + +Incoming buffers may be interleaved or planar. Planar buffers, such as those +commonly produced by a FLAC decoder, are converted to interleaved PCM before +playback. If the requested sample width cannot be opened on the selected audio +device, the backend tries lower-width output formats and converts samples before +they are sent to libao. + +The backend also maintains playback position metadata. Each queued buffer is +tagged with a music id, a current playback position and a duration. These +values are used by the higher-level player to report where the audio device is +in the current track. + +@section{Buffer information} + +@defproc[(make-buffer-info [type symbol?] + [sample-bits exact-positive-integer?] + [sample-rate exact-positive-integer?] + [channels exact-positive-integer?] + [endianness symbol?]) + any/c]{ + +Creates a buffer description object for PCM data passed to +@racket[ao_play_async]. + +The @racket[type] field describes the memory layout. The supported values are +@racket['interleaved] for normal interleaved PCM and @racket['planar] for planar +PCM. For compatibility with older code, @racket['ao] is treated as interleaved +by convention and @racket['flac] is accepted as planar input. + +The @racket[sample-bits], @racket[sample-rate] and @racket[channels] fields +describe the format of the supplied buffer, not necessarily the format that will +eventually be accepted by the device. The backend may convert the sample width +to the actual device width. + +The @racket[endianness] field must be one of @racket['little-endian], +@racket['big-endian] or @racket['native-endian]. It is used when samples are +converted between different sample widths or byte orders.} + +@defproc[(make-BufferInfo_t [type symbol?] + [sample-bits exact-positive-integer?] + [sample-rate exact-positive-integer?] + [channels exact-positive-integer?] + [endianness symbol?]) + any/c]{ + +Compatibility alias for @racket[make-buffer-info]. The name matches the older +FFI module and the former C structure naming convention.} + +@section{Creating and closing a backend} + +@defproc[(ao_version_async) exact-integer?]{ + +Returns the version number of this asynchronous backend implementation. The +current implementation returns @racket[3]. The value is useful for diagnostics +when multiple asynchronous backend implementations exist.} + +@defproc[(ao_create_async [bits exact-positive-integer?] + [rate exact-positive-integer?] + [channels exact-positive-integer?] + [byte-format symbol?] + [wav-output-file (or/c #f path-string?)]) + any/c]{ + +Opens a libao output device and creates an asynchronous playback handle. + +The @racket[bits], @racket[rate], @racket[channels] and @racket[byte-format] +arguments describe the preferred output format. The byte format must be one of +@racket['little-endian], @racket['big-endian] or @racket['native-endian]. + +When @racket[wav-output-file] is @racket[#f], the default live libao driver is +used. When it is a path string, the backend opens libao's @tt{wav} driver and +writes the audio stream to that file instead. + +The backend first tries to open the requested sample width. If that fails and +the requested width is greater than 24 bits, it tries 24-bit output. If that +also fails and the requested width is greater than 16 bits, it tries 16-bit +output. The actual device width can be queried with +@racket[ao_real_output_bits_async]. + +The function returns a playback handle on success and @racket[#f] when no +suitable libao device could be opened.} + +@defproc[(ao_stop_async [handle any/c]) any/c]{ + +Stops the worker thread, clears pending audio, closes the libao device and +invalidates @racket[handle]. + +The stop operation first clears all queued buffers, then queues an internal stop +command, waits for the playback thread to terminate, and finally closes the +underlying libao handle. Calling this function on an already invalid handle is +an error.} + +@section{Submitting audio} + +@defproc[(ao_play_async [handle any/c] + [music-id any/c] + [at-second real?] + [music-duration real?] + [buf-size exact-nonnegative-integer?] + [au-buf (or/c bytes? any/c)] + [info any/c]) + void?]{ + +Queues a PCM buffer for asynchronous playback. + +The @racket[music-id], @racket[at-second] and @racket[music-duration] values are +stored together with the queued buffer. They do not affect sample conversion, +but they allow the player to report the current track id, playback position and +track duration while the worker thread is playing the queued data. + +The @racket[buf-size] argument gives the number of valid bytes in +@racket[au-buf]. The input buffer is copied into backend-owned memory before +the function returns, so the caller may reuse or discard the original byte +string after the call. + +The @racket[info] argument should be created with @racket[make-buffer-info]. If +the buffer is planar, it is converted to interleaved PCM. If the buffer's +sample width or byte order differs from the actual libao device format, the +backend converts it before queueing. + +The backend groups smaller buffers into larger playback chunks. This reduces +the number of calls to libao and helps prevent underruns. Buffers with +different @racket[music-id] values are not merged into the same output chunk.} + +@defproc[(ao_clear_async [handle any/c]) any/c]{ + +Clears all queued audio buffers that have not yet been played. + +The current aggregation buffer is also cleared. Already playing audio may still +finish at the device level, depending on what libao and the operating system +have accepted. This operation is used by higher-level code when stopping, +seeking or replacing the current stream.} + +@section{Playback state} + +@defproc[(ao_is_at_second_async [handle any/c]) real?]{ + +Returns the playback position associated with the most recently dequeued buffer. +This value is the @racket[at-second] value supplied to @racket[ao_play_async], +not a sample-accurate query into the audio device.} + +@defproc[(ao_is_at_music_id_async [handle any/c]) any/c]{ + +Returns the music id associated with the most recently dequeued buffer. The +higher-level player uses this value to determine which track the output thread +has reached.} + +@defproc[(ao_music_duration_async [handle any/c]) real?]{ + +Returns the duration associated with the most recently dequeued buffer. This is +the @racket[music-duration] value supplied to @racket[ao_play_async].} + +@defproc[(ao_bufsize_async [handle any/c]) exact-nonnegative-integer?]{ + +Returns the number of queued PCM bytes that have been accepted by the backend +but not yet removed from the asynchronous queue. This is a backend queue size, +not the size of the operating-system or hardware audio buffer.} + +@defproc[(ao_sample_queue_len [handle any/c]) exact-nonnegative-integer?]{ + +Returns the number of queued playback elements waiting in the backend queue. +This is mainly useful for diagnostics and tuning.} + +@defproc[(ao_reuse_buf_len [handle any/c]) exact-nonnegative-integer?]{ + +Returns the number of reusable internal buffers currently kept by the backend. +This is a diagnostic value that can help detect excessive allocation or +unexpected buffer retention.} + +@section{Pause and volume} + +@defproc[(ao_pause_async [handle any/c] + [paused (or/c boolean? integer?)]) + void?]{ + +Pauses or resumes the playback worker. + +When @racket[paused] is @racket[#t], or an integer other than @racket[0], the +worker thread is blocked before it dequeues the next element. When +@racket[paused] is @racket[#f] or @racket[0], playback is resumed. + +Pausing does not prevent producers from queueing additional buffers. It only +prevents the worker thread from taking more data from the queue.} + +@defproc[(ao_set_volume_async [handle any/c] + [percentage real?]) + void?]{ + +Sets the output volume as a percentage. + +A value of @racket[100.0] means unchanged volume. Values below +@racket[100.0] attenuate the signal. Values above @racket[100.0] amplify the +signal and are clipped to the signed range of the actual device sample width. + +Internally the value is stored as an integer in hundredths of a percent: for +example, @racket[100.0] becomes @racket[10000]. Values very close to +@racket[100.0] are normalized to exactly @racket[10000] to avoid unnecessary +sample processing.} + +@defproc[(ao_volume_async [handle any/c]) real?]{ + +Returns the currently configured output volume percentage.} + +@section{Output format} + +@defproc[(ao_real_output_bits_async [handle any/c]) + exact-nonnegative-integer?]{ + +Returns the actual sample width opened on the libao device. + +This may be lower than the requested width passed to @racket[ao_create_async]. +For example, a request for 32-bit output may result in a 24-bit or 16-bit device +when the default libao driver cannot open the preferred format. In that case, +@racket[ao_play_async] converts the incoming samples before playback.} + +@section{Playback buffer tuning} + +@defproc[(ao-playback-buf-ms) exact-nonnegative-integer?]{ + +Returns the target size, in milliseconds, of the playback chunks that the +backend sends to libao. The default is @racket[150].} + +@defproc[(ao-set-playback-buf-ms! [ms exact-nonnegative-integer?]) + void?]{ + +Sets the target playback chunk size in milliseconds. + +Larger values reduce the number of calls to libao and may help prevent audible +glitches when decoders produce many small buffers. Smaller values reduce +latency but increase scheduling pressure on the Racket worker thread and on the +audio backend.} + +@section{Implementation notes} + +The worker thread is created with its own thread pool and uses libao's +@racket[ao_play] through a blocking FFI call. Before calling libao, the worker +copies the queued bytes into memory allocated with @racket['atomic-interior]. +This is important because a blocking foreign call must not be handed a pointer +to movable Racket memory that could be relocated by the garbage collector while +the foreign function is still using it. + +The backend keeps a small pool of previously allocated buffers. Buffers created +internally for conversion or aggregation can be reused after playback. This +reduces allocation pressure during continuous playback. + +The module initializes libao when the first handle is opened and shuts libao +down when the last handle is closed. This keeps libao lifetime management local +to the backend and avoids repeated global initialization during normal playback. + +@section{Example} + +@racketblock[ +(define h + (ao_create_async 32 44100 2 'native-endian #f)) + +(define info + (make-buffer-info 'interleaved 32 44100 2 'native-endian)) + +(when h + (ao_play_async h + 1 + 0.0 + 180.0 + (bytes-length pcm-bytes) + pcm-bytes + info) + + (ao_set_volume_async h 80.0) + + (ao_pause_async h #t) + (ao_pause_async h #f) + + (ao_stop_async h)) +] + +The example opens the default live libao device, queues one interleaved +32-bit PCM buffer, lowers the volume to 80 percent, briefly pauses and resumes +the worker, and finally closes the backend. \ No newline at end of file