Compare commits

..

2 Commits

Author SHA1 Message Date
hans cf87fa7ed8 Merge branch 'main' of https://git.dijkewijk.nl/hans/racket-audio 2026-06-05 22:17:30 +02:00
hans d7be947886 Òpus toevoeging via xiph library 2026-06-05 22:17:10 +02:00
5 changed files with 1082 additions and 16 deletions
+32 -16
View File
@@ -2,6 +2,7 @@
(require "flac-decoder.rkt"
"mp3-decoder.rkt"
"opusfile-decoder.rkt"
"ffmpeg-decoder.rkt"
"audio-sniffer.rkt"
"private/utils.rkt"
@@ -22,6 +23,8 @@
make-audio-reader
audio-handle?
audio-supported-extensions
current-opusfile-output-format
opusfile-output-format?
)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -56,7 +59,18 @@
mp3-stop
'ao))
;; FFmpeg decodere
;; Opus, via Xiph libopusfile
(hash-set! audio-readers
'opusfile
(make-audio-reader '("opus")
opusfile-valid?
opusfile-open
opusfile-read
opusfile-seek
opusfile-stop
'ao))
;; FFmpeg decoder
(hash-set! audio-readers
'ffmpeg
(make-audio-reader '("ogg" "oga" "opus"
@@ -229,21 +243,23 @@
(not (null? (filter (λ (e) (string-ci=? ext e)) (audio-reader-exts reader)))))
(define reader-for-kind
(make-hash '((mp3 . ffmpeg) ; ffmpeg does a better job on gapless playback...
(flac . flac)
(ogg . ffmpeg)
(vorbis . ffmpeg)
(opus . ffmpeg)
(wav . ffmpeg)
(aiff . ffmpeg)
(mp4 . ffmpeg)
(aac . ffmpeg)
(alac . ffmpeg)
(ac3 . ffmpeg)
(ape . ffmpeg)
(wavpack . ffmpeg)
(wma . ffmpeg)
(matroska . ffmpeg))))
(make-hash
(list (cons 'mp3 'ffmpeg) ; ffmpeg does a better job on gapless playback...
(cons 'flac 'flac)
(cons 'ogg 'ffmpeg)
(cons 'vorbis 'ffmpeg)
(cons 'opus (if (opusfile-available?) 'opusfile 'ffmpeg))
(cons 'wav 'ffmpeg)
(cons 'aiff 'ffmpeg)
(cons 'mp4 'ffmpeg)
(cons 'aac 'ffmpeg)
(cons 'alac 'ffmpeg)
(cons 'ac3 'ffmpeg)
(cons 'ape 'ffmpeg)
(cons 'wavpack 'ffmpeg)
(cons 'wma 'ffmpeg)
(cons 'matroska 'ffmpeg))))
(define (find-reader audio-file)
+3
View File
@@ -3,10 +3,13 @@
(require "taglib.rkt"
"audio-sniffer.rkt"
"audio-player.rkt"
"opusfile-decoder.rkt"
)
(provide (all-from-out "taglib.rkt")
(all-from-out "audio-sniffer.rkt")
(all-from-out "audio-player.rkt")
current-opusfile-output-format
opusfile-output-format?
)
+300
View File
@@ -0,0 +1,300 @@
(module opusfile-decoder racket/base
(require ffi/unsafe
"private/utils.rkt")
(provide opusfile-open
opusfile-valid?
opusfile-read
opusfile-stop
opusfile-seek
opusfile-available?
current-opusfile-output-format
opusfile-output-format?)
;; Xiph libopusfile backend for Ogg Opus streams.
;;
;; By default this backend uses op_read(), which returns signed 16-bit
;; interleaved PCM. That is the most efficient path for direct libao
;; playback. For users who prefer the wider decoder output path, set
;; current-opusfile-output-format to 's24. In that mode the backend uses
;; op_read_float() and converts the interleaved float output to packed signed
;; 24-bit PCM in native byte order.
;;
;; Opus decode output is always 48 kHz PCM. The original input rate, if
;; present in metadata, is not the actual decoder output rate.
(define libopusfile
(with-handlers ([exn:fail? (lambda (_) #f)])
(ffi-lib "libopusfile" '("0" #f))))
(define _OggOpusFile _pointer)
(define default-frames-per-read 4096)
(define opus-sample-rate 48000)
(define (opusfile-output-format? v)
(or (eq? v 's16) (eq? v 's24)))
(define current-opusfile-output-format
(make-parameter 's16
(lambda (v)
(unless (opusfile-output-format? v)
(raise-argument-error 'current-opusfile-output-format "(or/c 's16 's24)" v))
v)))
(define (opus-bits-per-sample)
(case (current-opusfile-output-format)
[(s16) 16]
[(s24) 24]))
(define (opus-bytes-per-sample)
(case (current-opusfile-output-format)
[(s16) 2]
[(s24) 3]))
(define (ffi-proc name type)
(and libopusfile
(with-handlers ([exn:fail? (lambda (_) #f)])
(get-ffi-obj name libopusfile type))))
(define op_open_file
(ffi-proc "op_open_file"
(_fun _path (err : (_ptr o _int))
-> (r : _OggOpusFile)
-> (values r err))))
(define op_free
(ffi-proc "op_free"
(_fun _OggOpusFile -> _void)))
(define op_channel_count
(ffi-proc "op_channel_count"
(_fun _OggOpusFile _int -> _int)))
(define op_pcm_total
(ffi-proc "op_pcm_total"
(_fun _OggOpusFile _int -> _int64)))
(define op_pcm_seek
(ffi-proc "op_pcm_seek"
(_fun _OggOpusFile _int64 -> _int)))
(define op_read
(ffi-proc "op_read"
(_fun _OggOpusFile _bytes _int (li : (_ptr o _int))
-> (r : _int)
-> (values r li))))
(define op_read_float
(ffi-proc "op_read_float"
(_fun _OggOpusFile _pointer _int (li : (_ptr o _int))
-> (r : _int)
-> (values r li))))
(define (opusfile-available?)
(and libopusfile
op_open_file
op_free
op_channel_count
op_pcm_total
op_pcm_seek
op_read
op_read_float
#t))
(define-struct opusfile-handle
(of cb-info cb-audio
(stop #:mutable)
(seek #:mutable)
(reading #:mutable)
(format #:mutable)
(pcm-pos #:mutable))
#:transparent)
(define (raise-opus who fmt . args)
(apply error who fmt args))
(define (check-libopusfile who)
(unless (opusfile-available?)
(raise-opus who "libopusfile could not be loaded")))
(define (correct-format-hash h)
(unless (hash-ref h 'sample-rate #f)
(hash-set! h 'sample-rate opus-sample-rate))
(unless (hash-ref h 'bits-per-sample #f)
(hash-set! h 'bits-per-sample (opus-bits-per-sample)))
(unless (hash-ref h 'bytes-per-sample #f)
(hash-set! h 'bytes-per-sample (opus-bytes-per-sample)))
(unless (hash-ref h 'sample-format #f)
(hash-set! h 'sample-format (current-opusfile-output-format)))
(unless (hash-ref h 'total-samples #f)
(hash-set! h 'total-samples 0)
(hash-set! h 'duration 0)))
(define (report-format handle)
(let ((cb (opusfile-handle-cb-info handle)))
(when (procedure? cb)
(cb (opusfile-handle-format handle)))))
(define (make-format channels total-samples)
(let ((h (make-hash)))
(hash-set! h 'duration (if (and (integer? total-samples) (>= total-samples 0))
(exact->inexact (/ total-samples opus-sample-rate))
0.0))
(hash-set! h 'sample-rate opus-sample-rate)
(hash-set! h 'channels channels)
(hash-set! h 'bits-per-sample (opus-bits-per-sample))
(hash-set! h 'bytes-per-sample (opus-bytes-per-sample))
(hash-set! h 'sample-format (current-opusfile-output-format))
(hash-set! h 'total-samples total-samples)
h))
(define (give-audio handle buffer size)
(let ((h (opusfile-handle-format handle)))
(correct-format-hash h)
(hash-set! h 'sample (opusfile-handle-pcm-pos handle))
(hash-set! h 'current-time (exact->inexact (/ (opusfile-handle-pcm-pos handle)
opus-sample-rate)))
((opusfile-handle-cb-audio handle) h buffer size)))
(define s24-pos-scale #x7FFFFF)
(define s24-neg-scale #x800000)
(define (clip-sample x)
(cond [(< x -1.0) -1.0]
[(> x 1.0) 1.0]
[else x]))
(define (float->s24 x)
(let ((y (clip-sample x)))
(if (negative? y)
(inexact->exact (round (* y s24-neg-scale)))
(inexact->exact (round (* y s24-pos-scale))))))
(define (write-s24-native! bs offset sample)
(let ((v (if (negative? sample) (+ sample #x1000000) sample)))
(if (system-big-endian?)
(begin
(bytes-set! bs offset (bitwise-and (arithmetic-shift v -16) #xFF))
(bytes-set! bs (+ offset 1) (bitwise-and (arithmetic-shift v -8) #xFF))
(bytes-set! bs (+ offset 2) (bitwise-and v #xFF)))
(begin
(bytes-set! bs offset (bitwise-and v #xFF))
(bytes-set! bs (+ offset 1) (bitwise-and (arithmetic-shift v -8) #xFF))
(bytes-set! bs (+ offset 2) (bitwise-and (arithmetic-shift v -16) #xFF))))))
(define (opusfile-valid? audio-file)
(and (opusfile-available?)
(file-exists? audio-file)
#t))
(define (opusfile-open audio-file* cb-stream-info cb-audio)
(check-libopusfile 'opusfile-open)
(let ((audio-file (if (path? audio-file*)
(path->string audio-file*)
audio-file*)))
(if (file-exists? audio-file)
(let-values (((of err) (op_open_file audio-file)))
(if of
(let* ((channels (op_channel_count of -1))
(total-samples (op_pcm_total of -1))
(fmt (make-format channels total-samples))
(h (make-opusfile-handle of cb-stream-info cb-audio #f #f #f fmt 0)))
(report-format h)
h)
(raise-opus 'opusfile-open
"could not open Opus file ~a; opusfile error code: ~a"
audio-file err)))
#f)))
(define (handle-pending-seek! handle)
(unless (eq? (opusfile-handle-seek handle) #f)
(let ((sample (opusfile-handle-seek handle)))
(dbg-sound "Seeking opusfile to sample ~a" sample)
(let ((r (op_pcm_seek (opusfile-handle-of handle) sample)))
(when (negative? r)
(err-sound "opusfile seek error: ~a" r))
(when (not (negative? r))
(set-opusfile-handle-pcm-pos! handle sample)))
(set-opusfile-handle-seek! handle #f))))
(define (read-s16 handle channels)
(let* ((max-samples (* default-frames-per-read channels))
(buffer (make-bytes (* max-samples 2))))
(let-values (((read-frames link-index)
(op_read (opusfile-handle-of handle) buffer max-samples)))
(cond [(negative? read-frames)
(values read-frames #f 0)]
[(zero? read-frames)
(values 0 #f 0)]
[else
(let* ((read-samples (* read-frames channels))
(read-bytes (* read-samples 2))
(out (if (= read-bytes (bytes-length buffer)) buffer (subbytes buffer 0 read-bytes))))
(values read-frames out read-bytes))]))))
(define (read-s24 handle channels)
(let* ((max-samples (* default-frames-per-read channels))
(float-buffer (malloc _float max-samples 'atomic-interior)))
(let-values (((read-frames link-index)
(op_read_float (opusfile-handle-of handle) float-buffer max-samples)))
(cond [(negative? read-frames)
(values read-frames #f 0)]
[(zero? read-frames)
(values 0 #f 0)]
[else
(let* ((read-samples (* read-frames channels))
(out (make-bytes (* read-samples 3))))
(for ([i (in-range read-samples)])
(write-s24-native! out (* i 3) (float->s24 (ptr-ref float-buffer _float i))))
(values read-frames out (bytes-length out)))]))))
(define (read-audio-buffer handle channels)
(case (current-opusfile-output-format)
[(s16) (read-s16 handle channels)]
[(s24) (read-s24 handle channels)]))
(define (opusfile-read handle)
(set-opusfile-handle-stop! handle #f)
(set-opusfile-handle-reading! handle #t)
(let loop ()
(cond
[(opusfile-handle-stop handle)
(dbg-sound "Stopping opusfile decoding")
(set-opusfile-handle-reading! handle #f)
'stopped-reading]
[else
(handle-pending-seek! handle)
(let ((channels (hash-ref (opusfile-handle-format handle) 'channels 2)))
(let-values (((read-frames out read-bytes) (read-audio-buffer handle channels)))
(cond [(negative? read-frames)
(err-sound "opusfile decode error: ~a" read-frames)
(set-opusfile-handle-stop! handle #t)
(loop)]
[(zero? read-frames)
(set-opusfile-handle-stop! handle #t)
(loop)]
[else
(give-audio handle out read-bytes)
(set-opusfile-handle-pcm-pos! handle (+ (opusfile-handle-pcm-pos handle) read-frames))
(loop)])))]))
(op_free (opusfile-handle-of handle))
(set-opusfile-handle-reading! handle #f))
(define (opusfile-seek handle percentage)
(let* ((fmt (opusfile-handle-format handle))
(total-samples (hash-ref fmt 'total-samples 0)))
(unless (or (eq? total-samples #f) (= total-samples -1) (= total-samples 0))
(let* ((percentage (max 0 (min 100 percentage)))
(sample (inexact->exact
(round (* (exact->inexact (/ percentage 100.0))
total-samples)))))
(set-opusfile-handle-seek! handle sample)))))
(define (opusfile-stop handle)
(set-opusfile-handle-stop! handle #t)
(while (opusfile-handle-reading handle)
(sleep 0.01)))
) ; end of module
+441
View File
@@ -0,0 +1,441 @@
#lang scribble/manual
@(require (for-label racket/base
(except-in racket/contract ->)
racket/path
ffi/unsafe
let-assert
early-return
"../ffmpeg-definitions.rkt"
"../private/cstruct-helper.rkt"))
@title[#:tag "ffmpeg-definitions"]{FFmpeg Decoder Definitions}
@author[@author+email["Hans Dijkema" "hans@dijkewijk.nl"]]
@defmodule[racket-audio/ffmpeg-definitions]
This module provides the direct FFmpeg-backed decoder layer used by the audio
pipeline. It is deliberately small and stateful. A caller creates one decoder
instance, opens one file on it, queries the selected audio stream, repeatedly
asks for the next PCM block, and closes the instance again.
The module does not expose FFmpeg metadata. It only exposes the information
needed for playback: stream count, sample rate, channel count, duration,
bitrate, decoded PCM data, and sample positions. The output format is fixed:
interleaved signed 32-bit PCM, four bytes per sample, using FFmpeg's
@tt{AV_SAMPLE_FMT_S32} sample format.
The FFmpeg libraries are loaded when the module is required. The module checks
that the runtime FFmpeg major versions are in the supported range configured by
the implementation. This binding targets the FFmpeg library major versions
used by FFmpeg 6, 7, and 8: @tt{libavutil} 58 to 60, @tt{libavcodec} 60 to 62,
@tt{libavformat} 60 to 62, and @tt{libswresample} 4 to 6. Unsupported runtime
versions fail early, before a decoder instance is used.
On Windows, the private library loader may download the bundled sound-library
set into Racket's add-on directory before the FFI libraries are opened. On
Unix-like systems, the FFmpeg libraries are expected to be installed by the
operating system or platform package manager and to be reachable by Racket's
FFI library search path.
@section{Layering}
This module is the low-level Racket FFI layer. It is normally wrapped by
@filepath{ffmpeg-ffi.rkt} and then by @filepath{ffmpeg-decoder.rkt}. The first
wrapper adapts this module to the command protocol used by the audio decoder
frontend. The second wrapper exposes the callback-oriented decoder interface
used by the rest of the playback pipeline.
The distinction matters for buffer lifetime. At this level,
@racket[fmpg-buffer] returns the current buffer owned by the decoder instance.
The adapter in @filepath{ffmpeg-ffi.rkt} copies that buffer before passing it to
@filepath{ffmpeg-decoder.rkt}. Code that uses this module directly must copy
the buffer itself when the bytes must survive the next decoder operation.
@section{FFmpeg version information}
@defproc[(ffmpeg-version [lib (or/c 'avutil 'avcodec 'avformat
'swr 'swresample)])
(list/c exact-nonnegative-integer?
exact-nonnegative-integer?
exact-nonnegative-integer?)]{
Returns the runtime version of one FFmpeg library as a three-element list
containing the major, minor, and micro version numbers. The symbols
@racket['swr] and @racket['swresample] both refer to @tt{libswresample}.
The version is read from FFmpeg's packed integer value. For example, a runtime
value corresponding to @tt{62.28.100} is returned as @racket['(62 28 100)].
The function raises an exception for an unknown library symbol.
}
The runtime versions determine which partial FFmpeg struct layouts are safe to
use. If a future FFmpeg major release changes a layout before one of the
fields read by this module, the supported range should be extended only after
the affected partial definitions have been checked.
@section{Implementation strategy}
This module talks directly to the FFmpeg shared libraries through Racket's FFI.
There is no C shim that hides FFmpeg's structs or normalizes their layout. The
price of that choice is that the Racket side must know enough of the relevant C
struct layouts to read the fields used by the decoder. The benefit is that the
binding remains a Racket module with direct access to the platform FFmpeg
libraries.
@subsection{C structs and offsets}
Small and stable structures, such as @tt{AVRational} and
@tt{AVChannelLayout}, are described with @racket[define-cstruct]. A
@racket[define-cstruct] form describes the C fields to Racket's FFI. Racket
then calculates the correct field offsets for the current platform ABI and
creates the corresponding pointer type, constructor, accessors and mutators.
The larger FFmpeg structures are handled by @racket[def-cstruct] from
@filepath{private/cstruct-helper.rkt}. Structures such as
@tt{AVCodecParameters}, @tt{AVStream}, @tt{AVFormatContext}, @tt{AVFrame} and
@tt{AVPacket} are large and may differ between FFmpeg major versions. The
decoder only needs a few fields from each one, but those fields must still be
read from their exact native offsets.
The helper solves this by describing the complete field sequence up to the last
field the backend needs. Unnamed entries are used only to advance the offset.
Named entries become generated accessors. Repeated entries such as
@racket[(6 _int)] keep the definition compact while still allowing Racket's FFI
to compute alignment, padding and pointer size correctly. Tail fields after
the last required member are not described.
The right layout is selected when the module is required, after the runtime
FFmpeg major versions have been read from the libraries. For the supported
range, @tt{_AVCodecParameters} uses one layout for @tt{libavcodec} major
version 60 and another for major versions 61 and 62. Likewise,
@tt{_AVFrame} uses one layout for @tt{libavutil} major version 58 and
another for major versions 59 and 60. The other partial structs used by this
module are defined with a single layout across the supported versions.
@subsection{Defensive control flow}
Most FFmpeg calls report ordinary failure through C-style return values or null
pointers. The implementation treats those results as normal control flow. The
@racket[let/assert] form is used for setup paths where each native result must
be checked before the next native call is made. It behaves like a sequential
binding form: each binding can be checked immediately, and a failed check
returns the specified failure value for the whole form.
That style is used for opening a file, selecting stream information, allocating
the codec context, and initializing the resampler. Predicates such as
@tt{a-!nullptr?}, @tt{a-nullptr?}, @tt{a-true?}, and @tt{a->=?} express the
usual FFmpeg checks directly next to the binding that produced the value.
The decode and seek paths also use @racket[early-return] where processing must
stop immediately from a nested position. This keeps the normal FFmpeg outcomes
away from exception-based control flow while still making cleanup actions local
to the point where a failure can occur.
@section{Decoder instances}
A decoder instance is an opaque value returned by @racket[fmpg-init]. Its
structure type and predicate are not exported. Pass the value back to the
functions in this module and do not inspect it directly. The contracts below
therefore use @racket[any/c] for the instance argument. Operationally, that
argument must be a value returned by @racket[fmpg-init].
The instance owns native FFmpeg resources: a format context, a codec context,
an audio frame, a resampler, and the Racket byte string used for the current
PCM block. Finalizers are installed as a last line of defence, but callers
should still call @racket[fmpg-close!] explicitly when playback stops or when
the file is no longer needed. Explicit close keeps the lifetime of native
resources predictable.
@defproc[(fmpg-init) any/c]{
Creates a new decoder instance. The result is an opaque instance value, or
@racket[#f] if the instance could not be created.
Creating the instance does not open a file. Use @racket[fmpg-open-file!]
before querying stream information or decoding audio.
}
@defproc[(fmpg-open-file! [instance any/c]
[filename (or/c path? string?)])
(integer-in 0 1)]{
Opens @racket[filename] on @racket[instance], reads the stream information,
selects the best audio stream, initializes the codec context, and initializes
the resampler.
The function returns @racket[1] on success and @racket[0] on failure. On
failure, partially initialized native state is closed again. A non-string,
non-path filename is treated as an open failure and returns @racket[0].
An instance can only have one file open. Close it with @racket[fmpg-close!]
before opening another file on the same instance.
}
@defproc[(fmpg-close! [instance any/c]) void?]{
Closes @racket[instance] if it is open and releases the native FFmpeg resources
owned by the instance. The codec context, frame and resampler are freed before
the format context is closed. This order avoids keeping decoder pointers that
refer to streams from an already closed container.
The stored audio information is reset. Calling this function with @racket[#f]
or with an already closed instance is harmless.
}
@defproc[(fmpg-is-open [instance any/c]) (integer-in 0 1)]{
Returns @racket[1] when @racket[instance] is ready for decoding and @racket[0]
otherwise. An instance is ready only after a file has been opened, a usable
audio stream has been selected, and the decoder and resampler have been
initialized.
}
@section{Audio stream information}
The decoder selects one audio stream for playback using FFmpeg's best-stream
selection. The stream count reports how many audio streams were found in the
container, but decoding is performed only for the selected stream.
The term @italic{sample} in this module means a sample frame: one time step in
the audio stream, across all channels. For stereo 32-bit output, one sample
frame therefore occupies @racket[(* 2 4)] bytes in the returned PCM buffer.
@defproc[(fmpg-audio-stream-count [instance any/c])
exact-nonnegative-integer?]{
Returns the number of audio streams in the open container. If the instance is
not open, the result is @racket[0]. This count is informational; actual stream
selection is performed during @racket[fmpg-open-file!].
}
@deftogether[
(@defproc[(fmpg-audio-sample-rate [instance any/c])
exact-nonnegative-integer?]
@defproc[(fmpg-audio-channels [instance any/c])
exact-nonnegative-integer?])]{
Return the sample rate and channel count of the selected audio stream. If the
instance is not ready, both functions return @racket[0].
}
@deftogether[
(@defproc[(fmpg-audio-bits-per-sample [instance any/c])
exact-positive-integer?]
@defproc[(fmpg-audio-bytes-per-sample [instance any/c])
exact-positive-integer?])]{
Return the fixed output sample width in bits and bytes. The current output
format is 32-bit signed PCM, so @racket[fmpg-audio-bits-per-sample] returns
@racket[32] and @racket[fmpg-audio-bytes-per-sample] returns @racket[4]. The
values are independent of the input file's original sample format and do not
depend on the instance state.
}
@deftogether[
(@defproc[(fmpg-duration-ms [instance any/c]) exact-integer?]
@defproc[(fmpg-duration-samples [instance any/c]) exact-integer?])]{
Return the duration of the selected audio stream in milliseconds and in sample
frames. If the stream duration is not available, the container duration is
used as a fallback. If no duration can be determined, or when the instance is
not ready, the result is @racket[-1].
}
@defproc[(fmpg-file-bitrate [instance any/c]) exact-integer?]{
Returns the container bitrate in bits per second. If the bitrate is unavailable
or if the instance is not open, the result is @racket[-1]. Only positive
FFmpeg bitrates are passed through as reliable.
}
@section{Output format}
The decoder output format is intentionally fixed:
@itemlist[
#:style 'compact
@item{sample format: signed 32-bit PCM, @tt{AV_SAMPLE_FMT_S32}}
@item{layout: interleaved}
@item{sample rate: the selected stream's sample rate}
@item{channels: the selected stream's channel count}
]
This keeps the playback layer simple. The FFmpeg input format may be planar,
floating point, compressed, or otherwise different; @tt{libswresample} converts
the decoded frames to the fixed output format before the bytes are exposed to
Racket.
@section{Decoding}
Decoding is block oriented. Each call to @racket[fmpg-decode-next!] clears the
previous PCM block and attempts to produce the next decoded block for the
selected audio stream. When the call returns @racket[1], the block can be read
with @racket[fmpg-buffer] and described with the buffer query functions.
@defproc[(fmpg-decode-next! [instance any/c]) exact-integer?]{
Decodes until a block of PCM output is available, end of stream is reached, or
an error occurs. The return values are:
@itemlist[
#:style 'compact
@item{@racket[1]: a new PCM buffer is available through @racket[fmpg-buffer].}
@item{@racket[0]: decoding is complete and no more PCM is available.}
@item{A negative value: decoding failed or the instance was not ready.}
]
Internally, the decoder first tries to receive frames that FFmpeg may already
have buffered. If no frame is ready, it reads packets until it finds a packet
for the selected audio stream. Packets from other streams are skipped and
immediately unreferenced. Sent packets are unreferenced after
@tt{avcodec_send_packet}, because the codec has then taken what it needs.
At end of input, the function drains both the codec and the resampler. This is
necessary because FFmpeg and @tt{libswresample} may still hold delayed samples
even after the demuxer has no more packets.
}
@section{Decoded buffers}
The PCM buffer belongs to the decoder instance. It is replaced by the next
call to @racket[fmpg-decode-next!], @racket[fmpg-seek-ms!], or
@racket[fmpg-close!]. Treat the returned byte string as read-only. Copy it if
it must outlive the next decoder operation or if another component may mutate
it.
@defproc[(fmpg-buffer [instance any/c]) (or/c bytes? #f)]{
Returns the current decoded PCM block as a byte string, or @racket[#f] when no
PCM block is available.
The byte string contains interleaved signed 32-bit samples. Its logical frame
count is available as the difference between @racket[fmpg-buffer-end-sample]
and @racket[fmpg-buffer-start-sample]. Its byte size is also available through
@racket[fmpg-buffer-size].
}
@defproc[(fmpg-buffer-size [instance any/c]) exact-nonnegative-integer?]{
Returns the number of valid bytes in the current PCM buffer. If no decoder
state is available, or if the size would not fit in the internal integer range,
the function returns @racket[0].
}
@deftogether[
(@defproc[(fmpg-buffer-start-sample [instance any/c])
exact-nonnegative-integer?]
@defproc[(fmpg-buffer-end-sample [instance any/c])
exact-nonnegative-integer?]
@defproc[(fmpg-sample-position [instance any/c])
exact-nonnegative-integer?])]{
Return sample-frame positions for the current decoder state.
@racket[fmpg-buffer-start-sample] returns the first sample frame represented by
the current PCM buffer. @racket[fmpg-buffer-end-sample] returns the half-open
end position: the first sample frame after the current buffer.
@racket[fmpg-sample-position] returns the next sample position the decoder
expects to produce.
These values count sample frames, not individual channel samples. For stereo
audio, one sample frame contains one sample for the left channel and one sample
for the right channel.
}
@section{Seeking}
@defproc[(fmpg-seek-ms! [instance any/c]
[target-pos-ms exact-nonnegative-integer?])
(integer-in 0 1)]{
Seeks the selected audio stream to @racket[target-pos-ms] milliseconds and
resets the decoder and resampler state. The function returns @racket[1] on
success and @racket[0] on failure. Seeking is allowed only when the instance
is already ready for decoding and the target position is non-negative.
Seeking uses FFmpeg's backward seek flag. FFmpeg may therefore seek to a packet
position before the requested target. The decoder stores a discard target in
sample frames. During the following decode calls, frames before the target are
dropped, and frames that overlap the target are trimmed so the exposed PCM
buffer starts at, or as close as FFmpeg can provide to, the requested position.
After a successful seek, the codec buffers are flushed, the resampler is closed
and reinitialized, EOF state is cleared, and sample bookkeeping is reset to the
target position.
}
@section{Resource ownership}
The decoder instance owns the native FFmpeg objects it allocates. The codec
pointer returned by FFmpeg is not owned by the instance, but the codec context,
frame, resampler and format context are. They are released by
@racket[fmpg-close!]. Finalizers are registered as a safety net, but callers
should close decoder instances explicitly.
Temporary native buffers used during resampling are allocated only for the
duration of a conversion step and are always freed before control returns to the
caller. The public PCM buffer is a Racket byte string, so it can safely be
passed to the Racket-side playback backend.
@section{Use through the decoder frontend}
The direct API above is normally wrapped by @filepath{ffmpeg-ffi.rkt} and by
@filepath{ffmpeg-decoder.rkt}. The frontend function @tt{ffmpeg-open} returns
a handle or @racket[#f] when the file does not exist. Its stream-info callback
receives a mutable hash with at least these playback keys:
@racketblock[
(list 'sample-rate
'channels
'bits-per-sample
'bytes-per-sample
'total-samples
'duration)]
The audio callback receives the same hash extended for the current buffer with
these keys:
@racketblock[
(list 'sample
'current-time)]
The hash is followed by a copied byte string and its valid byte count. The
copy is made by @filepath{ffmpeg-ffi.rkt}, not by the low-level buffer function
itself.
The frontend's seek function accepts a percentage of the stream and translates
that percentage to a sample position. The adapter then translates the sample
position to milliseconds and calls @racket[fmpg-seek-ms!]. This is why the
low-level module exposes millisecond seeking while the frontend exposes
percentage seeking.
@section{Examples}
The following example opens a file, decodes all PCM blocks, and reports their
byte ranges and sample ranges. A real playback loop would pass each buffer to
the audio output layer before requesting the next block.
@racketblock[
(define dec (fmpg-init))
(when (and dec (= (fmpg-open-file! dec "track.ogg") 1))
(printf "~a Hz, ~a channels, ~a ms\n"
(fmpg-audio-sample-rate dec)
(fmpg-audio-channels dec)
(fmpg-duration-ms dec))
(let loop ()
(case (fmpg-decode-next! dec)
[(1)
(define pcm (fmpg-buffer dec))
(define size (fmpg-buffer-size dec))
(define start (fmpg-buffer-start-sample dec))
(define end (fmpg-buffer-end-sample dec))
(printf "decoded ~a bytes, samples [~a, ~a)\n"
size start end)
;; Pass pcm to the audio output layer here, or copy it if needed.
(loop)]
[(0)
(printf "done\n")]
[else
(error "decode error")]))
(fmpg-close! dec))
]
A simple seek flow looks the same after the seek succeeds. The following code
moves to 30 seconds and then requests the next decoded buffer.
@racketblock[
(when (= (fmpg-seek-ms! dec 30000) 1)
(when (= (fmpg-decode-next! dec) 1)
(define pcm (fmpg-buffer dec))
(define start (fmpg-buffer-start-sample dec))
(printf "first buffer after seek starts at sample ~a\n" start)))
]
+306
View File
@@ -0,0 +1,306 @@
#lang scribble/manual
@(require (for-label racket/base
racket/contract
"../libao-async-ffi-racket.rkt"))
@title{Pure Racket Asynchronous libao Backend}
@defmodule[racket-audio/libao-async-ffi-racket]
This module implements the asynchronous libao playback backend used by
@racketmodname[racket-audio]. It provides the same public Racket API as the
older C-backed asynchronous player, but keeps the queueing, buffering,
conversion and worker-thread logic in Racket. The only foreign calls made by
this module are the direct calls into Xiph's libao library.
The module is intended as a low-level backend. Higher-level player code should
normally use the public audio-player interface instead of calling this module
directly. It is documented here because it defines the exact contract between
decoded PCM data and the libao output path.
@section{Overview}
The backend accepts decoded PCM buffers, converts them when needed, groups small
buffers into larger playback chunks, and sends those chunks to libao from a
dedicated Racket worker thread. The worker thread calls @racket[ao_play] as a
blocking foreign call, so other Racket threads and places do not have to wait
for the audio device to accept more data.
Incoming buffers may be interleaved or planar. Planar buffers, such as those
commonly produced by a FLAC decoder, are converted to interleaved PCM before
playback. If the requested sample width cannot be opened on the selected audio
device, the backend tries lower-width output formats and converts samples before
they are sent to libao.
The backend also maintains playback position metadata. Each queued buffer is
tagged with a music id, a current playback position and a duration. These
values are used by the higher-level player to report where the audio device is
in the current track.
@section{Buffer information}
@defproc[(make-buffer-info [type symbol?]
[sample-bits exact-positive-integer?]
[sample-rate exact-positive-integer?]
[channels exact-positive-integer?]
[endianness symbol?])
any/c]{
Creates a buffer description object for PCM data passed to
@racket[ao_play_async].
The @racket[type] field describes the memory layout. The supported values are
@racket['interleaved] for normal interleaved PCM and @racket['planar] for planar
PCM. For compatibility with older code, @racket['ao] is treated as interleaved
by convention and @racket['flac] is accepted as planar input.
The @racket[sample-bits], @racket[sample-rate] and @racket[channels] fields
describe the format of the supplied buffer, not necessarily the format that will
eventually be accepted by the device. The backend may convert the sample width
to the actual device width.
The @racket[endianness] field must be one of @racket['little-endian],
@racket['big-endian] or @racket['native-endian]. It is used when samples are
converted between different sample widths or byte orders.}
@defproc[(make-BufferInfo_t [type symbol?]
[sample-bits exact-positive-integer?]
[sample-rate exact-positive-integer?]
[channels exact-positive-integer?]
[endianness symbol?])
any/c]{
Compatibility alias for @racket[make-buffer-info]. The name matches the older
FFI module and the former C structure naming convention.}
@section{Creating and closing a backend}
@defproc[(ao_version_async) exact-integer?]{
Returns the version number of this asynchronous backend implementation. The
current implementation returns @racket[3]. The value is useful for diagnostics
when multiple asynchronous backend implementations exist.}
@defproc[(ao_create_async [bits exact-positive-integer?]
[rate exact-positive-integer?]
[channels exact-positive-integer?]
[byte-format symbol?]
[wav-output-file (or/c #f path-string?)])
any/c]{
Opens a libao output device and creates an asynchronous playback handle.
The @racket[bits], @racket[rate], @racket[channels] and @racket[byte-format]
arguments describe the preferred output format. The byte format must be one of
@racket['little-endian], @racket['big-endian] or @racket['native-endian].
When @racket[wav-output-file] is @racket[#f], the default live libao driver is
used. When it is a path string, the backend opens libao's @tt{wav} driver and
writes the audio stream to that file instead.
The backend first tries to open the requested sample width. If that fails and
the requested width is greater than 24 bits, it tries 24-bit output. If that
also fails and the requested width is greater than 16 bits, it tries 16-bit
output. The actual device width can be queried with
@racket[ao_real_output_bits_async].
The function returns a playback handle on success and @racket[#f] when no
suitable libao device could be opened.}
@defproc[(ao_stop_async [handle any/c]) any/c]{
Stops the worker thread, clears pending audio, closes the libao device and
invalidates @racket[handle].
The stop operation first clears all queued buffers, then queues an internal stop
command, waits for the playback thread to terminate, and finally closes the
underlying libao handle. Calling this function on an already invalid handle is
an error.}
@section{Submitting audio}
@defproc[(ao_play_async [handle any/c]
[music-id any/c]
[at-second real?]
[music-duration real?]
[buf-size exact-nonnegative-integer?]
[au-buf (or/c bytes? any/c)]
[info any/c])
void?]{
Queues a PCM buffer for asynchronous playback.
The @racket[music-id], @racket[at-second] and @racket[music-duration] values are
stored together with the queued buffer. They do not affect sample conversion,
but they allow the player to report the current track id, playback position and
track duration while the worker thread is playing the queued data.
The @racket[buf-size] argument gives the number of valid bytes in
@racket[au-buf]. The input buffer is copied into backend-owned memory before
the function returns, so the caller may reuse or discard the original byte
string after the call.
The @racket[info] argument should be created with @racket[make-buffer-info]. If
the buffer is planar, it is converted to interleaved PCM. If the buffer's
sample width or byte order differs from the actual libao device format, the
backend converts it before queueing.
The backend groups smaller buffers into larger playback chunks. This reduces
the number of calls to libao and helps prevent underruns. Buffers with
different @racket[music-id] values are not merged into the same output chunk.}
@defproc[(ao_clear_async [handle any/c]) any/c]{
Clears all queued audio buffers that have not yet been played.
The current aggregation buffer is also cleared. Already playing audio may still
finish at the device level, depending on what libao and the operating system
have accepted. This operation is used by higher-level code when stopping,
seeking or replacing the current stream.}
@section{Playback state}
@defproc[(ao_is_at_second_async [handle any/c]) real?]{
Returns the playback position associated with the most recently dequeued buffer.
This value is the @racket[at-second] value supplied to @racket[ao_play_async],
not a sample-accurate query into the audio device.}
@defproc[(ao_is_at_music_id_async [handle any/c]) any/c]{
Returns the music id associated with the most recently dequeued buffer. The
higher-level player uses this value to determine which track the output thread
has reached.}
@defproc[(ao_music_duration_async [handle any/c]) real?]{
Returns the duration associated with the most recently dequeued buffer. This is
the @racket[music-duration] value supplied to @racket[ao_play_async].}
@defproc[(ao_bufsize_async [handle any/c]) exact-nonnegative-integer?]{
Returns the number of queued PCM bytes that have been accepted by the backend
but not yet removed from the asynchronous queue. This is a backend queue size,
not the size of the operating-system or hardware audio buffer.}
@defproc[(ao_sample_queue_len [handle any/c]) exact-nonnegative-integer?]{
Returns the number of queued playback elements waiting in the backend queue.
This is mainly useful for diagnostics and tuning.}
@defproc[(ao_reuse_buf_len [handle any/c]) exact-nonnegative-integer?]{
Returns the number of reusable internal buffers currently kept by the backend.
This is a diagnostic value that can help detect excessive allocation or
unexpected buffer retention.}
@section{Pause and volume}
@defproc[(ao_pause_async [handle any/c]
[paused (or/c boolean? integer?)])
void?]{
Pauses or resumes the playback worker.
When @racket[paused] is @racket[#t], or an integer other than @racket[0], the
worker thread is blocked before it dequeues the next element. When
@racket[paused] is @racket[#f] or @racket[0], playback is resumed.
Pausing does not prevent producers from queueing additional buffers. It only
prevents the worker thread from taking more data from the queue.}
@defproc[(ao_set_volume_async [handle any/c]
[percentage real?])
void?]{
Sets the output volume as a percentage.
A value of @racket[100.0] means unchanged volume. Values below
@racket[100.0] attenuate the signal. Values above @racket[100.0] amplify the
signal and are clipped to the signed range of the actual device sample width.
Internally the value is stored as an integer in hundredths of a percent: for
example, @racket[100.0] becomes @racket[10000]. Values very close to
@racket[100.0] are normalized to exactly @racket[10000] to avoid unnecessary
sample processing.}
@defproc[(ao_volume_async [handle any/c]) real?]{
Returns the currently configured output volume percentage.}
@section{Output format}
@defproc[(ao_real_output_bits_async [handle any/c])
exact-nonnegative-integer?]{
Returns the actual sample width opened on the libao device.
This may be lower than the requested width passed to @racket[ao_create_async].
For example, a request for 32-bit output may result in a 24-bit or 16-bit device
when the default libao driver cannot open the preferred format. In that case,
@racket[ao_play_async] converts the incoming samples before playback.}
@section{Playback buffer tuning}
@defproc[(ao-playback-buf-ms) exact-nonnegative-integer?]{
Returns the target size, in milliseconds, of the playback chunks that the
backend sends to libao. The default is @racket[150].}
@defproc[(ao-set-playback-buf-ms! [ms exact-nonnegative-integer?])
void?]{
Sets the target playback chunk size in milliseconds.
Larger values reduce the number of calls to libao and may help prevent audible
glitches when decoders produce many small buffers. Smaller values reduce
latency but increase scheduling pressure on the Racket worker thread and on the
audio backend.}
@section{Implementation notes}
The worker thread is created with its own thread pool and uses libao's
@racket[ao_play] through a blocking FFI call. Before calling libao, the worker
copies the queued bytes into memory allocated with @racket['atomic-interior].
This is important because a blocking foreign call must not be handed a pointer
to movable Racket memory that could be relocated by the garbage collector while
the foreign function is still using it.
The backend keeps a small pool of previously allocated buffers. Buffers created
internally for conversion or aggregation can be reused after playback. This
reduces allocation pressure during continuous playback.
The module initializes libao when the first handle is opened and shuts libao
down when the last handle is closed. This keeps libao lifetime management local
to the backend and avoids repeated global initialization during normal playback.
@section{Example}
@racketblock[
(define h
(ao_create_async 32 44100 2 'native-endian #f))
(define info
(make-buffer-info 'interleaved 32 44100 2 'native-endian))
(when h
(ao_play_async h
1
0.0
180.0
(bytes-length pcm-bytes)
pcm-bytes
info)
(ao_set_volume_async h 80.0)
(ao_pause_async h #t)
(ao_pause_async h #f)
(ao_stop_async h))
]
The example opens the default live libao device, queues one interleaved
32-bit PCM buffer, lowers the volume to 80 percent, briefly pauses and resumes
the worker, and finally closes the backend.