Files
gemigreerd-racket-audio/ffmpeg-definitions.rkt
T
2026-06-08 10:27:05 +02:00

1791 lines
66 KiB
Racket

#lang racket/base
(require ffi/unsafe
ffi/unsafe/define
ffi/unsafe/alloc
(for-syntax racket/base)
"private/utils.rkt"
"private/cstruct-helper.rkt"
let-assert
early-return
)
(provide fmpg-init
fmpg-open-file!
fmpg-close!
fmpg-is-open
fmpg-audio-stream-count
fmpg-audio-sample-rate
fmpg-audio-channels
fmpg-audio-bits-per-sample
fmpg-audio-bytes-per-sample
fmpg-duration-ms
fmpg-duration-samples
fmpg-file-bitrate
fmpg-decode-next!
fmpg-seek-ms!
fmpg-buffer
fmpg-buffer-size
fmpg-buffer-start-sample
fmpg-buffer-end-sample
fmpg-sample-position
ffmpeg-version
;; Shared FFmpeg/swresample bindings for encoder-side PCM conversion.
;; Keeping these exports here prevents a second, divergent FFmpeg FFI
;; version layer in private/pcm-converter.rkt.
AV_SAMPLE_FMT_S32
swr_alloc_set_opts2
swr_init
swr_free
swr_get_out_samples
swr_get_delay
swr_convert
ffmpeg-make-default-channel-layout
ffmpeg-channel-layout-uninit!)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; C - Types & Functions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;; Helpers for constants
;; These helpers mirror FFmpeg macros that normally live in C headers.
;; Racket cannot link macros, so only the needed values are rebuilt
;; here.
;; Builds an FFmpeg fourcc/tag value from four characters.
;; FFmpeg uses these tags, among other things, to make error codes recognizable.
(define (mktag a b c d)
(bitwise-ior (char->integer a)
(arithmetic-shift (char->integer b) 8)
(arithmetic-shift (char->integer c) 16)
(arithmetic-shift (char->integer d) 24)))
;; Converts a fourcc/tag to a negative FFmpeg error code.
;; FFmpeg encodes some errors, such as EOF, as negative tag values.
(define (fferrtag a b c d)
(- (mktag a b c d)))
;; Converts an errno value to an FFmpeg AVERROR code.
;; This keeps comparisons with FFmpeg return values consistent.
(define (AVERROR e) (* -1 e))
;;;; Load libraries and get major library versions.
;; Libraries are resolved in a platform-dependent way. On Windows the
;; FFmpeg DLL names often contain the major version; on Unix-like systems
;; the dynamic linker can usually find the generic soname.
(define libavutil (get-lib (case (system-type 'os)
[(windows) '("avutil-60")]
[else '("avutil" "libavutil")]) '(#f)))
(define libswresample (get-lib (case (system-type 'os)
[(windows) '("swresample-6")]
[else '("swresample" "libswresample")]) '(#f)))
(define libavcodec (get-lib (case (system-type 'os)
[(windows) '("avcodec-62")]
[else '("avcodec" "libavcodec")]) '(#f)))
(define libavformat (get-lib (case (system-type 'os)
[(windows) '("avformat-62")]
[else '("avformat" "libavformat")]) '(#f)))
(define-ffi-definer def-avutil libavutil #:default-make-fail make-not-available)
(define-ffi-definer def-swresample libswresample #:default-make-fail make-not-available)
(define-ffi-definer def-avcodec libavcodec #:default-make-fail make-not-available)
(define-ffi-definer def-avformat libavformat #:default-make-fail make-not-available)
(def-avutil avutil_version (_fun -> _uint))
(def-avcodec avcodec_version (_fun -> _uint))
(def-avformat avformat_version (_fun -> _uint))
(def-swresample swresample_version (_fun -> _uint))
(define avutil-version-major (quotient (avutil_version) 65536))
(define avcodec-version-major (quotient (avcodec_version) 65536))
(define avformat-version-major (quotient (avformat_version) 65536))
(define swresample-version-major (quotient (swresample_version) 65536))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Version check
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Returns the runtime version of an FFmpeg library as a list.
;; The major/minor/micro values are extracted from FFmpeg's packed integer,
;; so the rest of the module can choose version-dependent layouts.
(define (ffmpeg-version lib)
;; FFmpeg packs versions as major<<16 | minor<<8 | micro.
(let ((v (λ (v) (list (quotient v 65536) (remainder (quotient v 256) 256) (remainder v 256)))))
(cond ((eq? lib 'avutil) (v (avutil_version)))
((eq? lib 'avcodec) (v (avcodec_version)))
((eq? lib 'avformat) (v (avformat_version)))
((or (eq? lib 'swr)
(eq? lib 'swresample)) (v (swresample_version)))
(else (error (format "Unknown library '~a" lib)))
)
)
)
;; Formats the runtime version of an FFmpeg library as text.
;; This is mainly used in error messages and logging.
(define (ffmpeg-version-string lib)
(apply format (cons "~a.~a.~a" (ffmpeg-version lib))))
;; Support ffmpeg 6, 7 and 8
;; Checks at load time whether the detected FFmpeg major versions are supported.
;; The struct layouts below are deliberately partial and major-version-dependent;
;; therefore an unknown major version must fail early and loudly.
(define-syntax check-support
(syntax-rules ()
((_ lib version-hash)
(let ((from (car (hash-ref version-hash lib)))
(until (cadr (hash-ref version-hash lib))))
;; Only the major version determines whether the C struct layouts below are safe.
(let ((major-version (car (ffmpeg-version lib))))
(cond
((or (< major-version from) (> major-version until))
(error
(format "Unsupported major version of ffmpeg library ~a: ~a (~a).\nSupported range: ~a - ~a"
'lib major-version (ffmpeg-version-string lib) from until)))
(else
(info-sound "Supported ffmpeg library ~a - version ~a between ~a and ~a"
lib (ffmpeg-version-string lib) from until)
)
)
)
)
)
)
)
(check-support 'avutil valid-ffmpeg-versions)
(check-support 'avcodec valid-ffmpeg-versions)
(check-support 'avformat valid-ffmpeg-versions)
(check-support 'swresample valid-ffmpeg-versions)
;;;; Constants
;; AVRational is small and stable enough to define completely.
;; FFmpeg uses this struct everywhere for time_base and scaling factors.
(define-cstruct _AVRational
([num _int]
[den _int]
)
)
(define EAGAIN 11) ; from errno.h
(define INT_MAX (sub1 (arithmetic-shift 1 31)))
(define AV_NOPTS_VALUE #x8000000000000000)
(define AV_TIME_BASE 1000000)
(define AV_TIME_BASE_Q (make-AVRational 1 AV_TIME_BASE))
(define AVSEEK_FLAG_BACKWARD 1)
(define AVERROR_EAGAIN (AVERROR EAGAIN))
(define AVERROR_EOF (fferrtag #\E #\O #\F #\space))
(define _AVCodec (_cpointer/null 'AVCodec))
(define _AVCodecContext (_cpointer/null 'AVCodecContext))
(define _SwrContext (_cpointer/null 'SwrContext))
;;;; enum constants
(define AV_SAMPLE_FMT_NONE -1)
(define AV_SAMPLE_FMT_S32 2)
(define AVMEDIA_TYPE_AUDIO 1)
;;;; potential enum types
(define _AVCodecID _int)
(define _AVMediaType _int)
(define _AVSampleFormat _int)
;;;; C structure layout strategy
;;
;; Racket's ffi/unsafe `define-cstruct` describes a C struct layout to the
;; FFI. It creates a C type descriptor, a pointer type, and generated
;; accessors such as `AVRational-num` or `AVFrame-nb_samples`. For small,
;; stable structs we use `define-cstruct` directly and list all relevant
;; fields in order. Racket then calculates the field offsets from the C
;; types and the platform ABI.
;;
;; The large FFmpeg structs below are different. Types such as
;; AVCodecParameters, AVStream, AVFormatContext, AVFrame, and AVPacket are
;; public enough to read, but their exact field layout changes between
;; FFmpeg major versions and sometimes depends on compatibility fields.
;; Defining every field would make this module brittle and noisy, while
;; defining only the fields we need without their real offsets would be
;; wrong.
;;
;; The local helper `def-cstruct` from private/cstruct-helper.rkt solves
;; this by separating offset calculation from accessor creation:
;;
;; - `make-offsets` receives the complete field sequence up to the last
;; field this module needs. Unnamed entries, for example `_pointer` or
;; `(6 _int)`, are fields that exist only to move the offset forward.
;; - Named entries, for example `(sample_rate _int)`, are the fields for
;; which this module wants generated accessors.
;; - `make-offsets` expands repeated entries and calls `compute-offsets`,
;; so alignment and pointer size are calculated by Racket's FFI for the
;; current platform instead of being hard-coded.
;; - `def-cstruct` then expands back to `define-cstruct`, but only for the
;; named fields, using explicit `#:offset` clauses. The result is a
;; partial struct with correct offsets and with accessors only for the
;; fields this module actually reads.
;;
;; This does not make FFmpeg layout changes disappear: the field sequence
;; below must still match the headers of the loaded major version. The
;; version checks and version-specific branches are therefore part of the
;; safety model. The benefit is that the Racket side stays small while the
;; generated accessors still read from the same byte offsets as C would.
;;
;;;; struct types and partial struct types.
;;;; the least necessary for ffmpeg and wrappers
;; AVChannelLayout is defined partially because playback only needs nb_channels.
;; The remaining fields keep the offsets correct.
(define-cstruct _AVChannelLayout
([order _int] ; enum AVChannelOrder
[nb_channels _int]
[u (_union _uint64 _pointer)]
[opaque _pointer]
)
)
(def-avutil av_channel_layout_default
(_fun _AVChannelLayout-pointer _int -> _void))
(def-avutil av_channel_layout_uninit
(_fun _AVChannelLayout-pointer -> _void))
(define (ffmpeg-make-default-channel-layout channels)
(let ((p (malloc (ctype-sizeof _AVChannelLayout) 'atomic-interior)))
(av_channel_layout_default p channels)
p))
(define (ffmpeg-channel-layout-uninit! p)
(when p (av_channel_layout_uninit p))
#t)
; _AVCodecParameters:
; codec_type : AVMediaType
; codec_id : int / AVCodecID
; ch_layout : AVChannelLayout
; sample_rate : int
; libavcodec stuff.
;; AVCodecParameters differs between FFmpeg majors. Only the fields needed
;; for audio playback are named; intervening fields are included as
;; padding/offset fields so the accessor reads from the right place.
(def-cstruct
_AVCodecParameters
(codec_type codec_id format ch_layout sample_rate)
(if (= avcodec-version-major 60)
;; ffmpeg 6 definition is different and probably backward compatible with ffmpeg 5
;; This is with old channel layout in compiled, which is the case on linux.
;; Probably always, because of backwards compatibility with ffmpeg 5.0 probably.
(make-offsets
(codec_type _AVMediaType)
(codec_id _AVCodecID)
_uint32 ;; codec_tag
_pointer ;; extradata
_int ;; extradata_size
(format _AVSampleFormat)
_int64 ;; bit_rate
(6 _int) ;; bits_per_coded_sample, bits_per_raw_sample,
;; profile, level, width, height
_AVRational ;; sample_aspect_ratio
(6 _int) ;; field_order, color_range, color_primaries,
;; color_trc, color_space, chroma_location
_int ;; video_delay
;; Only when FF_API_OLD_CHANNEL_LAYOUT is active.
_uint64 ;; channel_layout
_int ;; channels
(sample_rate _int)
(5 _int) ;; block_align, frame_size,
;; initial_padding, trailing_padding, seek_preroll
(ch_layout _AVChannelLayout)
;; framerate, coded_side_data, and nb_coded_side_data follow here,
;; but they are not needed.
)
;;;;;;; ffmpeg 7 and 8. (major versions 61 and 62).
(make-offsets (codec_type _AVMediaType)
(codec_id _AVCodecID)
_int32 ;; codec_tag
_pointer ;; extradata
_int ;; extradata_size
_pointer ;; coded_side_data
_int ;; nb_coded_side_data
;; AVCodecParameters.format
;; audio: enum AVSampleFormat
(format _AVSampleFormat)
_int64 ;; bit_rate
(6 _int) ;; bits_per_coded_sample, bits_per_raw_sample,
;; profile, level, width, height,
(2 _AVRational) ;; sample_aspect_ratio, ramerate
(7 _int) ;; field_order, color_range,
;; color_primaries, color_trc,
;; color_space, chroma_location,
;; video_delay
(ch_layout _AVChannelLayout) ;; ch_layout
(sample_rate _int)) ;; sample_rate
)
)
;; Reads the codec id from AVCodecParameters.
;; The wrapper hides the generated cstruct accessor behind a more stable name.
(define (avcodec-pars-codec_id s)
(AVCodecParameters-codec_id s))
;; Reads the media type from AVCodecParameters.
;; This lets the module distinguish audio streams from video/subtitle streams.
(define (avcodec-pars-codec_type s)
(AVCodecParameters-codec_type s))
;; Reads the sample rate from AVCodecParameters.
;; The decoder uses this value for timing, resampling, and sample positions.
(define (avcodec-pars-sample_rate s)
(AVCodecParameters-sample_rate s))
;; Reads the number of channels from the new AVChannelLayout.
;; Only this value is needed for interleaved PCM output and buffer sizing.
(define (avcodec-pars-channels s)
(AVChannelLayout-nb_channels
(AVCodecParameters-ch_layout s)))
;; Reads the FFmpeg sample format from AVCodecParameters.
;; This is the input format for swresample.
(define (avcodec-pars-format s)
(AVCodecParameters-format s))
; _AVStream:
; codecpar : AVCodecParameters*
; time_base : AVRational
; duration : int64
;; AVStream is defined partially. The needed fields are codecpar, time_base,
;; and duration: enough to open the stream and calculate positions.
(def-cstruct
_AVStream
(codec time_base duration)
(make-offsets
_pointer
(2 _int)
(codec _AVCodecParameters-pointer)
_pointer
(time_base _AVRational)
_int64
(duration _int64))
)
;; Returns the codec parameters of an AVStream.
;; The rest of the code works only with codecpar, not with old codec fields.
(define (avstream-codec s)
(AVStream-codec s))
;; Reads the stream duration in stream time_base ticks.
;; This duration is preferred over the container duration when FFmpeg knows it.
(define (avstream-duration s)
(AVStream-duration s))
;; Reads the time_base of an AVStream.
;; This rational is needed to scale timestamps and duration to seconds/samples.
(define (avstream-time_base s)
(AVStream-time_base s))
; _AVFormatContext:
; nb_streams : uint/int
; streams : AVStream**
; duration : int64
; bit_rate : int64
;; AVFormatContext is defined partially. The module only uses the stream array,
;; stream count, container duration, and bitrate; metadata is deliberately ignored.
(def-cstruct
_AVFormatContext
(nb_streams streams duration bit_rate)
(make-offsets
(5 _pointer)
_int
(nb_streams _int)
(streams _pointer)
_uint
_pointer
_uint
_pointer
_string*/utf-8
_int64
(duration _int64)
(bit_rate _int64))
)
;; Reads how many streams the container contains.
;; This determines the upper bound when searching for audio streams.
(define (avformat_nb_streams s)
(AVFormatContext-nb_streams s))
;; Fetches stream i from AVFormatContext.streams.
;; AVFormatContext.streams is an AVStream**; therefore the array pointer is
;; read first, and then the i-th stream pointer is loaded.
(define (avformat_stream s i)
(let ((streams-ptr (AVFormatContext-streams s)))
(ptr-ref streams-ptr _AVStream-pointer i)
))
;; Reads the container duration in AV_TIME_BASE units.
;; This is the fallback when the selected audio stream has no duration itself.
(define (avformat_duration s)
(AVFormatContext-duration s))
;; Reads the container bitrate.
;; The public API returns -1 when FFmpeg reports no positive bitrate.
(define (avformat_bit_rate s)
(AVFormatContext-bit_rate s))
; AVFrame
; data
; nb_samples
; best_effort_timestamp
;; AVFrame is one of the most sensitive layouts: FFmpeg 6 differs from 7/8.
;; Only data, nb_samples, sample_rate, and best_effort_timestamp are named;
;; the rest is included as offset information.
(def-cstruct
_AVFrame
(data nb_samples sample_rate best_effort_timestamp)
(if (= avutil-version-major 58)
;;; avutil 58 (ffmpeg 6.x)
(make-offsets
(data _pointer)
(7 _pointer) ; data
(8 _int) ; linesize
_pointer ; extended-data
(2 _int) ; width, height
(nb_samples _int) ; nb_samples
(3 _int) ; format / key_frame / enum Picturetype ;;; deprecated FRAME_KEY
_AVRational ; sample_aspect_ratio;
(2 _int64) ; pts / pkt_dts
_AVRational ; time_base
(2 _int) ; coded_picture_number / display_picture_number ;; PICTURE_NUMBER
_int ; quality
_pointer ; opaque
_int ; repeat_pict
(2 _int) ; interlaced_frame / top_field_first ;; INTERLACED_FRAME
_int64 ; reordered_opaque ;; REORDERED_OPAQUE
(sample_rate _int); sample_rate
_int64 ; channel_layout ;; OLD_CHANNEL_LAYOUT
(8 _pointer) ; AVBufferRef *buf[AV_NUM_DATA_POINTERS];
_pointer ; AVBufferRef **extended_buf;
_int ; int nb_extended_buf;
_pointer ; AVFrameSideData **side_data;
_int ; nb_side_data;
(6 _int) ; flags / color_range / color_primaries / color_trc / colorspace / chroma_location
(best_effort_timestamp _int64) ; best_effort_timestamp
)
;;; ffmpeg 7, 8 - avutil 59 and 60.
(make-offsets
(data _pointer)
(7 _pointer) ; data
(8 _int) ; linesize
_pointer ; extended-data
(2 _int) ; width, height
(nb_samples _int) ; nb_samples
(2 _int) ; format / enum Picturetype
_AVRational ; sample_aspect_ratio;
(2 _int64) ; pts / pkt_dts
_AVRational ; time_base
_int ; quality
_pointer ; opaque
_int ; repeat_pict
(sample_rate _int); sample_rate
(8 _pointer) ; AVBufferRef *buf[AV_NUM_DATA_POINTERS];
_pointer ; AVBufferRef **extended_buf;
_int ; int nb_extended_buf;
_pointer ; AVFrameSideData **side_data;
_int ; nb_side_data;
(6 _int) ; flags / color_range / color_primaries / color_trc / colorspace / chroma_location
(best_effort_timestamp _int64) ; best_effort_timestamp
)
)
)
;; Passes the address that swr_convert expects as const uint8_t **.
;; AVFrame starts with data[8], so the frame pointer itself points at data[0].
(define (avframe-data frame)
;; swr_convert wants const uint8_t **.
;; AVFrame starts with uint8_t *data[8], so the frame pointer itself
;; is the address of data[0].
frame)
;; Returns the first data plane of an AVFrame.
;; This is only useful if direct access to plane 0 is needed later.
(define (avframe-data0 frame)
;; Only useful if the first plane pointer itself is ever needed.
(AVFrame-data frame))
;; Reads how many sample frames the current AVFrame contains.
;; This value determines how much input is offered to swresample.
(define (avframe-nb-samples frame)
(AVFrame-nb_samples frame))
;; Reads the best-effort timestamp from an AVFrame.
;; That timestamp is the best basis for sample position and seek correction.
(define (avframe-best-effort-timestamp frame)
(AVFrame-best_effort_timestamp frame))
; AVPacket
; stream_index
;; AVPacket is defined partially because only stream_index is needed.
;; It lets the demuxer skip packets from other streams.
(def-cstruct
_AVPacket
(stream_index)
(make-offsets
_pointer ; buf
_int64 ; pts
_int64 ; dts
_pointer ; data
_int ; size
(stream_index _int)))
;; Reads which stream a packet belongs to.
;; Packets from other streams are ignored before they reach the audio decoder.
(define (avpacket-stream-index pkt)
(AVPacket-stream_index pkt))
;;;;; Now import the needed functions
;; The FFI imports below are the minimal FFmpeg functions for:
;; opening/demuxing, codec initialization, frame decode, resampling, seek, and cleanup.
;; Functions that use C pointer-to-pointer cleanup get a raw binding
;; plus a small Racket wrapper that normalizes the stored pointer to #f.
(def-avcodec avcodec_free_context/raw (_fun (_ptr io _AVCodecContext)
-> (p : _AVCodecContext)
-> p ) #:c-id avcodec_free_context)
;; Frees an AVCodecContext and normalizes the result to #f.
;; FFmpeg sets the C pointer to NULL through pointer-to-pointer; the Racket side then stores #f.
(define (avcodec_free_context ctx)
(if ctx
(begin (avcodec_free_context/raw ctx) #f)
#f))
(def-swresample swr_free/raw (_fun (_ptr io _SwrContext)
-> (p : _SwrContext)
-> p) #:c-id swr_free)
;; Frees a SwrContext and normalizes the result to #f.
;; This prevents decoder-storage from keeping an old native pointer after cleanup.
(define (swr_free ctx)
(if ctx
(begin (swr_free/raw ctx) #f)
#f))
(def-avformat avformat_close_input/raw (_fun (_ptr io _AVFormatContext-pointer/null)
-> (p : _AVFormatContext-pointer/null)
-> p) #:c-id avformat_close_input)
;; Closes an AVFormatContext and normalizes the result to #f.
;; This follows FFmpeg ownership: avformat_close_input also closes the input resource.
(define (avformat_close_input inp)
(if inp
(begin (avformat_close_input/raw inp) #f)
#f))
(def-avformat av_find_best_stream
(_fun _AVFormatContext-pointer
_AVMediaType
_int
_int
_pointer ; AVCodec **decoder_ret, hier gewoon #f meegeven
_int
-> _int))
(def-avcodec avcodec_find_decoder
(_fun _AVCodecID -> _AVCodec))
(def-avcodec avcodec_alloc_context3
(_fun _AVCodec -> _AVCodecContext))
(def-avcodec avcodec_parameters_to_context
(_fun _AVCodecContext
_AVCodecParameters-pointer
-> _int))
(def-avcodec avcodec_open2
(_fun _AVCodecContext
_AVCodec
_pointer ; AVDictionary **options, hier nullptr/#f
-> _int))
(def-avutil av_frame_alloc
(_fun -> _AVFrame-pointer/null))
(def-avutil av_frame_unref
(_fun _AVFrame-pointer -> _void))
(def-avutil av_frame_free/raw
(_fun (_ptr io _AVFrame-pointer/null)
-> (p : _AVFrame-pointer/null)
-> p) #:c-id av_frame_free)
;; Frees an AVFrame and normalizes the result to #f.
;; The wrapper prevents the Racket struct from keeping a dangling frame pointer.
(define (av_frame_free frm)
(if frm
(begin (av_frame_free/raw frm) #f)
#f))
(def-swresample swr_alloc_set_opts2
(_fun (ps : (_ptr io _SwrContext))
_AVChannelLayout-pointer ; out_ch_layout
_AVSampleFormat ; out_sample_fmt
_int ; out_sample_rate
_AVChannelLayout-pointer ; in_ch_layout
_AVSampleFormat ; in_sample_fmt
_int ; in_sample_rate
_int ; log_offset
_pointer ; log_ctx
-> (r : _int)
-> (values r ps)))
(def-swresample swr_init
(_fun _SwrContext -> _int))
(def-avcodec avcodec_parameters_alloc
(_fun -> _AVCodecParameters-pointer/null))
(def-avcodec avcodec_parameters_from_context
(_fun _AVCodecParameters-pointer
_AVCodecContext
-> _int))
(def-avcodec avcodec_parameters_free/raw
(_fun (_ptr io _AVCodecParameters-pointer/null)
-> (p : _AVCodecParameters-pointer/null)
-> p) #:c-id avcodec_parameters_free)
;; Frees temporarily used AVCodecParameters and returns #f.
;; This is used for parameter copies that are only needed during initialization.
(define (avcodec_parameters_free par)
(if par
(begin (avcodec_parameters_free/raw par) #f)
#f))
(def-avformat avformat_open_input
(_fun (ctx : (_ptr io _AVFormatContext-pointer/null))
_string/utf-8 ; filename
_pointer ; AVInputFormat *fmt, hier #f
_pointer ; AVDictionary **options, hier #f
-> (r : _int)
-> (values r ctx)))
(def-avformat avformat_find_stream_info
(_fun _AVFormatContext-pointer
_pointer ; AVDictionary **options, hier #f
-> _int))
(def-avutil av_frame_get_best_effort_timestamp
(_fun _AVFrame-pointer -> _int64))
(def-swresample swr_get_out_samples
(_fun _SwrContext _int -> _int))
(def-swresample swr_convert
(_fun _SwrContext _pointer _int _pointer _int -> _int))
(def-swresample swr_get_delay
(_fun _SwrContext _int64 -> _int64))
(def-avutil av_samples_get_buffer_size
(_fun _pointer _int _int _AVSampleFormat _int -> _int))
(def-avcodec avcodec_receive_frame
(_fun _AVCodecContext _AVFrame-pointer -> _int))
(def-avformat av_read_frame
(_fun _AVFormatContext-pointer _AVPacket-pointer -> _int))
(def-avcodec av_packet_alloc
(_fun -> _AVPacket-pointer/null))
(def-avcodec av_packet_unref
(_fun _AVPacket-pointer -> _void))
(def-avcodec av_packet_free/raw
(_fun (_ptr io _AVPacket-pointer/null)
-> (p : _AVPacket-pointer/null)
-> p) #:c-id av_packet_free)
;; Frees an AVPacket and normalizes the result to #f.
;; Packets are short-lived and are explicitly cleaned up to save native memory.
(define (av_packet_free pkg)
(if pkg
(begin (av_packet_free/raw pkg) #f)
#f))
(def-avcodec avcodec_send_packet
(_fun _AVCodecContext _AVPacket-pointer/null -> _int))
(def-avutil av_rescale
(_fun _int64 _int64 _int64 -> _int64))
(def-avutil av_rescale_q
(_fun _int64 _AVRational _AVRational -> _int64))
(def-avformat av_seek_frame
(_fun _AVFormatContext-pointer _int _int64 _int -> _int))
(def-avcodec avcodec_flush_buffers
(_fun _AVCodecContext -> _void))
(def-swresample swr_close
(_fun _SwrContext -> _void))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Constants
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The module always produces 32-bit signed PCM. This keeps the rest of
;; racket-audio from having to deal with every possible FFmpeg sample format.
(define FMPG_OUTPUT_BITS 32)
(define FMPG_OUTPUT_BYTES 4)
(define FMPG_OUTPUT_FMT AV_SAMPLE_FMT_S32)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Internal structures for ffmpeg decoding
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Creates short aliases for struct accessors and mutators.
;; This keeps the decoder code readable without long Racket struct names.
(define-syntax def-struct-helpers
(syntax-rules ()
((_ (struct-get get struct-set set))
(begin
(define get struct-get)
(define set struct-set))
)
((_ (struct-get get))
(define get struct-get)
)
)
)
;; Defines several short struct helper names at once.
;; This is purely syntactic bookkeeping for the storage structs below.
(define-syntax struct-helpers
(syntax-rules ()
((_ a ...)
(begin
(def-struct-helpers a)
...))))
;;;;;;;;;; audio-info-storage
;; audio-info-storage contains only playback-relevant information.
;; Tags/metadata are deliberately omitted; the player needs rate, channels, stream, and duration.
(define-struct audio-info-storage
(audio-stream-count
selected-stream-index
sample-rate
channels
duration-ms
duration-samples
)
#:mutable
#:transparent
)
(struct-helpers
(audio-info-storage-audio-stream-count ais-stream-count
set-audio-info-storage-audio-stream-count! ais-stream-count!)
(audio-info-storage-selected-stream-index ais-stream-index
set-audio-info-storage-selected-stream-index! ais-stream-index!)
(audio-info-storage-sample-rate ais-rate
set-audio-info-storage-sample-rate! ais-rate!)
(audio-info-storage-channels ais-channels
set-audio-info-storage-channels! ais-channels!)
(audio-info-storage-duration-ms ais-duration-ms
set-audio-info-storage-duration-ms! ais-duration-ms!)
(audio-info-storage-duration-samples ais-duration-samples
set-audio-info-storage-duration-samples! ais-duration-samples!)
)
;; Resets audio-info to a closed/unknown state.
;; This prevents old stream data from remaining visible after close or failed open.
(define (ais-clear! s)
(ais-stream-count! s 0)
(ais-stream-index! s -1)
(ais-rate! s 0)
(ais-channels! s 0)
(ais-duration-ms! s -1)
(ais-duration-samples! s -1)
)
;; Creates an empty audio-info storage structure.
;; The values mean: no stream selected and duration still unknown.
(define (new-audio-info-storage)
(let ((a (make-audio-info-storage 0 -1 0 0 -1 -1)))
a))
;;;;;;;;;;; decoder storage
;; decoder-storage owns the native decode resources and keeps the running
;; sample bookkeeping. PCM is the last produced Racket bytes buffer.
(define-struct decoder-storage
(codec ; _AVCodec - Not owned by decoder-storage, global pointer
codec-ctx ; _AVCodecContext
frame ; _AVFrame
swr-ctx ; _SwrContext
pcm ; bytes (maybe bytes-buffer)
eof-seen ; boolean
decoder-drained ; boolean
timecode ; double
last-samples ; integer
buffer-start-sample ; integer
next-sample-position ; integer
discard-until-sample ; integer
)
#:mutable
#:transparent
)
;; Tests whether the decoder currently has no PCM output buffer.
;; This is used to set the start position and timecode only on first output.
(define (pcm-empty? dec)
(zero? (bytes-length (ds-pcm dec))))
;; Tests whether PCM bytes are available for the caller.
;; The public buffer function uses this to return #f for empty output.
(define (pcm-present? dec)
(positive? (bytes-length (ds-pcm dec))))
;; Combines decoder status with the actual PCM buffer.
;; A positive decode step only counts as success when bytes are present too.
(define (produced-pcm? produced dec)
(and (> produced 0) (pcm-present? dec)))
(struct-helpers
(decoder-storage-codec ds-codec set-decoder-storage-codec! ds-codec!)
(decoder-storage-codec-ctx ds-codec-ctx set-decoder-storage-codec-ctx! ds-codec-ctx!)
(decoder-storage-frame ds-frame set-decoder-storage-frame! ds-frame!)
(decoder-storage-swr-ctx ds-swr-ctx set-decoder-storage-swr-ctx! ds-swr-ctx!)
(decoder-storage-pcm ds-pcm set-decoder-storage-pcm! ds-pcm!)
(decoder-storage-eof-seen ds-eof-seen set-decoder-storage-eof-seen! ds-eof-seen!)
(decoder-storage-decoder-drained ds-drained set-decoder-storage-decoder-drained! ds-drained!)
(decoder-storage-timecode ds-timecode set-decoder-storage-timecode! ds-timecode!)
(decoder-storage-last-samples ds-last-samples set-decoder-storage-last-samples! ds-last-samples!)
(decoder-storage-buffer-start-sample ds-start-sample
set-decoder-storage-buffer-start-sample! ds-start-sample!)
(decoder-storage-next-sample-position ds-next-sample-pos
set-decoder-storage-next-sample-position! ds-next-sample-pos!)
(decoder-storage-discard-until-sample ds-discard-until
set-decoder-storage-discard-until-sample! ds-discard-until!)
)
;; Cleans up all native decoder resources in decoder-storage.
;; Codec context, frame, and resampler are owned by this storage and must go together.
(define (free-ffmpeg s)
(when s
(ds-codec-ctx! s (avcodec_free_context (ds-codec-ctx s)))
(ds-frame! s (av_frame_free (ds-frame s)))
(ds-swr-ctx! s (swr_free (ds-swr-ctx s)))
(reset-decoder-storage! s)
)
)
;; Clears only the last produced PCM output.
;; Decoder and seek state remain intact so the next decode step can continue.
(define (ds-clear-output! s)
(ds-pcm! s (make-bytes 0))
(ds-last-samples! s 0)
(ds-start-sample! s (ds-next-sample-pos s))
)
;; Resets decoder-storage to initial values without opening native resources itself.
;; This is used after cleanup and during reinitialization to wipe old state.
(define (reset-decoder-storage! s)
(ds-codec! s #f)
(ds-pcm! s (make-bytes 0))
(ds-eof-seen! s #f)
(ds-drained! s #f)
(ds-timecode! s 0.0)
(ds-last-samples! s 0)
(ds-start-sample! s 0)
(ds-next-sample-pos! s 0)
(ds-discard-until! s -1))
;; Creates decoder-storage and registers a finalizer for native cleanup.
;; The finalizer is a safety net; normal code closes resources explicitly earlier.
(define (new-decoder-storage)
(let ((ds (make-decoder-storage #f #f #f #f
(make-bytes 0)
#f #f
0.0
0 0 0 -1)))
(register-finalizer ds
(λ (s) (free-ffmpeg s)))
ds))
;;;;;;;;;;;;;; fmpg-instance
;; An fmpg-instance groups the container context, audio-info, and decoder state.
;; The public API always works on such an instance, comparable to a C handle.
(define-struct fmpg-instance
(opened ; boolean
format-ctx ; AVFormatContext
audio-info ; audio-info-storage
decoder ; decoder-storage
)
#:mutable
#:transparent
)
;; Creates a new decode instance with its own format context, info, and decoder state.
;; Here too, a finalizer performs cleanup if the user does not close explicitly.
(define (new-fmpg-instance)
(let ((i (make-fmpg-instance #f #f (new-audio-info-storage) (new-decoder-storage))))
(register-finalizer i
(λ (i)
(let ((ctx (fmpg-instance-format-ctx i)))
(when ctx
(set-fmpg-instance-format-ctx! i (avformat_close_input ctx))))
(set-fmpg-instance-audio-info! i #f)
(set-fmpg-instance-decoder! i #f))
)
i))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Helper functions for ffmpeg decoding
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Converts an AVRational to an inexact number.
;; FFmpeg timestamps use rationals; Racket can then calculate more easily in seconds.
(define-syntax av_q2d
(syntax-rules ()
((_ a)
(exact->inexact (/ (AVRational-num a) (AVRational-den a)))
)
)
)
;; Counts how many streams in the container are audio streams.
;; This is informative for the API; actual selection is done with av_find_best_stream.
(define (count-audio-streams avformat-ctx)
;; Without a format context there is nothing to count; 0 is the safe API value.
(if (eq? avformat-ctx #f)
0
(let ((count 0)
(nb_streams (avformat_nb_streams avformat-ctx))
(i 0)
)
(while (< i nb_streams)
;; For each stream, inspect codec parameters; only audio counts.
(let* ((stream (avformat_stream avformat-ctx i))
(par (avstream-codec stream))
)
(unless (eq? par #f)
(let ((codec-type (avcodec-pars-codec_type par)))
(when (= codec-type AVMEDIA_TYPE_AUDIO)
(set! count (+ count 1))
)
)
)
)
(set! i (+ i 1))
)
count)
)
)
;; Converts seconds to milliseconds, or -1 for unknown/negative duration.
;; Rounding avoids systematic underestimation from floating point conversion.
(define (milliseconds_from_seconds seconds)
(if (< seconds 0.0)
-1
(inexact->exact (round (+ (* seconds 1000.0) 0.5)))))
;; Converts seconds to sample frames, or -1 for unknown duration/rate.
;; This makes duration and seek positions usable for the audio player.
(define (samples_from_seconds seconds sample-rate)
(if (or (< seconds 0.0) (<= sample-rate 0))
-1
(inexact->exact (round (+ (* seconds sample-rate) 0.5)))))
;; Computes the duration of a stream in seconds.
;; AV_NOPTS_VALUE means FFmpeg does not know a reliable stream duration.
(define (stream_duration_seconds stream)
(if (eq? stream #f)
-1.0
(if (= (avstream-duration stream) AV_NOPTS_VALUE)
-1.0
(* (avstream-duration stream) (av_q2d (avstream-time_base stream)))
)
)
)
;; Computes the container duration in seconds.
;; This is fallback information when the selected stream has no duration.
(define (format_duration_seconds ctx)
(if (eq? ctx #f)
-1
(if (= (avformat_duration ctx) AV_NOPTS_VALUE)
-1.0
(exact->inexact (/ (avformat_duration ctx) AV_TIME_BASE))
)
)
)
;; Converts an FFmpeg timestamp to a sample position.
;; If timestamp, stream, or sample rate is missing, -1 is returned as unknown.
(define (timestamp_to_samples timestamp stream sample_rate)
(if (or (eq? stream #f) (= timestamp AV_NOPTS_VALUE) (<= sample_rate 0))
-1
(let ((seconds (exact->inexact
(* timestamp (av_q2d (avstream-time_base stream))))))
(samples_from_seconds seconds sample_rate))))
;; Accepts both path? and string? values as filenames.
;; Other values become #f so fmpg-open-file! can reject them easily.
(define (filename->string filename)
(cond
[(path? filename) (path->string filename)]
[(string? filename) filename]
[else #f]))
;; Fills audio-info for the best audio stream in the opened file.
;; The function checks stream, codec parameters, type, rate, and channels before
;; the instance is considered usable.
(define (fill-audio-info! self)
(let* ((ctx (fmpg-instance-format-ctx self))
(info (fmpg-instance-audio-info self)))
;; Always start with clean info so a failed inspection leaves no old values behind.
(ais-clear! info)
(ais-stream-count! info (count-audio-streams ctx))
;; Each binding checks a condition required for safe playback:
;; best stream found, stream/parameters exist, type is audio, rate/channels are positive.
(early-return
((best (av_find_best_stream ctx AVMEDIA_TYPE_AUDIO -1 -1 #f 0) ? (< best 0) => #f)
(stream (avformat_stream ctx best) ? (not (a-!nullptr? stream)) => #f)
(par (avstream-codec stream) ? (not (a-!nullptr? par)) => #f)
(codec-type (avcodec-pars-codec_type par) ? (not (= codec-type AVMEDIA_TYPE_AUDIO)) => #f)
(sample-rate (avcodec-pars-sample_rate par) ? (<= sample-rate 0) => #f)
(channels (avcodec-pars-channels par) ? (<= channels 0) => #f)
;; Stream duration is more precise; container duration is the fallback for files without one.
(stream-seconds (stream_duration_seconds stream))
(seconds (if (< stream-seconds 0.0) (format_duration_seconds ctx) stream-seconds)))
;; Only after all checks does the selected stream become visible in audio-info.
(ais-stream-index! info best)
(ais-rate! info sample-rate)
(ais-channels! info channels)
(ais-duration-ms! info (milliseconds_from_seconds seconds))
(ais-duration-samples! info (samples_from_seconds seconds sample-rate))
#t)
)
)
;; Checks whether an instance is ready for decode or query operations.
;; There must be an open file, a selected stream, and an active codec/resampler.
(define (instance-ready? instance)
;; All parts must be present before decode or query is safe.
(let ((ready (and instance
(fmpg-instance-opened instance)
(fmpg-instance-format-ctx instance)
(let ((info (fmpg-instance-audio-info instance)))
(and info
(>= (ais-stream-index info) 0)))
(let ((dec (fmpg-instance-decoder instance)))
(and dec
(ds-codec-ctx dec)
(ds-swr-ctx dec)))
#t)))
(unless ready
(err-sound "instance not ready!"))
ready))
;; Initializes the FFmpeg codec context for the selected audio stream.
;; The decoder is found, a context is allocated, parameters are copied,
;; and the codec is opened before frames can be received.
(define (init-codec-context! self)
(early-return
;; The early-return chain prevents later FFmpeg calls from seeing NULL pointers.
;; Base values come from the selected stream and decoder state.
((dec (fmpg-instance-decoder self))
(info (fmpg-instance-audio-info self))
(ctx (fmpg-instance-format-ctx self))
(stream-index (ais-stream-index info))
(stream (avformat_stream ctx stream-index) ? (not (a-!nullptr? stream)) => #f)
(par (avstream-codec stream) ? (not (a-!nullptr? par)) => #f)
;; The decoder belongs to the codec_id from the stream parameters; the pointer is not owned.
(codec (let ((c (avcodec_find_decoder (avcodec-pars-codec_id par))))
(ds-codec! dec c)
c)
? (not (a-!nullptr? codec)) => #f)
;; The codec context is owned by decoder-storage and is freed explicitly later.
(codec-ctx (let ((c (avcodec_alloc_context3 codec)))
(ds-codec-ctx! dec c)
c)
? (not (a-!nullptr? codec-ctx)) => #f)
;; FFmpeg wants codec parameters in the context before the codec can be opened.
(ret-par (avcodec_parameters_to_context codec-ctx par) ? (< ret-par 0) => #f)
(ret-open (avcodec_open2 codec-ctx codec #f) ? (< ret-open 0) => #f)
(frame (let ((f (av_frame_alloc)))
(ds-frame! dec f)
f)
? (not (a-!nullptr? frame)) => #f))
#t)
)
;; Initializes swresample for interleaved 32-bit signed PCM output.
;; Current codec parameters are copied temporarily from the codec context,
;; so channel layout, input format, and sample rate remain consistent.
(define (init-resampler! self)
(early-return
;; swresample is configured only after the codec context exists.
((dec (fmpg-instance-decoder self))
(codec-ctx (ds-codec-ctx dec) ? (not (a-!nullptr? codec-ctx)) => #f)
(par (avcodec_parameters_alloc) ? (not (a-!nullptr? par)) => #f)
(result
(early-return
((ret-par (avcodec_parameters_from_context par codec-ctx) ? (< ret-par 0) => #f)
;; Use the same channel layout for input and output: only the sample format is normalized.
(layout (AVCodecParameters-ch_layout par))
(channels (AVChannelLayout-nb_channels layout) ? (<= channels 0) => #f)
(rate (avcodec-pars-sample_rate par) ? (<= rate 0) => #f)
(fmt (avcodec-pars-format par))
;; swr_alloc_set_opts2 allocates or reuses the SwrContext via pointer-to-pointer.
(ret-swr
(let-values (((ret swr-ctx)
(swr_alloc_set_opts2 (ds-swr-ctx dec)
layout FMPG_OUTPUT_FMT rate
layout fmt rate
0 #f)))
(ds-swr-ctx! dec swr-ctx)
ret)
? (< ret-swr 0) => #f)
(ret-init (swr_init (ds-swr-ctx dec)) ? (< ret-init 0) => #f))
#t)))
(avcodec_parameters_free par)
result
)
)
;; Reinitializes the codec and resampler for the current instance.
;; First the old native state is freed; then codec and resampler are rebuilt.
(define (init-decoder! self)
(let ((dec (fmpg-instance-decoder self)))
;; Old native state must be gone before the instance reuses the same storage.
(free-ffmpeg dec)
(reset-decoder-storage! dec)
(and (init-codec-context! self)
(init-resampler! self))))
;; Public constructor for an FFmpeg decode instance.
;; Initialization errors are caught so the C-like API can return #f.
(define (fmpg-init)
;; This constructor is intended as a robust boundary: exceptions become #f.
(with-handlers ([exn:fail? (lambda (e) #f)])
(new-fmpg-instance)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; API
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Opens an audio file and initializes all decode state.
;; The function fails safely with 0: at every failed step, the half-open instance is closed.
(define (fmpg-open-file! instance filename)
;; First check the API preconditions: valid instance, not already open,
;; no old format context, and a filename that FFmpeg can open.
(let/assert
((instance instance a-!nullptr? 0)
(opened (fmpg-instance-opened instance) a-false? 0)
(old-ctx (fmpg-instance-format-ctx instance) a-nullptr? 0)
(filename (filename->string filename) string? 0))
(let ((result
(let/assert
;; Opening happens first; the received AVFormatContext is stored directly in the instance
;; so cleanup after later errors can use the same path.
((ret-open (let-values (((r ctx) (avformat_open_input #f filename #f #f)))
(set-fmpg-instance-format-ctx! instance ctx)
r)
(a->=? 0) 0)
(ctx (fmpg-instance-format-ctx instance) a-!nullptr? 0)
;; Stream info is needed before codecpar, duration, and best stream are reliable.
(ret-info (avformat_find_stream_info ctx #f) (a->=? 0) 0)
(info-ok (fill-audio-info! instance) a-true? 0)
(dec-ok (init-decoder! instance) a-true? 0))
(begin
(set-fmpg-instance-opened! instance #t)
1))))
;; Every partial failure rolls back to a closed instance.
(when (zero? result) (fmpg-close! instance))
result)))
;; Closes an instance and cleans up all native resources.
;; Audio-info is cleared so later queries do not return stale metadata.
(define (fmpg-close! instance)
(when instance
;; Decoder resources first, then the container; this way decoder pointers no longer
;; refer to streams from an already-closed format context.
(free-ffmpeg (fmpg-instance-decoder instance))
(set-fmpg-instance-format-ctx! instance
(avformat_close_input
(fmpg-instance-format-ctx instance)))
(set-fmpg-instance-opened! instance #f)
(let ((info (fmpg-instance-audio-info instance)))
(when info (ais-clear! info)))))
;; Returns 1 when the instance is ready to decode, otherwise 0.
;; The API uses integers because the outside interface is C-like.
(define (fmpg-is-open instance)
(if (instance-ready? instance) 1 0))
;; Returns the number of audio streams in the opened file.
;; For a closed or missing instance, 0 is the safe value.
(define (fmpg-audio-stream-count instance)
(if (and instance
(fmpg-instance-opened instance))
(ais-stream-count (fmpg-instance-audio-info instance))
0))
;; Returns the sample rate of the selected audio stream.
;; For an instance that is not ready, 0 is returned.
(define (fmpg-audio-sample-rate instance)
(if (instance-ready? instance)
(ais-rate (fmpg-instance-audio-info instance))
0))
;; Returns the number of channels in the selected audio stream.
;; For an instance that is not ready, 0 is returned.
(define (fmpg-audio-channels instance)
(if (instance-ready? instance)
(ais-channels (fmpg-instance-audio-info instance))
0))
;; Returns the bit depth of the PCM output.
;; The decoder always normalizes to 32-bit signed samples.
(define (fmpg-audio-bits-per-sample instance)
FMPG_OUTPUT_BITS)
;; Returns the number of bytes per PCM sample.
;; This follows FMPG_OUTPUT_BITS and therefore remains constant at 4.
(define (fmpg-audio-bytes-per-sample instance)
FMPG_OUTPUT_BYTES)
;; Returns the selected audio duration in milliseconds.
;; -1 means FFmpeg could not provide a usable duration.
(define (fmpg-duration-ms instance)
(if (instance-ready? instance)
(ais-duration-ms (fmpg-instance-audio-info instance))
-1))
;; Returns the selected audio duration in sample frames.
;; -1 means unknown; the value is computed from stream or container duration.
(define (fmpg-duration-samples instance)
(if (instance-ready? instance)
(ais-duration-samples (fmpg-instance-audio-info instance))
-1))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Decoding
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Appends native PCM bytes to the Racket output buffer.
;; The function checks the pointer and maximum buffer size before copying bytes.
(define (append-bytes! dec src nbytes)
(cond
[(zero? nbytes) #t]
[else
;; src must be a native pointer and the new buffer must not grow beyond INT_MAX.
(let/assert
((src src a-!nullptr? #f)
(pcm (ds-pcm dec))
(old-size (bytes-length pcm))
(new-size (+ old-size nbytes) (a-<=? INT_MAX) #f))
;; Racket bytes are the ownership form that the caller can safely keep/read.
(let ((new-pcm (make-bytes new-size)))
(bytes-copy! new-pcm 0 pcm 0 old-size)
(memcpy new-pcm old-size src 0 nbytes)
(ds-pcm! dec new-pcm)
#t))]))
;; Determines which part of a converted frame must be kept.
;; After seek, FFmpeg may start before the target; this cuts pre-roll samples away.
(define (select-frame-range! dec frame-start out-samples channels)
(let ((keep-start frame-start)
(keep-samples out-samples)
(byte-offset 0)
(dropped-all? #f))
;; discard-until is active only immediately after seek. Negative means: discard nothing.
(when (>= (ds-discard-until dec) 0)
(let* ((target (ds-discard-until dec))
(frame-end (+ frame-start out-samples)))
(cond
;; The whole frame lies before the target position; drop it all and advance position.
[(<= frame-end target)
(ds-next-sample-pos! dec frame-end)
(set! keep-samples 0)
(set! dropped-all? #t)]
[else
;; The frame overlaps the target position; skip leading bytes and keep the rest.
(when (< frame-start target)
(let ((drop (- target frame-start)))
(when (and (> drop 0) (< drop out-samples))
(set! byte-offset (* drop channels FMPG_OUTPUT_BYTES))
(set! keep-samples (- out-samples drop))
(set! keep-start target))))
(ds-discard-until! dec -1)])))
(values keep-start keep-samples byte-offset dropped-all?)))
;; Allocates temporary native output buffers for swr_convert and always frees them.
;; swr_convert expects an array of plane pointers; for interleaved output this is an array
;; with exactly one pointer to the temporary PCM buffer.
(define (call-with-swr-output-buffer max-bytes proc)
(early-return
;; Both allocations are native because swr_convert reads/writes C pointers.
((tmp (malloc max-bytes 'atomic-interior) ? (eq? tmp #f) => #f)
(out-planes (malloc _pointer 1 'atomic-interior)
? (eq? out-planes #f) => #f)
;; Interleaved output has one plane; out-planes[0] points to tmp.
(do (ptr-set! out-planes _pointer 0 tmp))
(result (proc tmp out-planes)))
result)
)
;; Converts a received AVFrame to 32-bit PCM and appends it to the output buffer.
;; Buffer length, timestamp, seek pre-roll, and sample bookkeeping meet here.
(define (append-converted-frame! self frame)
(let/assert
((dec (fmpg-instance-decoder self))
(info (fmpg-instance-audio-info self))
(channels (ais-channels info))
(sample-rate (ais-rate info))
(nb-samples (avframe-nb-samples frame)))
(cond
;; No channels or samples means the frame yields no usable audio, but this is not an error.
[(or (<= channels 0) (<= nb-samples 0)) #t]
[else
(let/assert
;; First ask swresample how much output can be produced and allocate for that.
((max-out-samples (swr_get_out_samples (ds-swr-ctx dec) nb-samples) (a->? 0) #f)
(max-bytes (av_samples_get_buffer_size #f channels max-out-samples FMPG_OUTPUT_FMT 1) (a->? 0) #f))
(call-with-swr-output-buffer
max-bytes
(lambda (tmp out-planes)
(let/assert
;; Convert the frame to the fixed FMPG_OUTPUT_FMT.
((out-samples (swr_convert (ds-swr-ctx dec) out-planes max-out-samples
(avframe-data frame) nb-samples)
(a->=? 0) #f)
(used-bytes (av_samples_get_buffer_size #f channels out-samples FMPG_OUTPUT_FMT 1) (a->=? 0) #f)
(stream-index (ais-stream-index info))
(stream (avformat_stream (fmpg-instance-format-ctx self) stream-index) a-!nullptr? #f)
;; Use the frame timestamp if FFmpeg has one; otherwise continue the current position.
(frame-start0 (timestamp_to_samples (avframe-best-effort-timestamp frame) stream sample-rate))
(frame-start (if (< frame-start0 0) (ds-next-sample-pos dec) frame-start0)))
(let-values (((keep-start keep-samples byte-offset dropped-all?)
(select-frame-range! dec frame-start out-samples channels)))
;; Apply seek trimming before bytes are appended to the PCM buffer.
(cond
[dropped-all? #t]
[(<= keep-samples 0)
(ds-next-sample-pos! dec (+ frame-start out-samples))
#t]
[else
;; The first output in this decode call determines buffer-start and timecode.
(when (pcm-empty? dec)
(ds-start-sample! dec keep-start)
(ds-timecode! dec (/ (exact->inexact keep-start) (exact->inexact sample-rate))))
(let ((keep-bytes (* keep-samples channels FMPG_OUTPUT_BYTES)))
(let/assert
((ok? (append-bytes! dec (ptr-add tmp byte-offset) keep-bytes) (a-!eq? #f) #f))
(ds-last-samples! dec (+ (ds-last-samples dec) keep-samples))
(ds-next-sample-pos! dec (+ keep-start keep-samples))
#t))]))))))])))
;; Receives all frames currently available from the FFmpeg decoder.
;; The function stops at EAGAIN, marks drain at EOF, and converts each received frame.
(define (receive-available-frames! self)
(let ((dec (fmpg-instance-decoder self))
(produced 0))
(let loop ()
;; avcodec_receive_frame may return multiple frames after one packet, so loop until EAGAIN/EOF.
(let ((ret (avcodec_receive_frame (ds-codec-ctx dec) (ds-frame dec))))
(cond
;; EAGAIN means: no frame right now; send more packets first.
[(= ret AVERROR_EAGAIN) produced]
;; EOF means the decoder itself is empty; the resampler may still have delay.
[(= ret AVERROR_EOF) (ds-drained! dec #t) produced]
[(< ret 0)
(err-sound "Got retvalue ~a from avcodec_receive_frame" ret)
-1]
[else
(let ((ok? (append-converted-frame! self (ds-frame dec))))
;; The AVFrame is reused; unref releases FFmpeg's internal buffers.
(av_frame_unref (ds-frame dec))
(if ok?
(begin
(when (> (ds-last-samples dec) 0) (set! produced 1))
(loop))
-1))])))))
;; Reads packets until a packet for the selected audio stream is found.
;; Packets from other streams are immediately unref'ed so they keep no native buffers.
(define (read-selected-audio-packet! self pkt)
(let ((wanted-stream (ais-stream-index (fmpg-instance-audio-info self))))
(let loop ()
;; av_read_frame returns all streams interleaved; filter here on the selected audio stream.
(let ((ret (av_read_frame (fmpg-instance-format-ctx self) pkt)))
(cond
[(= ret AVERROR_EOF)
'eof]
[(< ret 0)
(err-sound "av_read_frame failed: ~a" ret)
'error]
[(= (avpacket-stream-index pkt) wanted-stream)
'packet]
[else
;; Non-audio or non-selected stream: release packet contents immediately and continue.
(av_packet_unref pkt)
(loop)])))))
;; Pulls delayed samples out of swresample after the decoder has been drained.
;; Resamplers may still hold output internally; without this step the end of audio is lost.
(define (drain-resampler! self)
(let* ((dec (fmpg-instance-decoder self))
(info (fmpg-instance-audio-info self))
(channels (ais-channels info))
(sample-rate (ais-rate info))
(sample-rate* (exact->inexact sample-rate))
(swr-ctx (ds-swr-ctx dec)))
(let loop ((produced 0))
(early-return
;; swr_get_delay reports how many samples may still be in the resampler buffer.
((delay (swr_get_delay swr-ctx sample-rate)
? (<= delay 0) => produced)
(max-bytes (av_samples_get_buffer_size #f channels delay FMPG_OUTPUT_FMT 1)
? (<= max-bytes 0) => produced)
(tmp (malloc max-bytes 'atomic-interior)
? (eq? tmp #f) => -1)
(out-planes (malloc _pointer 1 'atomic-interior)
? (eq? out-planes #f) => -1)
(do (ptr-set! out-planes _pointer 0 tmp))
;; Null input with 0 samples asks swresample to flush delayed output.
(out-samples (swr_convert swr-ctx out-planes delay #f 0)
? (<= out-samples 0) => produced)
(used-bytes (av_samples_get_buffer_size #f channels out-samples
FMPG_OUTPUT_FMT 1)
? (< used-bytes 0) => produced)
;; If this is the first output, the buffer belongs to the current next-sample-pos.
(do
(when (pcm-empty? dec)
(let ((start-sample (ds-next-sample-pos dec)))
(ds-start-sample! dec start-sample)
(ds-timecode! dec (/ (exact->inexact start-sample)
sample-rate*)))))
(appended? (append-bytes! dec tmp used-bytes)
? (not appended?) => -1)
)
(ds-last-samples! dec (+ (ds-last-samples dec) out-samples))
(ds-next-sample-pos! dec (+ (ds-next-sample-pos dec)
out-samples))
(loop 1))
)
)
)
;; Decodes until PCM is available, EOF is reached, or an error occurs.
;; Return values: 1 = PCM available, 0 = done/no more PCM, negative = error.
(define (fmpg-decode-next! instance)
(define (r m r . e)
(when (or (eq? r #f) (< r 0))
(err-sound "fmpg-decode-next! : ~a - ~a - ~a" m r e))
r)
;; #f = continue reading/sending packets, 1 = PCM available, negative = error.
(define (receive-result! self dec)
;; Try receiving first; FFmpeg may still have frames ready from previous packets.
(let ((produced (receive-available-frames! self)))
(cond
[(< produced 0) -1]
[(produced-pcm? produced dec) 1]
[else #f])))
(define (send-packet-result! dec pkt)
;; After send_packet the packet may always be unref'ed; the decoder has taken what it needs.
(let ((ret (avcodec_send_packet (ds-codec-ctx dec) pkt)))
(av_packet_unref pkt)
ret))
(early-return
((? (not (instance-ready? instance)) => (r "instance-ready" -1))
(dec (fmpg-instance-decoder instance))
;; Each call produces at most one new public PCM buffer; clear old output first.
(do (ds-clear-output! dec))
;; Before reading a new packet, first receive pending decoder frames.
(received (receive-result! instance dec)
? received => (r "receive-result!" received))
(pkt (av_packet_alloc)
? (eq? pkt #f) => (r "av_packet_alloc" -1))
(packet-result
(let loop ()
(cond
;; If the demuxer already reported EOF, no more packets need to be read.
[(ds-eof-seen dec) #f]
[else
(let ((packet-status (read-selected-audio-packet! instance pkt)))
(cond
[(eq? packet-status 'eof)
(ds-eof-seen! dec #t)
(av_packet_unref pkt)
#f]
[(eq? packet-status 'error)
(av_packet_unref pkt)
-1]
[(eq? packet-status 'packet)
(let ((ret (send-packet-result! dec pkt)))
(cond
;; The decoder asks for receive_frame before more packets may be sent.
[(= ret AVERROR_EAGAIN)
(let ((received (receive-result! instance dec)))
(if received received (loop)))]
[(< ret 0)
ret]
[else
(let ((received (receive-result! instance dec)))
(if received received (loop)))]))]
[else
(err-sound "read-selected-audio-packet!: unexpected result ~a"
packet-status)
-1]))]))
? packet-result => (r "packet-result" packet-result)
~ (av_packet_free pkt))
(do (av_packet_free pkt))
;; When all packets have been read, send NULL to drain the decoder.
(drain-result
(and (not (ds-drained dec))
(let ((ret (avcodec_send_packet (ds-codec-ctx dec) #f)))
(cond
[(= ret AVERROR_EOF)
(ds-drained! dec #t)
#f]
[(= ret AVERROR_EAGAIN)
(receive-result! instance dec)]
[(< ret 0)
ret]
[else
(receive-result! instance dec)])))
? drain-result => (r "drain-result" drain-result))
;; After decoder drain, samples may still be in swresample; flush those too.
(produced (drain-resampler! instance)
? (< produced 0) => (r "drain-resampler!" produced)))
(cond
[(produced-pcm? produced dec) 1]
[else
(dbg-sound "fmpg-decode-next!: eof/no more pcm")
0])))
;; Seeks to a position in milliseconds and resets decoder/resampler state.
;; FFmpeg seeks to a suitable earlier packet position; discard-until then corrects
;; the extra samples to exactly the requested sample position.
(define (fmpg-seek-ms! instance target-pos-ms)
(early-return
;; Seek is allowed only on a decode-ready instance and to a non-negative position.
((? (or (not (instance-ready? instance)) (< target-pos-ms 0)) => 0)
(info (fmpg-instance-audio-info instance))
(dec (fmpg-instance-decoder instance))
(ctx (fmpg-instance-format-ctx instance))
(stream-index (ais-stream-index info))
(stream (avformat_stream ctx stream-index)
? (not (a-!nullptr? stream)) => 0)
;; Convert milliseconds first to microseconds and then to stream time_base.
(pos-us (av_rescale target-pos-ms AV_TIME_BASE 1000))
(stream-ts (av_rescale_q pos-us AV_TIME_BASE_Q (avstream-time_base stream)))
;; BACKWARD seeks to an earlier key/packet point; exact correction happens via discard-until.
(ret-seek (av_seek_frame ctx stream-index stream-ts AVSEEK_FLAG_BACKWARD)
? (< ret-seek 0) => 0)
(target-samples (samples_from_seconds (/ target-pos-ms 1000.0) (ais-rate info)))
;; Old decoder and resampler buffers belong to the position before the seek and must go.
(do (avcodec_flush_buffers (ds-codec-ctx dec))
(swr_close (ds-swr-ctx dec)))
(ret-swr (swr_init (ds-swr-ctx dec))
? (< ret-swr 0) => 0)
(pos (if (>= target-samples 0) target-samples 0)))
;; Reset sample bookkeeping to the target position; later frames may still contain pre-roll.
(ds-pcm! dec (make-bytes 0))
(ds-last-samples! dec 0)
(ds-start-sample! dec pos)
(ds-next-sample-pos! dec pos)
(ds-discard-until! dec target-samples)
(ds-timecode! dec (/ target-pos-ms 1000.0))
(ds-eof-seen! dec #f)
(ds-drained! dec #f)
1)
)
;; Internal helper that safely extracts decoder-storage from an instance.
;; If the instance is #f, #f is returned as well.
(define (fmpg-decoder instance)
(and instance (fmpg-instance-decoder instance)))
;; Returns the last produced PCM buffer.
;; With no decoder or empty output, #f is returned.
(define (fmpg-buffer instance)
(let ((dec (fmpg-decoder instance)))
(if (and dec (not (pcm-empty? dec))) (ds-pcm dec) #f)))
;; Returns the size of the current PCM buffer in bytes.
;; Values above INT_MAX are reported as 0 because the outside interface uses int sizes.
(define (fmpg-buffer-size instance)
(let ((dec (fmpg-decoder instance)))
(if dec
(let ((n (bytes-length (ds-pcm dec))))
(if (> n INT_MAX) 0 n))
0)))
;; Returns how many sample frames are in the current PCM buffer.
;; This is independent of the number of channels.
(define (fmpg-buffer-samples instance)
(let ((dec (fmpg-decoder instance)))
(if dec (ds-last-samples dec) 0)))
;; Returns the sample position at the start of the current PCM buffer.
;; The player can use this to connect playback position to buffer contents.
(define (fmpg-buffer-start-sample instance)
(let ((dec (fmpg-decoder instance)))
(if dec (ds-start-sample dec) 0)))
;; Returns the sample position immediately after the current PCM buffer.
;; This is start position plus the number of sample frames in the buffer.
(define (fmpg-buffer-end-sample instance)
(let ((dec (fmpg-decoder instance)))
(if dec (+ (ds-start-sample dec) (ds-last-samples dec)) 0)))
;; Returns the next sample position the decoder expects to produce.
;; This value continues across decode calls and is reset by seek.
(define (fmpg-sample-position instance)
(let ((dec (fmpg-decoder instance)))
(if dec (ds-next-sample-pos dec) 0)))
;; Returns the timecode in seconds of the current output buffer.
;; With no decoder, 0.0 is the safe fallback.
(define (fmpg-timecode instance)
(let ((dec (fmpg-decoder instance)))
(if dec (ds-timecode dec) 0.0)))
;; Returns the container bitrate, or -1 when it is unknown.
;; Only positive FFmpeg bitrates are passed through as reliable.
(define (fmpg-file-bitrate instance)
(let ((ctx (and instance (fmpg-instance-format-ctx instance))))
(if ctx
(let ((br (avformat_bit_rate ctx)))
(if (> br 0) br -1))
-1)))