#lang racket/base (require ffi/unsafe ffi/unsafe/define ffi/unsafe/alloc (for-syntax racket/base) "private/utils.rkt" "private/cstruct-helper.rkt" let-assert early-return ) (provide fmpg-init fmpg-open-file! fmpg-close! fmpg-is-open fmpg-audio-stream-count fmpg-audio-sample-rate fmpg-audio-channels fmpg-audio-bits-per-sample fmpg-audio-bytes-per-sample fmpg-duration-ms fmpg-duration-samples fmpg-file-bitrate fmpg-decode-next! fmpg-seek-ms! fmpg-buffer fmpg-buffer-size fmpg-buffer-start-sample fmpg-buffer-end-sample fmpg-sample-position ffmpeg-version) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; C - Types & Functions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Helpers for constants ;; These helpers mirror FFmpeg macros that normally live in C headers. ;; Racket cannot link macros, so only the needed values are rebuilt ;; here. ;; Builds an FFmpeg fourcc/tag value from four characters. ;; FFmpeg uses these tags, among other things, to make error codes recognizable. (define (mktag a b c d) (bitwise-ior (char->integer a) (arithmetic-shift (char->integer b) 8) (arithmetic-shift (char->integer c) 16) (arithmetic-shift (char->integer d) 24))) ;; Converts a fourcc/tag to a negative FFmpeg error code. ;; FFmpeg encodes some errors, such as EOF, as negative tag values. (define (fferrtag a b c d) (- (mktag a b c d))) ;; Converts an errno value to an FFmpeg AVERROR code. ;; This keeps comparisons with FFmpeg return values consistent. (define (AVERROR e) (* -1 e)) ;;;; Load libraries and get major library versions. ;; Libraries are resolved in a platform-dependent way. On Windows the ;; FFmpeg DLL names often contain the major version; on Unix-like systems ;; the dynamic linker can usually find the generic soname. (define libavutil (get-lib (case (system-type 'os) [(windows) '("avutil-60")] [else '("avutil" "libavutil")]) '(#f))) (define libswresample (get-lib (case (system-type 'os) [(windows) '("swresample-6")] [else '("swresample" "libswresample")]) '(#f))) (define libavcodec (get-lib (case (system-type 'os) [(windows) '("avcodec-62")] [else '("avcodec" "libavcodec")]) '(#f))) (define libavformat (get-lib (case (system-type 'os) [(windows) '("avformat-62")] [else '("avformat" "libavformat")]) '(#f))) (define-ffi-definer def-avutil libavutil #:default-make-fail make-not-available) (define-ffi-definer def-swresample libswresample #:default-make-fail make-not-available) (define-ffi-definer def-avcodec libavcodec #:default-make-fail make-not-available) (define-ffi-definer def-avformat libavformat #:default-make-fail make-not-available) (def-avutil avutil_version (_fun -> _uint)) (def-avcodec avcodec_version (_fun -> _uint)) (def-avformat avformat_version (_fun -> _uint)) (def-swresample swresample_version (_fun -> _uint)) (define avutil-version-major (quotient (avutil_version) 65536)) (define avcodec-version-major (quotient (avcodec_version) 65536)) (define avformat-version-major (quotient (avformat_version) 65536)) (define swresample-version-major (quotient (swresample_version) 65536)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Version check ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Returns the runtime version of an FFmpeg library as a list. ;; The major/minor/micro values are extracted from FFmpeg's packed integer, ;; so the rest of the module can choose version-dependent layouts. (define (ffmpeg-version lib) ;; FFmpeg packs versions as major<<16 | minor<<8 | micro. (let ((v (λ (v) (list (quotient v 65536) (remainder (quotient v 256) 256) (remainder v 256))))) (cond ((eq? lib 'avutil) (v (avutil_version))) ((eq? lib 'avcodec) (v (avcodec_version))) ((eq? lib 'avformat) (v (avformat_version))) ((or (eq? lib 'swr) (eq? lib 'swresample)) (v (swresample_version))) (else (error (format "Unknown library '~a" lib))) ) ) ) ;; Formats the runtime version of an FFmpeg library as text. ;; This is mainly used in error messages and logging. (define (ffmpeg-version-string lib) (apply format (cons "~a.~a.~a" (ffmpeg-version lib)))) ;; Support ffmpeg 6, 7 and 8 ;; Checks at load time whether the detected FFmpeg major versions are supported. ;; The struct layouts below are deliberately partial and major-version-dependent; ;; therefore an unknown major version must fail early and loudly. (define-syntax check-support (syntax-rules () ((_ lib version-hash) (let ((from (car (hash-ref version-hash lib))) (until (cadr (hash-ref version-hash lib)))) ;; Only the major version determines whether the C struct layouts below are safe. (let ((major-version (car (ffmpeg-version lib)))) (cond ((or (< major-version from) (> major-version until)) (error (format "Unsupported major version of ffmpeg library ~a: ~a (~a).\nSupported range: ~a - ~a" 'lib major-version (ffmpeg-version-string lib) from until))) (else (info-sound "Supported ffmpeg library ~a - version ~a between ~a and ~a" lib (ffmpeg-version-string lib) from until) ) ) ) ) ) ) ) (check-support 'avutil valid-ffmpeg-versions) (check-support 'avcodec valid-ffmpeg-versions) (check-support 'avformat valid-ffmpeg-versions) (check-support 'swresample valid-ffmpeg-versions) ;;;; Constants ;; AVRational is small and stable enough to define completely. ;; FFmpeg uses this struct everywhere for time_base and scaling factors. (define-cstruct _AVRational ([num _int] [den _int] ) ) (define EAGAIN 11) ; from errno.h (define INT_MAX (sub1 (arithmetic-shift 1 31))) (define AV_NOPTS_VALUE #x8000000000000000) (define AV_TIME_BASE 1000000) (define AV_TIME_BASE_Q (make-AVRational 1 AV_TIME_BASE)) (define AVSEEK_FLAG_BACKWARD 1) (define AVERROR_EAGAIN (AVERROR EAGAIN)) (define AVERROR_EOF (fferrtag #\E #\O #\F #\space)) (define _AVCodec (_cpointer/null 'AVCodec)) (define _AVCodecContext (_cpointer/null 'AVCodecContext)) (define _SwrContext (_cpointer/null 'SwrContext)) ;;;; enum constants (define AV_SAMPLE_FMT_NONE -1) (define AV_SAMPLE_FMT_S32 2) (define AVMEDIA_TYPE_AUDIO 1) ;;;; potential enum types (define _AVCodecID _int) (define _AVMediaType _int) (define _AVSampleFormat _int) ;;;; C structure layout strategy ;; ;; Racket's ffi/unsafe `define-cstruct` describes a C struct layout to the ;; FFI. It creates a C type descriptor, a pointer type, and generated ;; accessors such as `AVRational-num` or `AVFrame-nb_samples`. For small, ;; stable structs we use `define-cstruct` directly and list all relevant ;; fields in order. Racket then calculates the field offsets from the C ;; types and the platform ABI. ;; ;; The large FFmpeg structs below are different. Types such as ;; AVCodecParameters, AVStream, AVFormatContext, AVFrame, and AVPacket are ;; public enough to read, but their exact field layout changes between ;; FFmpeg major versions and sometimes depends on compatibility fields. ;; Defining every field would make this module brittle and noisy, while ;; defining only the fields we need without their real offsets would be ;; wrong. ;; ;; The local helper `def-cstruct` from private/cstruct-helper.rkt solves ;; this by separating offset calculation from accessor creation: ;; ;; - `make-offsets` receives the complete field sequence up to the last ;; field this module needs. Unnamed entries, for example `_pointer` or ;; `(6 _int)`, are fields that exist only to move the offset forward. ;; - Named entries, for example `(sample_rate _int)`, are the fields for ;; which this module wants generated accessors. ;; - `make-offsets` expands repeated entries and calls `compute-offsets`, ;; so alignment and pointer size are calculated by Racket's FFI for the ;; current platform instead of being hard-coded. ;; - `def-cstruct` then expands back to `define-cstruct`, but only for the ;; named fields, using explicit `#:offset` clauses. The result is a ;; partial struct with correct offsets and with accessors only for the ;; fields this module actually reads. ;; ;; This does not make FFmpeg layout changes disappear: the field sequence ;; below must still match the headers of the loaded major version. The ;; version checks and version-specific branches are therefore part of the ;; safety model. The benefit is that the Racket side stays small while the ;; generated accessors still read from the same byte offsets as C would. ;; ;;;; struct types and partial struct types. ;;;; the least necessary for ffmpeg and wrappers ;; AVChannelLayout is defined partially because playback only needs nb_channels. ;; The remaining fields keep the offsets correct. (define-cstruct _AVChannelLayout ([order _int] ; enum AVChannelOrder [nb_channels _int] [u (_union _uint64 _pointer)] [opaque _pointer] ) ) ; _AVCodecParameters: ; codec_type : AVMediaType ; codec_id : int / AVCodecID ; ch_layout : AVChannelLayout ; sample_rate : int ; libavcodec stuff. ;; AVCodecParameters differs between FFmpeg majors. Only the fields needed ;; for audio playback are named; intervening fields are included as ;; padding/offset fields so the accessor reads from the right place. (def-cstruct _AVCodecParameters (codec_type codec_id format ch_layout sample_rate) (if (= avcodec-version-major 60) ;; ffmpeg 6 definition is different and probably backward compatible with ffmpeg 5 ;; This is with old channel layout in compiled, which is the case on linux. ;; Probably always, because of backwards compatibility with ffmpeg 5.0 probably. (make-offsets (codec_type _AVMediaType) (codec_id _AVCodecID) _uint32 ;; codec_tag _pointer ;; extradata _int ;; extradata_size (format _AVSampleFormat) _int64 ;; bit_rate (6 _int) ;; bits_per_coded_sample, bits_per_raw_sample, ;; profile, level, width, height _AVRational ;; sample_aspect_ratio (6 _int) ;; field_order, color_range, color_primaries, ;; color_trc, color_space, chroma_location _int ;; video_delay ;; Only when FF_API_OLD_CHANNEL_LAYOUT is active. _uint64 ;; channel_layout _int ;; channels (sample_rate _int) (5 _int) ;; block_align, frame_size, ;; initial_padding, trailing_padding, seek_preroll (ch_layout _AVChannelLayout) ;; framerate, coded_side_data, and nb_coded_side_data follow here, ;; but they are not needed. ) ;;;;;;; ffmpeg 7 and 8. (major versions 61 and 62). (make-offsets (codec_type _AVMediaType) (codec_id _AVCodecID) _int32 ;; codec_tag _pointer ;; extradata _int ;; extradata_size _pointer ;; coded_side_data _int ;; nb_coded_side_data ;; AVCodecParameters.format ;; audio: enum AVSampleFormat (format _AVSampleFormat) _int64 ;; bit_rate (6 _int) ;; bits_per_coded_sample, bits_per_raw_sample, ;; profile, level, width, height, (2 _AVRational) ;; sample_aspect_ratio, ramerate (7 _int) ;; field_order, color_range, ;; color_primaries, color_trc, ;; color_space, chroma_location, ;; video_delay (ch_layout _AVChannelLayout) ;; ch_layout (sample_rate _int)) ;; sample_rate ) ) ;; Reads the codec id from AVCodecParameters. ;; The wrapper hides the generated cstruct accessor behind a more stable name. (define (avcodec-pars-codec_id s) (AVCodecParameters-codec_id s)) ;; Reads the media type from AVCodecParameters. ;; This lets the module distinguish audio streams from video/subtitle streams. (define (avcodec-pars-codec_type s) (AVCodecParameters-codec_type s)) ;; Reads the sample rate from AVCodecParameters. ;; The decoder uses this value for timing, resampling, and sample positions. (define (avcodec-pars-sample_rate s) (AVCodecParameters-sample_rate s)) ;; Reads the number of channels from the new AVChannelLayout. ;; Only this value is needed for interleaved PCM output and buffer sizing. (define (avcodec-pars-channels s) (AVChannelLayout-nb_channels (AVCodecParameters-ch_layout s))) ;; Reads the FFmpeg sample format from AVCodecParameters. ;; This is the input format for swresample. (define (avcodec-pars-format s) (AVCodecParameters-format s)) ; _AVStream: ; codecpar : AVCodecParameters* ; time_base : AVRational ; duration : int64 ;; AVStream is defined partially. The needed fields are codecpar, time_base, ;; and duration: enough to open the stream and calculate positions. (def-cstruct _AVStream (codec time_base duration) (make-offsets _pointer (2 _int) (codec _AVCodecParameters-pointer) _pointer (time_base _AVRational) _int64 (duration _int64)) ) ;; Returns the codec parameters of an AVStream. ;; The rest of the code works only with codecpar, not with old codec fields. (define (avstream-codec s) (AVStream-codec s)) ;; Reads the stream duration in stream time_base ticks. ;; This duration is preferred over the container duration when FFmpeg knows it. (define (avstream-duration s) (AVStream-duration s)) ;; Reads the time_base of an AVStream. ;; This rational is needed to scale timestamps and duration to seconds/samples. (define (avstream-time_base s) (AVStream-time_base s)) ; _AVFormatContext: ; nb_streams : uint/int ; streams : AVStream** ; duration : int64 ; bit_rate : int64 ;; AVFormatContext is defined partially. The module only uses the stream array, ;; stream count, container duration, and bitrate; metadata is deliberately ignored. (def-cstruct _AVFormatContext (nb_streams streams duration bit_rate) (make-offsets (5 _pointer) _int (nb_streams _int) (streams _pointer) _uint _pointer _uint _pointer _string*/utf-8 _int64 (duration _int64) (bit_rate _int64)) ) ;; Reads how many streams the container contains. ;; This determines the upper bound when searching for audio streams. (define (avformat_nb_streams s) (AVFormatContext-nb_streams s)) ;; Fetches stream i from AVFormatContext.streams. ;; AVFormatContext.streams is an AVStream**; therefore the array pointer is ;; read first, and then the i-th stream pointer is loaded. (define (avformat_stream s i) (let ((streams-ptr (AVFormatContext-streams s))) (ptr-ref streams-ptr _AVStream-pointer i) )) ;; Reads the container duration in AV_TIME_BASE units. ;; This is the fallback when the selected audio stream has no duration itself. (define (avformat_duration s) (AVFormatContext-duration s)) ;; Reads the container bitrate. ;; The public API returns -1 when FFmpeg reports no positive bitrate. (define (avformat_bit_rate s) (AVFormatContext-bit_rate s)) ; AVFrame ; data ; nb_samples ; best_effort_timestamp ;; AVFrame is one of the most sensitive layouts: FFmpeg 6 differs from 7/8. ;; Only data, nb_samples, sample_rate, and best_effort_timestamp are named; ;; the rest is included as offset information. (def-cstruct _AVFrame (data nb_samples sample_rate best_effort_timestamp) (if (= avutil-version-major 58) ;;; avutil 58 (ffmpeg 6.x) (make-offsets (data _pointer) (7 _pointer) ; data (8 _int) ; linesize _pointer ; extended-data (2 _int) ; width, height (nb_samples _int) ; nb_samples (3 _int) ; format / key_frame / enum Picturetype ;;; deprecated FRAME_KEY _AVRational ; sample_aspect_ratio; (2 _int64) ; pts / pkt_dts _AVRational ; time_base (2 _int) ; coded_picture_number / display_picture_number ;; PICTURE_NUMBER _int ; quality _pointer ; opaque _int ; repeat_pict (2 _int) ; interlaced_frame / top_field_first ;; INTERLACED_FRAME _int64 ; reordered_opaque ;; REORDERED_OPAQUE (sample_rate _int); sample_rate _int64 ; channel_layout ;; OLD_CHANNEL_LAYOUT (8 _pointer) ; AVBufferRef *buf[AV_NUM_DATA_POINTERS]; _pointer ; AVBufferRef **extended_buf; _int ; int nb_extended_buf; _pointer ; AVFrameSideData **side_data; _int ; nb_side_data; (6 _int) ; flags / color_range / color_primaries / color_trc / colorspace / chroma_location (best_effort_timestamp _int64) ; best_effort_timestamp ) ;;; ffmpeg 7, 8 - avutil 59 and 60. (make-offsets (data _pointer) (7 _pointer) ; data (8 _int) ; linesize _pointer ; extended-data (2 _int) ; width, height (nb_samples _int) ; nb_samples (2 _int) ; format / enum Picturetype _AVRational ; sample_aspect_ratio; (2 _int64) ; pts / pkt_dts _AVRational ; time_base _int ; quality _pointer ; opaque _int ; repeat_pict (sample_rate _int); sample_rate (8 _pointer) ; AVBufferRef *buf[AV_NUM_DATA_POINTERS]; _pointer ; AVBufferRef **extended_buf; _int ; int nb_extended_buf; _pointer ; AVFrameSideData **side_data; _int ; nb_side_data; (6 _int) ; flags / color_range / color_primaries / color_trc / colorspace / chroma_location (best_effort_timestamp _int64) ; best_effort_timestamp ) ) ) ;; Passes the address that swr_convert expects as const uint8_t **. ;; AVFrame starts with data[8], so the frame pointer itself points at data[0]. (define (avframe-data frame) ;; swr_convert wants const uint8_t **. ;; AVFrame starts with uint8_t *data[8], so the frame pointer itself ;; is the address of data[0]. frame) ;; Returns the first data plane of an AVFrame. ;; This is only useful if direct access to plane 0 is needed later. (define (avframe-data0 frame) ;; Only useful if the first plane pointer itself is ever needed. (AVFrame-data frame)) ;; Reads how many sample frames the current AVFrame contains. ;; This value determines how much input is offered to swresample. (define (avframe-nb-samples frame) (AVFrame-nb_samples frame)) ;; Reads the best-effort timestamp from an AVFrame. ;; That timestamp is the best basis for sample position and seek correction. (define (avframe-best-effort-timestamp frame) (AVFrame-best_effort_timestamp frame)) ; AVPacket ; stream_index ;; AVPacket is defined partially because only stream_index is needed. ;; It lets the demuxer skip packets from other streams. (def-cstruct _AVPacket (stream_index) (make-offsets _pointer ; buf _int64 ; pts _int64 ; dts _pointer ; data _int ; size (stream_index _int))) ;; Reads which stream a packet belongs to. ;; Packets from other streams are ignored before they reach the audio decoder. (define (avpacket-stream-index pkt) (AVPacket-stream_index pkt)) ;;;;; Now import the needed functions ;; The FFI imports below are the minimal FFmpeg functions for: ;; opening/demuxing, codec initialization, frame decode, resampling, seek, and cleanup. ;; Functions that use C pointer-to-pointer cleanup get a raw binding ;; plus a small Racket wrapper that normalizes the stored pointer to #f. (def-avcodec avcodec_free_context/raw (_fun (_ptr io _AVCodecContext) -> (p : _AVCodecContext) -> p ) #:c-id avcodec_free_context) ;; Frees an AVCodecContext and normalizes the result to #f. ;; FFmpeg sets the C pointer to NULL through pointer-to-pointer; the Racket side then stores #f. (define (avcodec_free_context ctx) (if ctx (begin (avcodec_free_context/raw ctx) #f) #f)) (def-swresample swr_free/raw (_fun (_ptr io _SwrContext) -> (p : _SwrContext) -> p) #:c-id swr_free) ;; Frees a SwrContext and normalizes the result to #f. ;; This prevents decoder-storage from keeping an old native pointer after cleanup. (define (swr_free ctx) (if ctx (begin (swr_free/raw ctx) #f) #f)) (def-avformat avformat_close_input/raw (_fun (_ptr io _AVFormatContext-pointer/null) -> (p : _AVFormatContext-pointer/null) -> p) #:c-id avformat_close_input) ;; Closes an AVFormatContext and normalizes the result to #f. ;; This follows FFmpeg ownership: avformat_close_input also closes the input resource. (define (avformat_close_input inp) (if inp (begin (avformat_close_input/raw inp) #f) #f)) (def-avformat av_find_best_stream (_fun _AVFormatContext-pointer _AVMediaType _int _int _pointer ; AVCodec **decoder_ret, hier gewoon #f meegeven _int -> _int)) (def-avcodec avcodec_find_decoder (_fun _AVCodecID -> _AVCodec)) (def-avcodec avcodec_alloc_context3 (_fun _AVCodec -> _AVCodecContext)) (def-avcodec avcodec_parameters_to_context (_fun _AVCodecContext _AVCodecParameters-pointer -> _int)) (def-avcodec avcodec_open2 (_fun _AVCodecContext _AVCodec _pointer ; AVDictionary **options, hier nullptr/#f -> _int)) (def-avutil av_frame_alloc (_fun -> _AVFrame-pointer/null)) (def-avutil av_frame_unref (_fun _AVFrame-pointer -> _void)) (def-avutil av_frame_free/raw (_fun (_ptr io _AVFrame-pointer/null) -> (p : _AVFrame-pointer/null) -> p) #:c-id av_frame_free) ;; Frees an AVFrame and normalizes the result to #f. ;; The wrapper prevents the Racket struct from keeping a dangling frame pointer. (define (av_frame_free frm) (if frm (begin (av_frame_free/raw frm) #f) #f)) (def-swresample swr_alloc_set_opts2 (_fun (ps : (_ptr io _SwrContext)) _AVChannelLayout-pointer ; out_ch_layout _AVSampleFormat ; out_sample_fmt _int ; out_sample_rate _AVChannelLayout-pointer ; in_ch_layout _AVSampleFormat ; in_sample_fmt _int ; in_sample_rate _int ; log_offset _pointer ; log_ctx -> (r : _int) -> (values r ps))) (def-swresample swr_init (_fun _SwrContext -> _int)) (def-avcodec avcodec_parameters_alloc (_fun -> _AVCodecParameters-pointer/null)) (def-avcodec avcodec_parameters_from_context (_fun _AVCodecParameters-pointer _AVCodecContext -> _int)) (def-avcodec avcodec_parameters_free/raw (_fun (_ptr io _AVCodecParameters-pointer/null) -> (p : _AVCodecParameters-pointer/null) -> p) #:c-id avcodec_parameters_free) ;; Frees temporarily used AVCodecParameters and returns #f. ;; This is used for parameter copies that are only needed during initialization. (define (avcodec_parameters_free par) (if par (begin (avcodec_parameters_free/raw par) #f) #f)) (def-avformat avformat_open_input (_fun (ctx : (_ptr io _AVFormatContext-pointer/null)) _string/utf-8 ; filename _pointer ; AVInputFormat *fmt, hier #f _pointer ; AVDictionary **options, hier #f -> (r : _int) -> (values r ctx))) (def-avformat avformat_find_stream_info (_fun _AVFormatContext-pointer _pointer ; AVDictionary **options, hier #f -> _int)) (def-avutil av_frame_get_best_effort_timestamp (_fun _AVFrame-pointer -> _int64)) (def-swresample swr_get_out_samples (_fun _SwrContext _int -> _int)) (def-swresample swr_convert (_fun _SwrContext _pointer _int _pointer _int -> _int)) (def-swresample swr_get_delay (_fun _SwrContext _int64 -> _int64)) (def-avutil av_samples_get_buffer_size (_fun _pointer _int _int _AVSampleFormat _int -> _int)) (def-avcodec avcodec_receive_frame (_fun _AVCodecContext _AVFrame-pointer -> _int)) (def-avformat av_read_frame (_fun _AVFormatContext-pointer _AVPacket-pointer -> _int)) (def-avcodec av_packet_alloc (_fun -> _AVPacket-pointer/null)) (def-avcodec av_packet_unref (_fun _AVPacket-pointer -> _void)) (def-avcodec av_packet_free/raw (_fun (_ptr io _AVPacket-pointer/null) -> (p : _AVPacket-pointer/null) -> p) #:c-id av_packet_free) ;; Frees an AVPacket and normalizes the result to #f. ;; Packets are short-lived and are explicitly cleaned up to save native memory. (define (av_packet_free pkg) (if pkg (begin (av_packet_free/raw pkg) #f) #f)) (def-avcodec avcodec_send_packet (_fun _AVCodecContext _AVPacket-pointer/null -> _int)) (def-avutil av_rescale (_fun _int64 _int64 _int64 -> _int64)) (def-avutil av_rescale_q (_fun _int64 _AVRational _AVRational -> _int64)) (def-avformat av_seek_frame (_fun _AVFormatContext-pointer _int _int64 _int -> _int)) (def-avcodec avcodec_flush_buffers (_fun _AVCodecContext -> _void)) (def-swresample swr_close (_fun _SwrContext -> _void)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Constants ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; The module always produces 32-bit signed PCM. This keeps the rest of ;; racket-audio from having to deal with every possible FFmpeg sample format. (define FMPG_OUTPUT_BITS 32) (define FMPG_OUTPUT_BYTES 4) (define FMPG_OUTPUT_FMT AV_SAMPLE_FMT_S32) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Internal structures for ffmpeg decoding ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Creates short aliases for struct accessors and mutators. ;; This keeps the decoder code readable without long Racket struct names. (define-syntax def-struct-helpers (syntax-rules () ((_ (struct-get get struct-set set)) (begin (define get struct-get) (define set struct-set)) ) ((_ (struct-get get)) (define get struct-get) ) ) ) ;; Defines several short struct helper names at once. ;; This is purely syntactic bookkeeping for the storage structs below. (define-syntax struct-helpers (syntax-rules () ((_ a ...) (begin (def-struct-helpers a) ...)))) ;;;;;;;;;; audio-info-storage ;; audio-info-storage contains only playback-relevant information. ;; Tags/metadata are deliberately omitted; the player needs rate, channels, stream, and duration. (define-struct audio-info-storage (audio-stream-count selected-stream-index sample-rate channels duration-ms duration-samples ) #:mutable #:transparent ) (struct-helpers (audio-info-storage-audio-stream-count ais-stream-count set-audio-info-storage-audio-stream-count! ais-stream-count!) (audio-info-storage-selected-stream-index ais-stream-index set-audio-info-storage-selected-stream-index! ais-stream-index!) (audio-info-storage-sample-rate ais-rate set-audio-info-storage-sample-rate! ais-rate!) (audio-info-storage-channels ais-channels set-audio-info-storage-channels! ais-channels!) (audio-info-storage-duration-ms ais-duration-ms set-audio-info-storage-duration-ms! ais-duration-ms!) (audio-info-storage-duration-samples ais-duration-samples set-audio-info-storage-duration-samples! ais-duration-samples!) ) ;; Resets audio-info to a closed/unknown state. ;; This prevents old stream data from remaining visible after close or failed open. (define (ais-clear! s) (ais-stream-count! s 0) (ais-stream-index! s -1) (ais-rate! s 0) (ais-channels! s 0) (ais-duration-ms! s -1) (ais-duration-samples! s -1) ) ;; Creates an empty audio-info storage structure. ;; The values mean: no stream selected and duration still unknown. (define (new-audio-info-storage) (let ((a (make-audio-info-storage 0 -1 0 0 -1 -1))) a)) ;;;;;;;;;;; decoder storage ;; decoder-storage owns the native decode resources and keeps the running ;; sample bookkeeping. PCM is the last produced Racket bytes buffer. (define-struct decoder-storage (codec ; _AVCodec - Not owned by decoder-storage, global pointer codec-ctx ; _AVCodecContext frame ; _AVFrame swr-ctx ; _SwrContext pcm ; bytes (maybe bytes-buffer) eof-seen ; boolean decoder-drained ; boolean timecode ; double last-samples ; integer buffer-start-sample ; integer next-sample-position ; integer discard-until-sample ; integer ) #:mutable #:transparent ) ;; Tests whether the decoder currently has no PCM output buffer. ;; This is used to set the start position and timecode only on first output. (define (pcm-empty? dec) (zero? (bytes-length (ds-pcm dec)))) ;; Tests whether PCM bytes are available for the caller. ;; The public buffer function uses this to return #f for empty output. (define (pcm-present? dec) (positive? (bytes-length (ds-pcm dec)))) ;; Combines decoder status with the actual PCM buffer. ;; A positive decode step only counts as success when bytes are present too. (define (produced-pcm? produced dec) (and (> produced 0) (pcm-present? dec))) (struct-helpers (decoder-storage-codec ds-codec set-decoder-storage-codec! ds-codec!) (decoder-storage-codec-ctx ds-codec-ctx set-decoder-storage-codec-ctx! ds-codec-ctx!) (decoder-storage-frame ds-frame set-decoder-storage-frame! ds-frame!) (decoder-storage-swr-ctx ds-swr-ctx set-decoder-storage-swr-ctx! ds-swr-ctx!) (decoder-storage-pcm ds-pcm set-decoder-storage-pcm! ds-pcm!) (decoder-storage-eof-seen ds-eof-seen set-decoder-storage-eof-seen! ds-eof-seen!) (decoder-storage-decoder-drained ds-drained set-decoder-storage-decoder-drained! ds-drained!) (decoder-storage-timecode ds-timecode set-decoder-storage-timecode! ds-timecode!) (decoder-storage-last-samples ds-last-samples set-decoder-storage-last-samples! ds-last-samples!) (decoder-storage-buffer-start-sample ds-start-sample set-decoder-storage-buffer-start-sample! ds-start-sample!) (decoder-storage-next-sample-position ds-next-sample-pos set-decoder-storage-next-sample-position! ds-next-sample-pos!) (decoder-storage-discard-until-sample ds-discard-until set-decoder-storage-discard-until-sample! ds-discard-until!) ) ;; Cleans up all native decoder resources in decoder-storage. ;; Codec context, frame, and resampler are owned by this storage and must go together. (define (free-ffmpeg s) (when s (ds-codec-ctx! s (avcodec_free_context (ds-codec-ctx s))) (ds-frame! s (av_frame_free (ds-frame s))) (ds-swr-ctx! s (swr_free (ds-swr-ctx s))) (reset-decoder-storage! s) ) ) ;; Clears only the last produced PCM output. ;; Decoder and seek state remain intact so the next decode step can continue. (define (ds-clear-output! s) (ds-pcm! s (make-bytes 0)) (ds-last-samples! s 0) (ds-start-sample! s (ds-next-sample-pos s)) ) ;; Resets decoder-storage to initial values without opening native resources itself. ;; This is used after cleanup and during reinitialization to wipe old state. (define (reset-decoder-storage! s) (ds-codec! s #f) (ds-pcm! s (make-bytes 0)) (ds-eof-seen! s #f) (ds-drained! s #f) (ds-timecode! s 0.0) (ds-last-samples! s 0) (ds-start-sample! s 0) (ds-next-sample-pos! s 0) (ds-discard-until! s -1)) ;; Creates decoder-storage and registers a finalizer for native cleanup. ;; The finalizer is a safety net; normal code closes resources explicitly earlier. (define (new-decoder-storage) (let ((ds (make-decoder-storage #f #f #f #f (make-bytes 0) #f #f 0.0 0 0 0 -1))) (register-finalizer ds (λ (s) (free-ffmpeg s))) ds)) ;;;;;;;;;;;;;; fmpg-instance ;; An fmpg-instance groups the container context, audio-info, and decoder state. ;; The public API always works on such an instance, comparable to a C handle. (define-struct fmpg-instance (opened ; boolean format-ctx ; AVFormatContext audio-info ; audio-info-storage decoder ; decoder-storage ) #:mutable #:transparent ) ;; Creates a new decode instance with its own format context, info, and decoder state. ;; Here too, a finalizer performs cleanup if the user does not close explicitly. (define (new-fmpg-instance) (let ((i (make-fmpg-instance #f #f (new-audio-info-storage) (new-decoder-storage)))) (register-finalizer i (λ (i) (let ((ctx (fmpg-instance-format-ctx i))) (when ctx (set-fmpg-instance-format-ctx! i (avformat_close_input ctx)))) (set-fmpg-instance-audio-info! i #f) (set-fmpg-instance-decoder! i #f)) ) i)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Helper functions for ffmpeg decoding ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Converts an AVRational to an inexact number. ;; FFmpeg timestamps use rationals; Racket can then calculate more easily in seconds. (define-syntax av_q2d (syntax-rules () ((_ a) (exact->inexact (/ (AVRational-num a) (AVRational-den a))) ) ) ) ;; Counts how many streams in the container are audio streams. ;; This is informative for the API; actual selection is done with av_find_best_stream. (define (count-audio-streams avformat-ctx) ;; Without a format context there is nothing to count; 0 is the safe API value. (if (eq? avformat-ctx #f) 0 (let ((count 0) (nb_streams (avformat_nb_streams avformat-ctx)) (i 0) ) (while (< i nb_streams) ;; For each stream, inspect codec parameters; only audio counts. (let* ((stream (avformat_stream avformat-ctx i)) (par (avstream-codec stream)) ) (unless (eq? par #f) (let ((codec-type (avcodec-pars-codec_type par))) (when (= codec-type AVMEDIA_TYPE_AUDIO) (set! count (+ count 1)) ) ) ) ) (set! i (+ i 1)) ) count) ) ) ;; Converts seconds to milliseconds, or -1 for unknown/negative duration. ;; Rounding avoids systematic underestimation from floating point conversion. (define (milliseconds_from_seconds seconds) (if (< seconds 0.0) -1 (inexact->exact (round (+ (* seconds 1000.0) 0.5))))) ;; Converts seconds to sample frames, or -1 for unknown duration/rate. ;; This makes duration and seek positions usable for the audio player. (define (samples_from_seconds seconds sample-rate) (if (or (< seconds 0.0) (<= sample-rate 0)) -1 (inexact->exact (round (+ (* seconds sample-rate) 0.5))))) ;; Computes the duration of a stream in seconds. ;; AV_NOPTS_VALUE means FFmpeg does not know a reliable stream duration. (define (stream_duration_seconds stream) (if (eq? stream #f) -1.0 (if (= (avstream-duration stream) AV_NOPTS_VALUE) -1.0 (* (avstream-duration stream) (av_q2d (avstream-time_base stream))) ) ) ) ;; Computes the container duration in seconds. ;; This is fallback information when the selected stream has no duration. (define (format_duration_seconds ctx) (if (eq? ctx #f) -1 (if (= (avformat_duration ctx) AV_NOPTS_VALUE) -1.0 (exact->inexact (/ (avformat_duration ctx) AV_TIME_BASE)) ) ) ) ;; Converts an FFmpeg timestamp to a sample position. ;; If timestamp, stream, or sample rate is missing, -1 is returned as unknown. (define (timestamp_to_samples timestamp stream sample_rate) (if (or (eq? stream #f) (= timestamp AV_NOPTS_VALUE) (<= sample_rate 0)) -1 (let ((seconds (exact->inexact (* timestamp (av_q2d (avstream-time_base stream)))))) (samples_from_seconds seconds sample_rate)))) ;; Accepts both path? and string? values as filenames. ;; Other values become #f so fmpg-open-file! can reject them easily. (define (filename->string filename) (cond [(path? filename) (path->string filename)] [(string? filename) filename] [else #f])) ;; Fills audio-info for the best audio stream in the opened file. ;; The function checks stream, codec parameters, type, rate, and channels before ;; the instance is considered usable. (define (fill-audio-info! self) (let* ((ctx (fmpg-instance-format-ctx self)) (info (fmpg-instance-audio-info self))) ;; Always start with clean info so a failed inspection leaves no old values behind. (ais-clear! info) (ais-stream-count! info (count-audio-streams ctx)) ;; Each binding checks a condition required for safe playback: ;; best stream found, stream/parameters exist, type is audio, rate/channels are positive. (early-return ((best (av_find_best_stream ctx AVMEDIA_TYPE_AUDIO -1 -1 #f 0) ? (< best 0) => #f) (stream (avformat_stream ctx best) ? (not (a-!nullptr? stream)) => #f) (par (avstream-codec stream) ? (not (a-!nullptr? par)) => #f) (codec-type (avcodec-pars-codec_type par) ? (not (= codec-type AVMEDIA_TYPE_AUDIO)) => #f) (sample-rate (avcodec-pars-sample_rate par) ? (<= sample-rate 0) => #f) (channels (avcodec-pars-channels par) ? (<= channels 0) => #f) ;; Stream duration is more precise; container duration is the fallback for files without one. (stream-seconds (stream_duration_seconds stream)) (seconds (if (< stream-seconds 0.0) (format_duration_seconds ctx) stream-seconds))) ;; Only after all checks does the selected stream become visible in audio-info. (ais-stream-index! info best) (ais-rate! info sample-rate) (ais-channels! info channels) (ais-duration-ms! info (milliseconds_from_seconds seconds)) (ais-duration-samples! info (samples_from_seconds seconds sample-rate)) #t) ) ) ;; Checks whether an instance is ready for decode or query operations. ;; There must be an open file, a selected stream, and an active codec/resampler. (define (instance-ready? instance) ;; All parts must be present before decode or query is safe. (let ((ready (and instance (fmpg-instance-opened instance) (fmpg-instance-format-ctx instance) (let ((info (fmpg-instance-audio-info instance))) (and info (>= (ais-stream-index info) 0))) (let ((dec (fmpg-instance-decoder instance))) (and dec (ds-codec-ctx dec) (ds-swr-ctx dec))) #t))) (unless ready (err-sound "instance not ready!")) ready)) ;; Initializes the FFmpeg codec context for the selected audio stream. ;; The decoder is found, a context is allocated, parameters are copied, ;; and the codec is opened before frames can be received. (define (init-codec-context! self) (early-return ;; The early-return chain prevents later FFmpeg calls from seeing NULL pointers. ;; Base values come from the selected stream and decoder state. ((dec (fmpg-instance-decoder self)) (info (fmpg-instance-audio-info self)) (ctx (fmpg-instance-format-ctx self)) (stream-index (ais-stream-index info)) (stream (avformat_stream ctx stream-index) ? (not (a-!nullptr? stream)) => #f) (par (avstream-codec stream) ? (not (a-!nullptr? par)) => #f) ;; The decoder belongs to the codec_id from the stream parameters; the pointer is not owned. (codec (let ((c (avcodec_find_decoder (avcodec-pars-codec_id par)))) (ds-codec! dec c) c) ? (not (a-!nullptr? codec)) => #f) ;; The codec context is owned by decoder-storage and is freed explicitly later. (codec-ctx (let ((c (avcodec_alloc_context3 codec))) (ds-codec-ctx! dec c) c) ? (not (a-!nullptr? codec-ctx)) => #f) ;; FFmpeg wants codec parameters in the context before the codec can be opened. (ret-par (avcodec_parameters_to_context codec-ctx par) ? (< ret-par 0) => #f) (ret-open (avcodec_open2 codec-ctx codec #f) ? (< ret-open 0) => #f) (frame (let ((f (av_frame_alloc))) (ds-frame! dec f) f) ? (not (a-!nullptr? frame)) => #f)) #t) ) ;; Initializes swresample for interleaved 32-bit signed PCM output. ;; Current codec parameters are copied temporarily from the codec context, ;; so channel layout, input format, and sample rate remain consistent. (define (init-resampler! self) (early-return ;; swresample is configured only after the codec context exists. ((dec (fmpg-instance-decoder self)) (codec-ctx (ds-codec-ctx dec) ? (not (a-!nullptr? codec-ctx)) => #f) (par (avcodec_parameters_alloc) ? (not (a-!nullptr? par)) => #f) (result (early-return ((ret-par (avcodec_parameters_from_context par codec-ctx) ? (< ret-par 0) => #f) ;; Use the same channel layout for input and output: only the sample format is normalized. (layout (AVCodecParameters-ch_layout par)) (channels (AVChannelLayout-nb_channels layout) ? (<= channels 0) => #f) (rate (avcodec-pars-sample_rate par) ? (<= rate 0) => #f) (fmt (avcodec-pars-format par)) ;; swr_alloc_set_opts2 allocates or reuses the SwrContext via pointer-to-pointer. (ret-swr (let-values (((ret swr-ctx) (swr_alloc_set_opts2 (ds-swr-ctx dec) layout FMPG_OUTPUT_FMT rate layout fmt rate 0 #f))) (ds-swr-ctx! dec swr-ctx) ret) ? (< ret-swr 0) => #f) (ret-init (swr_init (ds-swr-ctx dec)) ? (< ret-init 0) => #f)) #t))) (avcodec_parameters_free par) result ) ) ;; Reinitializes the codec and resampler for the current instance. ;; First the old native state is freed; then codec and resampler are rebuilt. (define (init-decoder! self) (let ((dec (fmpg-instance-decoder self))) ;; Old native state must be gone before the instance reuses the same storage. (free-ffmpeg dec) (reset-decoder-storage! dec) (and (init-codec-context! self) (init-resampler! self)))) ;; Public constructor for an FFmpeg decode instance. ;; Initialization errors are caught so the C-like API can return #f. (define (fmpg-init) ;; This constructor is intended as a robust boundary: exceptions become #f. (with-handlers ([exn:fail? (lambda (e) #f)]) (new-fmpg-instance))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; API ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Opens an audio file and initializes all decode state. ;; The function fails safely with 0: at every failed step, the half-open instance is closed. (define (fmpg-open-file! instance filename) ;; First check the API preconditions: valid instance, not already open, ;; no old format context, and a filename that FFmpeg can open. (let/assert ((instance instance a-!nullptr? 0) (opened (fmpg-instance-opened instance) a-false? 0) (old-ctx (fmpg-instance-format-ctx instance) a-nullptr? 0) (filename (filename->string filename) string? 0)) (let ((result (let/assert ;; Opening happens first; the received AVFormatContext is stored directly in the instance ;; so cleanup after later errors can use the same path. ((ret-open (let-values (((r ctx) (avformat_open_input #f filename #f #f))) (set-fmpg-instance-format-ctx! instance ctx) r) (a->=? 0) 0) (ctx (fmpg-instance-format-ctx instance) a-!nullptr? 0) ;; Stream info is needed before codecpar, duration, and best stream are reliable. (ret-info (avformat_find_stream_info ctx #f) (a->=? 0) 0) (info-ok (fill-audio-info! instance) a-true? 0) (dec-ok (init-decoder! instance) a-true? 0)) (begin (set-fmpg-instance-opened! instance #t) 1)))) ;; Every partial failure rolls back to a closed instance. (when (zero? result) (fmpg-close! instance)) result))) ;; Closes an instance and cleans up all native resources. ;; Audio-info is cleared so later queries do not return stale metadata. (define (fmpg-close! instance) (when instance ;; Decoder resources first, then the container; this way decoder pointers no longer ;; refer to streams from an already-closed format context. (free-ffmpeg (fmpg-instance-decoder instance)) (set-fmpg-instance-format-ctx! instance (avformat_close_input (fmpg-instance-format-ctx instance))) (set-fmpg-instance-opened! instance #f) (let ((info (fmpg-instance-audio-info instance))) (when info (ais-clear! info))))) ;; Returns 1 when the instance is ready to decode, otherwise 0. ;; The API uses integers because the outside interface is C-like. (define (fmpg-is-open instance) (if (instance-ready? instance) 1 0)) ;; Returns the number of audio streams in the opened file. ;; For a closed or missing instance, 0 is the safe value. (define (fmpg-audio-stream-count instance) (if (and instance (fmpg-instance-opened instance)) (ais-stream-count (fmpg-instance-audio-info instance)) 0)) ;; Returns the sample rate of the selected audio stream. ;; For an instance that is not ready, 0 is returned. (define (fmpg-audio-sample-rate instance) (if (instance-ready? instance) (ais-rate (fmpg-instance-audio-info instance)) 0)) ;; Returns the number of channels in the selected audio stream. ;; For an instance that is not ready, 0 is returned. (define (fmpg-audio-channels instance) (if (instance-ready? instance) (ais-channels (fmpg-instance-audio-info instance)) 0)) ;; Returns the bit depth of the PCM output. ;; The decoder always normalizes to 32-bit signed samples. (define (fmpg-audio-bits-per-sample instance) FMPG_OUTPUT_BITS) ;; Returns the number of bytes per PCM sample. ;; This follows FMPG_OUTPUT_BITS and therefore remains constant at 4. (define (fmpg-audio-bytes-per-sample instance) FMPG_OUTPUT_BYTES) ;; Returns the selected audio duration in milliseconds. ;; -1 means FFmpeg could not provide a usable duration. (define (fmpg-duration-ms instance) (if (instance-ready? instance) (ais-duration-ms (fmpg-instance-audio-info instance)) -1)) ;; Returns the selected audio duration in sample frames. ;; -1 means unknown; the value is computed from stream or container duration. (define (fmpg-duration-samples instance) (if (instance-ready? instance) (ais-duration-samples (fmpg-instance-audio-info instance)) -1)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Decoding ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Appends native PCM bytes to the Racket output buffer. ;; The function checks the pointer and maximum buffer size before copying bytes. (define (append-bytes! dec src nbytes) (cond [(zero? nbytes) #t] [else ;; src must be a native pointer and the new buffer must not grow beyond INT_MAX. (let/assert ((src src a-!nullptr? #f) (pcm (ds-pcm dec)) (old-size (bytes-length pcm)) (new-size (+ old-size nbytes) (a-<=? INT_MAX) #f)) ;; Racket bytes are the ownership form that the caller can safely keep/read. (let ((new-pcm (make-bytes new-size))) (bytes-copy! new-pcm 0 pcm 0 old-size) (memcpy new-pcm old-size src 0 nbytes) (ds-pcm! dec new-pcm) #t))])) ;; Determines which part of a converted frame must be kept. ;; After seek, FFmpeg may start before the target; this cuts pre-roll samples away. (define (select-frame-range! dec frame-start out-samples channels) (let ((keep-start frame-start) (keep-samples out-samples) (byte-offset 0) (dropped-all? #f)) ;; discard-until is active only immediately after seek. Negative means: discard nothing. (when (>= (ds-discard-until dec) 0) (let* ((target (ds-discard-until dec)) (frame-end (+ frame-start out-samples))) (cond ;; The whole frame lies before the target position; drop it all and advance position. [(<= frame-end target) (ds-next-sample-pos! dec frame-end) (set! keep-samples 0) (set! dropped-all? #t)] [else ;; The frame overlaps the target position; skip leading bytes and keep the rest. (when (< frame-start target) (let ((drop (- target frame-start))) (when (and (> drop 0) (< drop out-samples)) (set! byte-offset (* drop channels FMPG_OUTPUT_BYTES)) (set! keep-samples (- out-samples drop)) (set! keep-start target)))) (ds-discard-until! dec -1)]))) (values keep-start keep-samples byte-offset dropped-all?))) ;; Allocates temporary native output buffers for swr_convert and always frees them. ;; swr_convert expects an array of plane pointers; for interleaved output this is an array ;; with exactly one pointer to the temporary PCM buffer. (define (call-with-swr-output-buffer max-bytes proc) (early-return ;; Both allocations are native because swr_convert reads/writes C pointers. ((tmp (malloc max-bytes 'raw) ? (eq? tmp #f) => #f) (out-planes (malloc _pointer 1 'raw) ? (eq? out-planes #f) => #f ~ (free tmp)) ;; Interleaved output has one plane; out-planes[0] points to tmp. (do (ptr-set! out-planes _pointer 0 tmp)) (result (proc tmp out-planes))) (free out-planes) (free tmp) result) ) ;; Converts a received AVFrame to 32-bit PCM and appends it to the output buffer. ;; Buffer length, timestamp, seek pre-roll, and sample bookkeeping meet here. (define (append-converted-frame! self frame) (let/assert ((dec (fmpg-instance-decoder self)) (info (fmpg-instance-audio-info self)) (channels (ais-channels info)) (sample-rate (ais-rate info)) (nb-samples (avframe-nb-samples frame))) (cond ;; No channels or samples means the frame yields no usable audio, but this is not an error. [(or (<= channels 0) (<= nb-samples 0)) #t] [else (let/assert ;; First ask swresample how much output can be produced and allocate for that. ((max-out-samples (swr_get_out_samples (ds-swr-ctx dec) nb-samples) (a->? 0) #f) (max-bytes (av_samples_get_buffer_size #f channels max-out-samples FMPG_OUTPUT_FMT 1) (a->? 0) #f)) (call-with-swr-output-buffer max-bytes (lambda (tmp out-planes) (let/assert ;; Convert the frame to the fixed FMPG_OUTPUT_FMT. ((out-samples (swr_convert (ds-swr-ctx dec) out-planes max-out-samples (avframe-data frame) nb-samples) (a->=? 0) #f) (used-bytes (av_samples_get_buffer_size #f channels out-samples FMPG_OUTPUT_FMT 1) (a->=? 0) #f) (stream-index (ais-stream-index info)) (stream (avformat_stream (fmpg-instance-format-ctx self) stream-index) a-!nullptr? #f) ;; Use the frame timestamp if FFmpeg has one; otherwise continue the current position. (frame-start0 (timestamp_to_samples (avframe-best-effort-timestamp frame) stream sample-rate)) (frame-start (if (< frame-start0 0) (ds-next-sample-pos dec) frame-start0))) (let-values (((keep-start keep-samples byte-offset dropped-all?) (select-frame-range! dec frame-start out-samples channels))) ;; Apply seek trimming before bytes are appended to the PCM buffer. (cond [dropped-all? #t] [(<= keep-samples 0) (ds-next-sample-pos! dec (+ frame-start out-samples)) #t] [else ;; The first output in this decode call determines buffer-start and timecode. (when (pcm-empty? dec) (ds-start-sample! dec keep-start) (ds-timecode! dec (/ (exact->inexact keep-start) (exact->inexact sample-rate)))) (let ((keep-bytes (* keep-samples channels FMPG_OUTPUT_BYTES))) (let/assert ((ok? (append-bytes! dec (ptr-add tmp byte-offset) keep-bytes) (a-!eq? #f) #f)) (ds-last-samples! dec (+ (ds-last-samples dec) keep-samples)) (ds-next-sample-pos! dec (+ keep-start keep-samples)) #t))]))))))]))) ;; Receives all frames currently available from the FFmpeg decoder. ;; The function stops at EAGAIN, marks drain at EOF, and converts each received frame. (define (receive-available-frames! self) (let ((dec (fmpg-instance-decoder self)) (produced 0)) (let loop () ;; avcodec_receive_frame may return multiple frames after one packet, so loop until EAGAIN/EOF. (let ((ret (avcodec_receive_frame (ds-codec-ctx dec) (ds-frame dec)))) (cond ;; EAGAIN means: no frame right now; send more packets first. [(= ret AVERROR_EAGAIN) produced] ;; EOF means the decoder itself is empty; the resampler may still have delay. [(= ret AVERROR_EOF) (ds-drained! dec #t) produced] [(< ret 0) (err-sound "Got retvalue ~a from avcodec_receive_frame" ret) -1] [else (let ((ok? (append-converted-frame! self (ds-frame dec)))) ;; The AVFrame is reused; unref releases FFmpeg's internal buffers. (av_frame_unref (ds-frame dec)) (if ok? (begin (when (> (ds-last-samples dec) 0) (set! produced 1)) (loop)) -1))]))))) ;; Reads packets until a packet for the selected audio stream is found. ;; Packets from other streams are immediately unref'ed so they keep no native buffers. (define (read-selected-audio-packet! self pkt) (let ((wanted-stream (ais-stream-index (fmpg-instance-audio-info self)))) (let loop () ;; av_read_frame returns all streams interleaved; filter here on the selected audio stream. (let ((ret (av_read_frame (fmpg-instance-format-ctx self) pkt))) (cond [(= ret AVERROR_EOF) 'eof] [(< ret 0) (err-sound "av_read_frame failed: ~a" ret) 'error] [(= (avpacket-stream-index pkt) wanted-stream) 'packet] [else ;; Non-audio or non-selected stream: release packet contents immediately and continue. (av_packet_unref pkt) (loop)]))))) ;; Pulls delayed samples out of swresample after the decoder has been drained. ;; Resamplers may still hold output internally; without this step the end of audio is lost. (define (drain-resampler! self) (let* ((dec (fmpg-instance-decoder self)) (info (fmpg-instance-audio-info self)) (channels (ais-channels info)) (sample-rate (ais-rate info)) (sample-rate* (exact->inexact sample-rate)) (swr-ctx (ds-swr-ctx dec))) (let loop ((produced 0)) (early-return ;; swr_get_delay reports how many samples may still be in the resampler buffer. ((delay (swr_get_delay swr-ctx sample-rate) ? (<= delay 0) => produced) (max-bytes (av_samples_get_buffer_size #f channels delay FMPG_OUTPUT_FMT 1) ? (<= max-bytes 0) => produced) (tmp (malloc max-bytes 'raw) ? (eq? tmp #f) => -1) (out-planes (malloc _pointer 1 'raw) ? (eq? out-planes #f) => -1 ~ (free tmp)) (do (ptr-set! out-planes _pointer 0 tmp)) ;; Null input with 0 samples asks swresample to flush delayed output. (out-samples (swr_convert swr-ctx out-planes delay #f 0) ? (<= out-samples 0) => produced ~ (begin (free out-planes) (free tmp))) (used-bytes (av_samples_get_buffer_size #f channels out-samples FMPG_OUTPUT_FMT 1) ? (< used-bytes 0) => produced ~ (begin (free out-planes) (free tmp))) ;; If this is the first output, the buffer belongs to the current next-sample-pos. (do (when (pcm-empty? dec) (let ((start-sample (ds-next-sample-pos dec))) (ds-start-sample! dec start-sample) (ds-timecode! dec (/ (exact->inexact start-sample) sample-rate*))))) (appended? (append-bytes! dec tmp used-bytes) ? (not appended?) => -1 ~ (begin (free out-planes) (free tmp))) ) (ds-last-samples! dec (+ (ds-last-samples dec) out-samples)) (ds-next-sample-pos! dec (+ (ds-next-sample-pos dec) out-samples)) (free out-planes) (free tmp) (loop 1)) ) ) ) ;; Decodes until PCM is available, EOF is reached, or an error occurs. ;; Return values: 1 = PCM available, 0 = done/no more PCM, negative = error. (define (fmpg-decode-next! instance) (define (r m r . e) (when (or (eq? r #f) (< r 0)) (err-sound "fmpg-decode-next! : ~a - ~a - ~a" m r e)) r) ;; #f = continue reading/sending packets, 1 = PCM available, negative = error. (define (receive-result! self dec) ;; Try receiving first; FFmpeg may still have frames ready from previous packets. (let ((produced (receive-available-frames! self))) (cond [(< produced 0) -1] [(produced-pcm? produced dec) 1] [else #f]))) (define (send-packet-result! dec pkt) ;; After send_packet the packet may always be unref'ed; the decoder has taken what it needs. (let ((ret (avcodec_send_packet (ds-codec-ctx dec) pkt))) (av_packet_unref pkt) ret)) (early-return ((? (not (instance-ready? instance)) => (r "instance-ready" -1)) (dec (fmpg-instance-decoder instance)) ;; Each call produces at most one new public PCM buffer; clear old output first. (do (ds-clear-output! dec)) ;; Before reading a new packet, first receive pending decoder frames. (received (receive-result! instance dec) ? received => (r "receive-result!" received)) (pkt (av_packet_alloc) ? (eq? pkt #f) => (r "av_packet_alloc" -1)) (packet-result (let loop () (cond ;; If the demuxer already reported EOF, no more packets need to be read. [(ds-eof-seen dec) #f] [else (let ((packet-status (read-selected-audio-packet! instance pkt))) (cond [(eq? packet-status 'eof) (ds-eof-seen! dec #t) (av_packet_unref pkt) #f] [(eq? packet-status 'error) (av_packet_unref pkt) -1] [(eq? packet-status 'packet) (let ((ret (send-packet-result! dec pkt))) (cond ;; The decoder asks for receive_frame before more packets may be sent. [(= ret AVERROR_EAGAIN) (let ((received (receive-result! instance dec))) (if received received (loop)))] [(< ret 0) ret] [else (let ((received (receive-result! instance dec))) (if received received (loop)))]))] [else (err-sound "read-selected-audio-packet!: unexpected result ~a" packet-status) -1]))])) ? packet-result => (r "packet-result" packet-result) ~ (av_packet_free pkt)) (do (av_packet_free pkt)) ;; When all packets have been read, send NULL to drain the decoder. (drain-result (and (not (ds-drained dec)) (let ((ret (avcodec_send_packet (ds-codec-ctx dec) #f))) (cond [(= ret AVERROR_EOF) (ds-drained! dec #t) #f] [(= ret AVERROR_EAGAIN) (receive-result! instance dec)] [(< ret 0) ret] [else (receive-result! instance dec)]))) ? drain-result => (r "drain-result" drain-result)) ;; After decoder drain, samples may still be in swresample; flush those too. (produced (drain-resampler! instance) ? (< produced 0) => (r "drain-resampler!" produced))) (cond [(produced-pcm? produced dec) 1] [else (dbg-sound "fmpg-decode-next!: eof/no more pcm") 0]))) ;; Seeks to a position in milliseconds and resets decoder/resampler state. ;; FFmpeg seeks to a suitable earlier packet position; discard-until then corrects ;; the extra samples to exactly the requested sample position. (define (fmpg-seek-ms! instance target-pos-ms) (early-return ;; Seek is allowed only on a decode-ready instance and to a non-negative position. ((? (or (not (instance-ready? instance)) (< target-pos-ms 0)) => 0) (info (fmpg-instance-audio-info instance)) (dec (fmpg-instance-decoder instance)) (ctx (fmpg-instance-format-ctx instance)) (stream-index (ais-stream-index info)) (stream (avformat_stream ctx stream-index) ? (not (a-!nullptr? stream)) => 0) ;; Convert milliseconds first to microseconds and then to stream time_base. (pos-us (av_rescale target-pos-ms AV_TIME_BASE 1000)) (stream-ts (av_rescale_q pos-us AV_TIME_BASE_Q (avstream-time_base stream))) ;; BACKWARD seeks to an earlier key/packet point; exact correction happens via discard-until. (ret-seek (av_seek_frame ctx stream-index stream-ts AVSEEK_FLAG_BACKWARD) ? (< ret-seek 0) => 0) (target-samples (samples_from_seconds (/ target-pos-ms 1000.0) (ais-rate info))) ;; Old decoder and resampler buffers belong to the position before the seek and must go. (do (avcodec_flush_buffers (ds-codec-ctx dec)) (swr_close (ds-swr-ctx dec))) (ret-swr (swr_init (ds-swr-ctx dec)) ? (< ret-swr 0) => 0) (pos (if (>= target-samples 0) target-samples 0))) ;; Reset sample bookkeeping to the target position; later frames may still contain pre-roll. (ds-pcm! dec (make-bytes 0)) (ds-last-samples! dec 0) (ds-start-sample! dec pos) (ds-next-sample-pos! dec pos) (ds-discard-until! dec target-samples) (ds-timecode! dec (/ target-pos-ms 1000.0)) (ds-eof-seen! dec #f) (ds-drained! dec #f) 1) ) ;; Internal helper that safely extracts decoder-storage from an instance. ;; If the instance is #f, #f is returned as well. (define (fmpg-decoder instance) (and instance (fmpg-instance-decoder instance))) ;; Returns the last produced PCM buffer. ;; With no decoder or empty output, #f is returned. (define (fmpg-buffer instance) (let ((dec (fmpg-decoder instance))) (if (and dec (not (pcm-empty? dec))) (ds-pcm dec) #f))) ;; Returns the size of the current PCM buffer in bytes. ;; Values above INT_MAX are reported as 0 because the outside interface uses int sizes. (define (fmpg-buffer-size instance) (let ((dec (fmpg-decoder instance))) (if dec (let ((n (bytes-length (ds-pcm dec)))) (if (> n INT_MAX) 0 n)) 0))) ;; Returns how many sample frames are in the current PCM buffer. ;; This is independent of the number of channels. (define (fmpg-buffer-samples instance) (let ((dec (fmpg-decoder instance))) (if dec (ds-last-samples dec) 0))) ;; Returns the sample position at the start of the current PCM buffer. ;; The player can use this to connect playback position to buffer contents. (define (fmpg-buffer-start-sample instance) (let ((dec (fmpg-decoder instance))) (if dec (ds-start-sample dec) 0))) ;; Returns the sample position immediately after the current PCM buffer. ;; This is start position plus the number of sample frames in the buffer. (define (fmpg-buffer-end-sample instance) (let ((dec (fmpg-decoder instance))) (if dec (+ (ds-start-sample dec) (ds-last-samples dec)) 0))) ;; Returns the next sample position the decoder expects to produce. ;; This value continues across decode calls and is reset by seek. (define (fmpg-sample-position instance) (let ((dec (fmpg-decoder instance))) (if dec (ds-next-sample-pos dec) 0))) ;; Returns the timecode in seconds of the current output buffer. ;; With no decoder, 0.0 is the safe fallback. (define (fmpg-timecode instance) (let ((dec (fmpg-decoder instance))) (if dec (ds-timecode dec) 0.0))) ;; Returns the container bitrate, or -1 when it is unknown. ;; Only positive FFmpeg bitrates are passed through as reliable. (define (fmpg-file-bitrate instance) (let ((ctx (and instance (fmpg-instance-format-ctx instance)))) (if ctx (let ((br (avformat_bit_rate ctx))) (if (> br 0) br -1)) -1)))