diff --git a/ffmpeg-audio/demo_ffmpeg_audio.c b/ffmpeg-audio/demo_ffmpeg_audio.c index 8de21ea..4ecbe46 100644 --- a/ffmpeg-audio/demo_ffmpeg_audio.c +++ b/ffmpeg-audio/demo_ffmpeg_audio.c @@ -8,12 +8,14 @@ #define fprintf fprintf_s #endif -static void write_u16_le(FILE *f, uint16_t v) { +static void write_u16_le(FILE *f, uint16_t v) +{ fputc((int)(v & 0xff), f); fputc((int)((v >> 8) & 0xff), f); } -static void write_u32_le(FILE *f, uint32_t v) { +static void write_u32_le(FILE *f, uint32_t v) +{ fputc((int)(v & 0xff), f); fputc((int)((v >> 8) & 0xff), f); fputc((int)((v >> 16) & 0xff), f); @@ -24,7 +26,8 @@ static int write_wav_header(FILE *f, int sample_rate, int channels, int bits_per_sample, - uint32_t data_size) { + uint32_t data_size) +{ const uint32_t byte_rate = (uint32_t)(sample_rate * channels * bits_per_sample / 8); const uint16_t block_align = @@ -36,7 +39,7 @@ static int write_wav_header(FILE *f, fwrite("fmt ", 1, 4, f); write_u32_le(f, 16); /* fmt chunk size */ - write_u16_le(f, 1); /* 1 = integer PCM */ + write_u16_le(f, 1); /* PCM */ write_u16_le(f, (uint16_t)channels); write_u32_le(f, (uint32_t)sample_rate); write_u32_le(f, byte_rate); @@ -53,7 +56,8 @@ static int rewrite_wav_header(FILE *f, int sample_rate, int channels, int bits_per_sample, - uint32_t data_size) { + uint32_t data_size) +{ if (fseek(f, 0, SEEK_SET) != 0) { return 0; } @@ -65,37 +69,24 @@ static int rewrite_wav_header(FILE *f, data_size); } -static int write_decoder_buffer(FILE *out, - fmpg_decoder *dec, - uint64_t *total_written) { - const uint8_t *buf = fmpg_decoder_buffer(dec); - const int size = fmpg_decoder_buffer_size(dec); - - if (!buf || size <= 0) { - return 1; +static void print_if_present(const char *label, const char *value) +{ + if (value && value[0]) { + printf("%s: %s\n", label, value); } - - if (fwrite(buf, 1, (size_t)size, out) != (size_t)size) { - return 0; - } - - *total_written += (uint64_t)size; - return 1; } -int main(int argc, char **argv) { +int main(int argc, char **argv) +{ const char *infile; const char *outfile; - fmpg_instance *ac = NULL; - fmpg_decoder *dec = NULL; + fmpg_instance *fmpg = NULL; FILE *out = NULL; int sample_rate; int channels; int bits_per_sample; - int64_t duration_ms; - int64_t duration_samples; uint64_t total_written = 0; if (argc != 3) { @@ -106,48 +97,38 @@ int main(int argc, char **argv) { infile = argv[1]; outfile = argv[2]; - ac = fmpg_init(); - if (!ac) { - fprintf(stderr, "ac_init failed\n"); + fmpg = fmpg_init(); + if (!fmpg) { + fprintf(stderr, "fmpg_init failed\n"); return 1; } - if (!fmpg_open_file(ac, infile)) { + if (!fmpg_open_file(fmpg, infile)) { fprintf(stderr, "could not open input file: %s\n", infile); - fmpg_free(ac); + fmpg_free(fmpg); return 1; } - sample_rate = fmpg_audio_sample_rate(ac); - channels = fmpg_audio_channels(ac); - bits_per_sample = fmpg_audio_bits_per_sample(ac); - duration_ms = fmpg_duration_ms(ac); - duration_samples = fmpg_duration_samples(ac); + sample_rate = fmpg_audio_sample_rate(fmpg); + channels = fmpg_audio_channels(fmpg); + bits_per_sample = fmpg_audio_bits_per_sample(fmpg); if (sample_rate <= 0 || channels <= 0 || bits_per_sample != 32) { - fprintf(stderr, "invalid audio parameters\n"); - fmpg_free(ac); - return 1; - } - - dec = fmpg_create_decoder(ac); - if (!dec) { - fprintf(stderr, "could not create decoder\n"); - fmpg_free(ac); + fprintf(stderr, "unexpected audio format reported by decoder\n"); + fmpg_free(fmpg); return 1; } out = fopen(outfile, "wb"); if (!out) { fprintf(stderr, "could not open output file: %s\n", outfile); - fmpg_free_decoder(dec); - fmpg_free(ac); + fmpg_free(fmpg); return 1; } /* - * We do not know the final WAV data size yet. - * Write a placeholder header first and patch it at the end. + * The final WAV data size is only known after decoding. Write a temporary + * header now and patch it after the decode loop. */ if (!write_wav_header(out, sample_rate, @@ -156,44 +137,22 @@ int main(int argc, char **argv) { 0)) { fprintf(stderr, "could not write WAV header\n"); fclose(out); - fmpg_free_decoder(dec); - fmpg_free(ac); + fmpg_free(fmpg); return 1; } - for (;;) { - fmpg_package *pkg = fmpg_read_package(ac); + while (fmpg_decode_next(fmpg)) { + const uint8_t *buf = fmpg_buffer(fmpg); + const int size = fmpg_buffer_size(fmpg); - if (!pkg) { - break; - } - - /* - * ac_read_package() now returns only packets from the internally - * selected audio stream. No stream_index test is needed anymore. - */ - if (fmpg_decode_package(pkg, dec)) { - if (!write_decoder_buffer(out, dec, &total_written)) { - fprintf(stderr, "could not write PCM data\n"); - fmpg_free_package(pkg); + if (buf && size > 0) { + if (fwrite(buf, 1, (size_t)size, out) != (size_t)size) { + fprintf(stderr, "write error\n"); fclose(out); - fmpg_free_decoder(dec); - fmpg_free(ac); + fmpg_free(fmpg); return 1; } - } - - fmpg_free_package(pkg); - } - - /* Drain delayed samples from the decoder and resampler. */ - while (fmpg_flush_decoder(dec)) { - if (!write_decoder_buffer(out, dec, &total_written)) { - fprintf(stderr, "could not write flushed PCM data\n"); - fclose(out); - fmpg_free_decoder(dec); - fmpg_free(ac); - return 1; + total_written += (uint64_t)size; } } @@ -209,24 +168,31 @@ int main(int argc, char **argv) { bits_per_sample, (uint32_t)total_written)) { fprintf(stderr, "could not rewrite WAV header\n"); + fclose(out); + fmpg_free(fmpg); + return 1; } fclose(out); - printf("wrote %s\n", outfile); - printf("title: %s\n", fmpg_file_title(ac)); - printf("album: %s\n", fmpg_file_album(ac)); + printf("wrote: %s\n", outfile); printf("sample rate: %d\n", sample_rate); printf("channels: %d\n", channels); printf("sample bits: %d\n", bits_per_sample); - printf("duration ms: %lld\n", (long long)duration_ms); - printf("duration smp:%lld\n", (long long)duration_samples); - printf("decoded smp: %lld\n", (long long)fmpg_decoder_sample_position(dec)); - printf("data bytes: %llu\n", - (unsigned long long)total_written); + printf("data bytes: %llu\n", (unsigned long long)total_written); + printf("samples out: %lld\n", (long long)fmpg_sample_position(fmpg)); - fmpg_free_decoder(dec); - fmpg_free(ac); + if (fmpg_duration_ms(fmpg) >= 0) { + printf("duration ms: %lld\n", (long long)fmpg_duration_ms(fmpg)); + } + if (fmpg_duration_samples(fmpg) >= 0) { + printf("duration smp:%lld\n", (long long)fmpg_duration_samples(fmpg)); + } + print_if_present("title", fmpg_file_title(fmpg)); + print_if_present("artist", fmpg_file_author(fmpg)); + print_if_present("album", fmpg_file_album(fmpg)); + + fmpg_free(fmpg); return 0; } diff --git a/ffmpeg-audio/ffmpeg_audio.cpp b/ffmpeg-audio/ffmpeg_audio.cpp index add0a27..c4fe23e 100644 --- a/ffmpeg-audio/ffmpeg_audio.cpp +++ b/ffmpeg-audio/ffmpeg_audio.cpp @@ -1,18 +1,15 @@ /* - * Audio-only FFmpeg wrapper. + * Audio-only FFmpeg wrapper with a plain C ABI. * - * This file is implemented in C++, but exports a plain C ABI. C++ is used only - * internally to make ownership understandable: strings are std::string, decoded - * PCM buffers are std::vector, and FFmpeg objects are released by destructors. + * This implementation intentionally hides FFmpeg concepts from the public API: * - * Public design choices: + * - no stream_index in the API; + * - no AVPacket/package object in the API; + * - no explicit decoder object in the API; + * - no file_info/audio_info structs exposed to C callers. * - * - The caller opens one file. - * - The best audio stream is selected internally. - * - FFmpeg stream_index is not exposed. - * - File metadata is stored in the instance and accessed through getters. - * - Audio output is always signed 32-bit interleaved PCM. - * - There are no callbacks; file IO is handled by FFmpeg. + * Internally the instance owns everything needed to decode one selected audio + * stream. The caller simply opens a file and repeatedly calls fmpg_decode_next(). */ #include "ffmpeg_audio.h" @@ -34,11 +31,10 @@ extern "C" { #include } -static constexpr int AC_AUDIO_OUTPUT_BITS = 32; -static constexpr int AC_AUDIO_OUTPUT_BYTES = 4; -static constexpr AVSampleFormat AC_AUDIO_OUTPUT_FMT = AV_SAMPLE_FMT_S32; +static constexpr int FMPG_OUTPUT_BITS = 32; +static constexpr int FMPG_OUTPUT_BYTES = 4; +static constexpr AVSampleFormat FMPG_OUTPUT_FMT = AV_SAMPLE_FMT_S32; -/* Metadata stored inside fmpg_instance. */ struct file_info_storage { std::string title; std::string author; @@ -51,7 +47,8 @@ struct file_info_storage { int track = -1; int bitrate = -1; - void clear() { + void clear() + { title.clear(); author.clear(); album.clear(); @@ -64,16 +61,16 @@ struct file_info_storage { } }; -/* Audio information for the selected audio stream. */ struct audio_info_storage { int audio_stream_count = 0; int selected_stream_index = -1; /* Internal FFmpeg stream index. */ int sample_rate = 0; int channels = 0; int64_t duration_ms = -1; - int64_t duration_samples = -1; /* Sample frames, not int32_t values. */ + int64_t duration_samples = -1; /* Output sample frames. */ - void clear() { + void clear() + { audio_stream_count = 0; selected_stream_index = -1; sample_rate = 0; @@ -83,33 +80,7 @@ struct audio_info_storage { } }; -struct __fmpg_instance__ { - bool opened = false; - AVFormatContext *format_ctx = nullptr; - file_info_storage file_info; - audio_info_storage audio_info; - - ~__fmpg_instance__() { - if (format_ctx) { - avformat_close_input(&format_ctx); - } - } -}; - -struct __fmpg_package__ { - int64_t pts = AV_NOPTS_VALUE; - AVPacket *packet = nullptr; - - __fmpg_package__() : packet(av_packet_alloc()) {} - - ~__fmpg_package__() { - av_packet_free(&packet); - } -}; - -struct __fmpg_decoder__ { - fmpg_instance *instance = nullptr; - +struct decoder_storage { const AVCodec *codec = nullptr; AVCodecContext *codec_ctx = nullptr; AVFrame *frame = nullptr; @@ -117,14 +88,59 @@ struct __fmpg_decoder__ { std::vector pcm; - double timecode = 0.0; - int64_t last_samples = 0; /* sample frames in current output block */ - int64_t sample_position = 0; /* total sample frames emitted */ + bool eof_seen = false; + bool decoder_drained = false; - ~__fmpg_decoder__() { + double timecode = 0.0; + + int64_t last_samples = 0; + int64_t buffer_start_sample = 0; + int64_t next_sample_position = 0; + + /* >= 0 while a seek has requested us to discard decoded pre-roll samples. */ + int64_t discard_until_sample = -1; + + void clear_output() + { + pcm.clear(); + last_samples = 0; + buffer_start_sample = next_sample_position; + } + + void free_ffmpeg() + { avcodec_free_context(&codec_ctx); av_frame_free(&frame); swr_free(&swr_ctx); + codec = nullptr; + pcm.clear(); + eof_seen = false; + decoder_drained = false; + timecode = 0.0; + last_samples = 0; + buffer_start_sample = 0; + next_sample_position = 0; + discard_until_sample = -1; + } + + ~decoder_storage() + { + free_ffmpeg(); + } +}; + +struct __fmpg_instance__ { + bool opened = false; + AVFormatContext *format_ctx = nullptr; + file_info_storage file_info; + audio_info_storage audio_info; + decoder_storage decoder; + + ~__fmpg_instance__() + { + if (format_ctx) { + avformat_close_input(&format_ctx); + } } }; @@ -133,13 +149,13 @@ static const char *string_c_str(const std::string &s) return s.empty() ? "" : s.c_str(); } -static std::string get_metadata_string(const AVFormatContext *ctx, const char *key) +static std::string get_metadata_string(const AVFormatContext *ctx, + const char *key) { const AVDictionaryEntry *entry = av_dict_get(ctx->metadata, key, nullptr, 0); - return entry && entry->value ? std::string(entry->value) : std::string(); } @@ -150,29 +166,25 @@ static int get_metadata_int(const AVFormatContext *ctx, const char *key) key, nullptr, 0); - if (!entry || !entry->value || !*entry->value) { return -1; } - return std::atoi(entry->value); } static int count_audio_streams(const AVFormatContext *ctx) { - int count = 0; - if (!ctx) { return 0; } + int count = 0; for (unsigned i = 0; i < ctx->nb_streams; ++i) { const AVCodecParameters *par = ctx->streams[i]->codecpar; if (par && par->codec_type == AVMEDIA_TYPE_AUDIO) { ++count; } } - return count; } @@ -181,7 +193,6 @@ static int64_t milliseconds_from_seconds(double seconds) if (seconds < 0.0) { return -1; } - return static_cast(seconds * 1000.0 + 0.5); } @@ -190,7 +201,6 @@ static int64_t samples_from_seconds(double seconds, int sample_rate) if (seconds < 0.0 || sample_rate <= 0) { return -1; } - return static_cast(seconds * static_cast(sample_rate) + 0.5); } @@ -200,7 +210,6 @@ static double stream_duration_seconds(const AVStream *stream) if (!stream || stream->duration == AV_NOPTS_VALUE) { return -1.0; } - return static_cast(stream->duration) * av_q2d(stream->time_base); } @@ -209,8 +218,21 @@ static double format_duration_seconds(const AVFormatContext *ctx) if (!ctx || ctx->duration == AV_NOPTS_VALUE) { return -1.0; } + return static_cast(ctx->duration) / + static_cast(AV_TIME_BASE); +} - return static_cast(ctx->duration) / static_cast(AV_TIME_BASE); +static int64_t timestamp_to_samples(int64_t timestamp, + const AVStream *stream, + int sample_rate) +{ + if (!stream || timestamp == AV_NOPTS_VALUE || sample_rate <= 0) { + return -1; + } + + const double seconds = static_cast(timestamp) * + av_q2d(stream->time_base); + return samples_from_seconds(seconds, sample_rate); } static void fill_file_metadata(fmpg_instance *self) @@ -244,7 +266,6 @@ static bool fill_audio_info(fmpg_instance *self) -1, nullptr, 0); - if (best < 0) { return false; } @@ -261,12 +282,6 @@ static bool fill_audio_info(fmpg_instance *self) self->audio_info.sample_rate = par->sample_rate; self->audio_info.channels = par->ch_layout.nb_channels; - /* - * Duration can come from the selected audio stream or from the container. - * Stream duration is preferred because it is tied to the audio stream's own - * time base. Some containers only provide container-level duration, so that - * is the fallback. - */ double seconds = stream_duration_seconds(stream); if (seconds < 0.0) { seconds = format_duration_seconds(ctx); @@ -282,10 +297,71 @@ static bool fill_audio_info(fmpg_instance *self) static bool instance_ready(const fmpg_instance *instance) { return instance && instance->opened && instance->format_ctx && - instance->audio_info.selected_stream_index >= 0; + instance->audio_info.selected_stream_index >= 0 && + instance->decoder.codec_ctx && instance->decoder.swr_ctx; } -fmpg_instance *fmpg_init(void) { +static bool init_codec_context(fmpg_instance *self) +{ + decoder_storage &dec = self->decoder; + const int stream_index = self->audio_info.selected_stream_index; + const AVCodecParameters *par = + self->format_ctx->streams[stream_index]->codecpar; + + dec.codec = avcodec_find_decoder(par->codec_id); + if (!dec.codec) { + return false; + } + + dec.codec_ctx = avcodec_alloc_context3(dec.codec); + if (!dec.codec_ctx) { + return false; + } + + if (avcodec_parameters_to_context(dec.codec_ctx, par) < 0) { + return false; + } + + if (avcodec_open2(dec.codec_ctx, dec.codec, nullptr) < 0) { + return false; + } + + dec.frame = av_frame_alloc(); + return dec.frame != nullptr; +} + +static bool init_resampler(fmpg_instance *self) +{ + decoder_storage &dec = self->decoder; + const AVChannelLayout *layout = &dec.codec_ctx->ch_layout; + + if (layout->nb_channels <= 0 || dec.codec_ctx->sample_rate <= 0) { + return false; + } + + if (swr_alloc_set_opts2(&dec.swr_ctx, + layout, + FMPG_OUTPUT_FMT, + dec.codec_ctx->sample_rate, + layout, + dec.codec_ctx->sample_fmt, + dec.codec_ctx->sample_rate, + 0, + nullptr) < 0) { + return false; + } + + return swr_init(dec.swr_ctx) >= 0; +} + +static bool init_decoder(fmpg_instance *self) +{ + self->decoder.free_ffmpeg(); + return init_codec_context(self) && init_resampler(self); +} + +fmpg_instance *fmpg_init(void) +{ try { return new fmpg_instance(); } catch (...) { @@ -319,7 +395,7 @@ int fmpg_open_file(fmpg_instance *instance, const char *filename) fill_file_metadata(instance); - if (!fill_audio_info(instance)) { + if (!fill_audio_info(instance) || !init_decoder(instance)) { fmpg_close(instance); return 0; } @@ -334,6 +410,8 @@ void fmpg_close(fmpg_instance *instance) return; } + instance->decoder.free_ffmpeg(); + if (instance->format_ctx) { avformat_close_input(&instance->format_ctx); } @@ -366,12 +444,12 @@ int fmpg_audio_channels(fmpg_instance *instance) int fmpg_audio_bits_per_sample(fmpg_instance *) { - return AC_AUDIO_OUTPUT_BITS; + return FMPG_OUTPUT_BITS; } int fmpg_audio_bytes_per_sample(fmpg_instance *) { - return AC_AUDIO_OUTPUT_BYTES; + return FMPG_OUTPUT_BYTES; } int64_t fmpg_duration_ms(fmpg_instance *instance) @@ -394,7 +472,8 @@ const char *fmpg_file_author(fmpg_instance *instance) return instance ? string_c_str(instance->file_info.author) : ""; } -const char *fmpg_file_album(fmpg_instance *instance) { +const char *fmpg_file_album(fmpg_instance *instance) +{ return instance ? string_c_str(instance->file_info.album) : ""; } @@ -413,7 +492,8 @@ const char *fmpg_file_copyright(fmpg_instance *instance) return instance ? string_c_str(instance->file_info.copyright) : ""; } -int fmpg_file_year(fmpg_instance *instance) { +int fmpg_file_year(fmpg_instance *instance) +{ return instance ? instance->file_info.year : -1; } @@ -427,166 +507,41 @@ int fmpg_file_bitrate(fmpg_instance *instance) return instance ? instance->file_info.bitrate : -1; } -fmpg_package *fmpg_read_package(fmpg_instance *instance) -{ - if (!instance_ready(instance)) { - return nullptr; - } - - const int wanted_stream = instance->audio_info.selected_stream_index; - - for (;;) { - fmpg_package *pkg = nullptr; - - try { - pkg = new fmpg_package(); - } catch (...) { - return nullptr; - } - - if (!pkg->packet) { - delete pkg; - return nullptr; - } - - const int ret = av_read_frame(instance->format_ctx, pkg->packet); - if (ret < 0) { - delete pkg; - return nullptr; - } - - if (pkg->packet->stream_index != wanted_stream) { - delete pkg; - continue; - } - - pkg->pts = pkg->packet->dts != AV_NOPTS_VALUE - ? pkg->packet->dts - : pkg->packet->pts; - return pkg; - } -} - -void fmpg_free_package(fmpg_package *package) -{ - delete package; -} - -static bool init_codec_context(fmpg_decoder *dec, const AVCodecParameters *par) -{ - dec->codec = avcodec_find_decoder(par->codec_id); - if (!dec->codec) { - return false; - } - - dec->codec_ctx = avcodec_alloc_context3(dec->codec); - if (!dec->codec_ctx) { - return false; - } - - if (avcodec_parameters_to_context(dec->codec_ctx, par) < 0) { - return false; - } - - return avcodec_open2(dec->codec_ctx, dec->codec, nullptr) >= 0; -} - -static bool init_resampler(fmpg_decoder *dec) -{ - const AVChannelLayout *layout = &dec->codec_ctx->ch_layout; - - if (layout->nb_channels <= 0 || dec->codec_ctx->sample_rate <= 0) { - return false; - } - - if (swr_alloc_set_opts2(&dec->swr_ctx, - layout, - AC_AUDIO_OUTPUT_FMT, - dec->codec_ctx->sample_rate, - layout, - dec->codec_ctx->sample_fmt, - dec->codec_ctx->sample_rate, - 0, - nullptr) < 0) { - return false; - } - - return swr_init(dec->swr_ctx) >= 0; -} - -fmpg_decoder *fmpg_create_decoder(fmpg_instance *instance) -{ - if (!instance_ready(instance)) { - return nullptr; - } - - fmpg_decoder *dec = nullptr; - - try { - dec = new fmpg_decoder(); - } catch (...) { - return nullptr; - } - - dec->instance = instance; - - const int stream_index = instance->audio_info.selected_stream_index; - const AVCodecParameters *par = instance->format_ctx->streams[stream_index]->codecpar; - - if (!init_codec_context(dec, par)) { - delete dec; - return nullptr; - } - - dec->frame = av_frame_alloc(); - if (!dec->frame) { - delete dec; - return nullptr; - } - - if (!init_resampler(dec)) { - delete dec; - return nullptr; - } - - return dec; -} - -void fmpg_free_decoder(fmpg_decoder *decoder) -{ - delete decoder; -} - -static bool append_bytes(fmpg_decoder *dec, const uint8_t *src, size_t bytes) +static bool append_bytes(decoder_storage &dec, + const uint8_t *src, + size_t bytes) { if (!bytes) { return true; } if (bytes > static_cast(std::numeric_limits::max()) - - dec->pcm.size()) { + dec.pcm.size()) { return false; } try { - const size_t old_size = dec->pcm.size(); - dec->pcm.resize(old_size + bytes); - std::memcpy(dec->pcm.data() + old_size, src, bytes); + const size_t old_size = dec.pcm.size(); + dec.pcm.resize(old_size + bytes); + std::memcpy(dec.pcm.data() + old_size, src, bytes); return true; } catch (...) { return false; } } -static bool append_converted_frame(fmpg_decoder *dec, const AVFrame *frame) +static bool append_converted_frame(fmpg_instance *self, + const AVFrame *frame) { - const int channels = dec->codec_ctx->ch_layout.nb_channels; + decoder_storage &dec = self->decoder; + const int channels = self->audio_info.channels; + const int sample_rate = self->audio_info.sample_rate; if (channels <= 0 || frame->nb_samples <= 0) { return true; } - const int max_out_samples = swr_get_out_samples(dec->swr_ctx, + const int max_out_samples = swr_get_out_samples(dec.swr_ctx, frame->nb_samples); if (max_out_samples <= 0) { return false; @@ -595,7 +550,7 @@ static bool append_converted_frame(fmpg_decoder *dec, const AVFrame *frame) const int max_bytes = av_samples_get_buffer_size(nullptr, channels, max_out_samples, - AC_AUDIO_OUTPUT_FMT, + FMPG_OUTPUT_FMT, 1); if (max_bytes <= 0) { return false; @@ -604,7 +559,7 @@ static bool append_converted_frame(fmpg_decoder *dec, const AVFrame *frame) std::vector tmp(static_cast(max_bytes)); uint8_t *out_planes[1] = { tmp.data() }; - const int out_samples = swr_convert(dec->swr_ctx, + const int out_samples = swr_convert(dec.swr_ctx, out_planes, max_out_samples, const_cast(frame->data), @@ -616,29 +571,92 @@ static bool append_converted_frame(fmpg_decoder *dec, const AVFrame *frame) const int used_bytes = av_samples_get_buffer_size(nullptr, channels, out_samples, - AC_AUDIO_OUTPUT_FMT, + FMPG_OUTPUT_FMT, 1); if (used_bytes < 0) { return false; } - if (!append_bytes(dec, tmp.data(), static_cast(used_bytes))) { + const int stream_index = self->audio_info.selected_stream_index; + const AVStream *stream = self->format_ctx->streams[stream_index]; + + int64_t frame_start = timestamp_to_samples(frame->best_effort_timestamp, + stream, + sample_rate); + if (frame_start < 0) { + frame_start = dec.next_sample_position; + } + + int64_t keep_start = frame_start; + int keep_samples = out_samples; + size_t byte_offset = 0; + + /* + * After seeking, FFmpeg may first return decoded samples from before the + * requested position. Discard them so public sample positions refer to the + * actual music position requested by the caller. + */ + if (dec.discard_until_sample >= 0) { + const int64_t target = dec.discard_until_sample; + const int64_t frame_end = frame_start + out_samples; + + if (frame_end <= target) { + dec.next_sample_position = frame_end; + return true; + } + + if (frame_start < target) { + const int64_t drop = target - frame_start; + if (drop > 0 && drop < out_samples) { + byte_offset = static_cast(drop) * + static_cast(channels) * + FMPG_OUTPUT_BYTES; + keep_samples = static_cast(out_samples - drop); + keep_start = target; + } + } + + dec.discard_until_sample = -1; + } + + if (keep_samples <= 0) { + dec.next_sample_position = frame_start + out_samples; + return true; + } + + if (dec.pcm.empty()) { + dec.buffer_start_sample = keep_start; + dec.timecode = static_cast(keep_start) / + static_cast(sample_rate); + } + + const size_t keep_bytes = static_cast(keep_samples) * + static_cast(channels) * + FMPG_OUTPUT_BYTES; + + if (!append_bytes(dec, tmp.data() + byte_offset, keep_bytes)) { return false; } - dec->last_samples += out_samples; - dec->sample_position += out_samples; + dec.last_samples += keep_samples; + dec.next_sample_position = keep_start + keep_samples; return true; } -static int receive_available_frames(fmpg_decoder *dec) +static int receive_available_frames(fmpg_instance *self) { + decoder_storage &dec = self->decoder; int produced = 0; for (;;) { - const int ret = avcodec_receive_frame(dec->codec_ctx, dec->frame); + const int ret = avcodec_receive_frame(dec.codec_ctx, dec.frame); - if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { + if (ret == AVERROR(EAGAIN)) { + return produced; + } + + if (ret == AVERROR_EOF) { + dec.decoder_drained = true; return produced; } @@ -646,77 +664,44 @@ static int receive_available_frames(fmpg_decoder *dec) return -1; } - if (!append_converted_frame(dec, dec->frame)) { - av_frame_unref(dec->frame); + if (!append_converted_frame(self, dec.frame)) { + av_frame_unref(dec.frame); return -1; } - produced = 1; - av_frame_unref(dec->frame); + produced = dec.last_samples > 0 ? 1 : produced; + av_frame_unref(dec.frame); } } -static void update_timecode_from_packet(fmpg_decoder *dec, const fmpg_package *pkg) +static bool read_selected_audio_packet(fmpg_instance *self, AVPacket *pkt) { - if (!dec || !pkg || pkg->pts == AV_NOPTS_VALUE) { - return; - } - - const int stream_index = dec->instance->audio_info.selected_stream_index; - AVStream *stream = dec->instance->format_ctx->streams[stream_index]; - dec->timecode = pkg->pts * av_q2d(stream->time_base); -} - -int fmpg_decode_package(fmpg_package *package, fmpg_decoder *decoder) -{ - if (!package || !decoder || !package->packet) { - return 0; - } - - decoder->pcm.clear(); - decoder->last_samples = 0; - update_timecode_from_packet(decoder, package); - - int ret = avcodec_send_packet(decoder->codec_ctx, package->packet); - - if (ret == AVERROR(EAGAIN)) { - if (receive_available_frames(decoder) < 0) { - return 0; - } - ret = avcodec_send_packet(decoder->codec_ctx, package->packet); - } - - if (ret < 0) { - return 0; - } - - return receive_available_frames(decoder) > 0 ? 1 : 0; -} - -int fmpg_flush_decoder(fmpg_decoder *decoder) -{ - if (!decoder) { - return 0; - } - - decoder->pcm.clear(); - decoder->last_samples = 0; - - const int ret = avcodec_send_packet(decoder->codec_ctx, nullptr); - if (ret < 0 && ret != AVERROR_EOF) { - return 0; - } - - const int produced = receive_available_frames(decoder); - if (produced < 0) { - return 0; - } - - const int channels = decoder->codec_ctx->ch_layout.nb_channels; + const int wanted_stream = self->audio_info.selected_stream_index; for (;;) { - const int delay = static_cast(swr_get_delay(decoder->swr_ctx, - decoder->codec_ctx->sample_rate)); + const int ret = av_read_frame(self->format_ctx, pkt); + if (ret < 0) { + return false; + } + + if (pkt->stream_index == wanted_stream) { + return true; + } + + av_packet_unref(pkt); + } +} + +static int drain_resampler(fmpg_instance *self) +{ + decoder_storage &dec = self->decoder; + const int channels = self->audio_info.channels; + const int sample_rate = self->audio_info.sample_rate; + int produced = 0; + + for (;;) { + const int delay = static_cast(swr_get_delay(dec.swr_ctx, + sample_rate)); if (delay <= 0) { break; } @@ -724,7 +709,7 @@ int fmpg_flush_decoder(fmpg_decoder *decoder) const int max_bytes = av_samples_get_buffer_size(nullptr, channels, delay, - AC_AUDIO_OUTPUT_FMT, + FMPG_OUTPUT_FMT, 1); if (max_bytes <= 0) { break; @@ -733,7 +718,7 @@ int fmpg_flush_decoder(fmpg_decoder *decoder) std::vector tmp(static_cast(max_bytes)); uint8_t *out_planes[1] = { tmp.data() }; - const int out_samples = swr_convert(decoder->swr_ctx, + const int out_samples = swr_convert(dec.swr_ctx, out_planes, delay, nullptr, @@ -745,80 +730,196 @@ int fmpg_flush_decoder(fmpg_decoder *decoder) const int used_bytes = av_samples_get_buffer_size(nullptr, channels, out_samples, - AC_AUDIO_OUTPUT_FMT, + FMPG_OUTPUT_FMT, 1); - if (used_bytes < 0 || - !append_bytes(decoder, - tmp.data(), - static_cast(used_bytes))) { + if (used_bytes < 0) { break; } - decoder->last_samples += out_samples; - decoder->sample_position += out_samples; + if (dec.pcm.empty()) { + dec.buffer_start_sample = dec.next_sample_position; + dec.timecode = static_cast(dec.buffer_start_sample) / + static_cast(sample_rate); + } + + if (!append_bytes(dec, tmp.data(), static_cast(used_bytes))) { + return -1; + } + + dec.last_samples += out_samples; + dec.next_sample_position += out_samples; + produced = 1; } - return decoder->pcm.empty() ? 0 : 1; + return produced; } -int fmpg_seek_ms(fmpg_decoder *decoder, int64_t target_pos_ms) +int fmpg_decode_next(fmpg_instance *instance) { - if (!decoder || !instance_ready(decoder->instance)) { + if (!instance_ready(instance)) { return 0; } - const int stream_index = decoder->instance->audio_info.selected_stream_index; - AVStream *stream = decoder->instance->format_ctx->streams[stream_index]; + decoder_storage &dec = instance->decoder; + dec.clear_output(); + + /* First return any frames that are already pending in the decoder. */ + int produced = receive_available_frames(instance); + if (produced < 0) { + return 0; + } + if (produced > 0 && !dec.pcm.empty()) { + return 1; + } + + AVPacket *pkt = av_packet_alloc(); + if (!pkt) { + return 0; + } + + while (!dec.eof_seen) { + if (!read_selected_audio_packet(instance, pkt)) { + dec.eof_seen = true; + av_packet_unref(pkt); + break; + } + + int ret = avcodec_send_packet(dec.codec_ctx, pkt); + av_packet_unref(pkt); + + if (ret == AVERROR(EAGAIN)) { + produced = receive_available_frames(instance); + if (produced < 0) { + av_packet_free(&pkt); + return 0; + } + if (produced > 0 && !dec.pcm.empty()) { + av_packet_free(&pkt); + return 1; + } + continue; + } + + if (ret < 0) { + av_packet_free(&pkt); + return 0; + } + + produced = receive_available_frames(instance); + if (produced < 0) { + av_packet_free(&pkt); + return 0; + } + if (produced > 0 && !dec.pcm.empty()) { + av_packet_free(&pkt); + return 1; + } + } + + av_packet_free(&pkt); + + if (!dec.decoder_drained) { + const int ret = avcodec_send_packet(dec.codec_ctx, nullptr); + if (ret < 0 && ret != AVERROR_EOF) { + return 0; + } + + produced = receive_available_frames(instance); + if (produced < 0) { + return 0; + } + if (produced > 0 && !dec.pcm.empty()) { + return 1; + } + } + + produced = drain_resampler(instance); + return produced > 0 && !dec.pcm.empty() ? 1 : 0; +} + +int fmpg_seek_ms(fmpg_instance *instance, int64_t target_pos_ms) +{ + if (!instance_ready(instance) || target_pos_ms < 0) { + return 0; + } + + const int stream_index = instance->audio_info.selected_stream_index; + AVStream *stream = instance->format_ctx->streams[stream_index]; const int64_t pos_us = av_rescale(target_pos_ms, AV_TIME_BASE, 1000); const int64_t stream_ts = av_rescale_q(pos_us, AV_TIME_BASE_Q, stream->time_base); - if (av_seek_frame(decoder->instance->format_ctx, + if (av_seek_frame(instance->format_ctx, stream_index, stream_ts, AVSEEK_FLAG_BACKWARD) < 0) { return 0; } - decoder->timecode = target_pos_ms / 1000.0; - decoder->pcm.clear(); - decoder->last_samples = 0; - decoder->sample_position = samples_from_seconds(decoder->timecode, - decoder->instance->audio_info.sample_rate); + decoder_storage &dec = instance->decoder; + const int64_t target_samples = samples_from_seconds(target_pos_ms / 1000.0, + instance->audio_info.sample_rate); - avcodec_flush_buffers(decoder->codec_ctx); - - swr_close(decoder->swr_ctx); - return swr_init(decoder->swr_ctx) >= 0 ? 1 : 0; -} - -const uint8_t *fmpg_decoder_buffer(fmpg_decoder *decoder) -{ - return decoder && !decoder->pcm.empty() ? decoder->pcm.data() : nullptr; -} - -int fmpg_decoder_buffer_size(fmpg_decoder *decoder) { - if (!decoder || decoder->pcm.size() > - static_cast(std::numeric_limits::max())) { + avcodec_flush_buffers(dec.codec_ctx); + swr_close(dec.swr_ctx); + if (swr_init(dec.swr_ctx) < 0) { return 0; } - return static_cast(decoder->pcm.size()); + dec.pcm.clear(); + dec.last_samples = 0; + dec.buffer_start_sample = target_samples >= 0 ? target_samples : 0; + dec.next_sample_position = target_samples >= 0 ? target_samples : 0; + dec.discard_until_sample = target_samples; + dec.timecode = target_pos_ms / 1000.0; + dec.eof_seen = false; + dec.decoder_drained = false; + + return 1; } -double fmpg_decoder_timecode(fmpg_decoder *decoder) +const uint8_t *fmpg_buffer(fmpg_instance *instance) { - return decoder ? decoder->timecode : 0.0; + return instance && !instance->decoder.pcm.empty() + ? instance->decoder.pcm.data() + : nullptr; } -int64_t fmpg_decoder_last_samples(fmpg_decoder *decoder) +int fmpg_buffer_size(fmpg_instance *instance) { - return decoder ? decoder->last_samples : 0; + if (!instance || instance->decoder.pcm.size() > + static_cast(std::numeric_limits::max())) { + return 0; + } + return static_cast(instance->decoder.pcm.size()); } -int64_t fmpg_decoder_sample_position(fmpg_decoder *decoder) +int64_t fmpg_buffer_samples(fmpg_instance *instance) { - return decoder ? decoder->sample_position : 0; + return instance ? instance->decoder.last_samples : 0; +} + +int64_t fmpg_buffer_start_sample(fmpg_instance *instance) +{ + return instance ? instance->decoder.buffer_start_sample : 0; +} + +int64_t fmpg_buffer_end_sample(fmpg_instance *instance) +{ + if (!instance) { + return 0; + } + return instance->decoder.buffer_start_sample + instance->decoder.last_samples; +} + +int64_t fmpg_sample_position(fmpg_instance *instance) +{ + return instance ? instance->decoder.next_sample_position : 0; +} + +double fmpg_timecode(fmpg_instance *instance) +{ + return instance ? instance->decoder.timecode : 0.0; } diff --git a/ffmpeg-audio/ffmpeg_audio.h b/ffmpeg-audio/ffmpeg_audio.h index 78b4d28..cdfdcb2 100644 --- a/ffmpeg-audio/ffmpeg_audio.h +++ b/ffmpeg-audio/ffmpeg_audio.h @@ -20,243 +20,135 @@ extern "C" { /* * Audio-only FFmpeg wrapper. * - * The implementation is C++, but this header is plain C-compatible. All - * public object types are opaque. The caller never sees FFmpeg's AVFormatContext, - * AVCodecContext, AVPacket, stream_index, or std::string objects. + * The implementation is C++, but the exported API is plain C. The caller never + * sees FFmpeg objects, stream indices, packets, decoders, or C++ objects. * - * Output audio format is deliberately fixed: + * Output audio format is fixed: * * signed 32-bit integer PCM * interleaved / packed * native endian * - * This makes the API easy to bind from Racket and straightforward to feed into - * libao. Source formats such as MP3 float/planar output are converted internally. + * A sample frame means one sample moment across all channels. For stereo S32, + * one sample frame is two int32_t values, therefore 8 bytes. */ typedef struct __fmpg_instance__ fmpg_instance; -typedef struct __fmpg_decoder__ fmpg_decoder; -typedef struct __fmpg_package__ fmpg_package; /* ------------------------------------------------------------------------- */ /* Lifecycle */ /* ------------------------------------------------------------------------- */ -/* - * Create an empty decoder instance. - * - * Return: - * instance pointer, or NULL on allocation failure. - */ FFMPEG_EXTERN fmpg_instance *fmpg_init(void); - -/* - * Close any open file and free the instance. - * - * It is safe to pass NULL. - */ FFMPEG_EXTERN void fmpg_free(fmpg_instance *instance); /* - * Open a media file and select the best audio stream. + * Open a media file, select the best audio stream, and create the internal + * decoder/resampler for that stream. * - * The selected stream index is kept inside the instance. The public API does - * not expose FFmpeg stream indices. After this function succeeds, metadata, - * duration, sample rate and channel count are available through the getters - * below. + * After success, metadata, duration, sample rate and channel count are + * available through the getters below. * - * Return: - * 1 on success - * 0 on failure or if no usable audio stream was found + * Return: 1 on success, 0 on failure. */ -FFMPEG_EXTERN int fmpg_open_file(fmpg_instance *instance, - const char *filename); +FFMPEG_EXTERN int fmpg_open_file(fmpg_instance *instance, const char *filename); -/* Close the current file, if any, and reset instance-owned information. */ FFMPEG_EXTERN void fmpg_close(fmpg_instance *instance); - -/* Return 1 if a file is open, 0 otherwise. */ FFMPEG_EXTERN int fmpg_is_open(fmpg_instance *instance); /* ------------------------------------------------------------------------- */ /* Audio information */ /* ------------------------------------------------------------------------- */ -/* - * The number of audio streams found in the container. - * - * The decoder currently uses the best stream selected by FFmpeg. This count is - * informational; stream selection is intentionally not part of the public API. - */ FFMPEG_EXTERN int fmpg_audio_stream_count(fmpg_instance *instance); - -/* Output sample rate in Hz, for example 44100 or 48000. */ FFMPEG_EXTERN int fmpg_audio_sample_rate(fmpg_instance *instance); - -/* Number of output channels, for example 1 or 2. */ FFMPEG_EXTERN int fmpg_audio_channels(fmpg_instance *instance); - -/* Always 32: samples are signed 32-bit integer PCM. */ FFMPEG_EXTERN int fmpg_audio_bits_per_sample(fmpg_instance *instance); - -/* Always 4: one output sample occupies four bytes. */ FFMPEG_EXTERN int fmpg_audio_bytes_per_sample(fmpg_instance *instance); -/* - * Duration in milliseconds, or -1 if unknown. - * - * This value is known after ac_open_file() succeeds, as far as FFmpeg can know - * it from the container/stream metadata. Some streams do not contain exact - * duration information; in that case this getter returns -1. - */ +/* Duration in milliseconds, or -1 if unknown. */ FFMPEG_EXTERN int64_t fmpg_duration_ms(fmpg_instance *instance); -/* - * Duration expressed as output sample frames, or -1 if unknown. - * - * A sample frame means one sample moment across all channels. For stereo, one - * sample frame contains two int32_t values: left and right. This is usually the - * most useful duration unit for playback and progress calculations. - * - * PCM int32_t values in the whole output would be: - * - * ac_duration_samples(instance) * ac_audio_channels(instance) - */ +/* Duration in output sample frames, or -1 if unknown. */ FFMPEG_EXTERN int64_t fmpg_duration_samples(fmpg_instance *instance); /* ------------------------------------------------------------------------- */ /* Metadata */ /* ------------------------------------------------------------------------- */ -/* - * Metadata is owned by the instance and available after ac_open_file(). - * Returned strings are never NULL. Missing metadata is returned as "". - * - * Pointers remain valid until ac_close() or ac_free() is called for the - * instance. Do not free the returned strings. - */ FFMPEG_EXTERN const char *fmpg_file_title(fmpg_instance *instance); FFMPEG_EXTERN const char *fmpg_file_author(fmpg_instance *instance); FFMPEG_EXTERN const char *fmpg_file_album(fmpg_instance *instance); FFMPEG_EXTERN const char *fmpg_file_genre(fmpg_instance *instance); FFMPEG_EXTERN const char *fmpg_file_comment(fmpg_instance *instance); FFMPEG_EXTERN const char *fmpg_file_copyright(fmpg_instance *instance); - -/* Return -1 if the field is unknown. */ FFMPEG_EXTERN int fmpg_file_year(fmpg_instance *instance); FFMPEG_EXTERN int fmpg_file_track(fmpg_instance *instance); - -/* Container-level bitrate in bits/second, or -1 if unknown. */ FFMPEG_EXTERN int fmpg_file_bitrate(fmpg_instance *instance); /* ------------------------------------------------------------------------- */ -/* Packet reading */ +/* Decoding */ /* ------------------------------------------------------------------------- */ /* - * Read the next compressed packet from the selected audio stream. + * Decode the next block of audio. * - * Non-audio packets and packets from non-selected streams are skipped - * internally. The caller therefore no longer has to inspect stream_index. + * Internally this reads compressed packets from the selected audio stream, + * feeds them to the FFmpeg decoder, receives all available decoded frames, + * converts them to signed 32-bit interleaved PCM, and concatenates them in the + * instance output buffer. + * + * Non-selected streams are skipped internally. The caller does not handle + * stream_index, packets, or decoder objects. * * Return: - * package pointer, or NULL at EOF or on read error. + * 1 if PCM data is available through fmpg_buffer()/fmpg_buffer_size() + * 0 at EOF or on error */ -FFMPEG_EXTERN fmpg_package *fmpg_read_package(fmpg_instance *instance); - -/* Free a package returned by ac_read_package(). Safe to pass NULL. */ -FFMPEG_EXTERN void fmpg_free_package(fmpg_package *package); - -/* ------------------------------------------------------------------------- */ -/* Decoder */ -/* ------------------------------------------------------------------------- */ - -/* - * Create a decoder for the selected audio stream. - * - * The stream is the one selected during ac_open_file(). The caller does not - * pass a stream index. - */ -FFMPEG_EXTERN fmpg_decoder *fmpg_create_decoder(fmpg_instance *instance); - -/* Free decoder and all FFmpeg decoder/resampler state. Safe to pass NULL. */ -FFMPEG_EXTERN void fmpg_free_decoder(fmpg_decoder *decoder); - -/* - * Decode one compressed audio package. - * - * Modern FFmpeg decoding is packet-in, frame-out. One compressed packet can - * produce zero, one, or multiple decoded frames. This function receives all - * available frames, converts them to signed 32-bit interleaved PCM, and - * concatenates them into the decoder output buffer. - * - * Return: - * 1 if PCM data was produced - * 0 if no PCM data was produced or an error occurred - */ -FFMPEG_EXTERN int fmpg_decode_package(fmpg_package *package, fmpg_decoder *decoder); - -/* - * Flush delayed decoder/resampler samples after EOF. - * - * Call this repeatedly after ac_read_package() returns NULL, until this - * function returns 0. - */ -FFMPEG_EXTERN int fmpg_flush_decoder(fmpg_decoder *decoder); +FFMPEG_EXTERN int fmpg_decode_next(fmpg_instance *instance); /* * Seek to an absolute position in milliseconds. * - * The compressed decoder buffer, decoded output buffer and resampler state are - * reset. After seeking, continue reading packages and decoding as usual. + * FFmpeg may seek to a packet before the requested timestamp. This wrapper + * decodes and discards pre-roll samples until the requested output sample is + * reached, when timestamps are available. That makes the exposed sample + * position match the music position as closely as FFmpeg's timestamps allow. * - * Return: - * 1 on success - * 0 on failure + * Return: 1 on success, 0 on failure. */ -FFMPEG_EXTERN int fmpg_seek_ms(fmpg_decoder *decoder, int64_t target_pos_ms); +FFMPEG_EXTERN int fmpg_seek_ms(fmpg_instance *instance, int64_t target_pos_ms); /* ------------------------------------------------------------------------- */ -/* Decoder output */ +/* Output buffer and sample positions */ /* ------------------------------------------------------------------------- */ -/* - * Pointer to the current decoded PCM buffer. - * - * Format: - * int32_t samples - * interleaved by channel - * native endian - * - * The pointer remains valid until the next ac_decode_package(), - * ac_flush_decoder(), ac_seek_ms(), or ac_free_decoder() call for this decoder. - */ -FFMPEG_EXTERN const uint8_t *fmpg_decoder_buffer(fmpg_decoder *decoder); +/* Pointer to the current decoded PCM buffer. Valid until next API call that + * decodes, seeks, closes, or frees the instance. */ +FFMPEG_EXTERN const uint8_t *fmpg_buffer(fmpg_instance *instance); /* Size of the current decoded PCM buffer in bytes. */ -FFMPEG_EXTERN int fmpg_decoder_buffer_size(fmpg_decoder *decoder); +FFMPEG_EXTERN int fmpg_buffer_size(fmpg_instance *instance); + +/* Number of sample frames in the current decoded PCM buffer. */ +FFMPEG_EXTERN int64_t fmpg_buffer_samples(fmpg_instance *instance); + +/* Absolute sample-frame index of the first sample frame in the current buffer. */ +FFMPEG_EXTERN int64_t fmpg_buffer_start_sample(fmpg_instance *instance); + +/* Absolute sample-frame index just after the current buffer. */ +FFMPEG_EXTERN int64_t fmpg_buffer_end_sample(fmpg_instance *instance); /* - * Approximate timecode of the current decoded block in seconds. + * Current absolute sample position in the music stream. * - * This is based on packet timestamps. It is useful for progress indication, - * but exact sample counting should use ac_decoder_sample_position(). + * This is the same as fmpg_buffer_end_sample() after a successful + * fmpg_decode_next(): it points just after the last produced sample frame. */ -FFMPEG_EXTERN double fmpg_decoder_timecode(fmpg_decoder *decoder); +FFMPEG_EXTERN int64_t fmpg_sample_position(fmpg_instance *instance); -/* - * Number of output sample frames produced by the last decode/flush call. - * - * A sample frame contains one sample for each channel. For stereo S32, one - * sample frame is 8 bytes. - */ -FFMPEG_EXTERN int64_t fmpg_decoder_last_samples(fmpg_decoder *decoder); - -/* - * Running count of output sample frames produced since decoder creation or - * the most recent successful seek. - */ -FFMPEG_EXTERN int64_t fmpg_decoder_sample_position(fmpg_decoder *decoder); +/* Approximate start time of the current decoded block in seconds. */ +FFMPEG_EXTERN double fmpg_timecode(fmpg_instance *instance); #ifdef __cplusplus }