From 1a8646fd8b7fae102e5fc089d66e59b5cb518fbb Mon Sep 17 00:00:00 2001
From: Hans Dijkema <hans@dijkewijk.nl>
Date: Sun, 26 Apr 2026 11:49:42 +0200
Subject: [PATCH] ffmpeg second api

---
 ffmpeg-audio/demo_ffmpeg_audio.c | 147 +++---
 ffmpeg-audio/ffmpeg_audio.cpp    | 778 +++++++++++++++----------------
 ffmpeg-audio/ffmpeg_audio.h      | 294 +++++++++---
 3 files changed, 694 insertions(+), 525 deletions(-)

diff --git a/ffmpeg-audio/demo_ffmpeg_audio.c b/ffmpeg-audio/demo_ffmpeg_audio.c
index e49cd46..8de21ea 100644
--- a/ffmpeg-audio/demo_ffmpeg_audio.c
+++ b/ffmpeg-audio/demo_ffmpeg_audio.c
@@ -3,7 +3,6 @@
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
 
 #ifdef WIN32
 #define fprintf fprintf_s
@@ -26,18 +25,18 @@ static int write_wav_header(FILE *f,
                             int channels,
                             int bits_per_sample,
                             uint32_t data_size) {
-    uint32_t byte_rate =
+    const uint32_t byte_rate =
         (uint32_t)(sample_rate * channels * bits_per_sample / 8);
-    uint16_t block_align =
+    const uint16_t block_align =
         (uint16_t)(channels * bits_per_sample / 8);
 
     fwrite("RIFF", 1, 4, f);
-    write_u32_le(f, 36 + data_size);
+    write_u32_le(f, 36u + data_size);
     fwrite("WAVE", 1, 4, f);
 
     fwrite("fmt ", 1, 4, f);
     write_u32_le(f, 16);              /* fmt chunk size */
-    write_u16_le(f, 1);               /* PCM */
+    write_u16_le(f, 1);               /* 1 = integer PCM */
     write_u16_le(f, (uint16_t)channels);
     write_u32_le(f, (uint32_t)sample_rate);
     write_u32_le(f, byte_rate);
@@ -66,16 +65,37 @@ static int rewrite_wav_header(FILE *f,
                             data_size);
 }
 
+static int write_decoder_buffer(FILE *out,
+                                fmpg_decoder *dec,
+                                uint64_t *total_written) {
+    const uint8_t *buf = fmpg_decoder_buffer(dec);
+    const int size = fmpg_decoder_buffer_size(dec);
+
+    if (!buf || size <= 0) {
+        return 1;
+    }
+
+    if (fwrite(buf, 1, (size_t)size, out) != (size_t)size) {
+        return 0;
+    }
+
+    *total_written += (uint64_t)size;
+    return 1;
+}
+
 int main(int argc, char **argv) {
     const char *infile;
     const char *outfile;
 
-    fmpg_instance * ac = NULL;
-    fmpg_decoder * dec = NULL;
+    fmpg_instance *ac = NULL;
+    fmpg_decoder *dec = NULL;
     FILE *out = NULL;
 
-    int stream;
-    fmpg_audio_info info;
+    int sample_rate;
+    int channels;
+    int bits_per_sample;
+    int64_t duration_ms;
+    int64_t duration_samples;
     uint64_t total_written = 0;
 
     if (argc != 3) {
@@ -86,44 +106,42 @@ int main(int argc, char **argv) {
     infile = argv[1];
     outfile = argv[2];
 
-    ac = ac_init();
+    ac = fmpg_init();
     if (!ac) {
         fprintf(stderr, "ac_init failed\n");
         return 1;
     }
 
-    if (!ac_open_file(ac, infile)) {
+    if (!fmpg_open_file(ac, infile)) {
         fprintf(stderr, "could not open input file: %s\n", infile);
-        ac_free(ac);
+        fmpg_free(ac);
         return 1;
     }
 
-    stream = ac_get_default_audio_stream(ac);
-    if (stream < 0) {
-        fprintf(stderr, "no audio stream found\n");
-        ac_free(ac);
+    sample_rate = fmpg_audio_sample_rate(ac);
+    channels = fmpg_audio_channels(ac);
+    bits_per_sample = fmpg_audio_bits_per_sample(ac);
+    duration_ms = fmpg_duration_ms(ac);
+    duration_samples = fmpg_duration_samples(ac);
+
+    if (sample_rate <= 0 || channels <= 0 || bits_per_sample != 32) {
+        fprintf(stderr, "invalid audio parameters\n");
+        fmpg_free(ac);
         return 1;
     }
 
-    memset(&info, 0, sizeof(info));
-    if (!ac_get_audio_info(ac, stream, &info)) {
-        fprintf(stderr, "could not get audio info\n");
-        ac_free(ac);
-        return 1;
-    }
-
-    dec = ac_create_decoder(ac, stream);
+    dec = fmpg_create_decoder(ac);
     if (!dec) {
         fprintf(stderr, "could not create decoder\n");
-        ac_free(ac);
+        fmpg_free(ac);
         return 1;
     }
 
     out = fopen(outfile, "wb");
     if (!out) {
         fprintf(stderr, "could not open output file: %s\n", outfile);
-        ac_free_decoder(dec);
-        ac_free(ac);
+        fmpg_free_decoder(dec);
+        fmpg_free(ac);
         return 1;
     }
 
@@ -132,49 +150,50 @@ int main(int argc, char **argv) {
      * Write a placeholder header first and patch it at the end.
      */
     if (!write_wav_header(out,
-                          info.sample_rate,
-                          info.channels,
-                          32,
+                          sample_rate,
+                          channels,
+                          bits_per_sample,
                           0)) {
         fprintf(stderr, "could not write WAV header\n");
         fclose(out);
-        ac_free_decoder(dec);
-        ac_free(ac);
+        fmpg_free_decoder(dec);
+        fmpg_free(ac);
         return 1;
     }
 
     for (;;) {
-        fmpg_package * pkg = ac_read_package(ac);
+        fmpg_package *pkg = fmpg_read_package(ac);
 
         if (!pkg) {
             break;
         }
 
-        if (ac_package_stream_index(pkg) == stream) {
-            if (ac_decode_package(pkg, dec)) {
-                const uint8_t *buf = ac_decoder_buffer(dec);
-                int size = ac_decoder_buffer_size(dec);
-
-                if (buf && size > 0) {
-                    fwrite(buf, 1, (size_t)size, out);
-                    total_written += (uint64_t)size;
-                }
+        /*
+         * ac_read_package() now returns only packets from the internally
+         * selected audio stream. No stream_index test is needed anymore.
+         */
+        if (fmpg_decode_package(pkg, dec)) {
+            if (!write_decoder_buffer(out, dec, &total_written)) {
+                fprintf(stderr, "could not write PCM data\n");
+                fmpg_free_package(pkg);
+                fclose(out);
+                fmpg_free_decoder(dec);
+                fmpg_free(ac);
+                return 1;
             }
         }
 
-        ac_free_package(pkg);
+        fmpg_free_package(pkg);
     }
 
-    /*
-     * Drain delayed samples from the decoder and resampler.
-     */
-    while (ac_flush_decoder(dec)) {
-        const uint8_t *buf = ac_decoder_buffer(dec);
-        int size = ac_decoder_buffer_size(dec);
-
-        if (buf && size > 0) {
-            fwrite(buf, 1, (size_t)size, out);
-            total_written += (uint64_t)size;
+    /* Drain delayed samples from the decoder and resampler. */
+    while (fmpg_flush_decoder(dec)) {
+        if (!write_decoder_buffer(out, dec, &total_written)) {
+            fprintf(stderr, "could not write flushed PCM data\n");
+            fclose(out);
+            fmpg_free_decoder(dec);
+            fmpg_free(ac);
+            return 1;
         }
     }
 
@@ -185,23 +204,29 @@ int main(int argc, char **argv) {
     }
 
     if (!rewrite_wav_header(out,
-                            info.sample_rate,
-                            info.channels,
-                            32,
+                            sample_rate,
+                            channels,
+                            bits_per_sample,
                             (uint32_t)total_written)) {
         fprintf(stderr, "could not rewrite WAV header\n");
     }
 
     fclose(out);
-    ac_free_decoder(dec);
-    ac_free(ac);
 
     printf("wrote %s\n", outfile);
-    printf("sample rate: %d\n", info.sample_rate);
-    printf("channels:    %d\n", info.channels);
-    printf("sample bits: %d\n", 32);
+    printf("title:       %s\n", fmpg_file_title(ac));
+    printf("album:       %s\n", fmpg_file_album(ac));
+    printf("sample rate: %d\n", sample_rate);
+    printf("channels:    %d\n", channels);
+    printf("sample bits: %d\n", bits_per_sample);
+    printf("duration ms: %lld\n", (long long)duration_ms);
+    printf("duration smp:%lld\n", (long long)duration_samples);
+    printf("decoded smp: %lld\n", (long long)fmpg_decoder_sample_position(dec));
     printf("data bytes:  %llu\n",
            (unsigned long long)total_written);
 
+    fmpg_free_decoder(dec);
+    fmpg_free(ac);
+
     return 0;
 }
diff --git a/ffmpeg-audio/ffmpeg_audio.cpp b/ffmpeg-audio/ffmpeg_audio.cpp
index 096d96a..add0a27 100644
--- a/ffmpeg-audio/ffmpeg_audio.cpp
+++ b/ffmpeg-audio/ffmpeg_audio.cpp
@@ -1,43 +1,18 @@
 /*
- * Acinerella audio-only decoder.
+ * Audio-only FFmpeg wrapper.
  *
- * This file is intentionally written as C++ internally, but exports a stable
- * C ABI. That gives us RAII, std::string and std::vector internally, while a
- * C or Racket FFI caller still sees a simple C interface.
+ * This file is implemented in C++, but exports a plain C ABI. C++ is used only
+ * internally to make ownership understandable: strings are std::string, decoded
+ * PCM buffers are std::vector, and FFmpeg objects are released by destructors.
  *
- * What this decoder does:
+ * Public design choices:
  *
- *   1. Open a media file with FFmpeg/libavformat.
- *   2. Find audio streams.
- *   3. Read compressed packets from the container.
- *   4. Decode packets with the modern avcodec_send_packet() /
- *      avcodec_receive_frame() API.
- *   5. Convert decoded audio to one predictable output format:
- *
- *          signed 32-bit integer PCM
- *          interleaved / packed
- *          native endian
- *
- *      This is suitable for feeding to libao as 32-bit PCM.
- *
- * Important FFmpeg vocabulary:
- *
- *   Container/demuxer:
- *      The file format layer: mp3, mp4/m4a, ogg, wav, etc.
- *      FFmpeg represents this with AVFormatContext.
- *
- *   Stream:
- *      A file may contain one or more streams. For this audio-only API we only
- *      care about streams whose codec_type is AVMEDIA_TYPE_AUDIO.
- *
- *   Packet:
- *      Compressed data belonging to one stream. One packet may decode to zero,
- *      one, or multiple decoded frames.
- *
- *   Frame:
- *      Decoded audio samples, but not necessarily in the format we want. MP3,
- *      for example, may decode to planar float. We therefore use libswresample
- *      to normalize everything to signed 32-bit interleaved PCM.
+ *   - The caller opens one file.
+ *   - The best audio stream is selected internally.
+ *   - FFmpeg stream_index is not exposed.
+ *   - File metadata is stored in the instance and accessed through getters.
+ *   - Audio output is always signed 32-bit interleaved PCM.
+ *   - There are no callbacks; file IO is handled by FFmpeg.
  */
 
 #include "ffmpeg_audio.h"
@@ -63,18 +38,8 @@ static constexpr int AC_AUDIO_OUTPUT_BITS = 32;
 static constexpr int AC_AUDIO_OUTPUT_BYTES = 4;
 static constexpr AVSampleFormat AC_AUDIO_OUTPUT_FMT = AV_SAMPLE_FMT_S32;
 
-/*
- * Metadata.
- *
- * This used to be the kind of place where C code often used fixed-size arrays:
- *
- *   char title[512];
- *
- * That is simple, but truncates long UTF-8 metadata and wastes space. Since the
- * implementation is C++, std::string is the natural representation. The public
- * C API only exposes const char* getters.
- */
-struct __fmpg_file_info__ {
+/* Metadata stored inside fmpg_instance. */
+struct file_info_storage {
     std::string title;
     std::string author;
     std::string album;
@@ -84,7 +49,6 @@ struct __fmpg_file_info__ {
 
     int year = -1;
     int track = -1;
-    int64_t duration = -1; /* milliseconds */
     int bitrate = -1;
 
     void clear() {
@@ -96,21 +60,34 @@ struct __fmpg_file_info__ {
         copyright.clear();
         year = -1;
         track = -1;
-        duration = -1;
         bitrate = -1;
     }
 };
 
-/*
- * __fmpg_instance__ owns the opened media file.
- *
- * AVFormatContext is FFmpeg's demuxer/container object. It knows which streams
- * the file contains and can read compressed packets from it.
- */
+/* Audio information for the selected audio stream. */
+struct audio_info_storage {
+    int audio_stream_count = 0;
+    int selected_stream_index = -1; /* Internal FFmpeg stream index. */
+    int sample_rate = 0;
+    int channels = 0;
+    int64_t duration_ms = -1;
+    int64_t duration_samples = -1;  /* Sample frames, not int32_t values. */
+
+    void clear() {
+        audio_stream_count = 0;
+        selected_stream_index = -1;
+        sample_rate = 0;
+        channels = 0;
+        duration_ms = -1;
+        duration_samples = -1;
+    }
+};
+
 struct __fmpg_instance__ {
     bool opened = false;
     AVFormatContext *format_ctx = nullptr;
-    fmpg_file_info info;
+    file_info_storage file_info;
+    audio_info_storage audio_info;
 
     ~__fmpg_instance__() {
         if (format_ctx) {
@@ -119,14 +96,7 @@ struct __fmpg_instance__ {
     }
 };
 
-/*
- * A package wraps one FFmpeg AVPacket.
- *
- * The old Acinerella name was "package". FFmpeg calls this a packet. It is not
- * decoded audio yet; it is compressed data read from the container.
- */
 struct __fmpg_package__ {
-    int stream_index = -1;
     int64_t pts = AV_NOPTS_VALUE;
     AVPacket *packet = nullptr;
 
@@ -137,21 +107,19 @@ struct __fmpg_package__ {
     }
 };
 
-/*
- * __fmpg_decoder__ owns the actual audio decoder and resampler for one stream.
- */
 struct __fmpg_decoder__ {
     fmpg_instance *instance = nullptr;
-    int stream_index = -1;
 
     const AVCodec *codec = nullptr;
     AVCodecContext *codec_ctx = nullptr;
     AVFrame *frame = nullptr;
     SwrContext *swr_ctx = nullptr;
 
-    fmpg_audio_info audio_info{};
     std::vector<uint8_t> pcm;
+
     double timecode = 0.0;
+    int64_t last_samples = 0;      /* sample frames in current output block */
+    int64_t sample_position = 0;   /* total sample frames emitted */
 
     ~__fmpg_decoder__() {
         avcodec_free_context(&codec_ctx);
@@ -160,26 +128,28 @@ struct __fmpg_decoder__ {
     }
 };
 
-static const char *empty_if_null(const char *s) {
-    return s ? s : "";
-}
-
-static const char *string_c_str(const std::string &s) {
+static const char *string_c_str(const std::string &s)
+{
     return s.empty() ? "" : s.c_str();
 }
 
-static std::string get_metadata_string(const AVFormatContext *ctx,
-                                       const char *key) {
-    const AVDictionaryEntry *entry =
-        av_dict_get(ctx->metadata, key, nullptr, 0);
+static std::string get_metadata_string(const AVFormatContext *ctx, const char *key)
+{
+    const AVDictionaryEntry *entry = av_dict_get(ctx->metadata,
+                                                 key,
+                                                 nullptr,
+                                                 0);
 
     return entry && entry->value ? std::string(entry->value)
                                  : std::string();
 }
 
-static int get_metadata_int(const AVFormatContext *ctx, const char *key) {
-    const AVDictionaryEntry *entry =
-        av_dict_get(ctx->metadata, key, nullptr, 0);
+static int get_metadata_int(const AVFormatContext *ctx, const char *key)
+{
+    const AVDictionaryEntry *entry = av_dict_get(ctx->metadata,
+                                                 key,
+                                                 nullptr,
+                                                 0);
 
     if (!entry || !entry->value || !*entry->value) {
         return -1;
@@ -188,34 +158,134 @@ static int get_metadata_int(const AVFormatContext *ctx, const char *key) {
     return std::atoi(entry->value);
 }
 
-static void fill_metadata(fmpg_instance *self) {
+static int count_audio_streams(const AVFormatContext *ctx)
+{
+    int count = 0;
+
+    if (!ctx) {
+        return 0;
+    }
+
+    for (unsigned i = 0; i < ctx->nb_streams; ++i) {
+        const AVCodecParameters *par = ctx->streams[i]->codecpar;
+        if (par && par->codec_type == AVMEDIA_TYPE_AUDIO) {
+            ++count;
+        }
+    }
+
+    return count;
+}
+
+static int64_t milliseconds_from_seconds(double seconds)
+{
+    if (seconds < 0.0) {
+        return -1;
+    }
+
+    return static_cast<int64_t>(seconds * 1000.0 + 0.5);
+}
+
+static int64_t samples_from_seconds(double seconds, int sample_rate)
+{
+    if (seconds < 0.0 || sample_rate <= 0) {
+        return -1;
+    }
+
+    return static_cast<int64_t>(seconds * static_cast<double>(sample_rate) +
+                                0.5);
+}
+
+static double stream_duration_seconds(const AVStream *stream)
+{
+    if (!stream || stream->duration == AV_NOPTS_VALUE) {
+        return -1.0;
+    }
+
+    return static_cast<double>(stream->duration) * av_q2d(stream->time_base);
+}
+
+static double format_duration_seconds(const AVFormatContext *ctx)
+{
+    if (!ctx || ctx->duration == AV_NOPTS_VALUE) {
+        return -1.0;
+    }
+
+    return static_cast<double>(ctx->duration) / static_cast<double>(AV_TIME_BASE);
+}
+
+static void fill_file_metadata(fmpg_instance *self)
+{
     AVFormatContext *ctx = self->format_ctx;
 
-    self->info.clear();
-    self->info.title = get_metadata_string(ctx, "title");
-    self->info.author = get_metadata_string(ctx, "artist");
-    self->info.album = get_metadata_string(ctx, "album");
-    self->info.genre = get_metadata_string(ctx, "genre");
-    self->info.comment = get_metadata_string(ctx, "comment");
-    self->info.copyright = get_metadata_string(ctx, "copyright");
-    self->info.year = get_metadata_int(ctx, "year");
-    self->info.track = get_metadata_int(ctx, "track");
-    self->info.bitrate = static_cast<int>(ctx->bit_rate);
-
-    self->info.duration =
-        ctx->duration == AV_NOPTS_VALUE
-            ? -1
-            : ctx->duration * 1000 / AV_TIME_BASE;
+    self->file_info.clear();
+    self->file_info.title = get_metadata_string(ctx, "title");
+    self->file_info.author = get_metadata_string(ctx, "artist");
+    self->file_info.album = get_metadata_string(ctx, "album");
+    self->file_info.genre = get_metadata_string(ctx, "genre");
+    self->file_info.comment = get_metadata_string(ctx, "comment");
+    self->file_info.copyright = get_metadata_string(ctx, "copyright");
+    self->file_info.year = get_metadata_int(ctx, "year");
+    self->file_info.track = get_metadata_int(ctx, "track");
+    self->file_info.bitrate = ctx->bit_rate > 0
+                                  ? static_cast<int>(ctx->bit_rate)
+                                  : -1;
 }
 
-static bool valid_stream_index(const fmpg_instance *instance, int stream_index)
+static bool fill_audio_info(fmpg_instance *self)
+{
+    AVFormatContext *ctx = self->format_ctx;
+
+    self->audio_info.clear();
+    self->audio_info.audio_stream_count = count_audio_streams(ctx);
+
+    const int best = av_find_best_stream(ctx,
+                                         AVMEDIA_TYPE_AUDIO,
+                                         -1,
+                                         -1,
+                                         nullptr,
+                                         0);
+
+    if (best < 0) {
+        return false;
+    }
+
+    AVStream *stream = ctx->streams[best];
+    const AVCodecParameters *par = stream->codecpar;
+
+    if (!par || par->codec_type != AVMEDIA_TYPE_AUDIO ||
+        par->sample_rate <= 0 || par->ch_layout.nb_channels <= 0) {
+        return false;
+    }
+
+    self->audio_info.selected_stream_index = best;
+    self->audio_info.sample_rate = par->sample_rate;
+    self->audio_info.channels = par->ch_layout.nb_channels;
+
+    /*
+     * Duration can come from the selected audio stream or from the container.
+     * Stream duration is preferred because it is tied to the audio stream's own
+     * time base. Some containers only provide container-level duration, so that
+     * is the fallback.
+     */
+    double seconds = stream_duration_seconds(stream);
+    if (seconds < 0.0) {
+        seconds = format_duration_seconds(ctx);
+    }
+
+    self->audio_info.duration_ms = milliseconds_from_seconds(seconds);
+    self->audio_info.duration_samples =
+        samples_from_seconds(seconds, self->audio_info.sample_rate);
+
+    return true;
+}
+
+static bool instance_ready(const fmpg_instance *instance)
 {
     return instance && instance->opened && instance->format_ctx &&
-           stream_index >= 0 &&
-           stream_index < static_cast<int>(instance->format_ctx->nb_streams);
+           instance->audio_info.selected_stream_index >= 0;
 }
 
-fmpg_instance *  ac_init(void) {
+fmpg_instance *fmpg_init(void) {
     try {
         return new fmpg_instance();
     } catch (...) {
@@ -223,43 +293,43 @@ fmpg_instance *  ac_init(void) {
     }
 }
 
-void  ac_free(fmpg_instance * instance) {
+void fmpg_free(fmpg_instance *instance)
+{
     delete instance;
 }
 
-int  ac_open_file(fmpg_instance * instance,
-                                       const char *filename) {
+int fmpg_open_file(fmpg_instance *instance, const char *filename)
+{
     if (!instance || instance->opened || !filename) {
         return 0;
     }
 
-    /*
-     * avformat_open_input opens the file and guesses the container format.
-     * The codec is not opened here. This is only the demuxing layer.
-     */
     if (avformat_open_input(&instance->format_ctx,
-                            empty_if_null(filename),
+                            filename,
                             nullptr,
                             nullptr) < 0) {
-        ac_close(instance);
+        fmpg_close(instance);
         return 0;
     }
 
-    /*
-     * Read enough packets to discover stream metadata such as sample rate,
-     * channel layout, codec id, duration and tags.
-     */
     if (avformat_find_stream_info(instance->format_ctx, nullptr) < 0) {
-        ac_close(instance);
+        fmpg_close(instance);
+        return 0;
+    }
+
+    fill_file_metadata(instance);
+
+    if (!fill_audio_info(instance)) {
+        fmpg_close(instance);
         return 0;
     }
 
-    fill_metadata(instance);
     instance->opened = true;
     return 1;
 }
 
-void  ac_close(fmpg_instance * instance) {
+void fmpg_close(fmpg_instance *instance)
+{
     if (!instance) {
         return;
     }
@@ -269,179 +339,139 @@ void  ac_close(fmpg_instance * instance) {
     }
 
     instance->opened = false;
-    instance->info.clear();
+    instance->file_info.clear();
+    instance->audio_info.clear();
 }
 
-int  ac_is_open(fmpg_instance * instance)
+int fmpg_is_open(fmpg_instance *instance)
 {
-    return instance && instance->opened ? 1 : 0;
+    return instance_ready(instance) ? 1 : 0;
 }
 
-int ac_get_audio_stream_count(fmpg_instance * instance)
+int fmpg_audio_stream_count(fmpg_instance *instance)
 {
-    if (!instance || !instance->opened || !instance->format_ctx) {
-        return 0;
+    return instance && instance->opened ? instance->audio_info.audio_stream_count
+                                        : 0;
+}
+
+int fmpg_audio_sample_rate(fmpg_instance *instance)
+{
+    return instance_ready(instance) ? instance->audio_info.sample_rate : 0;
+}
+
+int fmpg_audio_channels(fmpg_instance *instance)
+{
+    return instance_ready(instance) ? instance->audio_info.channels : 0;
+}
+
+int fmpg_audio_bits_per_sample(fmpg_instance *)
+{
+    return AC_AUDIO_OUTPUT_BITS;
+}
+
+int fmpg_audio_bytes_per_sample(fmpg_instance *)
+{
+    return AC_AUDIO_OUTPUT_BYTES;
+}
+
+int64_t fmpg_duration_ms(fmpg_instance *instance)
+{
+    return instance_ready(instance) ? instance->audio_info.duration_ms : -1;
+}
+
+int64_t fmpg_duration_samples(fmpg_instance *instance)
+{
+    return instance_ready(instance) ? instance->audio_info.duration_samples : -1;
+}
+
+const char *fmpg_file_title(fmpg_instance *instance)
+{
+    return instance ? string_c_str(instance->file_info.title) : "";
+}
+
+const char *fmpg_file_author(fmpg_instance *instance)
+{
+    return instance ? string_c_str(instance->file_info.author) : "";
+}
+
+const char *fmpg_file_album(fmpg_instance *instance) {
+    return instance ? string_c_str(instance->file_info.album) : "";
+}
+
+const char *fmpg_file_genre(fmpg_instance *instance)
+{
+    return instance ? string_c_str(instance->file_info.genre) : "";
+}
+
+const char *fmpg_file_comment(fmpg_instance *instance)
+{
+    return instance ? string_c_str(instance->file_info.comment) : "";
+}
+
+const char *fmpg_file_copyright(fmpg_instance *instance)
+{
+    return instance ? string_c_str(instance->file_info.copyright) : "";
+}
+
+int fmpg_file_year(fmpg_instance *instance) {
+    return instance ? instance->file_info.year : -1;
+}
+
+int fmpg_file_track(fmpg_instance *instance)
+{
+    return instance ? instance->file_info.track : -1;
+}
+
+int fmpg_file_bitrate(fmpg_instance *instance)
+{
+    return instance ? instance->file_info.bitrate : -1;
+}
+
+fmpg_package *fmpg_read_package(fmpg_instance *instance)
+{
+    if (!instance_ready(instance)) {
+        return nullptr;
     }
 
-    int count = 0;
+    const int wanted_stream = instance->audio_info.selected_stream_index;
 
-    for (unsigned i = 0; i < instance->format_ctx->nb_streams; ++i) {
-        const AVCodecParameters *par =
-            instance->format_ctx->streams[i]->codecpar;
+    for (;;) {
+        fmpg_package *pkg = nullptr;
 
-        if (par && par->codec_type == AVMEDIA_TYPE_AUDIO) {
-            ++count;
+        try {
+            pkg = new fmpg_package();
+        } catch (...) {
+            return nullptr;
         }
+
+        if (!pkg->packet) {
+            delete pkg;
+            return nullptr;
+        }
+
+        const int ret = av_read_frame(instance->format_ctx, pkg->packet);
+        if (ret < 0) {
+            delete pkg;
+            return nullptr;
+        }
+
+        if (pkg->packet->stream_index != wanted_stream) {
+            delete pkg;
+            continue;
+        }
+
+        pkg->pts = pkg->packet->dts != AV_NOPTS_VALUE
+                       ? pkg->packet->dts
+                       : pkg->packet->pts;
+        return pkg;
     }
-
-    return count;
 }
 
-int ac_get_default_audio_stream(fmpg_instance * instance)
-{
-    if (!instance || !instance->opened || !instance->format_ctx) {
-        return -1;
-    }
-
-    const int idx = av_find_best_stream(instance->format_ctx,
-                                        AVMEDIA_TYPE_AUDIO,
-                                        -1,
-                                        -1,
-                                        nullptr,
-                                        0);
-
-    return idx >= 0 ? idx : -1;
-}
-
-int ac_get_audio_info(fmpg_instance * instance, int stream_index, fmpg_audio_info *info)
-{
-    if (!info) {
-        return 0;
-    }
-
-    std::memset(info, 0, sizeof(*info));
-
-    if (!valid_stream_index(instance, stream_index)) {
-        return 0;
-    }
-
-    const AVCodecParameters *par =
-        instance->format_ctx->streams[stream_index]->codecpar;
-
-    if (!par || par->codec_type != AVMEDIA_TYPE_AUDIO) {
-        return 0;
-    }
-
-    info->sample_rate = par->sample_rate;
-    info->channels = par->ch_layout.nb_channels;
-    info->bits_per_sample = AC_AUDIO_OUTPUT_BITS;
-    info->bytes_per_sample = AC_AUDIO_OUTPUT_BYTES;
-
-    return info->sample_rate > 0 && info->channels > 0 ? 1 : 0;
-}
-
-const fmpg_file_info *ac_get_file_info(fmpg_instance * instance)
-{
-    return instance ? &instance->info : nullptr;
-}
-
-const char * ac_file_info_title(const fmpg_file_info *info)
-{
-    return info ? string_c_str(info->title) : "";
-}
-
-const char *ac_file_info_author(const fmpg_file_info *info)
-{
-    return info ? string_c_str(info->author) : "";
-}
-
-const char *ac_file_info_album(const fmpg_file_info *info)
-{
-    return info ? string_c_str(info->album) : "";
-}
-
-const char *ac_file_info_genre(const fmpg_file_info *info)
-{
-    return info ? string_c_str(info->genre) : "";
-}
-
-const char *ac_file_info_comment(const fmpg_file_info *info)
-{
-    return info ? string_c_str(info->comment) : "";
-}
-
-const char *ac_file_info_copyright(const fmpg_file_info *info)
-{
-    return info ? string_c_str(info->copyright) : "";
-}
-
-int ac_file_info_year(const fmpg_file_info *info)
-{
-    return info ? info->year : -1;
-}
-
-int ac_file_info_track(const fmpg_file_info *info)
-{
-    return info ? info->track : -1;
-}
-
-int64_t ac_file_info_duration(const fmpg_file_info *info)
-{
-    return info ? info->duration : -1;
-}
-
-int ac_file_info_bitrate(const fmpg_file_info *info)
-{
-    return info ? info->bitrate : -1;
-}
-
-fmpg_package * ac_read_package(fmpg_instance * instance)
-{
-    if (!instance || !instance->opened || !instance->format_ctx) {
-        return nullptr;
-    }
-
-    fmpg_package *pkg = nullptr;
-
-    try {
-        pkg = new fmpg_package();
-    } catch (...) {
-        return nullptr;
-    }
-
-    if (!pkg->packet) {
-        delete pkg;
-        return nullptr;
-    }
-
-    /*
-     * av_read_frame reads one compressed packet. This may be audio, video,
-     * subtitles, or another stream type. The caller can inspect stream_index
-     * and only feed audio packets to the matching decoder.
-     */
-    if (av_read_frame(instance->format_ctx, pkg->packet) < 0) {
-        delete pkg;
-        return nullptr;
-    }
-
-    pkg->stream_index = pkg->packet->stream_index;
-    pkg->pts = pkg->packet->dts != AV_NOPTS_VALUE
-                   ? pkg->packet->dts
-                   : pkg->packet->pts;
-
-    return pkg;
-}
-
-void ac_free_package(fmpg_package * package)
+void fmpg_free_package(fmpg_package *package)
 {
     delete package;
 }
 
-int ac_package_stream_index(fmpg_package * package)
-{
-    return package ? package->stream_index : -1;
-}
-
 static bool init_codec_context(fmpg_decoder *dec, const AVCodecParameters *par)
 {
     dec->codec = avcodec_find_decoder(par->codec_id);
@@ -454,21 +484,11 @@ static bool init_codec_context(fmpg_decoder *dec, const AVCodecParameters *par)
         return false;
     }
 
-    /*
-     * Copy stream codec parameters into the active decoder context.
-     */
     if (avcodec_parameters_to_context(dec->codec_ctx, par) < 0) {
         return false;
     }
 
-    /*
-     * Open the actual decoder. From this point on, packets can be sent to it.
-     */
-    if (avcodec_open2(dec->codec_ctx, dec->codec, nullptr) < 0) {
-        return false;
-    }
-
-    return true;
+    return avcodec_open2(dec->codec_ctx, dec->codec, nullptr) >= 0;
 }
 
 static bool init_resampler(fmpg_decoder *dec)
@@ -479,10 +499,6 @@ static bool init_resampler(fmpg_decoder *dec)
         return false;
     }
 
-    /*
-     * We do not change sample rate or channel layout. We only normalize the
-     * sample format to signed 32-bit integer PCM.
-     */
     if (swr_alloc_set_opts2(&dec->swr_ctx,
                             layout,
                             AC_AUDIO_OUTPUT_FMT,
@@ -498,14 +514,9 @@ static bool init_resampler(fmpg_decoder *dec)
     return swr_init(dec->swr_ctx) >= 0;
 }
 
-fmpg_decoder * ac_create_decoder(fmpg_instance * instance, int stream_index)
+fmpg_decoder *fmpg_create_decoder(fmpg_instance *instance)
 {
-    if (!valid_stream_index(instance, stream_index)) {
-        return nullptr;
-    }
-
-    fmpg_audio_info info{};
-    if (!ac_get_audio_info(instance, stream_index, &info)) {
+    if (!instance_ready(instance)) {
         return nullptr;
     }
 
@@ -518,11 +529,9 @@ fmpg_decoder * ac_create_decoder(fmpg_instance * instance, int stream_index)
     }
 
     dec->instance = instance;
-    dec->stream_index = stream_index;
-    dec->audio_info = info;
 
-    const AVCodecParameters *par =
-        instance->format_ctx->streams[stream_index]->codecpar;
+    const int stream_index = instance->audio_info.selected_stream_index;
+    const AVCodecParameters *par = instance->format_ctx->streams[stream_index]->codecpar;
 
     if (!init_codec_context(dec, par)) {
         delete dec;
@@ -543,7 +552,7 @@ fmpg_decoder * ac_create_decoder(fmpg_instance * instance, int stream_index)
     return dec;
 }
 
-void ac_free_decoder(fmpg_decoder * decoder)
+void fmpg_free_decoder(fmpg_decoder *decoder)
 {
     delete decoder;
 }
@@ -577,25 +586,17 @@ static bool append_converted_frame(fmpg_decoder *dec, const AVFrame *frame)
         return true;
     }
 
-    /*
-     * swr_get_out_samples gives a safe upper bound for the number of output
-     * samples. The resampler can have internal delay, so this is safer than
-     * assuming input sample count equals output sample count.
-     */
-    const int max_out_samples =
-        swr_get_out_samples(dec->swr_ctx, frame->nb_samples);
-
+    const int max_out_samples = swr_get_out_samples(dec->swr_ctx,
+                                                    frame->nb_samples);
     if (max_out_samples <= 0) {
         return false;
     }
 
-    const int max_bytes =
-        av_samples_get_buffer_size(nullptr,
-                                   channels,
-                                   max_out_samples,
-                                   AC_AUDIO_OUTPUT_FMT,
-                                   1);
-
+    const int max_bytes = av_samples_get_buffer_size(nullptr,
+                                                     channels,
+                                                     max_out_samples,
+                                                     AC_AUDIO_OUTPUT_FMT,
+                                                     1);
     if (max_bytes <= 0) {
         return false;
     }
@@ -603,32 +604,31 @@ static bool append_converted_frame(fmpg_decoder *dec, const AVFrame *frame)
     std::vector<uint8_t> tmp(static_cast<size_t>(max_bytes));
     uint8_t *out_planes[1] = { tmp.data() };
 
-    /*
-     * swr_convert performs the actual conversion to S32 interleaved PCM.
-     */
-    const int out_samples =
-        swr_convert(dec->swr_ctx,
-                    out_planes,
-                    max_out_samples,
-                    const_cast<const uint8_t **>(frame->data),
-                    frame->nb_samples);
-
+    const int out_samples = swr_convert(dec->swr_ctx,
+                                        out_planes,
+                                        max_out_samples,
+                                        const_cast<const uint8_t **>(frame->data),
+                                        frame->nb_samples);
     if (out_samples < 0) {
         return false;
     }
 
-    const int used_bytes =
-        av_samples_get_buffer_size(nullptr,
-                                   channels,
-                                   out_samples,
-                                   AC_AUDIO_OUTPUT_FMT,
-                                   1);
-
+    const int used_bytes = av_samples_get_buffer_size(nullptr,
+                                                      channels,
+                                                      out_samples,
+                                                      AC_AUDIO_OUTPUT_FMT,
+                                                      1);
     if (used_bytes < 0) {
         return false;
     }
 
-    return append_bytes(dec, tmp.data(), static_cast<size_t>(used_bytes));
+    if (!append_bytes(dec, tmp.data(), static_cast<size_t>(used_bytes))) {
+        return false;
+    }
+
+    dec->last_samples += out_samples;
+    dec->sample_position += out_samples;
+    return true;
 }
 
 static int receive_available_frames(fmpg_decoder *dec)
@@ -662,29 +662,21 @@ static void update_timecode_from_packet(fmpg_decoder *dec, const fmpg_package *p
         return;
     }
 
-    AVStream *stream = dec->instance->format_ctx->streams[pkg->stream_index];
+    const int stream_index = dec->instance->audio_info.selected_stream_index;
+    AVStream *stream = dec->instance->format_ctx->streams[stream_index];
     dec->timecode = pkg->pts * av_q2d(stream->time_base);
 }
 
-int ac_decode_package(fmpg_package * package, fmpg_decoder * decoder)
+int fmpg_decode_package(fmpg_package *package, fmpg_decoder *decoder)
 {
-    if (!package || !decoder || !package->packet ||
-        package->stream_index != decoder->stream_index) {
+    if (!package || !decoder || !package->packet) {
         return 0;
     }
 
     decoder->pcm.clear();
+    decoder->last_samples = 0;
     update_timecode_from_packet(decoder, package);
 
-    /*
-     * Modern FFmpeg decoding is a two-step queue-like API:
-     *
-     *   1. send compressed packet
-     *   2. receive all decoded frames currently available
-     *
-     * A single packet can produce multiple frames, especially with codecs that
-     * buffer internally. We concatenate all produced PCM blocks.
-     */
     int ret = avcodec_send_packet(decoder->codec_ctx, package->packet);
 
     if (ret == AVERROR(EAGAIN)) {
@@ -701,18 +693,15 @@ int ac_decode_package(fmpg_package * package, fmpg_decoder * decoder)
     return receive_available_frames(decoder) > 0 ? 1 : 0;
 }
 
-int ac_flush_decoder(fmpg_decoder * decoder)
+int fmpg_flush_decoder(fmpg_decoder *decoder)
 {
     if (!decoder) {
         return 0;
     }
 
     decoder->pcm.clear();
+    decoder->last_samples = 0;
 
-    /*
-     * Sending NULL tells FFmpeg that no more input is coming and that delayed
-     * decoded frames should be drained.
-     */
     const int ret = avcodec_send_packet(decoder->codec_ctx, nullptr);
     if (ret < 0 && ret != AVERROR_EOF) {
         return 0;
@@ -723,25 +712,20 @@ int ac_flush_decoder(fmpg_decoder * decoder)
         return 0;
     }
 
-    /* Drain possible delayed samples from libswresample as well. */
     const int channels = decoder->codec_ctx->ch_layout.nb_channels;
 
     for (;;) {
-        const int delay =
-            static_cast<int>(swr_get_delay(decoder->swr_ctx,
-                                           decoder->codec_ctx->sample_rate));
-
+        const int delay = static_cast<int>(swr_get_delay(decoder->swr_ctx,
+                                                        decoder->codec_ctx->sample_rate));
         if (delay <= 0) {
             break;
         }
 
-        const int max_bytes =
-            av_samples_get_buffer_size(nullptr,
-                                       channels,
-                                       delay,
-                                       AC_AUDIO_OUTPUT_FMT,
-                                       1);
-
+        const int max_bytes = av_samples_get_buffer_size(nullptr,
+                                                         channels,
+                                                         delay,
+                                                         AC_AUDIO_OUTPUT_FMT,
+                                                         1);
         if (max_bytes <= 0) {
             break;
         }
@@ -749,46 +733,50 @@ int ac_flush_decoder(fmpg_decoder * decoder)
         std::vector<uint8_t> tmp(static_cast<size_t>(max_bytes));
         uint8_t *out_planes[1] = { tmp.data() };
 
-        const int out_samples =
-            swr_convert(decoder->swr_ctx,
-                        out_planes,
-                        delay,
-                        nullptr,
-                        0);
-
+        const int out_samples = swr_convert(decoder->swr_ctx,
+                                            out_planes,
+                                            delay,
+                                            nullptr,
+                                            0);
         if (out_samples <= 0) {
             break;
         }
 
-        const int used_bytes =
-            av_samples_get_buffer_size(nullptr,
-                                       channels,
-                                       out_samples,
-                                       AC_AUDIO_OUTPUT_FMT,
-                                       1);
-
+        const int used_bytes = av_samples_get_buffer_size(nullptr,
+                                                          channels,
+                                                          out_samples,
+                                                          AC_AUDIO_OUTPUT_FMT,
+                                                          1);
         if (used_bytes < 0 ||
-            !append_bytes(decoder, tmp.data(), static_cast<size_t>(used_bytes))) {
+            !append_bytes(decoder,
+                          tmp.data(),
+                          static_cast<size_t>(used_bytes))) {
             break;
         }
+
+        decoder->last_samples += out_samples;
+        decoder->sample_position += out_samples;
     }
 
     return decoder->pcm.empty() ? 0 : 1;
 }
 
-int ac_seek_ms(fmpg_decoder * decoder, int64_t target_pos_ms)
+int fmpg_seek_ms(fmpg_decoder *decoder, int64_t target_pos_ms)
 {
-    if (!decoder || !decoder->instance || !decoder->instance->format_ctx) {
+    if (!decoder || !instance_ready(decoder->instance)) {
         return 0;
     }
 
-    AVStream *stream = decoder->instance->format_ctx->streams[decoder->stream_index];
+    const int stream_index = decoder->instance->audio_info.selected_stream_index;
+    AVStream *stream = decoder->instance->format_ctx->streams[stream_index];
 
     const int64_t pos_us = av_rescale(target_pos_ms, AV_TIME_BASE, 1000);
-    const int64_t stream_ts = av_rescale_q(pos_us, AV_TIME_BASE_Q, stream->time_base);
+    const int64_t stream_ts = av_rescale_q(pos_us,
+                                           AV_TIME_BASE_Q,
+                                           stream->time_base);
 
     if (av_seek_frame(decoder->instance->format_ctx,
-                      decoder->stream_index,
+                      stream_index,
                       stream_ts,
                       AVSEEK_FLAG_BACKWARD) < 0) {
         return 0;
@@ -796,37 +784,41 @@ int ac_seek_ms(fmpg_decoder * decoder, int64_t target_pos_ms)
 
     decoder->timecode = target_pos_ms / 1000.0;
     decoder->pcm.clear();
+    decoder->last_samples = 0;
+    decoder->sample_position = samples_from_seconds(decoder->timecode,
+                                                    decoder->instance->audio_info.sample_rate);
 
-    /* Old buffered data no longer belongs to the new seek position. */
     avcodec_flush_buffers(decoder->codec_ctx);
 
-    /* Reset resampler delay/state too. */
     swr_close(decoder->swr_ctx);
     return swr_init(decoder->swr_ctx) >= 0 ? 1 : 0;
 }
 
-const uint8_t *ac_decoder_buffer(fmpg_decoder * decoder)
+const uint8_t *fmpg_decoder_buffer(fmpg_decoder *decoder)
 {
     return decoder && !decoder->pcm.empty() ? decoder->pcm.data() : nullptr;
 }
 
-int ac_decoder_buffer_size(fmpg_decoder * decoder)
-{
-    if (!decoder ||
-        decoder->pcm.size() >
-            static_cast<size_t>(std::numeric_limits<int>::max())) {
+int fmpg_decoder_buffer_size(fmpg_decoder *decoder) {
+    if (!decoder || decoder->pcm.size() >
+                        static_cast<size_t>(std::numeric_limits<int>::max())) {
         return 0;
     }
 
     return static_cast<int>(decoder->pcm.size());
 }
 
-double ac_decoder_timecode(fmpg_decoder * decoder)
+double fmpg_decoder_timecode(fmpg_decoder *decoder)
 {
     return decoder ? decoder->timecode : 0.0;
 }
 
-int ac_decoder_stream_index(fmpg_decoder * decoder)
+int64_t fmpg_decoder_last_samples(fmpg_decoder *decoder)
 {
-    return decoder ? decoder->stream_index : -1;
+    return decoder ? decoder->last_samples : 0;
+}
+
+int64_t fmpg_decoder_sample_position(fmpg_decoder *decoder)
+{
+    return decoder ? decoder->sample_position : 0;
 }
diff --git a/ffmpeg-audio/ffmpeg_audio.h b/ffmpeg-audio/ffmpeg_audio.h
index 56d6870..78b4d28 100644
--- a/ffmpeg-audio/ffmpeg_audio.h
+++ b/ffmpeg-audio/ffmpeg_audio.h
@@ -18,93 +18,245 @@ extern "C" {
 #endif
 
 /*
- * Audio-only Acinerella API.
+ * Audio-only FFmpeg wrapper.
  *
- * The implementation may be C++, but this header is plain C-compatible.
- * All public structs below are opaque handles, except ac_audio_info.
+ * The implementation is C++, but this header is plain C-compatible. All
+ * public object types are opaque. The caller never sees FFmpeg's AVFormatContext,
+ * AVCodecContext, AVPacket, stream_index, or std::string objects.
+ *
+ * Output audio format is deliberately fixed:
+ *
+ *   signed 32-bit integer PCM
+ *   interleaved / packed
+ *   native endian
+ *
+ * This makes the API easy to bind from Racket and straightforward to feed into
+ * libao. Source formats such as MP3 float/planar output are converted internally.
  */
 
 typedef struct __fmpg_instance__ fmpg_instance;
-typedef struct __fmpg_decoder__ fmpg_decoder;
-typedef struct __fmpg_package__ fmpg_package;
-typedef struct __fmpg_file_info__ fmpg_file_info;
+typedef struct __fmpg_decoder__  fmpg_decoder;
+typedef struct __fmpg_package__  fmpg_package;
 
-typedef struct __fmpg_audio_info__ {
-    int sample_rate;
-    int channels;
-    int bits_per_sample;     /* Always 32. */
-    int bytes_per_sample;    /* Always 4. */
-} fmpg_audio_info;
-
-/* Lifecycle */
-FFMPEG_EXTERN fmpg_instance *  ac_init(void);
-FFMPEG_EXTERN void ac_free(fmpg_instance * instance);
-
-FFMPEG_EXTERN int  ac_open_file(fmpg_instance * instance, const char *filename);
-FFMPEG_EXTERN void ac_close(fmpg_instance * instance);
-FFMPEG_EXTERN int  ac_is_open(fmpg_instance * instance);
-
-/* Audio stream discovery */
-FFMPEG_EXTERN int  ac_get_audio_stream_count(fmpg_instance * instance);
-FFMPEG_EXTERN int  ac_get_default_audio_stream(fmpg_instance * instance);
-FFMPEG_EXTERN int  ac_get_audio_info(fmpg_instance * instance, int stream_index, fmpg_audio_info *info);
-
-/* Metadata. The returned strings are owned by the instance. */
-FFMPEG_EXTERN const fmpg_file_info * ac_get_file_info(fmpg_instance * instance);
-
-FFMPEG_EXTERN const char * ac_file_info_title(const fmpg_file_info *info);
-FFMPEG_EXTERN const char * ac_file_info_author(const fmpg_file_info *info);
-FFMPEG_EXTERN const char * ac_file_info_album(const fmpg_file_info *info);
-FFMPEG_EXTERN const char * ac_file_info_genre(const fmpg_file_info *info);
-FFMPEG_EXTERN const char * ac_file_info_comment(const fmpg_file_info *info);
-FFMPEG_EXTERN const char * ac_file_info_copyright(const fmpg_file_info *info);
-
-FFMPEG_EXTERN int  ac_file_info_year(const fmpg_file_info *info);
-FFMPEG_EXTERN int  ac_file_info_track(const fmpg_file_info *info);
-FFMPEG_EXTERN int64_t  ac_file_info_duration(const fmpg_file_info *info);
-FFMPEG_EXTERN int  ac_file_info_bitrate(const fmpg_file_info *info);
-
-/* Packet reading */
-FFMPEG_EXTERN fmpg_package *  ac_read_package(fmpg_instance * instance);
-FFMPEG_EXTERN void  ac_free_package(fmpg_package * package);
-FFMPEG_EXTERN int  ac_package_stream_index(fmpg_package * package);
-
-/* Decoder */
-FFMPEG_EXTERN fmpg_decoder * ac_create_decoder(fmpg_instance * instance, int stream_index);
-
-FFMPEG_EXTERN void  ac_free_decoder(fmpg_decoder * decoder);
+/* ------------------------------------------------------------------------- */
+/* Lifecycle                                                                 */
+/* ------------------------------------------------------------------------- */
 
 /*
- * Decode one compressed packet.
+ * Create an empty decoder instance.
  *
- * Returns 1 if PCM data was produced, 0 otherwise.
- *
- * Output format:
- *   signed 32-bit integer PCM
- *   interleaved
- *   native endian
- *
- * Example stereo layout:
- *   L0 R0 L1 R1 L2 R2 ...
+ * Return:
+ *   instance pointer, or NULL on allocation failure.
  */
-FFMPEG_EXTERN int  ac_decode_package(fmpg_package * package, fmpg_decoder * decoder);
+FFMPEG_EXTERN fmpg_instance *fmpg_init(void);
+
+/*
+ * Close any open file and free the instance.
+ *
+ * It is safe to pass NULL.
+ */
+FFMPEG_EXTERN void fmpg_free(fmpg_instance *instance);
+
+/*
+ * Open a media file and select the best audio stream.
+ *
+ * The selected stream index is kept inside the instance. The public API does
+ * not expose FFmpeg stream indices. After this function succeeds, metadata,
+ * duration, sample rate and channel count are available through the getters
+ * below.
+ *
+ * Return:
+ *   1 on success
+ *   0 on failure or if no usable audio stream was found
+ */
+FFMPEG_EXTERN int fmpg_open_file(fmpg_instance *instance,
+                               const char *filename);
+
+/* Close the current file, if any, and reset instance-owned information. */
+FFMPEG_EXTERN void fmpg_close(fmpg_instance *instance);
+
+/* Return 1 if a file is open, 0 otherwise. */
+FFMPEG_EXTERN int fmpg_is_open(fmpg_instance *instance);
+
+/* ------------------------------------------------------------------------- */
+/* Audio information                                                         */
+/* ------------------------------------------------------------------------- */
+
+/*
+ * The number of audio streams found in the container.
+ *
+ * The decoder currently uses the best stream selected by FFmpeg. This count is
+ * informational; stream selection is intentionally not part of the public API.
+ */
+FFMPEG_EXTERN int fmpg_audio_stream_count(fmpg_instance *instance);
+
+/* Output sample rate in Hz, for example 44100 or 48000. */
+FFMPEG_EXTERN int fmpg_audio_sample_rate(fmpg_instance *instance);
+
+/* Number of output channels, for example 1 or 2. */
+FFMPEG_EXTERN int fmpg_audio_channels(fmpg_instance *instance);
+
+/* Always 32: samples are signed 32-bit integer PCM. */
+FFMPEG_EXTERN int fmpg_audio_bits_per_sample(fmpg_instance *instance);
+
+/* Always 4: one output sample occupies four bytes. */
+FFMPEG_EXTERN int fmpg_audio_bytes_per_sample(fmpg_instance *instance);
+
+/*
+ * Duration in milliseconds, or -1 if unknown.
+ *
+ * This value is known after ac_open_file() succeeds, as far as FFmpeg can know
+ * it from the container/stream metadata. Some streams do not contain exact
+ * duration information; in that case this getter returns -1.
+ */
+FFMPEG_EXTERN int64_t fmpg_duration_ms(fmpg_instance *instance);
+
+/*
+ * Duration expressed as output sample frames, or -1 if unknown.
+ *
+ * A sample frame means one sample moment across all channels. For stereo, one
+ * sample frame contains two int32_t values: left and right. This is usually the
+ * most useful duration unit for playback and progress calculations.
+ *
+ * PCM int32_t values in the whole output would be:
+ *
+ *   ac_duration_samples(instance) * ac_audio_channels(instance)
+ */
+FFMPEG_EXTERN int64_t fmpg_duration_samples(fmpg_instance *instance);
+
+/* ------------------------------------------------------------------------- */
+/* Metadata                                                                  */
+/* ------------------------------------------------------------------------- */
+
+/*
+ * Metadata is owned by the instance and available after ac_open_file().
+ * Returned strings are never NULL. Missing metadata is returned as "".
+ *
+ * Pointers remain valid until ac_close() or ac_free() is called for the
+ * instance. Do not free the returned strings.
+ */
+FFMPEG_EXTERN const char *fmpg_file_title(fmpg_instance *instance);
+FFMPEG_EXTERN const char *fmpg_file_author(fmpg_instance *instance);
+FFMPEG_EXTERN const char *fmpg_file_album(fmpg_instance *instance);
+FFMPEG_EXTERN const char *fmpg_file_genre(fmpg_instance *instance);
+FFMPEG_EXTERN const char *fmpg_file_comment(fmpg_instance *instance);
+FFMPEG_EXTERN const char *fmpg_file_copyright(fmpg_instance *instance);
+
+/* Return -1 if the field is unknown. */
+FFMPEG_EXTERN int fmpg_file_year(fmpg_instance *instance);
+FFMPEG_EXTERN int fmpg_file_track(fmpg_instance *instance);
+
+/* Container-level bitrate in bits/second, or -1 if unknown. */
+FFMPEG_EXTERN int fmpg_file_bitrate(fmpg_instance *instance);
+
+/* ------------------------------------------------------------------------- */
+/* Packet reading                                                            */
+/* ------------------------------------------------------------------------- */
+
+/*
+ * Read the next compressed packet from the selected audio stream.
+ *
+ * Non-audio packets and packets from non-selected streams are skipped
+ * internally. The caller therefore no longer has to inspect stream_index.
+ *
+ * Return:
+ *   package pointer, or NULL at EOF or on read error.
+ */
+FFMPEG_EXTERN fmpg_package *fmpg_read_package(fmpg_instance *instance);
+
+/* Free a package returned by ac_read_package(). Safe to pass NULL. */
+FFMPEG_EXTERN void fmpg_free_package(fmpg_package *package);
+
+/* ------------------------------------------------------------------------- */
+/* Decoder                                                                   */
+/* ------------------------------------------------------------------------- */
+
+/*
+ * Create a decoder for the selected audio stream.
+ *
+ * The stream is the one selected during ac_open_file(). The caller does not
+ * pass a stream index.
+ */
+FFMPEG_EXTERN fmpg_decoder *fmpg_create_decoder(fmpg_instance *instance);
+
+/* Free decoder and all FFmpeg decoder/resampler state. Safe to pass NULL. */
+FFMPEG_EXTERN void fmpg_free_decoder(fmpg_decoder *decoder);
+
+/*
+ * Decode one compressed audio package.
+ *
+ * Modern FFmpeg decoding is packet-in, frame-out. One compressed packet can
+ * produce zero, one, or multiple decoded frames. This function receives all
+ * available frames, converts them to signed 32-bit interleaved PCM, and
+ * concatenates them into the decoder output buffer.
+ *
+ * Return:
+ *   1 if PCM data was produced
+ *   0 if no PCM data was produced or an error occurred
+ */
+FFMPEG_EXTERN int fmpg_decode_package(fmpg_package *package, fmpg_decoder *decoder);
 
 /*
  * Flush delayed decoder/resampler samples after EOF.
  *
- * Call this repeatedly after ac_read_package() returns NULL,
- * until it returns 0.
+ * Call this repeatedly after ac_read_package() returns NULL, until this
+ * function returns 0.
  */
-FFMPEG_EXTERN int  ac_flush_decoder(fmpg_decoder * decoder);
+FFMPEG_EXTERN int fmpg_flush_decoder(fmpg_decoder *decoder);
 
-/* Seek to absolute position in milliseconds. */
-FFMPEG_EXTERN int  ac_seek_ms(fmpg_decoder * decoder, int64_t target_pos_ms);
+/*
+ * Seek to an absolute position in milliseconds.
+ *
+ * The compressed decoder buffer, decoded output buffer and resampler state are
+ * reset. After seeking, continue reading packages and decoding as usual.
+ *
+ * Return:
+ *   1 on success
+ *   0 on failure
+ */
+FFMPEG_EXTERN int fmpg_seek_ms(fmpg_decoder *decoder, int64_t target_pos_ms);
 
-/* Decoder output */
-FFMPEG_EXTERN const uint8_t * ac_decoder_buffer(fmpg_decoder * decoder);
-FFMPEG_EXTERN int  ac_decoder_buffer_size(fmpg_decoder * decoder);
-FFMPEG_EXTERN double  ac_decoder_timecode(fmpg_decoder * decoder);
-FFMPEG_EXTERN int  ac_decoder_stream_index(fmpg_decoder * decoder);
+/* ------------------------------------------------------------------------- */
+/* Decoder output                                                            */
+/* ------------------------------------------------------------------------- */
+
+/*
+ * Pointer to the current decoded PCM buffer.
+ *
+ * Format:
+ *   int32_t samples
+ *   interleaved by channel
+ *   native endian
+ *
+ * The pointer remains valid until the next ac_decode_package(),
+ * ac_flush_decoder(), ac_seek_ms(), or ac_free_decoder() call for this decoder.
+ */
+FFMPEG_EXTERN const uint8_t *fmpg_decoder_buffer(fmpg_decoder *decoder);
+
+/* Size of the current decoded PCM buffer in bytes. */
+FFMPEG_EXTERN int fmpg_decoder_buffer_size(fmpg_decoder *decoder);
+
+/*
+ * Approximate timecode of the current decoded block in seconds.
+ *
+ * This is based on packet timestamps. It is useful for progress indication,
+ * but exact sample counting should use ac_decoder_sample_position().
+ */
+FFMPEG_EXTERN double fmpg_decoder_timecode(fmpg_decoder *decoder);
+
+/*
+ * Number of output sample frames produced by the last decode/flush call.
+ *
+ * A sample frame contains one sample for each channel. For stereo S32, one
+ * sample frame is 8 bytes.
+ */
+FFMPEG_EXTERN int64_t fmpg_decoder_last_samples(fmpg_decoder *decoder);
+
+/*
+ * Running count of output sample frames produced since decoder creation or
+ * the most recent successful seek.
+ */
+FFMPEG_EXTERN int64_t fmpg_decoder_sample_position(fmpg_decoder *decoder);
 
 #ifdef __cplusplus
 }