833 lines
21 KiB
C++
833 lines
21 KiB
C++
/*
|
|
* Acinerella audio-only decoder.
|
|
*
|
|
* This file is intentionally written as C++ internally, but exports a stable
|
|
* C ABI. That gives us RAII, std::string and std::vector internally, while a
|
|
* C or Racket FFI caller still sees a simple C interface.
|
|
*
|
|
* What this decoder does:
|
|
*
|
|
* 1. Open a media file with FFmpeg/libavformat.
|
|
* 2. Find audio streams.
|
|
* 3. Read compressed packets from the container.
|
|
* 4. Decode packets with the modern avcodec_send_packet() /
|
|
* avcodec_receive_frame() API.
|
|
* 5. Convert decoded audio to one predictable output format:
|
|
*
|
|
* signed 32-bit integer PCM
|
|
* interleaved / packed
|
|
* native endian
|
|
*
|
|
* This is suitable for feeding to libao as 32-bit PCM.
|
|
*
|
|
* Important FFmpeg vocabulary:
|
|
*
|
|
* Container/demuxer:
|
|
* The file format layer: mp3, mp4/m4a, ogg, wav, etc.
|
|
* FFmpeg represents this with AVFormatContext.
|
|
*
|
|
* Stream:
|
|
* A file may contain one or more streams. For this audio-only API we only
|
|
* care about streams whose codec_type is AVMEDIA_TYPE_AUDIO.
|
|
*
|
|
* Packet:
|
|
* Compressed data belonging to one stream. One packet may decode to zero,
|
|
* one, or multiple decoded frames.
|
|
*
|
|
* Frame:
|
|
* Decoded audio samples, but not necessarily in the format we want. MP3,
|
|
* for example, may decode to planar float. We therefore use libswresample
|
|
* to normalize everything to signed 32-bit interleaved PCM.
|
|
*/
|
|
|
|
#include "ffmpeg_audio.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstdint>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <limits>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
extern "C" {
|
|
#include <libavcodec/avcodec.h>
|
|
#include <libavformat/avformat.h>
|
|
#include <libavutil/avutil.h>
|
|
#include <libavutil/channel_layout.h>
|
|
#include <libavutil/samplefmt.h>
|
|
#include <libswresample/swresample.h>
|
|
}
|
|
|
|
static constexpr int AC_AUDIO_OUTPUT_BITS = 32;
|
|
static constexpr int AC_AUDIO_OUTPUT_BYTES = 4;
|
|
static constexpr AVSampleFormat AC_AUDIO_OUTPUT_FMT = AV_SAMPLE_FMT_S32;
|
|
|
|
/*
|
|
* Metadata.
|
|
*
|
|
* This used to be the kind of place where C code often used fixed-size arrays:
|
|
*
|
|
* char title[512];
|
|
*
|
|
* That is simple, but truncates long UTF-8 metadata and wastes space. Since the
|
|
* implementation is C++, std::string is the natural representation. The public
|
|
* C API only exposes const char* getters.
|
|
*/
|
|
struct __fmpg_file_info__ {
|
|
std::string title;
|
|
std::string author;
|
|
std::string album;
|
|
std::string genre;
|
|
std::string comment;
|
|
std::string copyright;
|
|
|
|
int year = -1;
|
|
int track = -1;
|
|
int64_t duration = -1; /* milliseconds */
|
|
int bitrate = -1;
|
|
|
|
void clear() {
|
|
title.clear();
|
|
author.clear();
|
|
album.clear();
|
|
genre.clear();
|
|
comment.clear();
|
|
copyright.clear();
|
|
year = -1;
|
|
track = -1;
|
|
duration = -1;
|
|
bitrate = -1;
|
|
}
|
|
};
|
|
|
|
/*
|
|
* __fmpg_instance__ owns the opened media file.
|
|
*
|
|
* AVFormatContext is FFmpeg's demuxer/container object. It knows which streams
|
|
* the file contains and can read compressed packets from it.
|
|
*/
|
|
struct __fmpg_instance__ {
|
|
bool opened = false;
|
|
AVFormatContext *format_ctx = nullptr;
|
|
fmpg_file_info info;
|
|
|
|
~__fmpg_instance__() {
|
|
if (format_ctx) {
|
|
avformat_close_input(&format_ctx);
|
|
}
|
|
}
|
|
};
|
|
|
|
/*
|
|
* A package wraps one FFmpeg AVPacket.
|
|
*
|
|
* The old Acinerella name was "package". FFmpeg calls this a packet. It is not
|
|
* decoded audio yet; it is compressed data read from the container.
|
|
*/
|
|
struct __fmpg_package__ {
|
|
int stream_index = -1;
|
|
int64_t pts = AV_NOPTS_VALUE;
|
|
AVPacket *packet = nullptr;
|
|
|
|
__fmpg_package__() : packet(av_packet_alloc()) {}
|
|
|
|
~__fmpg_package__() {
|
|
av_packet_free(&packet);
|
|
}
|
|
};
|
|
|
|
/*
|
|
* __fmpg_decoder__ owns the actual audio decoder and resampler for one stream.
|
|
*/
|
|
struct __fmpg_decoder__ {
|
|
fmpg_instance *instance = nullptr;
|
|
int stream_index = -1;
|
|
|
|
const AVCodec *codec = nullptr;
|
|
AVCodecContext *codec_ctx = nullptr;
|
|
AVFrame *frame = nullptr;
|
|
SwrContext *swr_ctx = nullptr;
|
|
|
|
fmpg_audio_info audio_info{};
|
|
std::vector<uint8_t> pcm;
|
|
double timecode = 0.0;
|
|
|
|
~__fmpg_decoder__() {
|
|
avcodec_free_context(&codec_ctx);
|
|
av_frame_free(&frame);
|
|
swr_free(&swr_ctx);
|
|
}
|
|
};
|
|
|
|
static const char *empty_if_null(const char *s) {
|
|
return s ? s : "";
|
|
}
|
|
|
|
static const char *string_c_str(const std::string &s) {
|
|
return s.empty() ? "" : s.c_str();
|
|
}
|
|
|
|
static std::string get_metadata_string(const AVFormatContext *ctx,
|
|
const char *key) {
|
|
const AVDictionaryEntry *entry =
|
|
av_dict_get(ctx->metadata, key, nullptr, 0);
|
|
|
|
return entry && entry->value ? std::string(entry->value)
|
|
: std::string();
|
|
}
|
|
|
|
static int get_metadata_int(const AVFormatContext *ctx, const char *key) {
|
|
const AVDictionaryEntry *entry =
|
|
av_dict_get(ctx->metadata, key, nullptr, 0);
|
|
|
|
if (!entry || !entry->value || !*entry->value) {
|
|
return -1;
|
|
}
|
|
|
|
return std::atoi(entry->value);
|
|
}
|
|
|
|
static void fill_metadata(fmpg_instance *self) {
|
|
AVFormatContext *ctx = self->format_ctx;
|
|
|
|
self->info.clear();
|
|
self->info.title = get_metadata_string(ctx, "title");
|
|
self->info.author = get_metadata_string(ctx, "artist");
|
|
self->info.album = get_metadata_string(ctx, "album");
|
|
self->info.genre = get_metadata_string(ctx, "genre");
|
|
self->info.comment = get_metadata_string(ctx, "comment");
|
|
self->info.copyright = get_metadata_string(ctx, "copyright");
|
|
self->info.year = get_metadata_int(ctx, "year");
|
|
self->info.track = get_metadata_int(ctx, "track");
|
|
self->info.bitrate = static_cast<int>(ctx->bit_rate);
|
|
|
|
self->info.duration =
|
|
ctx->duration == AV_NOPTS_VALUE
|
|
? -1
|
|
: ctx->duration * 1000 / AV_TIME_BASE;
|
|
}
|
|
|
|
static bool valid_stream_index(const fmpg_instance *instance, int stream_index)
|
|
{
|
|
return instance && instance->opened && instance->format_ctx &&
|
|
stream_index >= 0 &&
|
|
stream_index < static_cast<int>(instance->format_ctx->nb_streams);
|
|
}
|
|
|
|
fmpg_instance * ac_init(void) {
|
|
try {
|
|
return new fmpg_instance();
|
|
} catch (...) {
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
void ac_free(fmpg_instance * instance) {
|
|
delete instance;
|
|
}
|
|
|
|
int ac_open_file(fmpg_instance * instance,
|
|
const char *filename) {
|
|
if (!instance || instance->opened || !filename) {
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* avformat_open_input opens the file and guesses the container format.
|
|
* The codec is not opened here. This is only the demuxing layer.
|
|
*/
|
|
if (avformat_open_input(&instance->format_ctx,
|
|
empty_if_null(filename),
|
|
nullptr,
|
|
nullptr) < 0) {
|
|
ac_close(instance);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Read enough packets to discover stream metadata such as sample rate,
|
|
* channel layout, codec id, duration and tags.
|
|
*/
|
|
if (avformat_find_stream_info(instance->format_ctx, nullptr) < 0) {
|
|
ac_close(instance);
|
|
return 0;
|
|
}
|
|
|
|
fill_metadata(instance);
|
|
instance->opened = true;
|
|
return 1;
|
|
}
|
|
|
|
void ac_close(fmpg_instance * instance) {
|
|
if (!instance) {
|
|
return;
|
|
}
|
|
|
|
if (instance->format_ctx) {
|
|
avformat_close_input(&instance->format_ctx);
|
|
}
|
|
|
|
instance->opened = false;
|
|
instance->info.clear();
|
|
}
|
|
|
|
int ac_is_open(fmpg_instance * instance)
|
|
{
|
|
return instance && instance->opened ? 1 : 0;
|
|
}
|
|
|
|
int ac_get_audio_stream_count(fmpg_instance * instance)
|
|
{
|
|
if (!instance || !instance->opened || !instance->format_ctx) {
|
|
return 0;
|
|
}
|
|
|
|
int count = 0;
|
|
|
|
for (unsigned i = 0; i < instance->format_ctx->nb_streams; ++i) {
|
|
const AVCodecParameters *par =
|
|
instance->format_ctx->streams[i]->codecpar;
|
|
|
|
if (par && par->codec_type == AVMEDIA_TYPE_AUDIO) {
|
|
++count;
|
|
}
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
int ac_get_default_audio_stream(fmpg_instance * instance)
|
|
{
|
|
if (!instance || !instance->opened || !instance->format_ctx) {
|
|
return -1;
|
|
}
|
|
|
|
const int idx = av_find_best_stream(instance->format_ctx,
|
|
AVMEDIA_TYPE_AUDIO,
|
|
-1,
|
|
-1,
|
|
nullptr,
|
|
0);
|
|
|
|
return idx >= 0 ? idx : -1;
|
|
}
|
|
|
|
int ac_get_audio_info(fmpg_instance * instance, int stream_index, fmpg_audio_info *info)
|
|
{
|
|
if (!info) {
|
|
return 0;
|
|
}
|
|
|
|
std::memset(info, 0, sizeof(*info));
|
|
|
|
if (!valid_stream_index(instance, stream_index)) {
|
|
return 0;
|
|
}
|
|
|
|
const AVCodecParameters *par =
|
|
instance->format_ctx->streams[stream_index]->codecpar;
|
|
|
|
if (!par || par->codec_type != AVMEDIA_TYPE_AUDIO) {
|
|
return 0;
|
|
}
|
|
|
|
info->sample_rate = par->sample_rate;
|
|
info->channels = par->ch_layout.nb_channels;
|
|
info->bits_per_sample = AC_AUDIO_OUTPUT_BITS;
|
|
info->bytes_per_sample = AC_AUDIO_OUTPUT_BYTES;
|
|
|
|
return info->sample_rate > 0 && info->channels > 0 ? 1 : 0;
|
|
}
|
|
|
|
const fmpg_file_info *ac_get_file_info(fmpg_instance * instance)
|
|
{
|
|
return instance ? &instance->info : nullptr;
|
|
}
|
|
|
|
const char * ac_file_info_title(const fmpg_file_info *info)
|
|
{
|
|
return info ? string_c_str(info->title) : "";
|
|
}
|
|
|
|
const char *ac_file_info_author(const fmpg_file_info *info)
|
|
{
|
|
return info ? string_c_str(info->author) : "";
|
|
}
|
|
|
|
const char *ac_file_info_album(const fmpg_file_info *info)
|
|
{
|
|
return info ? string_c_str(info->album) : "";
|
|
}
|
|
|
|
const char *ac_file_info_genre(const fmpg_file_info *info)
|
|
{
|
|
return info ? string_c_str(info->genre) : "";
|
|
}
|
|
|
|
const char *ac_file_info_comment(const fmpg_file_info *info)
|
|
{
|
|
return info ? string_c_str(info->comment) : "";
|
|
}
|
|
|
|
const char *ac_file_info_copyright(const fmpg_file_info *info)
|
|
{
|
|
return info ? string_c_str(info->copyright) : "";
|
|
}
|
|
|
|
int ac_file_info_year(const fmpg_file_info *info)
|
|
{
|
|
return info ? info->year : -1;
|
|
}
|
|
|
|
int ac_file_info_track(const fmpg_file_info *info)
|
|
{
|
|
return info ? info->track : -1;
|
|
}
|
|
|
|
int64_t ac_file_info_duration(const fmpg_file_info *info)
|
|
{
|
|
return info ? info->duration : -1;
|
|
}
|
|
|
|
int ac_file_info_bitrate(const fmpg_file_info *info)
|
|
{
|
|
return info ? info->bitrate : -1;
|
|
}
|
|
|
|
fmpg_package * ac_read_package(fmpg_instance * instance)
|
|
{
|
|
if (!instance || !instance->opened || !instance->format_ctx) {
|
|
return nullptr;
|
|
}
|
|
|
|
fmpg_package *pkg = nullptr;
|
|
|
|
try {
|
|
pkg = new fmpg_package();
|
|
} catch (...) {
|
|
return nullptr;
|
|
}
|
|
|
|
if (!pkg->packet) {
|
|
delete pkg;
|
|
return nullptr;
|
|
}
|
|
|
|
/*
|
|
* av_read_frame reads one compressed packet. This may be audio, video,
|
|
* subtitles, or another stream type. The caller can inspect stream_index
|
|
* and only feed audio packets to the matching decoder.
|
|
*/
|
|
if (av_read_frame(instance->format_ctx, pkg->packet) < 0) {
|
|
delete pkg;
|
|
return nullptr;
|
|
}
|
|
|
|
pkg->stream_index = pkg->packet->stream_index;
|
|
pkg->pts = pkg->packet->dts != AV_NOPTS_VALUE
|
|
? pkg->packet->dts
|
|
: pkg->packet->pts;
|
|
|
|
return pkg;
|
|
}
|
|
|
|
void ac_free_package(fmpg_package * package)
|
|
{
|
|
delete package;
|
|
}
|
|
|
|
int ac_package_stream_index(fmpg_package * package)
|
|
{
|
|
return package ? package->stream_index : -1;
|
|
}
|
|
|
|
static bool init_codec_context(fmpg_decoder *dec, const AVCodecParameters *par)
|
|
{
|
|
dec->codec = avcodec_find_decoder(par->codec_id);
|
|
if (!dec->codec) {
|
|
return false;
|
|
}
|
|
|
|
dec->codec_ctx = avcodec_alloc_context3(dec->codec);
|
|
if (!dec->codec_ctx) {
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Copy stream codec parameters into the active decoder context.
|
|
*/
|
|
if (avcodec_parameters_to_context(dec->codec_ctx, par) < 0) {
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Open the actual decoder. From this point on, packets can be sent to it.
|
|
*/
|
|
if (avcodec_open2(dec->codec_ctx, dec->codec, nullptr) < 0) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool init_resampler(fmpg_decoder *dec)
|
|
{
|
|
const AVChannelLayout *layout = &dec->codec_ctx->ch_layout;
|
|
|
|
if (layout->nb_channels <= 0 || dec->codec_ctx->sample_rate <= 0) {
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* We do not change sample rate or channel layout. We only normalize the
|
|
* sample format to signed 32-bit integer PCM.
|
|
*/
|
|
if (swr_alloc_set_opts2(&dec->swr_ctx,
|
|
layout,
|
|
AC_AUDIO_OUTPUT_FMT,
|
|
dec->codec_ctx->sample_rate,
|
|
layout,
|
|
dec->codec_ctx->sample_fmt,
|
|
dec->codec_ctx->sample_rate,
|
|
0,
|
|
nullptr) < 0) {
|
|
return false;
|
|
}
|
|
|
|
return swr_init(dec->swr_ctx) >= 0;
|
|
}
|
|
|
|
fmpg_decoder * ac_create_decoder(fmpg_instance * instance, int stream_index)
|
|
{
|
|
if (!valid_stream_index(instance, stream_index)) {
|
|
return nullptr;
|
|
}
|
|
|
|
fmpg_audio_info info{};
|
|
if (!ac_get_audio_info(instance, stream_index, &info)) {
|
|
return nullptr;
|
|
}
|
|
|
|
fmpg_decoder *dec = nullptr;
|
|
|
|
try {
|
|
dec = new fmpg_decoder();
|
|
} catch (...) {
|
|
return nullptr;
|
|
}
|
|
|
|
dec->instance = instance;
|
|
dec->stream_index = stream_index;
|
|
dec->audio_info = info;
|
|
|
|
const AVCodecParameters *par =
|
|
instance->format_ctx->streams[stream_index]->codecpar;
|
|
|
|
if (!init_codec_context(dec, par)) {
|
|
delete dec;
|
|
return nullptr;
|
|
}
|
|
|
|
dec->frame = av_frame_alloc();
|
|
if (!dec->frame) {
|
|
delete dec;
|
|
return nullptr;
|
|
}
|
|
|
|
if (!init_resampler(dec)) {
|
|
delete dec;
|
|
return nullptr;
|
|
}
|
|
|
|
return dec;
|
|
}
|
|
|
|
void ac_free_decoder(fmpg_decoder * decoder)
|
|
{
|
|
delete decoder;
|
|
}
|
|
|
|
static bool append_bytes(fmpg_decoder *dec, const uint8_t *src, size_t bytes)
|
|
{
|
|
if (!bytes) {
|
|
return true;
|
|
}
|
|
|
|
if (bytes > static_cast<size_t>(std::numeric_limits<int>::max()) -
|
|
dec->pcm.size()) {
|
|
return false;
|
|
}
|
|
|
|
try {
|
|
const size_t old_size = dec->pcm.size();
|
|
dec->pcm.resize(old_size + bytes);
|
|
std::memcpy(dec->pcm.data() + old_size, src, bytes);
|
|
return true;
|
|
} catch (...) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static bool append_converted_frame(fmpg_decoder *dec, const AVFrame *frame)
|
|
{
|
|
const int channels = dec->codec_ctx->ch_layout.nb_channels;
|
|
|
|
if (channels <= 0 || frame->nb_samples <= 0) {
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* swr_get_out_samples gives a safe upper bound for the number of output
|
|
* samples. The resampler can have internal delay, so this is safer than
|
|
* assuming input sample count equals output sample count.
|
|
*/
|
|
const int max_out_samples =
|
|
swr_get_out_samples(dec->swr_ctx, frame->nb_samples);
|
|
|
|
if (max_out_samples <= 0) {
|
|
return false;
|
|
}
|
|
|
|
const int max_bytes =
|
|
av_samples_get_buffer_size(nullptr,
|
|
channels,
|
|
max_out_samples,
|
|
AC_AUDIO_OUTPUT_FMT,
|
|
1);
|
|
|
|
if (max_bytes <= 0) {
|
|
return false;
|
|
}
|
|
|
|
std::vector<uint8_t> tmp(static_cast<size_t>(max_bytes));
|
|
uint8_t *out_planes[1] = { tmp.data() };
|
|
|
|
/*
|
|
* swr_convert performs the actual conversion to S32 interleaved PCM.
|
|
*/
|
|
const int out_samples =
|
|
swr_convert(dec->swr_ctx,
|
|
out_planes,
|
|
max_out_samples,
|
|
const_cast<const uint8_t **>(frame->data),
|
|
frame->nb_samples);
|
|
|
|
if (out_samples < 0) {
|
|
return false;
|
|
}
|
|
|
|
const int used_bytes =
|
|
av_samples_get_buffer_size(nullptr,
|
|
channels,
|
|
out_samples,
|
|
AC_AUDIO_OUTPUT_FMT,
|
|
1);
|
|
|
|
if (used_bytes < 0) {
|
|
return false;
|
|
}
|
|
|
|
return append_bytes(dec, tmp.data(), static_cast<size_t>(used_bytes));
|
|
}
|
|
|
|
static int receive_available_frames(fmpg_decoder *dec)
|
|
{
|
|
int produced = 0;
|
|
|
|
for (;;) {
|
|
const int ret = avcodec_receive_frame(dec->codec_ctx, dec->frame);
|
|
|
|
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
|
|
return produced;
|
|
}
|
|
|
|
if (ret < 0) {
|
|
return -1;
|
|
}
|
|
|
|
if (!append_converted_frame(dec, dec->frame)) {
|
|
av_frame_unref(dec->frame);
|
|
return -1;
|
|
}
|
|
|
|
produced = 1;
|
|
av_frame_unref(dec->frame);
|
|
}
|
|
}
|
|
|
|
static void update_timecode_from_packet(fmpg_decoder *dec, const fmpg_package *pkg)
|
|
{
|
|
if (!dec || !pkg || pkg->pts == AV_NOPTS_VALUE) {
|
|
return;
|
|
}
|
|
|
|
AVStream *stream = dec->instance->format_ctx->streams[pkg->stream_index];
|
|
dec->timecode = pkg->pts * av_q2d(stream->time_base);
|
|
}
|
|
|
|
int ac_decode_package(fmpg_package * package, fmpg_decoder * decoder)
|
|
{
|
|
if (!package || !decoder || !package->packet ||
|
|
package->stream_index != decoder->stream_index) {
|
|
return 0;
|
|
}
|
|
|
|
decoder->pcm.clear();
|
|
update_timecode_from_packet(decoder, package);
|
|
|
|
/*
|
|
* Modern FFmpeg decoding is a two-step queue-like API:
|
|
*
|
|
* 1. send compressed packet
|
|
* 2. receive all decoded frames currently available
|
|
*
|
|
* A single packet can produce multiple frames, especially with codecs that
|
|
* buffer internally. We concatenate all produced PCM blocks.
|
|
*/
|
|
int ret = avcodec_send_packet(decoder->codec_ctx, package->packet);
|
|
|
|
if (ret == AVERROR(EAGAIN)) {
|
|
if (receive_available_frames(decoder) < 0) {
|
|
return 0;
|
|
}
|
|
ret = avcodec_send_packet(decoder->codec_ctx, package->packet);
|
|
}
|
|
|
|
if (ret < 0) {
|
|
return 0;
|
|
}
|
|
|
|
return receive_available_frames(decoder) > 0 ? 1 : 0;
|
|
}
|
|
|
|
int ac_flush_decoder(fmpg_decoder * decoder)
|
|
{
|
|
if (!decoder) {
|
|
return 0;
|
|
}
|
|
|
|
decoder->pcm.clear();
|
|
|
|
/*
|
|
* Sending NULL tells FFmpeg that no more input is coming and that delayed
|
|
* decoded frames should be drained.
|
|
*/
|
|
const int ret = avcodec_send_packet(decoder->codec_ctx, nullptr);
|
|
if (ret < 0 && ret != AVERROR_EOF) {
|
|
return 0;
|
|
}
|
|
|
|
const int produced = receive_available_frames(decoder);
|
|
if (produced < 0) {
|
|
return 0;
|
|
}
|
|
|
|
/* Drain possible delayed samples from libswresample as well. */
|
|
const int channels = decoder->codec_ctx->ch_layout.nb_channels;
|
|
|
|
for (;;) {
|
|
const int delay =
|
|
static_cast<int>(swr_get_delay(decoder->swr_ctx,
|
|
decoder->codec_ctx->sample_rate));
|
|
|
|
if (delay <= 0) {
|
|
break;
|
|
}
|
|
|
|
const int max_bytes =
|
|
av_samples_get_buffer_size(nullptr,
|
|
channels,
|
|
delay,
|
|
AC_AUDIO_OUTPUT_FMT,
|
|
1);
|
|
|
|
if (max_bytes <= 0) {
|
|
break;
|
|
}
|
|
|
|
std::vector<uint8_t> tmp(static_cast<size_t>(max_bytes));
|
|
uint8_t *out_planes[1] = { tmp.data() };
|
|
|
|
const int out_samples =
|
|
swr_convert(decoder->swr_ctx,
|
|
out_planes,
|
|
delay,
|
|
nullptr,
|
|
0);
|
|
|
|
if (out_samples <= 0) {
|
|
break;
|
|
}
|
|
|
|
const int used_bytes =
|
|
av_samples_get_buffer_size(nullptr,
|
|
channels,
|
|
out_samples,
|
|
AC_AUDIO_OUTPUT_FMT,
|
|
1);
|
|
|
|
if (used_bytes < 0 ||
|
|
!append_bytes(decoder, tmp.data(), static_cast<size_t>(used_bytes))) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return decoder->pcm.empty() ? 0 : 1;
|
|
}
|
|
|
|
int ac_seek_ms(fmpg_decoder * decoder, int64_t target_pos_ms)
|
|
{
|
|
if (!decoder || !decoder->instance || !decoder->instance->format_ctx) {
|
|
return 0;
|
|
}
|
|
|
|
AVStream *stream = decoder->instance->format_ctx->streams[decoder->stream_index];
|
|
|
|
const int64_t pos_us = av_rescale(target_pos_ms, AV_TIME_BASE, 1000);
|
|
const int64_t stream_ts = av_rescale_q(pos_us, AV_TIME_BASE_Q, stream->time_base);
|
|
|
|
if (av_seek_frame(decoder->instance->format_ctx,
|
|
decoder->stream_index,
|
|
stream_ts,
|
|
AVSEEK_FLAG_BACKWARD) < 0) {
|
|
return 0;
|
|
}
|
|
|
|
decoder->timecode = target_pos_ms / 1000.0;
|
|
decoder->pcm.clear();
|
|
|
|
/* Old buffered data no longer belongs to the new seek position. */
|
|
avcodec_flush_buffers(decoder->codec_ctx);
|
|
|
|
/* Reset resampler delay/state too. */
|
|
swr_close(decoder->swr_ctx);
|
|
return swr_init(decoder->swr_ctx) >= 0 ? 1 : 0;
|
|
}
|
|
|
|
const uint8_t *ac_decoder_buffer(fmpg_decoder * decoder)
|
|
{
|
|
return decoder && !decoder->pcm.empty() ? decoder->pcm.data() : nullptr;
|
|
}
|
|
|
|
int ac_decoder_buffer_size(fmpg_decoder * decoder)
|
|
{
|
|
if (!decoder ||
|
|
decoder->pcm.size() >
|
|
static_cast<size_t>(std::numeric_limits<int>::max())) {
|
|
return 0;
|
|
}
|
|
|
|
return static_cast<int>(decoder->pcm.size());
|
|
}
|
|
|
|
double ac_decoder_timecode(fmpg_decoder * decoder)
|
|
{
|
|
return decoder ? decoder->timecode : 0.0;
|
|
}
|
|
|
|
int ac_decoder_stream_index(fmpg_decoder * decoder)
|
|
{
|
|
return decoder ? decoder->stream_index : -1;
|
|
}
|