citra/src/core/dumping/ffmpeg_backend.cpp
zhupengfei 4161163d9c
ffmpeg: Correctly handle sample rates
Previously, we just used the native sample rate for encoding. However, some encoders like libmp3lame doesn't support it. Therefore, we now use a supported sample rate (preferring the native one if possible).

FFmpeg requires audio data to be sent in a sequence of frames, each containing the same specific number of samples. Previously, we buffered input samples in FFmpegBackend. However, as the source and destination sample rates can now be different, we should buffer resampled data instead. swresample have an internal input buffer, so we now just forward all data to it and 'gradually' receive resampled data, at most one frame_size at a time. When there is not enough resampled data to form a frame, we will record the current offset and request for less data on the next call.

Additionally, this commit also fixes a flaw. When an encoder supports variable frame sizes, its frame size is reported to be 0, which breaks our buffering system. Now we treat variable frame size encoders as having a frame size of 160 (the size of a HLE audio frame).
2020-02-27 16:55:13 +08:00

595 lines
18 KiB
C++

// Copyright 2018 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/param_package.h"
#include "core/dumping/ffmpeg_backend.h"
#include "core/settings.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
extern "C" {
#include <libavutil/opt.h>
}
namespace VideoDumper {
void InitializeFFmpegLibraries() {
static bool initialized = false;
if (initialized)
return;
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 9, 100)
av_register_all();
#endif
avformat_network_init();
initialized = true;
}
AVDictionary* ToAVDictionary(const std::string& serialized) {
Common::ParamPackage param_package{serialized};
AVDictionary* result = nullptr;
for (const auto& [key, value] : param_package) {
av_dict_set(&result, key.c_str(), value.c_str(), 0);
}
return result;
}
FFmpegStream::~FFmpegStream() {
Free();
}
bool FFmpegStream::Init(AVFormatContext* format_context_) {
InitializeFFmpegLibraries();
format_context = format_context_;
return true;
}
void FFmpegStream::Free() {
codec_context.reset();
}
void FFmpegStream::Flush() {
SendFrame(nullptr);
}
void FFmpegStream::WritePacket(AVPacket& packet) {
if (packet.pts != static_cast<s64>(AV_NOPTS_VALUE)) {
packet.pts = av_rescale_q(packet.pts, codec_context->time_base, stream->time_base);
}
if (packet.dts != static_cast<s64>(AV_NOPTS_VALUE)) {
packet.dts = av_rescale_q(packet.dts, codec_context->time_base, stream->time_base);
}
packet.stream_index = stream->index;
av_interleaved_write_frame(format_context, &packet);
}
void FFmpegStream::SendFrame(AVFrame* frame) {
// Initialize packet
AVPacket packet;
av_init_packet(&packet);
packet.data = nullptr;
packet.size = 0;
// Encode frame
if (avcodec_send_frame(codec_context.get(), frame) < 0) {
LOG_ERROR(Render, "Frame dropped: could not send frame");
return;
}
int error = 1;
while (error >= 0) {
error = avcodec_receive_packet(codec_context.get(), &packet);
if (error == AVERROR(EAGAIN) || error == AVERROR_EOF)
return;
if (error < 0) {
LOG_ERROR(Render, "Frame dropped: could not encode audio");
return;
} else {
// Write frame to video file
WritePacket(packet);
}
}
}
FFmpegVideoStream::~FFmpegVideoStream() {
Free();
}
bool FFmpegVideoStream::Init(AVFormatContext* format_context, AVOutputFormat* output_format,
const Layout::FramebufferLayout& layout_) {
InitializeFFmpegLibraries();
if (!FFmpegStream::Init(format_context))
return false;
layout = layout_;
frame_count = 0;
// Initialize video codec
const AVCodec* codec = avcodec_find_encoder_by_name(Settings::values.video_encoder.c_str());
codec_context.reset(avcodec_alloc_context3(codec));
if (!codec || !codec_context) {
LOG_ERROR(Render, "Could not find video encoder or allocate video codec context");
return false;
}
// Configure video codec context
codec_context->codec_type = AVMEDIA_TYPE_VIDEO;
codec_context->bit_rate = Settings::values.video_bitrate;
codec_context->width = layout.width;
codec_context->height = layout.height;
codec_context->time_base.num = 1;
codec_context->time_base.den = 60;
codec_context->gop_size = 12;
codec_context->pix_fmt = codec->pix_fmts ? codec->pix_fmts[0] : AV_PIX_FMT_YUV420P;
if (output_format->flags & AVFMT_GLOBALHEADER)
codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
AVDictionary* options = ToAVDictionary(Settings::values.video_encoder_options);
if (avcodec_open2(codec_context.get(), codec, &options) < 0) {
LOG_ERROR(Render, "Could not open video codec");
return false;
}
if (av_dict_count(options) != 0) { // Successfully set options are removed from the dict
char* buf = nullptr;
av_dict_get_string(options, &buf, ':', ';');
LOG_WARNING(Render, "Video encoder options not found: {}", buf);
}
// Create video stream
stream = avformat_new_stream(format_context, codec);
if (!stream || avcodec_parameters_from_context(stream->codecpar, codec_context.get()) < 0) {
LOG_ERROR(Render, "Could not create video stream");
return false;
}
// Allocate frames
current_frame.reset(av_frame_alloc());
scaled_frame.reset(av_frame_alloc());
scaled_frame->format = codec_context->pix_fmt;
scaled_frame->width = layout.width;
scaled_frame->height = layout.height;
if (av_frame_get_buffer(scaled_frame.get(), 1) < 0) {
LOG_ERROR(Render, "Could not allocate frame buffer");
return false;
}
// Create SWS Context
auto* context = sws_getCachedContext(
sws_context.get(), layout.width, layout.height, pixel_format, layout.width, layout.height,
codec_context->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr);
if (context != sws_context.get())
sws_context.reset(context);
return true;
}
void FFmpegVideoStream::Free() {
FFmpegStream::Free();
current_frame.reset();
scaled_frame.reset();
sws_context.reset();
}
void FFmpegVideoStream::ProcessFrame(VideoFrame& frame) {
if (frame.width != layout.width || frame.height != layout.height) {
LOG_ERROR(Render, "Frame dropped: resolution does not match");
return;
}
// Prepare frame
current_frame->data[0] = frame.data.data();
current_frame->linesize[0] = frame.stride;
current_frame->format = pixel_format;
current_frame->width = layout.width;
current_frame->height = layout.height;
// Scale the frame
if (sws_context) {
sws_scale(sws_context.get(), current_frame->data, current_frame->linesize, 0, layout.height,
scaled_frame->data, scaled_frame->linesize);
}
scaled_frame->pts = frame_count++;
// Encode frame
SendFrame(scaled_frame.get());
}
FFmpegAudioStream::~FFmpegAudioStream() {
Free();
}
bool FFmpegAudioStream::Init(AVFormatContext* format_context) {
InitializeFFmpegLibraries();
if (!FFmpegStream::Init(format_context))
return false;
frame_count = 0;
// Initialize audio codec
const AVCodec* codec = avcodec_find_encoder_by_name(Settings::values.audio_encoder.c_str());
codec_context.reset(avcodec_alloc_context3(codec));
if (!codec || !codec_context) {
LOG_ERROR(Render, "Could not find audio encoder or allocate audio codec context");
return false;
}
// Configure audio codec context
codec_context->codec_type = AVMEDIA_TYPE_AUDIO;
codec_context->bit_rate = Settings::values.audio_bitrate;
if (codec->sample_fmts) {
codec_context->sample_fmt = AV_SAMPLE_FMT_NONE;
// Use any planar format
const AVSampleFormat* ptr = codec->sample_fmts;
while ((*ptr) != -1) {
if (av_sample_fmt_is_planar((*ptr))) {
codec_context->sample_fmt = (*ptr);
break;
}
ptr++;
}
if (codec_context->sample_fmt == AV_SAMPLE_FMT_NONE) {
LOG_ERROR(Render, "Specified audio encoder does not support any planar format");
return false;
}
} else {
codec_context->sample_fmt = AV_SAMPLE_FMT_S16P;
}
if (codec->supported_samplerates) {
codec_context->sample_rate = codec->supported_samplerates[0];
// Prefer native sample rate if supported
const int* ptr = codec->supported_samplerates;
while ((*ptr)) {
if ((*ptr) == AudioCore::native_sample_rate) {
codec_context->sample_rate = AudioCore::native_sample_rate;
break;
}
ptr++;
}
} else {
codec_context->sample_rate = AudioCore::native_sample_rate;
}
codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
codec_context->channels = 2;
AVDictionary* options = ToAVDictionary(Settings::values.audio_encoder_options);
if (avcodec_open2(codec_context.get(), codec, &options) < 0) {
LOG_ERROR(Render, "Could not open audio codec");
return false;
}
if (av_dict_count(options) != 0) { // Successfully set options are removed from the dict
char* buf = nullptr;
av_dict_get_string(options, &buf, ':', ';');
LOG_WARNING(Render, "Audio encoder options not found: {}", buf);
}
if (codec_context->frame_size) {
frame_size = static_cast<u64>(codec_context->frame_size);
} else { // variable frame size support
frame_size = std::tuple_size<AudioCore::StereoFrame16>::value;
}
// Create audio stream
stream = avformat_new_stream(format_context, codec);
if (!stream || avcodec_parameters_from_context(stream->codecpar, codec_context.get()) < 0) {
LOG_ERROR(Render, "Could not create audio stream");
return false;
}
// Allocate frame
audio_frame.reset(av_frame_alloc());
audio_frame->format = codec_context->sample_fmt;
audio_frame->channel_layout = codec_context->channel_layout;
audio_frame->channels = codec_context->channels;
// Allocate SWR context
auto* context =
swr_alloc_set_opts(nullptr, codec_context->channel_layout, codec_context->sample_fmt,
codec_context->sample_rate, codec_context->channel_layout,
AV_SAMPLE_FMT_S16P, AudioCore::native_sample_rate, 0, nullptr);
if (!context) {
LOG_ERROR(Render, "Could not create SWR context");
return false;
}
swr_context.reset(context);
if (swr_init(swr_context.get()) < 0) {
LOG_ERROR(Render, "Could not init SWR context");
return false;
}
// Allocate resampled data
int error =
av_samples_alloc_array_and_samples(&resampled_data, nullptr, codec_context->channels,
frame_size, codec_context->sample_fmt, 0);
if (error < 0) {
LOG_ERROR(Render, "Could not allocate samples storage");
return false;
}
return true;
}
void FFmpegAudioStream::Free() {
FFmpegStream::Free();
audio_frame.reset();
swr_context.reset();
// Free resampled data
if (resampled_data) {
av_freep(&resampled_data[0]);
}
av_freep(&resampled_data);
}
void FFmpegAudioStream::ProcessFrame(const VariableAudioFrame& channel0,
const VariableAudioFrame& channel1) {
ASSERT_MSG(channel0.size() == channel1.size(),
"Frames of the two channels must have the same number of samples");
const auto sample_size = av_get_bytes_per_sample(codec_context->sample_fmt);
std::array<const u8*, 2> src_data = {reinterpret_cast<const u8*>(channel0.data()),
reinterpret_cast<const u8*>(channel1.data())};
std::array<u8*, 2> dst_data = {resampled_data[0] + sample_size * offset,
resampled_data[1] + sample_size * offset};
auto resampled_count = swr_convert(swr_context.get(), dst_data.data(), frame_size - offset,
src_data.data(), channel0.size());
if (resampled_count < 0) {
LOG_ERROR(Render, "Audio frame dropped: Could not resample data");
return;
}
offset += resampled_count;
if (offset < frame_size) { // Still not enough to form a frame
return;
}
while (true) {
// Prepare frame
audio_frame->nb_samples = frame_size;
audio_frame->data[0] = resampled_data[0];
audio_frame->data[1] = resampled_data[1];
audio_frame->pts = frame_count * frame_size;
frame_count++;
SendFrame(audio_frame.get());
// swr_convert buffers input internally. Try to get more resampled data
resampled_count = swr_convert(swr_context.get(), resampled_data, frame_size, nullptr, 0);
if (resampled_count < 0) {
LOG_ERROR(Render, "Audio frame dropped: Could not resample data");
return;
}
if (static_cast<u64>(resampled_count) < frame_size) {
offset = resampled_count;
break;
}
}
}
void FFmpegAudioStream::Flush() {
// Send the last samples
audio_frame->nb_samples = offset;
audio_frame->data[0] = resampled_data[0];
audio_frame->data[1] = resampled_data[1];
audio_frame->pts = frame_count * frame_size;
SendFrame(audio_frame.get());
FFmpegStream::Flush();
}
FFmpegMuxer::~FFmpegMuxer() {
Free();
}
bool FFmpegMuxer::Init(const std::string& path, const Layout::FramebufferLayout& layout) {
InitializeFFmpegLibraries();
if (!FileUtil::CreateFullPath(path)) {
return false;
}
// Get output format
const auto format = Settings::values.output_format;
auto* output_format = av_guess_format(format.c_str(), path.c_str(), nullptr);
if (!output_format) {
LOG_ERROR(Render, "Could not get format {}", format);
return false;
}
// Initialize format context
auto* format_context_raw = format_context.get();
if (avformat_alloc_output_context2(&format_context_raw, output_format, nullptr, path.c_str()) <
0) {
LOG_ERROR(Render, "Could not allocate output context");
return false;
}
format_context.reset(format_context_raw);
if (!video_stream.Init(format_context.get(), output_format, layout))
return false;
if (!audio_stream.Init(format_context.get()))
return false;
AVDictionary* options = ToAVDictionary(Settings::values.format_options);
// Open video file
if (avio_open(&format_context->pb, path.c_str(), AVIO_FLAG_WRITE) < 0 ||
avformat_write_header(format_context.get(), &options)) {
LOG_ERROR(Render, "Could not open {}", path);
return false;
}
if (av_dict_count(options) != 0) { // Successfully set options are removed from the dict
char* buf = nullptr;
av_dict_get_string(options, &buf, ':', ';');
LOG_WARNING(Render, "Format options not found: {}", buf);
}
LOG_INFO(Render, "Dumping frames to {} ({}x{})", path, layout.width, layout.height);
return true;
}
void FFmpegMuxer::Free() {
video_stream.Free();
audio_stream.Free();
format_context.reset();
}
void FFmpegMuxer::ProcessVideoFrame(VideoFrame& frame) {
video_stream.ProcessFrame(frame);
}
void FFmpegMuxer::ProcessAudioFrame(const VariableAudioFrame& channel0,
const VariableAudioFrame& channel1) {
audio_stream.ProcessFrame(channel0, channel1);
}
void FFmpegMuxer::FlushVideo() {
video_stream.Flush();
}
void FFmpegMuxer::FlushAudio() {
audio_stream.Flush();
}
void FFmpegMuxer::WriteTrailer() {
av_write_trailer(format_context.get());
}
FFmpegBackend::FFmpegBackend() = default;
FFmpegBackend::~FFmpegBackend() {
ASSERT_MSG(!IsDumping(), "Dumping must be stopped first");
if (video_processing_thread.joinable())
video_processing_thread.join();
if (audio_processing_thread.joinable())
audio_processing_thread.join();
ffmpeg.Free();
}
bool FFmpegBackend::StartDumping(const std::string& path, const Layout::FramebufferLayout& layout) {
InitializeFFmpegLibraries();
if (!ffmpeg.Init(path, layout)) {
ffmpeg.Free();
return false;
}
video_layout = layout;
if (video_processing_thread.joinable())
video_processing_thread.join();
video_processing_thread = std::thread([&] {
event1.Set();
while (true) {
event2.Wait();
current_buffer = (current_buffer + 1) % 2;
next_buffer = (current_buffer + 1) % 2;
event1.Set();
// Process this frame
auto& frame = video_frame_buffers[current_buffer];
if (frame.width == 0 && frame.height == 0) {
// An empty frame marks the end of frame data
ffmpeg.FlushVideo();
break;
}
ffmpeg.ProcessVideoFrame(frame);
}
// Finish audio execution first if not done yet
if (audio_processing_thread.joinable())
audio_processing_thread.join();
EndDumping();
});
if (audio_processing_thread.joinable())
audio_processing_thread.join();
audio_processing_thread = std::thread([&] {
VariableAudioFrame channel0, channel1;
while (true) {
channel0 = audio_frame_queues[0].PopWait();
channel1 = audio_frame_queues[1].PopWait();
if (channel0.empty()) {
// An empty frame marks the end of frame data
ffmpeg.FlushAudio();
break;
}
ffmpeg.ProcessAudioFrame(channel0, channel1);
}
});
VideoCore::g_renderer->PrepareVideoDumping();
is_dumping = true;
return true;
}
void FFmpegBackend::AddVideoFrame(VideoFrame frame) {
event1.Wait();
video_frame_buffers[next_buffer] = std::move(frame);
event2.Set();
}
void FFmpegBackend::AddAudioFrame(AudioCore::StereoFrame16 frame) {
std::array<VariableAudioFrame, 2> refactored_frame;
for (auto& channel : refactored_frame) {
channel.resize(frame.size());
}
for (std::size_t i = 0; i < frame.size(); i++) {
refactored_frame[0][i] = frame[i][0];
refactored_frame[1][i] = frame[i][1];
}
ffmpeg.ProcessAudioFrame(refactored_frame[0], refactored_frame[1]);
}
void FFmpegBackend::AddAudioSample(const std::array<s16, 2>& sample) {
ffmpeg.ProcessAudioFrame({sample[0]}, {sample[1]});
}
void FFmpegBackend::StopDumping() {
is_dumping = false;
VideoCore::g_renderer->CleanupVideoDumping();
// Flush the video processing queue
AddVideoFrame(VideoFrame());
for (auto i : {0, 1}) {
// Flush the audio processing queue
audio_frame_queues[i].Push(VariableAudioFrame());
}
// Wait until processing ends
processing_ended.Wait();
}
bool FFmpegBackend::IsDumping() const {
return is_dumping.load(std::memory_order_relaxed);
}
Layout::FramebufferLayout FFmpegBackend::GetLayout() const {
return video_layout;
}
void FFmpegBackend::EndDumping() {
LOG_INFO(Render, "Ending frame dumping");
ffmpeg.WriteTrailer();
ffmpeg.Free();
processing_ended.Set();
}
} // namespace VideoDumper