From 70c35642a4c9748cbd7dbc90c41267a04aa4d7a1 Mon Sep 17 00:00:00 2001 From: Megamouse Date: Sun, 6 Nov 2022 11:19:24 +0100 Subject: [PATCH] recording: initial audio recording --- rpcs3/Emu/Audio/audio_resampler.cpp | 3 +- rpcs3/Emu/Cell/Modules/cellAudio.cpp | 24 +- rpcs3/Emu/Cell/Modules/cellRec.cpp | 113 ++- rpcs3/Emu/Io/recording_config.h | 33 +- rpcs3/emucore.vcxproj | 2 +- rpcs3/emucore.vcxproj.filters | 2 +- rpcs3/rpcs3qt/gs_frame.cpp | 35 +- rpcs3/util/image_sink.h | 54 -- rpcs3/util/media_utils.cpp | 1004 ++++++++++++++++++-------- rpcs3/util/media_utils.h | 17 +- rpcs3/util/video_provider.cpp | 103 ++- rpcs3/util/video_provider.h | 16 +- rpcs3/util/video_sink.h | 104 +++ 13 files changed, 1036 insertions(+), 474 deletions(-) delete mode 100644 rpcs3/util/image_sink.h create mode 100644 rpcs3/util/video_sink.h diff --git a/rpcs3/Emu/Audio/audio_resampler.cpp b/rpcs3/Emu/Audio/audio_resampler.cpp index c2d318db40..b029172dfa 100644 --- a/rpcs3/Emu/Audio/audio_resampler.cpp +++ b/rpcs3/Emu/Audio/audio_resampler.cpp @@ -33,8 +33,7 @@ void audio_resampler::put_samples(const f32* buf, u32 sample_cnt) std::pair audio_resampler::get_samples(u32 sample_cnt) { - f32 *const buf = resampler.bufBegin(); - return std::make_pair(buf, resampler.receiveSamples(sample_cnt)); + return std::make_pair(resampler.bufBegin(), resampler.receiveSamples(sample_cnt)); } u32 audio_resampler::samples_available() const diff --git a/rpcs3/Emu/Cell/Modules/cellAudio.cpp b/rpcs3/Emu/Cell/Modules/cellAudio.cpp index c46b09b8a5..f9083c8fc4 100644 --- a/rpcs3/Emu/Cell/Modules/cellAudio.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAudio.cpp @@ -5,6 +5,7 @@ #include "Emu/Cell/lv2/sys_process.h" #include "Emu/Cell/lv2/sys_event.h" #include "cellAudio.h" +#include "util/video_provider.h" #include @@ -69,7 +70,7 @@ void cell_audio_config::reset(bool backend_changed) const AudioFreq freq = AudioFreq::FREQ_48K; const AudioSampleSize sample_size = raw.convert_to_s16 ? AudioSampleSize::S16 : AudioSampleSize::FLOAT; - const auto [req_ch_cnt, downmix] = AudioBackend::get_channel_count_and_downmixer(0); // CELL_AUDIO_OUT_PRIMARY + const auto& [req_ch_cnt, downmix] = AudioBackend::get_channel_count_and_downmixer(0); // CELL_AUDIO_OUT_PRIMARY f64 cb_frame_len = 0.0; u32 ch_cnt = 2; @@ -276,16 +277,23 @@ void audio_ringbuffer::process_resampled_data() { if (!cfg.time_stretching_enabled) return; - const auto [buffer, samples] = resampler.get_samples(static_cast(cb_ringbuf.get_free_size() / (cfg.audio_sample_size * static_cast(cfg.backend_ch_cnt)))); + const auto& [buffer, samples] = resampler.get_samples(static_cast(cb_ringbuf.get_free_size() / (cfg.audio_sample_size * static_cast(cfg.backend_ch_cnt)))); commit_data(buffer, samples); } void audio_ringbuffer::commit_data(f32* buf, u32 sample_cnt) { - sample_cnt *= cfg.audio_channels; + const u32 sample_cnt_in = sample_cnt * cfg.audio_channels; + const u32 sample_cnt_out = sample_cnt * static_cast(cfg.backend_ch_cnt); // Dump audio if enabled - m_dump.WriteData(buf, sample_cnt * static_cast(AudioSampleSize::FLOAT)); + m_dump.WriteData(buf, sample_cnt_in * static_cast(AudioSampleSize::FLOAT)); + + // Record audio if enabled + if (utils::video_provider& provider = g_fxo->get(); provider.can_consume_sample()) + { + provider.present_samples(reinterpret_cast(buf), sample_cnt, static_cast(cfg.audio_channels)); + } if (cfg.backend_ch_cnt < AudioChannelCnt{cfg.audio_channels}) { @@ -293,11 +301,11 @@ void audio_ringbuffer::commit_data(f32* buf, u32 sample_cnt) { if (cfg.backend_ch_cnt == AudioChannelCnt::SURROUND_5_1) { - AudioBackend::downmix(sample_cnt, buf, buf); + AudioBackend::downmix(sample_cnt_in, buf, buf); } else if (cfg.backend_ch_cnt == AudioChannelCnt::STEREO) { - AudioBackend::downmix(sample_cnt, buf, buf); + AudioBackend::downmix(sample_cnt_in, buf, buf); } else { @@ -308,7 +316,7 @@ void audio_ringbuffer::commit_data(f32* buf, u32 sample_cnt) { if (cfg.backend_ch_cnt == AudioChannelCnt::STEREO) { - AudioBackend::downmix(sample_cnt, buf, buf); + AudioBackend::downmix(sample_cnt_in, buf, buf); } else { @@ -321,8 +329,6 @@ void audio_ringbuffer::commit_data(f32* buf, u32 sample_cnt) } } - const u32 sample_cnt_out = sample_cnt / cfg.audio_channels * static_cast(cfg.backend_ch_cnt); - if (cfg.backend->get_convert_to_s16()) { AudioBackend::convert_to_s16(sample_cnt_out, buf, buf); diff --git a/rpcs3/Emu/Cell/Modules/cellRec.cpp b/rpcs3/Emu/Cell/Modules/cellRec.cpp index 53165ef6a5..ebdeffb49f 100644 --- a/rpcs3/Emu/Cell/Modules/cellRec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellRec.cpp @@ -140,34 +140,26 @@ struct rec_param constexpr u32 rec_framerate = 30; // Always 30 fps -class rec_image_sink : public utils::image_sink +class rec_video_sink : public utils::video_sink { public: - rec_image_sink() : utils::image_sink() + rec_video_sink() : utils::video_sink() { m_framerate = rec_framerate; + m_sample_rate = 44100; // TODO } void stop(bool flush = true) override { - cellRec.notice("Stopping image sink. flush=%d", flush); + cellRec.notice("Stopping video sink. flush=%d", flush); std::lock_guard lock(m_mtx); m_flush = flush; m_frames_to_encode.clear(); + m_samples_to_encode.clear(); has_error = false; } - void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) override - { - std::lock_guard lock(m_mtx); - - if (m_flush) - return; - - m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame)); - } - encoder_frame get_frame() { std::lock_guard lock(m_mtx); @@ -196,7 +188,7 @@ struct rec_info vm::bptr video_input_buffer{}; // Used by the game to inject a frame right before it would render a frame to the screen. vm::bptr audio_input_buffer{}; // Used by the game to inject audio: 2-channel interleaved (left-right) * 256 samples * sizeof(f32) at 48000 kHz - std::vector video_ringbuffer; + std::vector video_ringbuffer; std::vector audio_ringbuffer; usz video_ring_pos = 0; usz video_ring_frame_count = 0; @@ -209,9 +201,9 @@ struct rec_info return pos; } - std::shared_ptr image_sink; + std::shared_ptr video_sink; std::shared_ptr encoder; - std::unique_ptr>> image_provider_thread; + std::unique_ptr>> video_provider_thread; atomic_t paused = false; s64 last_pts = -1; @@ -240,9 +232,9 @@ struct rec_info void set_video_params(s32 video_format); void set_audio_params(s32 audio_format); - void start_image_provider(); - void pause_image_provider(); - void stop_image_provider(bool flush); + void start_video_provider(); + void pause_video_provider(); + void stop_video_provider(bool flush); }; void rec_info::set_video_params(s32 video_format) @@ -507,29 +499,29 @@ void rec_info::set_audio_params(s32 audio_format) cellRec.notice("set_audio_params: audio_format=0x%x, audio_codec_id=%d, sample_rate=%d, audio_bps=%d", audio_format, audio_codec_id, sample_rate, audio_bps); } -void rec_info::start_image_provider() +void rec_info::start_video_provider() { const bool was_paused = paused.exchange(false); utils::video_provider& video_provider = g_fxo->get(); - if (image_provider_thread && was_paused) + if (video_provider_thread && was_paused) { // Resume const u64 pause_time_end = get_system_time(); ensure(pause_time_end > pause_time_start); pause_time_total += (pause_time_end - pause_time_start); video_provider.set_pause_time(pause_time_total / 1000); - cellRec.notice("Resuming image provider."); + cellRec.notice("Resuming video provider."); return; } - cellRec.notice("Starting image provider."); + cellRec.notice("Starting video provider."); recording_time_start = get_system_time(); pause_time_total = 0; video_provider.set_pause_time(0); - image_provider_thread = std::make_unique>>("cellRec Image Provider", [this]() + video_provider_thread = std::make_unique>>("cellRec video provider", [this]() { const bool use_internal_audio = param.audio_input == CELL_REC_PARAM_AUDIO_INPUT_DISABLE || param.audio_input_mix_vol < 100; const bool use_external_audio = param.audio_input != CELL_REC_PARAM_AUDIO_INPUT_DISABLE && param.audio_input_mix_vol > 0; @@ -537,7 +529,7 @@ void rec_info::start_image_provider() const bool use_ring_buffer = param.ring_sec > 0; const usz frame_size = input_format.pitch * input_format.height; - cellRec.notice("image_provider_thread: use_ring_buffer=%d, video_ringbuffer_size=%d, audio_ringbuffer_size=%d, ring_sec=%d, frame_size=%d, use_external_video=%d, use_external_audio=%d, use_internal_audio=%d", use_ring_buffer, video_ringbuffer.size(), audio_ringbuffer.size(), param.ring_sec, frame_size, use_external_video, use_external_audio, use_internal_audio); + cellRec.notice("video_provider_thread: use_ring_buffer=%d, video_ringbuffer_size=%d, audio_ringbuffer_size=%d, ring_sec=%d, frame_size=%d, use_external_video=%d, use_external_audio=%d, use_internal_audio=%d", use_ring_buffer, video_ringbuffer.size(), audio_ringbuffer.size(), param.ring_sec, frame_size, use_external_video, use_external_audio, use_internal_audio); while (thread_ctrl::state() != thread_state::aborting && encoder) { @@ -575,7 +567,7 @@ void rec_info::start_image_provider() { if (use_ring_buffer) { - utils::image_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()]; + utils::video_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()]; frame_data.pts = pts; frame_data.width = input_format.width; frame_data.height = input_format.height; @@ -595,14 +587,14 @@ void rec_info::start_image_provider() last_pts = pts; } } - else if (use_ring_buffer && image_sink) + else if (use_ring_buffer && video_sink) { - utils::image_sink::encoder_frame frame = image_sink->get_frame(); + utils::video_sink::encoder_frame frame = video_sink->get_frame(); if (const s64 pts = encoder->get_pts(frame.timestamp_ms); pts > last_pts && frame.data.size() > 0) { ensure(frame.data.size() == frame_size); - utils::image_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()]; + utils::video_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()]; frame_data = std::move(frame); frame_data.pts = pts; last_pts = pts; @@ -635,34 +627,34 @@ void rec_info::start_image_provider() } // Update recording time - recording_time_total = encoder->get_timestamp_ms(encoder->last_pts()); + recording_time_total = encoder->get_timestamp_ms(encoder->last_video_pts()); thread_ctrl::wait_for(100); } }); } -void rec_info::pause_image_provider() +void rec_info::pause_video_provider() { cellRec.notice("Pausing image provider."); - if (image_provider_thread) + if (video_provider_thread) { paused = true; pause_time_start = get_system_time(); } } -void rec_info::stop_image_provider(bool flush) +void rec_info::stop_video_provider(bool flush) { - cellRec.notice("Stopping image provider."); + cellRec.notice("Stopping video provider."); - if (image_provider_thread) + if (video_provider_thread) { - auto& thread = *image_provider_thread; + auto& thread = *video_provider_thread; thread = thread_state::aborting; thread(); - image_provider_thread.reset(); + video_provider_thread.reset(); } if (flush && param.ring_sec > 0 && !video_ringbuffer.empty()) @@ -680,7 +672,7 @@ void rec_info::stop_image_provider(bool flush) for (usz i = 0; i < frame_count; i++) { const usz pos = (start_offset + i) % video_ringbuffer.size(); - utils::image_sink::encoder_frame& frame_data = video_ringbuffer[pos]; + utils::video_sink::encoder_frame& frame_data = video_ringbuffer[pos]; encoder->add_frame(frame_data.data, frame_data.pitch, frame_data.width, frame_data.height, frame_data.av_pixel_format, encoder->get_timestamp_ms(frame_data.pts - start_pts)); // TODO: add audio data to encoder @@ -1073,7 +1065,7 @@ error_code cellRecOpen(vm::cptr pDirName, vm::cptr pFileName, vm::cp rec.audio_ringbuffer.resize(audio_ring_buffer_size); rec.audio_ring_step = audio_size_per_sample; rec.video_ringbuffer.resize(video_ring_buffer_size, {}); - rec.image_sink = std::make_shared(); + rec.video_sink = std::make_shared(); } rec.encoder = std::make_shared(); @@ -1082,6 +1074,7 @@ error_code cellRecOpen(vm::cptr pDirName, vm::cptr pFileName, vm::cp rec.encoder->set_video_bitrate(rec.video_bps); rec.encoder->set_video_codec(rec.video_codec_id); rec.encoder->set_sample_rate(rec.sample_rate); + rec.encoder->set_audio_channels(rec.channels); rec.encoder->set_audio_bitrate(rec.audio_bps); rec.encoder->set_audio_codec(rec.audio_codec_id); rec.encoder->set_output_format(rec.output_format); @@ -1114,12 +1107,12 @@ error_code cellRecClose(s32 isDiscard) if (isDiscard) { // No need to flush - rec.stop_image_provider(false); + rec.stop_video_provider(false); rec.encoder->stop(false); - if (rec.image_sink) + if (rec.video_sink) { - rec.image_sink->stop(false); + rec.video_sink->stop(false); } if (fs::is_file(rec.param.filename)) @@ -1135,18 +1128,18 @@ error_code cellRecClose(s32 isDiscard) else { // Flush to make sure we encode all remaining frames - rec.stop_image_provider(true); + rec.stop_video_provider(true); rec.encoder->stop(true); - rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_pts()); + rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_video_pts()); - if (rec.image_sink) + if (rec.video_sink) { - rec.image_sink->stop(true); + rec.video_sink->stop(true); } const s64 start_pts = rec.encoder->get_pts(rec.param.scene_metadata.start_time); const s64 end_pts = rec.encoder->get_pts(rec.param.scene_metadata.end_time); - const s64 last_pts = rec.encoder->last_pts(); + const s64 last_pts = rec.encoder->last_video_pts(); is_valid_range = start_pts >= 0 && end_pts <= last_pts; } @@ -1157,7 +1150,7 @@ error_code cellRecClose(s32 isDiscard) g_fxo->need(); utils::video_provider& video_provider = g_fxo->get(); - // Release the image sink if it was used + // Release the video sink if it was used if (rec.param.video_input == CELL_REC_PARAM_VIDEO_INPUT_DISABLE) { const recording_mode old_mode = g_recording_mode.exchange(recording_mode::stopped); @@ -1167,15 +1160,15 @@ error_code cellRecClose(s32 isDiscard) cellRec.error("cellRecClose: Unexpected recording mode %s found while stopping video capture.", old_mode); } - if (!video_provider.set_image_sink(nullptr, recording_mode::cell)) + if (!video_provider.set_video_sink(nullptr, recording_mode::cell)) { - cellRec.error("cellRecClose failed to release image sink"); + cellRec.error("cellRecClose failed to release video sink"); } } rec.param = {}; rec.encoder.reset(); - rec.image_sink.reset(); + rec.video_sink.reset(); rec.audio_ringbuffer.clear(); rec.video_ringbuffer.clear(); rec.state = rec_state::closed; @@ -1207,7 +1200,7 @@ error_code cellRecStop() sysutil_register_cb([&rec](ppu_thread& ppu) -> s32 { - // Disable image sink if it was used + // Disable video sink if it was used if (rec.param.video_input == CELL_REC_PARAM_VIDEO_INPUT_DISABLE) { const recording_mode old_mode = g_recording_mode.exchange(recording_mode::stopped); @@ -1219,12 +1212,12 @@ error_code cellRecStop() } // cellRecStop actually just pauses the recording - rec.pause_image_provider(); + rec.pause_video_provider(); ensure(!!rec.encoder); rec.encoder->pause(true); - rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_pts()); + rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_video_pts()); rec.state = rec_state::stopped; rec.cb(ppu, CELL_REC_STATUS_STOP, CELL_OK, rec.cbUserData); @@ -1254,15 +1247,15 @@ error_code cellRecStart() g_fxo->need(); utils::video_provider& video_provider = g_fxo->get(); - // Setup an image sink if it is needed + // Setup an video sink if it is needed if (rec.param.video_input == CELL_REC_PARAM_VIDEO_INPUT_DISABLE) { if (rec.param.ring_sec <= 0) { // Regular recording - if (!video_provider.set_image_sink(rec.encoder, recording_mode::cell)) + if (!video_provider.set_video_sink(rec.encoder, recording_mode::cell)) { - cellRec.error("Failed to set image sink"); + cellRec.error("Failed to set video sink"); rec.cb(ppu, CELL_REC_STATUS_ERR, CELL_REC_ERROR_FATAL, rec.cbUserData); return CELL_OK; } @@ -1270,9 +1263,9 @@ error_code cellRecStart() else { // Ringbuffer recording - if (!video_provider.set_image_sink(rec.image_sink, recording_mode::cell)) + if (!video_provider.set_video_sink(rec.video_sink, recording_mode::cell)) { - cellRec.error("Failed to set image sink"); + cellRec.error("Failed to set video sink"); rec.cb(ppu, CELL_REC_STATUS_ERR, CELL_REC_ERROR_FATAL, rec.cbUserData); return CELL_OK; } @@ -1287,7 +1280,7 @@ error_code cellRecStart() g_recording_mode = recording_mode::stopped; } - rec.start_image_provider(); + rec.start_video_provider(); if (rec.encoder->has_error) { diff --git a/rpcs3/Emu/Io/recording_config.h b/rpcs3/Emu/Io/recording_config.h index f1e2e58242..e08e73acfe 100644 --- a/rpcs3/Emu/Io/recording_config.h +++ b/rpcs3/Emu/Io/recording_config.h @@ -8,14 +8,31 @@ struct cfg_recording final : cfg::node bool load(); void save() const; - cfg::uint<0, 60> framerate{this, "Framerate", 30}; - cfg::uint<0, 7680> width{this, "Width", 1280}; - cfg::uint<0, 4320> height{this, "Height", 720}; - cfg::uint<0, 192> pixel_format{this, "AVPixelFormat", 0}; // AVPixelFormat::AV_PIX_FMT_YUV420P - cfg::uint<0, 32813> video_codec{this, "AVCodecID", 12}; // AVCodecID::AV_CODEC_ID_MPEG4 - cfg::uint<0, 25000000> video_bps{this, "Video Bitrate", 4000000}; - cfg::uint<0, 5> max_b_frames{this, "Max B-Frames", 2}; - cfg::uint<0, 20> gop_size{this, "Group of Pictures Size", 12}; + struct node_video : cfg::node + { + node_video(cfg::node* _this) : cfg::node(_this, "Video") {} + + cfg::uint<0, 60> framerate{this, "Framerate", 30}; + cfg::uint<0, 7680> width{this, "Width", 1280}; + cfg::uint<0, 4320> height{this, "Height", 720}; + cfg::uint<0, 192> pixel_format{this, "AVPixelFormat", 0}; // AVPixelFormat::AV_PIX_FMT_YUV420P + cfg::uint<0, 0xFFFF> video_codec{this, "AVCodecID", 12}; // AVCodecID::AV_CODEC_ID_MPEG4 + cfg::uint<0, 25000000> video_bps{this, "Video Bitrate", 4000000}; + cfg::uint<0, 5> max_b_frames{this, "Max B-Frames", 2}; + cfg::uint<0, 20> gop_size{this, "Group of Pictures Size", 12}; + + } video{ this }; + + struct node_audio : cfg::node + { + node_audio(cfg::node* _this) : cfg::node(_this, "Audio") {} + + cfg::uint<0x10000, 0x17000> audio_codec{this, "AVCodecID", 86019}; // AVCodecID::AV_CODEC_ID_AC3 + cfg::uint<0, 8> channels{this, "Channels", 2}; + cfg::uint<0, 25000000> audio_bps{this, "Audio Bitrate", 320000}; + cfg::uint<0, 25000000> sample_rate{this, "Sample Rate", 48000}; + + } audio{ this }; const std::string path; }; diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index cc7c60f51d..0d7a9a74ea 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -618,7 +618,7 @@ - + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 5a11ef535f..447430681e 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2275,7 +2275,7 @@ Utilities - + Utilities diff --git a/rpcs3/rpcs3qt/gs_frame.cpp b/rpcs3/rpcs3qt/gs_frame.cpp index f14213cb52..31b81518e5 100644 --- a/rpcs3/rpcs3qt/gs_frame.cpp +++ b/rpcs3/rpcs3qt/gs_frame.cpp @@ -12,6 +12,7 @@ #include "Emu/IdManager.h" #include "Emu/Cell/Modules/cellScreenshot.h" #include "Emu/Cell/Modules/cellVideoOut.h" +#include "Emu/Cell/Modules/cellAudio.h" #include "Emu/RSX/rsx_utils.h" #include "Emu/RSX/Overlays/overlay_message.h" #include "Emu/Io/recording_config.h" @@ -445,9 +446,9 @@ void gs_frame::toggle_recording() { m_video_encoder->stop(); - if (!video_provider.set_image_sink(nullptr, recording_mode::rpcs3)) + if (!video_provider.set_video_sink(nullptr, recording_mode::rpcs3)) { - gui_log.warning("The video provider could not release the image sink. A sink with higher priority must have been set."); + gui_log.warning("The video provider could not release the video sink. A sink with higher priority must have been set."); } // Play a sound @@ -489,21 +490,23 @@ void gs_frame::toggle_recording() video_path += "recording_" + date_time::current_time_narrow<'_'>() + ".mp4"; utils::video_encoder::frame_format output_format{}; - output_format.av_pixel_format = static_cast(g_cfg_recording.pixel_format.get()); - output_format.width = g_cfg_recording.width; - output_format.height = g_cfg_recording.height; - output_format.pitch = g_cfg_recording.width * 4; + output_format.av_pixel_format = static_cast(g_cfg_recording.video.pixel_format.get()); + output_format.width = g_cfg_recording.video.width; + output_format.height = g_cfg_recording.video.height; + output_format.pitch = g_cfg_recording.video.width * 4; m_video_encoder->set_path(video_path); - m_video_encoder->set_framerate(g_cfg_recording.framerate); - m_video_encoder->set_video_bitrate(g_cfg_recording.video_bps); - m_video_encoder->set_video_codec(g_cfg_recording.video_codec); - m_video_encoder->set_max_b_frames(g_cfg_recording.max_b_frames); - m_video_encoder->set_gop_size(g_cfg_recording.gop_size); + m_video_encoder->set_framerate(g_cfg_recording.video.framerate); + m_video_encoder->set_video_bitrate(g_cfg_recording.video.video_bps); + m_video_encoder->set_video_codec(g_cfg_recording.video.video_codec); + m_video_encoder->set_max_b_frames(g_cfg_recording.video.max_b_frames); + m_video_encoder->set_gop_size(g_cfg_recording.video.gop_size); m_video_encoder->set_output_format(output_format); - m_video_encoder->set_sample_rate(0); // TODO - m_video_encoder->set_audio_bitrate(0); // TODO - m_video_encoder->set_audio_codec(0); // TODO + m_video_encoder->set_sample_rate(g_cfg_recording.audio.sample_rate); + //m_video_encoder->set_audio_channels(static_cast(g_fxo->get().cfg.backend_ch_cnt)); + m_video_encoder->set_audio_channels(static_cast(g_fxo->get().cfg.audio_channels)); + m_video_encoder->set_audio_bitrate(g_cfg_recording.audio.audio_bps); + m_video_encoder->set_audio_codec(g_cfg_recording.audio.audio_codec); m_video_encoder->encode(); if (m_video_encoder->has_error) @@ -513,9 +516,9 @@ void gs_frame::toggle_recording() return; } - if (!video_provider.set_image_sink(m_video_encoder, recording_mode::rpcs3)) + if (!video_provider.set_video_sink(m_video_encoder, recording_mode::rpcs3)) { - gui_log.warning("The video provider could not set the image sink. A sink with higher priority must have been set."); + gui_log.warning("The video provider could not set the video sink. A sink with higher priority must have been set."); rsx::overlays::queue_message(tr("Recording not possible").toStdString()); m_video_encoder->stop(); return; diff --git a/rpcs3/util/image_sink.h b/rpcs3/util/image_sink.h deleted file mode 100644 index 3c23eca514..0000000000 --- a/rpcs3/util/image_sink.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include "util/types.hpp" -#include "util/atomic.hpp" -#include "Utilities/mutex.h" - -#include -#include - -namespace utils -{ - class image_sink - { - public: - image_sink() = default; - - virtual void stop(bool flush = true) = 0; - virtual void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) = 0; - - s64 get_pts(usz timestamp_ms) const - { - return static_cast(std::round((timestamp_ms * m_framerate) / 1000.f)); - } - - usz get_timestamp_ms(s64 pts) const - { - return static_cast(std::round((pts * 1000) / static_cast(m_framerate))); - } - - atomic_t has_error{false}; - - struct encoder_frame - { - encoder_frame() = default; - encoder_frame(usz timestamp_ms, u32 pitch, u32 width, u32 height, s32 av_pixel_format, std::vector&& data) - : timestamp_ms(timestamp_ms), pitch(pitch), width(width), height(height), av_pixel_format(av_pixel_format), data(std::move(data)) - {} - - s64 pts = -1; // Optional - usz timestamp_ms = 0; - u32 pitch = 0; - u32 width = 0; - u32 height = 0; - s32 av_pixel_format = 0; // NOTE: Make sure this is a valid AVPixelFormat - std::vector data; - }; - - protected: - shared_mutex m_mtx; - std::deque m_frames_to_encode; - atomic_t m_flush = false; - u32 m_framerate = 0; - }; -} diff --git a/rpcs3/util/media_utils.cpp b/rpcs3/util/media_utils.cpp index 3dddd5b0f8..cc6207d369 100644 --- a/rpcs3/util/media_utils.cpp +++ b/rpcs3/util/media_utils.cpp @@ -32,6 +32,28 @@ LOG_CHANNEL(media_log, "Media"); namespace utils { + template + static inline void write_byteswapped(const u8* src, u8* dst) + { + *reinterpret_cast(dst) = *reinterpret_cast*>(src); + } + + template + static inline void copy_samples(const u8* src, u8* dst, usz sample_count, bool swap_endianness) + { + if (swap_endianness) + { + for (usz i = 0; i < sample_count; i++) + { + write_byteswapped(src + i * sizeof(T), dst + i * sizeof(T)); + } + } + else + { + std::memcpy(dst, src, sample_count * sizeof(T)); + } + } + template <> std::string media_info::get_metadata(const std::string& key, const std::string& def) const { @@ -204,11 +226,19 @@ namespace utils struct scoped_av { - AVFormatContext* format = nullptr; - const AVCodec* codec = nullptr; - AVCodecContext* context = nullptr; - AVFrame* frame = nullptr; - AVStream* stream = nullptr; + struct ctx + { + const AVCodec* codec = nullptr; + AVCodecContext* context = nullptr; + AVStream* stream = nullptr; + AVPacket* packet = nullptr; + AVFrame* frame = nullptr; + }; + + ctx audio{}; + ctx video{}; + + AVFormatContext* format_context = nullptr; SwrContext* swr = nullptr; SwsContext* sws = nullptr; std::function kill_callback = nullptr; @@ -216,21 +246,38 @@ namespace utils ~scoped_av() { // Clean up - if (frame) + if (audio.frame) { - av_frame_unref(frame); - av_frame_free(&frame); + av_frame_unref(audio.frame); + av_frame_free(&audio.frame); + } + if (video.frame) + { + av_frame_unref(video.frame); + av_frame_free(&video.frame); + } + if (audio.packet) + { + av_packet_unref(audio.packet); + av_packet_free(&audio.packet); + } + if (video.packet) + { + av_packet_unref(video.packet); + av_packet_free(&video.packet); } if (swr) swr_free(&swr); if (sws) sws_freeContext(sws); - if (context) - avcodec_close(context); + if (audio.context) + avcodec_close(audio.context); + if (video.context) + avcodec_close(video.context); // AVCodec is managed by libavformat, no need to free it // see: https://stackoverflow.com/a/18047320 - if (format) - avformat_free_context(format); + if (format_context) + avformat_free_context(format_context); //if (stream) // av_free(stream); if (kill_callback) @@ -238,6 +285,53 @@ namespace utils } }; + // check that a given sample format is supported by the encoder + static bool check_sample_fmt(const AVCodec* codec, enum AVSampleFormat sample_fmt) + { + for (const AVSampleFormat* p = codec->sample_fmts; p && *p != AV_SAMPLE_FMT_NONE; p++) + { + if (*p == sample_fmt) + { + return true; + } + } + return false; + } + + // just pick the highest supported samplerate + static int select_sample_rate(const AVCodec* codec) + { + if (!codec->supported_samplerates) + return 44100; + + int best_samplerate = 0; + for (const int* samplerate = codec->supported_samplerates; samplerate && *samplerate != 0; samplerate++) + { + if (!best_samplerate || abs(44100 - *samplerate) < abs(44100 - best_samplerate)) + { + best_samplerate = *samplerate; + } + } + return best_samplerate; + } + + // select layout with the highest channel count + static const AVChannelLayout* select_channel_layout(const AVCodec* codec, int channels) + { + constexpr AVChannelLayout empty_ch_layout = {}; + + for (const AVChannelLayout* ch_layout = codec->ch_layouts; + ch_layout && memcmp(ch_layout, &empty_ch_layout, sizeof(AVChannelLayout)) != 0; + ch_layout++) + { + if (ch_layout->nb_channels == channels) + { + return ch_layout; + } + } + return nullptr; + } + audio_decoder::audio_decoder() { } @@ -295,14 +389,14 @@ namespace utils scoped_av av; // Get format from audio file - av.format = avformat_alloc_context(); - if (int err = avformat_open_input(&av.format, path.c_str(), nullptr, nullptr); err < 0) + av.format_context = avformat_alloc_context(); + if (int err = avformat_open_input(&av.format_context, path.c_str(), nullptr, nullptr); err < 0) { media_log.error("audio_decoder: Could not open file '%s'. Error: %d='%s'", path, err, av_error_to_string(err)); has_error = true; return; } - if (int err = avformat_find_stream_info(av.format, nullptr); err < 0) + if (int err = avformat_find_stream_info(av.format_context, nullptr); err < 0) { media_log.error("audio_decoder: Could not retrieve stream info from file '%s'. Error: %d='%s'", path, err, av_error_to_string(err)); has_error = true; @@ -312,11 +406,11 @@ namespace utils // Find the first audio stream AVStream* stream = nullptr; unsigned int stream_index; - for (stream_index = 0; stream_index < av.format->nb_streams; stream_index++) + for (stream_index = 0; stream_index < av.format_context->nb_streams; stream_index++) { - if (av.format->streams[stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) + if (av.format_context->streams[stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { - stream = av.format->streams[stream_index]; + stream = av.format_context->streams[stream_index]; break; } } @@ -328,8 +422,8 @@ namespace utils } // Find decoder - av.codec = avcodec_find_decoder(stream->codecpar->codec_id); - if (!av.codec) + av.audio.codec = avcodec_find_decoder(stream->codecpar->codec_id); + if (!av.audio.codec) { media_log.error("audio_decoder: Failed to find decoder for stream #%u in file '%s'", stream_index, path); has_error = true; @@ -337,8 +431,8 @@ namespace utils } // Allocate context - av.context = avcodec_alloc_context3(av.codec); - if (!av.context) + av.audio.context = avcodec_alloc_context3(av.audio.codec); + if (!av.audio.context) { media_log.error("audio_decoder: Failed to allocate context for stream #%u in file '%s'", stream_index, path); has_error = true; @@ -346,7 +440,7 @@ namespace utils } // Open decoder - if (int err = avcodec_open2(av.context, av.codec, nullptr); err < 0) + if (int err = avcodec_open2(av.audio.context, av.audio.codec, nullptr); err < 0) { media_log.error("audio_decoder: Failed to open decoder for stream #%u in file '%s'. Error: %d='%s'", stream_index, path, err, av_error_to_string(err)); has_error = true; @@ -389,8 +483,8 @@ namespace utils } // Prepare to read data - av.frame = av_frame_alloc(); - if (!av.frame) + av.audio.frame = av_frame_alloc(); + if (!av.audio.frame) { media_log.error("audio_decoder: Error allocating the frame"); has_error = true; @@ -403,9 +497,9 @@ namespace utils std::unique_ptr packet_(packet); // Iterate through frames - while (thread_ctrl::state() != thread_state::aborting && av_read_frame(av.format, packet) >= 0) + while (thread_ctrl::state() != thread_state::aborting && av_read_frame(av.format_context, packet) >= 0) { - if (int err = avcodec_send_packet(av.context, packet); err < 0) + if (int err = avcodec_send_packet(av.audio.context, packet); err < 0) { media_log.error("audio_decoder: Queuing error: %d='%s'", err, av_error_to_string(err)); has_error = true; @@ -414,7 +508,7 @@ namespace utils while (thread_ctrl::state() != thread_state::aborting) { - if (int err = avcodec_receive_frame(av.context, av.frame); err < 0) + if (int err = avcodec_receive_frame(av.audio.context, av.audio.frame); err < 0) { if (err == AVERROR(EAGAIN) || err == averror_eof) break; @@ -427,7 +521,7 @@ namespace utils // Resample frames u8* buffer; const int align = 1; - const int buffer_size = av_samples_alloc(&buffer, nullptr, dst_channels, av.frame->nb_samples, dst_format, align); + const int buffer_size = av_samples_alloc(&buffer, nullptr, dst_channels, av.audio.frame->nb_samples, dst_format, align); if (buffer_size < 0) { media_log.error("audio_decoder: Error allocating buffer: %d='%s'", buffer_size, av_error_to_string(buffer_size)); @@ -435,7 +529,7 @@ namespace utils return; } - const int frame_count = swr_convert(av.swr, &buffer, av.frame->nb_samples, const_cast(av.frame->data), av.frame->nb_samples); + const int frame_count = swr_convert(av.swr, &buffer, av.audio.frame->nb_samples, const_cast(av.audio.frame->data), av.audio.frame->nb_samples); if (frame_count < 0) { media_log.error("audio_decoder: Error converting frame: %d='%s'", frame_count, av_error_to_string(frame_count)); @@ -450,25 +544,10 @@ namespace utils std::scoped_lock lock(m_mtx); data.resize(m_size + buffer_size); - if (m_swap_endianness) - { - // The format is float 32bit per channel. - const auto write_byteswapped = [](const void* src, void* dst) -> void - { - *static_cast(dst) = *static_cast*>(src); - }; + // The format is float 32bit per channel. + copy_samples(buffer, &data[m_size], buffer_size / sizeof(f32), m_swap_endianness); - for (size_t i = 0; i < (buffer_size - sizeof(f32)); i += sizeof(f32)) - { - write_byteswapped(buffer + i, data.data() + m_size + i); - } - } - else - { - memcpy(&data[m_size], buffer, buffer_size); - } - - const s64 timestamp_ms = stream->time_base.den ? (1000 * av.frame->best_effort_timestamp * stream->time_base.num) / stream->time_base.den : 0; + const s64 timestamp_ms = stream->time_base.den ? (1000 * av.audio.frame->best_effort_timestamp * stream->time_base.num) / stream->time_base.den : 0; timestamps_ms.push_back({m_size, timestamp_ms}); m_size += buffer_size; } @@ -476,7 +555,7 @@ namespace utils if (buffer) av_free(buffer); - media_log.notice("audio_decoder: decoded frame_count=%d buffer_size=%d timestamp_us=%d", frame_count, buffer_size, av.frame->best_effort_timestamp); + media_log.notice("audio_decoder: decoded frame_count=%d buffer_size=%d timestamp_us=%d", frame_count, buffer_size, av.audio.frame->best_effort_timestamp); } } }; @@ -535,7 +614,7 @@ namespace utils } video_encoder::video_encoder() - : utils::image_sink() + : utils::video_sink() { } @@ -549,9 +628,9 @@ namespace utils return m_path; } - s64 video_encoder::last_pts() const + s64 video_encoder::last_video_pts() const { - return m_last_pts; + return m_last_video_pts; } void video_encoder::set_path(const std::string& path) @@ -594,6 +673,11 @@ namespace utils m_sample_rate = sample_rate; } + void video_encoder::set_audio_channels(u32 channels) + { + m_channels = channels; + } + void video_encoder::set_audio_bitrate(u32 bitrate) { m_audio_bitrate_bps = bitrate; @@ -604,16 +688,6 @@ namespace utils m_audio_codec_id = codec_id; } - void video_encoder::add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) - { - // Do not allow new frames while flushing - if (m_flush) - return; - - std::lock_guard lock(m_mtx); - m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame)); - } - void video_encoder::pause(bool flush) { if (m_thread) @@ -658,6 +732,7 @@ namespace utils std::lock_guard lock(m_mtx); m_frames_to_encode.clear(); + m_samples_to_encode.clear(); has_error = false; m_flush = false; m_paused = false; @@ -675,7 +750,8 @@ namespace utils return; } - m_last_pts = 0; + m_last_audio_pts = 0; + m_last_video_pts = 0; stop(); @@ -692,7 +768,21 @@ namespace utils { m_running = true; - // TODO: audio encoding + av_log_set_callback([](void* avcl, int level, const char* fmt, va_list vl) -> void + { + constexpr int line_size = 1024; + char line[line_size]{}; + int print_prefix = 1; + + if (int err = av_log_format_line2(avcl, level, fmt, vl, line, line_size, &print_prefix); err < 0) + { + media_log.error("av_log: av_log_format_line2 failed. Error: %d='%s'", err, av_error_to_string(err)); + return; + } + + media_log.error("av_log: %s", line); + }); + av_log_set_level(AV_LOG_TRACE); // Reset variables at all costs scoped_av av; @@ -702,38 +792,38 @@ namespace utils m_running = false; }; - const AVPixelFormat out_format = static_cast(m_out_format.av_pixel_format); - const char* av_output_format = nullptr; - - const auto find_format = [&](const AVCodec* codec) -> const char* + // Let's list the encoders first + std::vector audio_codecs; + std::vector video_codecs; + void* opaque = nullptr; + while (const AVCodec* codec = av_codec_iterate(&opaque)) { - if (!codec) - return nullptr; + if (codec->type == AVMediaType::AVMEDIA_TYPE_AUDIO) + { + media_log.notice("video_encoder: Found audio codec %d = %s", static_cast(codec->id), codec->name); + audio_codecs.push_back(codec); + } + else if (codec->type == AVMediaType::AVMEDIA_TYPE_VIDEO) + { + media_log.notice("video_encoder: Found video codec %d = %s", static_cast(codec->id), codec->name); + video_codecs.push_back(codec); + } + } + const AVPixelFormat out_pix_format = static_cast(m_out_format.av_pixel_format); + + const auto find_format = [&](AVCodecID video_codec, AVCodecID audio_codec) -> const AVOutputFormat* + { // Try to find a preferable output format std::vector oformats; void* opaque = nullptr; for (const AVOutputFormat* oformat = av_muxer_iterate(&opaque); !!oformat; oformat = av_muxer_iterate(&opaque)) { - if (avformat_query_codec(oformat, codec->id, FF_COMPLIANCE_STRICT) == 1) + if (avformat_query_codec(oformat, video_codec, FF_COMPLIANCE_STRICT) == 1 && + avformat_query_codec(oformat, audio_codec, FF_COMPLIANCE_STRICT) == 1) { - media_log.notice("video_encoder: Found output format '%s'", oformat->name); - - switch (codec->id) - { - case AV_CODEC_ID_MPEG4: - if (strcmp(oformat->name, "avi") == 0) - return oformat->name; - break; - case AV_CODEC_ID_H264: - case AV_CODEC_ID_MJPEG: - // TODO - break; - default: - break; - } - + media_log.notice("video_encoder: Found output format '%s' (video_codec=%d, audio_codec=%d)", oformat->name, static_cast(video_codec), static_cast(audio_codec)); oformats.push_back(oformat); } } @@ -742,168 +832,294 @@ namespace utils if (!oformats.empty() && oformats.front()) { const AVOutputFormat* oformat = oformats.front(); - media_log.notice("video_encoder: Falling back to output format '%s'", oformat->name); - return oformat->name; + media_log.notice("video_encoder: Falling back to output format '%s' (video_codec=%d, audio_codec=%d)", oformat->name, static_cast(video_codec), static_cast(audio_codec)); + return oformat; } return nullptr; }; - AVCodecID used_codec = static_cast(m_video_codec_id); + const AVOutputFormat* out_format = find_format(static_cast(m_video_codec_id), static_cast(m_audio_codec_id)); - // Find specified codec first - if (const AVCodec* encoder = avcodec_find_encoder(used_codec); !!encoder) + if (out_format) { - media_log.success("video_encoder: Found requested video_codec %d = %s", static_cast(used_codec), encoder->name); - av_output_format = find_format(encoder); - - if (av_output_format) - { - media_log.success("video_encoder: Found requested output format '%s'", av_output_format); - } - else - { - media_log.error("video_encoder: Could not find a format for the requested video_codec %d = %s", static_cast(used_codec), encoder->name); - } + media_log.success("video_encoder: Found requested output format '%s'", out_format->name); } else { - media_log.error("video_encoder: Could not find requested video_codec %d", static_cast(used_codec)); - } + media_log.error("video_encoder: Could not find a format for the requested video_codec %d and audio_codec %d", m_video_codec_id, m_audio_codec_id); - // Fallback to some other codec - if (!av_output_format) - { - void* opaque = nullptr; - for (const AVCodec* codec = av_codec_iterate(&opaque); !!codec; codec = av_codec_iterate(&opaque)) + // Fallback to some other codec + for (const AVCodec* video_codec : video_codecs) { - if (av_codec_is_encoder(codec)) + for (const AVCodec* audio_codec : audio_codecs) { - media_log.notice("video_encoder: Found video_codec %d = %s", static_cast(codec->id), codec->name); - av_output_format = find_format(codec); + out_format = find_format(video_codec->id, audio_codec->id); - if (av_output_format) + if (out_format) { - media_log.success("video_encoder: Found fallback output format '%s'", av_output_format); + media_log.success("video_encoder: Found fallback output format '%s'", out_format->name); break; } } + + if (out_format) + { + break; + } } } - if (!av_output_format) + if (!out_format) { media_log.error("video_encoder: Could not find any output format"); has_error = true; return; } - if (int err = avformat_alloc_output_context2(&av.format, nullptr, av_output_format, path.c_str()); err < 0) + if (int err = avformat_alloc_output_context2(&av.format_context, out_format, nullptr, nullptr); err < 0) { - media_log.error("video_encoder: avformat_alloc_output_context2 failed. Error: %d='%s'", err, av_error_to_string(err)); + media_log.error("video_encoder: avformat_alloc_output_context2 for '%s' failed. Error: %d='%s'", out_format->name, err, av_error_to_string(err)); has_error = true; return; } - if (!av.format) + if (!av.format_context) { media_log.error("video_encoder: avformat_alloc_output_context2 failed"); has_error = true; return; } - if (!(av.codec = avcodec_find_encoder(av.format->oformat->video_codec))) + const auto create_context = [this, &av](AVCodecID codec_id, bool is_video) -> bool + { + const std::string type = is_video ? "video" : "audio"; + scoped_av::ctx& ctx = is_video ? av.video : av.audio; + + if (is_video) + { + if (!(ctx.codec = avcodec_find_encoder(av.format_context->oformat->video_codec))) + { + media_log.error("video_encoder: avcodec_find_encoder for video failed. video_codev=%d", static_cast(av.format_context->oformat->video_codec)); + return false; + } + } + else + { + if (!(ctx.codec = avcodec_find_encoder(av.format_context->oformat->audio_codec))) + { + media_log.error("video_encoder: avcodec_find_encoder for audio failed. audio_codec=%d", static_cast(av.format_context->oformat->audio_codec)); + return false; + } + } + + if (!(ctx.stream = avformat_new_stream(av.format_context, nullptr))) + { + media_log.error("video_encoder: avformat_new_stream for %s failed", type); + return false; + } + + ctx.stream->id = is_video ? 0 : 1; + + if (!(ctx.context = avcodec_alloc_context3(ctx.codec))) + { + media_log.error("video_encoder: avcodec_alloc_context3 for %s failed", type); + return false; + } + + if (av.format_context->oformat->flags & AVFMT_GLOBALHEADER) + { + ctx.context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + } + + return true; + }; + + if (!create_context(static_cast(m_video_codec_id), true)) { - media_log.error("video_encoder: avcodec_find_encoder failed"); has_error = true; return; } - if (!(av.stream = avformat_new_stream(av.format, nullptr))) + if (!create_context(static_cast(m_audio_codec_id), false)) { - media_log.error("video_encoder: avformat_new_stream failed"); has_error = true; return; } - av.stream->id = static_cast(av.format->nb_streams - 1); + media_log.error("video_encoder: using audio_codec = %d", static_cast(av.format_context->oformat->audio_codec)); + media_log.error("video_encoder: using sample_rate = %d", m_sample_rate); + media_log.error("video_encoder: using audio_bitrate = %d", m_audio_bitrate_bps); + media_log.error("video_encoder: using audio channels = %d", m_channels); + media_log.error("video_encoder: using video_codec = %d", static_cast(av.format_context->oformat->video_codec)); + media_log.error("video_encoder: using video_bitrate = %d", m_video_bitrate_bps); + media_log.error("video_encoder: using out width = %d", m_out_format.width); + media_log.error("video_encoder: using out height = %d", m_out_format.height); + media_log.error("video_encoder: using framerate = %d", m_framerate); + media_log.error("video_encoder: using gop_size = %d", m_gop_size); + media_log.error("video_encoder: using max_b_frames = %d", m_max_b_frames); - if (!(av.context = avcodec_alloc_context3(av.codec))) + // select audio parameters supported by the encoder + if (av.audio.context) { - media_log.error("video_encoder: avcodec_alloc_context3 failed"); - has_error = true; - return; + if (const AVChannelLayout* ch_layout = select_channel_layout(av.audio.codec, m_channels)) + { + if (int err = av_channel_layout_copy(&av.audio.context->ch_layout, ch_layout); err != 0) + { + media_log.error("video_encoder: av_channel_layout_copy failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + } + else + { + media_log.error("video_encoder: select_channel_layout returned nullptr"); + has_error = true; + return; + } + + m_sample_rate = select_sample_rate(av.audio.codec); + + av.audio.context->codec_id = av.format_context->oformat->audio_codec; + av.audio.context->codec_type = AVMEDIA_TYPE_AUDIO; + av.audio.context->bit_rate = m_audio_bitrate_bps; + av.audio.context->sample_rate = m_sample_rate; + av.audio.context->time_base = {.num = 1, .den = av.audio.context->sample_rate}; + av.audio.context->sample_fmt = AV_SAMPLE_FMT_FLTP; // AV_SAMPLE_FMT_FLT is not supported in regular AC3 + av.audio.stream->time_base = av.audio.context->time_base; + + // check that the encoder supports the format + if (!check_sample_fmt(av.audio.codec, av.audio.context->sample_fmt)) + { + media_log.error("video_encoder: Audio encoder does not support sample format %s", av_get_sample_fmt_name(av.audio.context->sample_fmt)); + has_error = true; + return; + } + + if (int err = avcodec_open2(av.audio.context, av.audio.codec, nullptr); err != 0) + { + media_log.error("video_encoder: avcodec_open2 for audio failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + if (!(av.audio.packet = av_packet_alloc())) + { + media_log.error("video_encoder: av_packet_alloc for audio packet failed"); + has_error = true; + return; + } + + if (!(av.audio.frame = av_frame_alloc())) + { + media_log.error("video_encoder: av_frame_alloc for audio frame failed"); + has_error = true; + return; + } + + av.audio.frame->format = AV_SAMPLE_FMT_FLTP; + av.audio.frame->nb_samples = av.audio.context->frame_size; + + if (int err = av_channel_layout_copy(&av.audio.frame->ch_layout, &av.audio.context->ch_layout); err < 0) + { + media_log.error("video_encoder: av_channel_layout_copy for audio frame failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + if (int err = av_frame_get_buffer(av.audio.frame, 0); err < 0) + { + media_log.error("video_encoder: av_frame_get_buffer for audio frame failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + if (int err = avcodec_parameters_from_context(av.audio.stream->codecpar, av.audio.context); err < 0) + { + media_log.error("video_encoder: avcodec_parameters_from_context for audio failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } } - media_log.notice("video_encoder: using video_codec = %d", static_cast(av.format->oformat->video_codec)); - media_log.notice("video_encoder: using video_bitrate = %d", m_video_bitrate_bps); - media_log.notice("video_encoder: using out width = %d", m_out_format.width); - media_log.notice("video_encoder: using out height = %d", m_out_format.height); - media_log.notice("video_encoder: using framerate = %d", m_framerate); - media_log.notice("video_encoder: using gop_size = %d", m_gop_size); - media_log.notice("video_encoder: using max_b_frames = %d", m_max_b_frames); - - av.context->codec_id = av.format->oformat->video_codec; - av.context->bit_rate = m_video_bitrate_bps; - av.context->width = static_cast(m_out_format.width); - av.context->height = static_cast(m_out_format.height); - av.context->time_base = {.num = 1, .den = static_cast(m_framerate)}; - av.context->framerate = {.num = static_cast(m_framerate), .den = 1}; - av.context->pix_fmt = out_format; - av.context->gop_size = m_gop_size; - av.context->max_b_frames = m_max_b_frames; - - if (av.format->oformat->flags & AVFMT_GLOBALHEADER) + // select video parameters supported by the encoder + if (av.video.context) { - av.context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + av.video.context->codec_id = av.format_context->oformat->video_codec; + av.video.context->codec_type = AVMEDIA_TYPE_VIDEO; + av.video.context->frame_number = 0; + av.video.context->bit_rate = m_video_bitrate_bps; + av.video.context->width = static_cast(m_out_format.width); + av.video.context->height = static_cast(m_out_format.height); + av.video.context->time_base = {.num = 1, .den = static_cast(m_framerate)}; + av.video.context->framerate = {.num = static_cast(m_framerate), .den = 1}; + av.video.context->pix_fmt = out_pix_format; + av.video.context->gop_size = m_gop_size; + av.video.context->max_b_frames = m_max_b_frames; + av.video.stream->time_base = av.video.context->time_base; + + if (int err = avcodec_open2(av.video.context, av.video.codec, nullptr); err != 0) + { + media_log.error("video_encoder: avcodec_open2 for video failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + if (!(av.video.packet = av_packet_alloc())) + { + media_log.error("video_encoder: av_packet_alloc for video packet failed"); + has_error = true; + return; + } + + if (!(av.video.frame = av_frame_alloc())) + { + media_log.error("video_encoder: av_frame_alloc for video frame failed"); + has_error = true; + return; + } + + av.video.frame->format = av.video.context->pix_fmt; + av.video.frame->width = av.video.context->width; + av.video.frame->height = av.video.context->height; + + if (int err = av_frame_get_buffer(av.video.frame, 0); err < 0) + { + media_log.error("video_encoder: av_frame_get_buffer for video frame failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + if (int err = avcodec_parameters_from_context(av.video.stream->codecpar, av.video.context); err < 0) + { + media_log.error("video_encoder: avcodec_parameters_from_context for video failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } } - if (int err = avcodec_open2(av.context, av.codec, nullptr); err != 0) + media_log.error("video_encoder: av_dump_format"); + for (u32 i = 0; i < av.format_context->nb_streams; i++) { - media_log.error("video_encoder: avcodec_open2 failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - return; + av_dump_format(av.format_context, i, path.c_str(), 1); } - if (!(av.frame = av_frame_alloc())) + // open the output file, if needed + if (!(av.format_context->flags & AVFMT_NOFILE)) { - media_log.error("video_encoder: av_frame_alloc failed"); - has_error = true; - return; + if (int err = avio_open(&av.format_context->pb, path.c_str(), AVIO_FLAG_WRITE); err != 0) + { + media_log.error("video_encoder: avio_open failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } } - av.frame->format = av.context->pix_fmt; - av.frame->width = av.context->width; - av.frame->height = av.context->height; - - if (int err = av_frame_get_buffer(av.frame, 32); err < 0) - { - media_log.error("video_encoder: av_frame_get_buffer failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - return; - } - - if (int err = avcodec_parameters_from_context(av.stream->codecpar, av.context); err < 0) - { - media_log.error("video_encoder: avcodec_parameters_from_context failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - return; - } - - av_dump_format(av.format, 0, path.c_str(), 1); - - if (int err = avio_open(&av.format->pb, path.c_str(), AVIO_FLAG_WRITE); err != 0) - { - media_log.error("video_encoder: avio_open failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - return; - } - - if (int err = avformat_write_header(av.format, nullptr); err < 0) + if (int err = avformat_write_header(av.format_context, nullptr); err < 0) { media_log.error("video_encoder: avformat_write_header failed. Error: %d='%s'", err, av_error_to_string(err)); - if (int err = avio_close(av.format->pb); err != 0) + if (int err = avio_close(av.format_context->pb); err != 0) { media_log.error("video_encoder: avio_close failed. Error: %d='%s'", err, av_error_to_string(err)); } @@ -912,21 +1128,11 @@ namespace utils return; } - const auto flush = [&]() + const auto flush = [&](scoped_av::ctx& ctx) { - while ((thread_ctrl::state() != thread_state::aborting || m_flush) && !has_error) + while ((thread_ctrl::state() != thread_state::aborting || m_flush) && !has_error && ctx.context) { - AVPacket* packet = av_packet_alloc(); - std::unique_ptr packet_(packet); - - if (!packet) - { - media_log.error("video_encoder: av_packet_alloc failed"); - has_error = true; - return; - } - - if (int err = avcodec_receive_packet(av.context, packet); err < 0) + if (int err = avcodec_receive_packet(ctx.context, ctx.packet); err < 0) { if (err == AVERROR(EAGAIN) || err == averror_eof) break; @@ -936,133 +1142,361 @@ namespace utils return; } - av_packet_rescale_ts(packet, av.context->time_base, av.stream->time_base); - packet->stream_index = av.stream->index; + av_packet_rescale_ts(ctx.packet, ctx.context->time_base, ctx.stream->time_base); + ctx.packet->stream_index = ctx.stream->index; - if (int err = av_interleaved_write_frame(av.format, packet); err < 0) + if (int err = av_write_frame(av.format_context, ctx.packet); err < 0) { - media_log.error("video_encoder: av_interleaved_write_frame failed. Error: %d='%s'", err, av_error_to_string(err)); + media_log.error("video_encoder: av_write_frame failed. Error: %d='%s'", err, av_error_to_string(err)); has_error = true; return; } } }; - s64 last_pts = -1; + u32 audio_sample_remainder = 0; + s64 last_audio_pts = -1; + s64 last_audio_frame_pts = 0; + s64 last_video_pts = -1; + + // Allocate audio buffer for our audio frame + std::vector audio_samples; + u32 audio_samples_sample_count = 0; + const bool sample_fmt_is_planar = av.audio.context && av_sample_fmt_is_planar(av.audio.context->sample_fmt) != 0; + const int sample_fmt_bytes = av.audio.context ? av_get_bytes_per_sample(av.audio.context->sample_fmt) : 0; + ensure(sample_fmt_bytes == sizeof(f32)); // We only support FLT or FLTP for now + + if (av.audio.frame) + { + audio_samples.resize(av.audio.frame->nb_samples * av.audio.frame->ch_layout.nb_channels * sizeof(f32)); + last_audio_frame_pts -= av.audio.frame->nb_samples; + } + + encoder_sample last_samples; + u32 leftover_sample_count = 0; while ((thread_ctrl::state() != thread_state::aborting || m_flush) && !has_error) { + // Fetch video frame encoder_frame frame_data; + bool got_frame = false; { m_mtx.lock(); if (m_frames_to_encode.empty()) { m_mtx.unlock(); + } + else + { + frame_data = std::move(m_frames_to_encode.front()); + m_frames_to_encode.pop_front(); + m_mtx.unlock(); - if (m_flush) + // Calculate presentation timestamp. + const s64 pts = get_pts(frame_data.timestamp_ms); + + // We need to skip this frame if it has the same timestamp. + if (pts <= last_video_pts) { - m_flush = false; + media_log.trace("video_encoder: skipping frame. last_pts=%d, pts=%d", last_video_pts, pts); + } + else if (av.video.context) + { + media_log.trace("video_encoder: adding new frame. timestamp=%d", frame_data.timestamp_ms); - if (!m_paused) + got_frame = true; + + if (int err = av_frame_make_writable(av.video.frame); err < 0) { - // We only stop the thread after a flush if we are not paused + media_log.error("video_encoder: av_frame_make_writable failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; break; } + + u8* in_data[4]{}; + int in_line[4]{}; + + const AVPixelFormat in_format = static_cast(frame_data.av_pixel_format); + + if (int ret = av_image_fill_linesizes(in_line, in_format, frame_data.width); ret < 0) + { + fmt::throw_exception("video_encoder: av_image_fill_linesizes failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret)); + } + + if (int ret = av_image_fill_pointers(in_data, in_format, frame_data.height, frame_data.data.data(), in_line); ret < 0) + { + fmt::throw_exception("video_encoder: av_image_fill_pointers failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret)); + } + + // Update the context in case the frame format has changed + av.sws = sws_getCachedContext(av.sws, frame_data.width, frame_data.height, in_format, + av.video.context->width, av.video.context->height, out_pix_format, SWS_BICUBIC, nullptr, nullptr, nullptr); + if (!av.sws) + { + media_log.error("video_encoder: sws_getCachedContext failed"); + has_error = true; + break; + } + + if (int err = sws_scale(av.sws, in_data, in_line, 0, frame_data.height, av.video.frame->data, av.video.frame->linesize); err < 0) + { + media_log.error("video_encoder: sws_scale failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + break; + } + + av.video.frame->pts = pts; + + if (int err = avcodec_send_frame(av.video.context, av.video.frame); err < 0) + { + media_log.error("video_encoder: avcodec_send_frame for video failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + break; + } + + flush(av.video); + + last_video_pts = av.video.frame->pts; + m_last_video_pts = last_video_pts; + } + } + } + + // Fetch audio sample + encoder_sample sample_data; + bool got_sample = false; + { + m_audio_mtx.lock(); + + if (m_samples_to_encode.empty()) + { + m_audio_mtx.unlock(); + } + else + { + sample_data = std::move(m_samples_to_encode.front()); + m_samples_to_encode.pop_front(); + m_audio_mtx.unlock(); + + if (sample_data.channels != av.audio.frame->ch_layout.nb_channels) + { + fmt::throw_exception("video_encoder: Audio sample channel count %d does not match frame channel count %d", sample_data.channels, av.audio.frame->ch_layout.nb_channels); } - // We only actually pause after we process all frames - const u64 sleeptime = m_paused ? 10000 : 1; - thread_ctrl::wait_for(sleeptime); - continue; + // Calculate presentation timestamp. + const s64 pts = get_audio_pts(sample_data.timestamp_us); + + // We need to skip this frame if it has the same timestamp. + if (pts <= last_audio_pts) + { + media_log.error("video_encoder: skipping sample. last_pts=%d, pts=%d", last_audio_pts, pts); + } + else if (av.audio.context) + { + media_log.trace("video_encoder: adding new sample. timestamp_us=%d", sample_data.timestamp_us); + + static constexpr bool swap_endianness = false; + + const auto send_frame = [&]() + { + if (audio_samples_sample_count < static_cast(av.audio.frame->nb_samples)) + { + return; + } + + audio_samples_sample_count = 0; + got_sample = true; + + if (int err = av_frame_make_writable(av.audio.frame); err < 0) + { + media_log.error("video_encoder: av_frame_make_writable failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + if (sample_fmt_is_planar) + { + const int channels = av.audio.frame->ch_layout.nb_channels; + const int samples = av.audio.frame->nb_samples; + + for (int ch = 0; ch < channels; ch++) + { + f32* dst = reinterpret_cast(av.audio.frame->data[ch]); + + for (int sample = 0; sample < samples; sample++) + { + dst[sample] = *reinterpret_cast(&audio_samples[(sample * channels + ch) * sizeof(f32)]); + } + } + } + else + { + std::memcpy(av.audio.frame->data[0], audio_samples.data(), audio_samples.size()); + } + + av.audio.frame->pts = last_audio_frame_pts + av.audio.frame->nb_samples; + + if (int err = avcodec_send_frame(av.audio.context, av.audio.frame); err < 0) + { + media_log.error("video_encoder: avcodec_send_frame failed: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + flush(av.audio); + + last_audio_frame_pts = av.audio.frame->pts; + }; + + const auto add_encoder_sample = [&](bool add_new_sample, u32 silence_to_add = 0) + { + const auto update_last_pts = [&](u32 samples_to_add) + { + const u32 sample_count = audio_sample_remainder + samples_to_add; + const u32 pts_to_add = sample_count / m_samples_per_block; + audio_sample_remainder = sample_count % m_samples_per_block; + last_audio_pts += pts_to_add; + }; + + // Copy as many old samples to our audio frame as possible + if (leftover_sample_count > 0) + { + const u32 samples_to_add = std::min(leftover_sample_count, av.audio.frame->nb_samples - audio_samples_sample_count); + + if (samples_to_add > 0) + { + const u8* src = &last_samples.data[(last_samples.sample_count - leftover_sample_count) * last_samples.channels * sizeof(f32)]; + u8* dst = &audio_samples[audio_samples_sample_count * last_samples.channels * sizeof(f32)]; + copy_samples(src, dst, samples_to_add * last_samples.channels, swap_endianness); + audio_samples_sample_count += samples_to_add; + leftover_sample_count -= samples_to_add; + update_last_pts(samples_to_add); + } + + if (samples_to_add < leftover_sample_count) + { + media_log.error("video_encoder: audio frame buffer is already filled entirely by last sample package..."); + } + } + else if (silence_to_add > 0) + { + const u32 samples_to_add = std::min(silence_to_add, av.audio.frame->nb_samples - audio_samples_sample_count); + + if (samples_to_add > 0) + { + u8* dst = &audio_samples[audio_samples_sample_count * av.audio.frame->ch_layout.nb_channels * sizeof(f32)]; + std::memset(dst, 0, samples_to_add * sample_data.channels * sizeof(f32)); + audio_samples_sample_count += samples_to_add; + update_last_pts(samples_to_add); + } + } + else if (add_new_sample) + { + // Copy as many new samples to our audio frame as possible + const u32 samples_to_add = std::min(sample_data.sample_count, av.audio.frame->nb_samples - audio_samples_sample_count); + + if (samples_to_add > 0) + { + const u8* src = sample_data.data.data(); + u8* dst = &audio_samples[audio_samples_sample_count * sample_data.channels * sizeof(f32)]; + copy_samples(src, dst, samples_to_add * sample_data.channels, swap_endianness); + audio_samples_sample_count += samples_to_add; + update_last_pts(samples_to_add); + } + + if (samples_to_add < sample_data.sample_count) + { + // Save this sample package for the next loop if it wasn't fully used. + leftover_sample_count = sample_data.sample_count - samples_to_add; + } + else + { + // Mark this sample package as fully used. + leftover_sample_count = 0; + } + + last_samples = std::move(sample_data); + } + + send_frame(); + }; + + for (u32 sample = 0; !has_error;) + { + if (leftover_sample_count > 0) + { + // Add leftover samples + add_encoder_sample(false); + } + else if (pts > (last_audio_pts + 1)) + { + // Add silence to fill the gap + const u32 silence_to_add = pts - (last_audio_pts + 1); + add_encoder_sample(false, silence_to_add); + } + else if (sample == 0) + { + // Add new samples + add_encoder_sample(true); + sample++; + } + else + { + break; + } + } + + m_last_audio_pts = last_audio_pts; + } + } + } + + if (!got_frame && !got_sample) + { + if (m_flush) + { + m_flush = false; + + if (!m_paused) + { + // We only stop the thread after a flush if we are not paused + break; + } } - frame_data = std::move(m_frames_to_encode.front()); - m_frames_to_encode.pop_front(); - - m_mtx.unlock(); - - media_log.trace("video_encoder: adding new frame. timestamp=%d", frame_data.timestamp_ms); - } - - // Calculate presentation timestamp. - const s64 pts = get_pts(frame_data.timestamp_ms); - - // We need to skip this frame if it has the same timestamp. - if (pts <= last_pts) - { - media_log.notice("video_encoder: skipping frame. last_pts=%d, pts=%d", last_pts, pts); + // We only actually pause after we process all frames + const u64 sleeptime_us = m_paused ? 10000 : 1; + thread_ctrl::wait_for(sleeptime_us); continue; } - - if (int err = av_frame_make_writable(av.frame); err < 0) - { - media_log.error("video_encoder: av_frame_make_writable failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - break; - } - - u8* in_data[4]{}; - int in_line[4]{}; - - const AVPixelFormat in_format = static_cast(frame_data.av_pixel_format); - - if (int ret = av_image_fill_linesizes(in_line, in_format, frame_data.width); ret < 0) - { - fmt::throw_exception("video_encoder: av_image_fill_linesizes failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret)); - } - - if (int ret = av_image_fill_pointers(in_data, in_format, frame_data.height, frame_data.data.data(), in_line); ret < 0) - { - fmt::throw_exception("video_encoder: av_image_fill_pointers failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret)); - } - - // Update the context in case the frame format has changed - av.sws = sws_getCachedContext(av.sws, frame_data.width, frame_data.height, in_format, - av.context->width, av.context->height, out_format, SWS_BICUBIC, nullptr, nullptr, nullptr); - if (!av.sws) - { - media_log.error("video_encoder: sws_getCachedContext failed"); - has_error = true; - break; - } - - if (int err = sws_scale(av.sws, in_data, in_line, 0, frame_data.height, av.frame->data, av.frame->linesize); err < 0) - { - media_log.error("video_encoder: sws_scale failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - break; - } - - av.frame->pts = pts; - - if (int err = avcodec_send_frame(av.context, av.frame); err < 0) - { - media_log.error("video_encoder: avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - break; - } - - flush(); - - last_pts = av.frame->pts; - - m_last_pts = last_pts; } - if (int err = avcodec_send_frame(av.context, nullptr); err != 0) + if (av.video.context) { - media_log.error("video_encoder: final avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err)); + if (int err = avcodec_send_frame(av.video.context, nullptr); err != 0) + { + media_log.error("video_encoder: final avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err)); + } } - flush(); + if (av.audio.context) + { + if (int err = avcodec_send_frame(av.audio.context, nullptr); err != 0) + { + media_log.error("video_encoder: final avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err)); + } + } - if (int err = av_write_trailer(av.format); err != 0) + flush(av.video); + flush(av.audio); + + if (int err = av_write_trailer(av.format_context); err != 0) { media_log.error("video_encoder: av_write_trailer failed. Error: %d='%s'", err, av_error_to_string(err)); } - if (int err = avio_close(av.format->pb); err != 0) + if (int err = avio_close(av.format_context->pb); err != 0) { media_log.error("video_encoder: avio_close failed. Error: %d='%s'", err, av_error_to_string(err)); } diff --git a/rpcs3/util/media_utils.h b/rpcs3/util/media_utils.h index 2718a80617..8bbea8c7ee 100644 --- a/rpcs3/util/media_utils.h +++ b/rpcs3/util/media_utils.h @@ -88,7 +88,7 @@ namespace utils std::unique_ptr>> m_thread; }; - class video_encoder : public utils::image_sink + class video_encoder : public utils::video_sink { public: video_encoder(); @@ -108,7 +108,7 @@ namespace utils }; std::string path() const; - s64 last_pts() const; + s64 last_video_pts() const; void set_path(const std::string& path); void set_framerate(u32 framerate); @@ -118,16 +118,17 @@ namespace utils void set_max_b_frames(s32 max_b_frames); void set_gop_size(s32 gop_size); void set_sample_rate(u32 sample_rate); + void set_audio_channels(u32 channels); void set_audio_bitrate(u32 bitrate); void set_audio_codec(s32 codec_id); - void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) override; void pause(bool flush = true); void stop(bool flush = true) override; void encode(); private: std::string m_path; - s64 m_last_pts = 0; + s64 m_last_audio_pts = 0; + s64 m_last_video_pts = 0; // Thread control std::unique_ptr>> m_thread; @@ -136,14 +137,14 @@ namespace utils // Video parameters u32 m_video_bitrate_bps = 0; - s32 m_video_codec_id = 12; // AV_CODEC_ID_MPEG4; + s32 m_video_codec_id = 12; // AV_CODEC_ID_MPEG4 s32 m_max_b_frames = 2; s32 m_gop_size = 12; frame_format m_out_format{}; // Audio parameters - u32 m_sample_rate = 48000; - u32 m_audio_bitrate_bps = 96000; - s32 m_audio_codec_id = 86018; // AV_CODEC_ID_AAC + u32 m_channels = 2; + u32 m_audio_bitrate_bps = 320000; + s32 m_audio_codec_id = 86019; // AV_CODEC_ID_AC3 }; } diff --git a/rpcs3/util/video_provider.cpp b/rpcs3/util/video_provider.cpp index d919137733..d86da5ecf1 100644 --- a/rpcs3/util/video_provider.cpp +++ b/rpcs3/util/video_provider.cpp @@ -34,37 +34,37 @@ namespace utils g_recording_mode = recording_mode::stopped; } - bool video_provider::set_image_sink(std::shared_ptr sink, recording_mode type) + bool video_provider::set_video_sink(std::shared_ptr sink, recording_mode type) { - media_log.notice("video_provider: setting new image sink. sink=%d, type=%s", !!sink, type); + media_log.notice("video_provider: setting new video sink. sink=%d, type=%s", !!sink, type); if (type == recording_mode::stopped) { // Prevent misuse. type is supposed to be a valid state. - media_log.error("video_provider: cannot set image sink with type %s", type); + media_log.error("video_provider: cannot set video sink with type %s", type); return false; } std::lock_guard lock(m_mutex); - if (m_image_sink) + if (m_video_sink) { // cell has preference if (m_type == recording_mode::cell && m_type != type) { - media_log.warning("video_provider: cannot set image sink with type %s if type %s is active", type, m_type); + media_log.warning("video_provider: cannot set video sink with type %s if type %s is active", type, m_type); return false; } - if (m_type != type || m_image_sink != sink) + if (m_type != type || m_video_sink != sink) { - media_log.warning("video_provider: stopping current image sink of type %s", m_type); - m_image_sink->stop(); + media_log.warning("video_provider: stopping current video sink of type %s", m_type); + m_video_sink->stop(); } } m_type = sink ? type : recording_mode::stopped; - m_image_sink = sink; + m_video_sink = sink; if (m_type == recording_mode::stopped) { @@ -84,19 +84,17 @@ namespace utils { std::lock_guard lock(m_mutex); - if (!m_image_sink) + if (!m_video_sink) return false; const usz timestamp_ms = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - m_pause_time_ms; - const s64 pts = m_image_sink->get_pts(timestamp_ms); - return pts > m_last_pts_incoming; + const s64 pts = m_video_sink->get_pts(timestamp_ms); + return pts > m_last_video_pts_incoming; } - void video_provider::present_frame(std::vector& data, u32 pitch, u32 width, u32 height, bool is_bgra) + recording_mode video_provider::check_state() { - std::lock_guard lock(m_mutex); - - if (!m_image_sink || m_image_sink->has_error) + if (!m_video_sink || m_video_sink->has_error) { g_recording_mode = recording_mode::stopped; rsx::overlays::queue_message(localized_string_id::RECORDING_ABORTED); @@ -105,33 +103,86 @@ namespace utils if (g_recording_mode == recording_mode::stopped) { m_active = false; - return; + return g_recording_mode; } if (!m_active.exchange(true)) { m_current_encoder_frame = 0; - m_last_pts_incoming = -1; + m_last_video_pts_incoming = -1; + m_last_audio_pts_incoming = -1; } - if (m_current_encoder_frame == 0) + if (m_current_encoder_frame == 0 && m_current_encoder_sample == 0) { m_encoder_start = steady_clock::now(); } - // Calculate presentation timestamp. - const usz timestamp_ms = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - m_pause_time_ms; - const s64 pts = m_image_sink->get_pts(timestamp_ms); + return g_recording_mode; + } - // We can just skip this frame if it has the same timestamp. - if (pts <= m_last_pts_incoming) + void video_provider::present_frame(std::vector& data, u32 pitch, u32 width, u32 height, bool is_bgra) + { + std::lock_guard lock(m_mutex); + + if (check_state() == recording_mode::stopped) { return; } - m_last_pts_incoming = pts; + // Calculate presentation timestamp. + const usz timestamp_ms = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - m_pause_time_ms; + const s64 pts = m_video_sink->get_pts(timestamp_ms); + // We can just skip this frame if it has the same timestamp. + if (pts <= m_last_video_pts_incoming) + { + return; + } + + m_last_video_pts_incoming = pts; m_current_encoder_frame++; - m_image_sink->add_frame(data, pitch, width, height, is_bgra ? AVPixelFormat::AV_PIX_FMT_BGRA : AVPixelFormat::AV_PIX_FMT_RGBA, timestamp_ms); + m_video_sink->add_frame(data, pitch, width, height, is_bgra ? AVPixelFormat::AV_PIX_FMT_BGRA : AVPixelFormat::AV_PIX_FMT_RGBA, timestamp_ms); + } + + bool video_provider::can_consume_sample() + { + std::lock_guard lock(m_mutex); + + if (!m_video_sink) + return false; + + const usz timestamp_us = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - (m_pause_time_ms * 1000ull); + const s64 pts = m_video_sink->get_audio_pts(timestamp_us); + return pts > m_last_audio_pts_incoming; + } + + void video_provider::present_samples(u8* buf, u32 sample_count, u16 channels) + { + if (!buf || !sample_count || !channels) + { + return; + } + + std::lock_guard lock(m_mutex); + + if (check_state() == recording_mode::stopped) + { + return; + } + + // Calculate presentation timestamp. + const usz timestamp_us = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - (m_pause_time_ms * 1000ull); + const s64 pts = m_video_sink->get_audio_pts(timestamp_us); + + // We can just skip this sample if it has the same timestamp. + if (pts <= m_last_audio_pts_incoming) + { + return; + } + + m_last_audio_pts_incoming = pts; + m_current_encoder_sample += sample_count; + m_video_sink->add_audio_samples(buf, sample_count, channels, timestamp_us); } } diff --git a/rpcs3/util/video_provider.h b/rpcs3/util/video_provider.h index 31a051a112..93955ab571 100644 --- a/rpcs3/util/video_provider.h +++ b/rpcs3/util/video_provider.h @@ -1,6 +1,6 @@ #pragma once -#include "image_sink.h" +#include "video_sink.h" enum class recording_mode { @@ -17,19 +17,27 @@ namespace utils video_provider() = default; ~video_provider(); - bool set_image_sink(std::shared_ptr sink, recording_mode type); + bool set_video_sink(std::shared_ptr sink, recording_mode type); void set_pause_time(usz pause_time_ms); + bool can_consume_frame(); void present_frame(std::vector& data, u32 pitch, u32 width, u32 height, bool is_bgra); + bool can_consume_sample(); + void present_samples(u8* buf, u32 sample_count, u16 channels); + private: + recording_mode check_state(); + recording_mode m_type = recording_mode::stopped; - std::shared_ptr m_image_sink; + std::shared_ptr m_video_sink; shared_mutex m_mutex{}; atomic_t m_active{false}; atomic_t m_current_encoder_frame{0}; + atomic_t m_current_encoder_sample{0}; steady_clock::time_point m_encoder_start{}; - s64 m_last_pts_incoming = -1; + s64 m_last_video_pts_incoming = -1; + s64 m_last_audio_pts_incoming = -1; usz m_pause_time_ms = 0; }; diff --git a/rpcs3/util/video_sink.h b/rpcs3/util/video_sink.h new file mode 100644 index 0000000000..d3d69c82d1 --- /dev/null +++ b/rpcs3/util/video_sink.h @@ -0,0 +1,104 @@ +#pragma once + +#include "util/types.hpp" +#include "util/atomic.hpp" +#include "Utilities/mutex.h" + +#include +#include + +namespace utils +{ + class video_sink + { + public: + video_sink() = default; + + virtual void stop(bool flush = true) = 0; + + void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) + { + // Do not allow new frames while flushing + if (m_flush) + return; + + std::lock_guard lock(m_mtx); + m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame)); + } + + void add_audio_samples(u8* buf, u32 sample_count, u16 channels, usz timestamp_us) + { + // Do not allow new samples while flushing + if (m_flush || !buf || !sample_count || !channels) + return; + + std::vector sample(buf, buf + sample_count * channels * sizeof(f32)); + std::lock_guard lock(m_audio_mtx); + m_samples_to_encode.emplace_back(timestamp_us, sample_count, channels, std::move(sample)); + } + + s64 get_pts(usz timestamp_ms) const + { + return static_cast(std::round((timestamp_ms * m_framerate) / 1000.f)); + } + + s64 get_audio_pts(usz timestamp_us) const + { + static constexpr f32 us_per_sec = 1000000.0f; + const f32 us_per_block = us_per_sec / (m_sample_rate / static_cast(m_samples_per_block)); + return static_cast(std::ceil(timestamp_us / us_per_block)); + } + + usz get_timestamp_ms(s64 pts) const + { + return static_cast(std::round((pts * 1000) / static_cast(m_framerate))); + } + + usz get_audio_timestamp_us(s64 pts) const + { + return static_cast(std::round((pts * 1000) / static_cast(m_sample_rate))); + } + + atomic_t has_error{false}; + + struct encoder_frame + { + encoder_frame() = default; + encoder_frame(usz timestamp_ms, u32 pitch, u32 width, u32 height, s32 av_pixel_format, std::vector&& data) + : timestamp_ms(timestamp_ms), pitch(pitch), width(width), height(height), av_pixel_format(av_pixel_format), data(std::move(data)) + {} + + s64 pts = -1; // Optional + usz timestamp_ms = 0; + u32 pitch = 0; + u32 width = 0; + u32 height = 0; + s32 av_pixel_format = 0; // NOTE: Make sure this is a valid AVPixelFormat + std::vector data; + }; + + struct encoder_sample + { + encoder_sample() = default; + encoder_sample(usz timestamp_us, u32 sample_count, u16 channels, std::vector&& data) + : timestamp_us(timestamp_us), sample_count(sample_count), channels(channels), data(std::move(data)) + { + } + + usz timestamp_us = 0; + u32 sample_count = 0; + u16 channels = 0; + std::vector data; + }; + + protected: + shared_mutex m_mtx; + std::deque m_frames_to_encode; + shared_mutex m_audio_mtx; + std::deque m_samples_to_encode; + atomic_t m_flush = false; + u32 m_framerate = 30; + u32 m_sample_rate = 48000; + static constexpr u32 m_samples_per_block = 256; + }; +}