use crate::av::avio::{AVIOWriter, AVIO}; use crate::av::decoder::Decoder; use crate::av::dictionary::Dictionary; use crate::av::encoder::Encoder; use crate::av::format::Format; use crate::av::mov_avc::{annex_b_to_avc, AvcDecoderConfigurationRecord, NalBuffer, NalUnitType}; use crate::av::packet::Packet; use crate::av::resampler::Resampler; use crate::av::scaler::Scaler; use crate::av::stream::Stream; use crate::av::xcoder::XCoder; use crate::av::Rational; use crate::utils::SortedFrameBuffer; use ffmpeg_sys_next::AVMediaType::{AVMEDIA_TYPE_AUDIO, AVMEDIA_TYPE_VIDEO}; use ffmpeg_sys_next::AVPictureType::AV_PICTURE_TYPE_I; use ffmpeg_sys_next::AVPixelFormat::AV_PIX_FMT_YUV420P; use ffmpeg_sys_next::{avio_wb32, AVIOContext, AVPixelFormat, AV_TIME_BASE, SWS_BILINEAR}; use num_rational::Ratio; use std::cmp::min; use std::fs::{DirBuilder, File}; use std::io::Write; use std::ops::Mul; use std::option::Option::Some; pub struct AudioTranscoder { encoder: Encoder, decoder: Decoder, frame_buffer: SortedFrameBuffer, output_stream: Stream, input_stream: Stream, format: Format, output_path: String, resampler: Resampler, segment: u32, last_pts: i64, current_pts: i64, seconds_per_segment: u32, } const SECONDS_PER_SEGMENT: u32 = 5; impl AudioTranscoder { fn process_packet(&mut self, packet: Packet) -> Result<(), String> { self.decoder .send_packet(&packet) .map_err(|err| format!("Failed sending audio packet: {}", err))?; while let Some(frame) = self.decoder.read_frame()? { let resampled = self.resampler.convert(&frame)?; let resampled_pts = resampled.pts(); self.frame_buffer.insert_frame(resampled); let mut offset: f64 = 0f64; while let Some(drain_resampled) = self.resampler.drain()? { offset += (drain_resampled.nb_samples() as f64 / self.encoder.sample_rate() as f64) * self.input_stream.time_base().den() as f64; drain_resampled.set_pts(resampled_pts + offset as i64); self.frame_buffer.insert_frame(drain_resampled); } self.encode()?; } Ok(()) } #[inline] fn input_stream(&self) -> &Stream { &self.input_stream } fn start_segment(&self, force: bool) { if self.segment == 0 && !force { return; } unsafe { let pb = (*self.format.as_mut_ptr()).pb; avio_wb32(pb, 24); ffio_wfourcc(pb, b"styp"); ffio_wfourcc(pb, b"msdh"); avio_wb32(pb, 0); /* minor */ ffio_wfourcc(pb, b"msdh"); ffio_wfourcc(pb, b"msix"); } } fn write_header(&mut self) -> Result<(), String> { File::create(format!("{}/audio-init.mp4", &self.output_path)) .unwrap() .write_all(&mut self.format.avio_inner_mut().unwrap().buffer()) .map_err(|_| "Failed to write audio init segment".to_string())?; self.start_segment(true); self.format.write_packet_null()?; Ok(()) } fn write_segment(&mut self) -> Result<(), String> { if self.segment == 0 { self.write_header()?; } if let Some(avio) = self.format.avio.as_mut() { avio.flush() } let mut segment = self.format.avio_inner_mut().unwrap().buffer(); File::create(format!( "{}/audio-segment-{:0>5}.m4s", self.output_path, self.segment )) .unwrap() .write_all(&mut segment) .map_err(|_| format!("Failed to write audio segment {}", self.segment))?; self.start_segment(false); Ok(()) } fn encode(&mut self) -> Result<(), String> { while let Some(frame) = self.frame_buffer.pop_first() { let pts_passed = frame.pts() - self.last_pts; if pts_passed <= 0 && self.last_pts != 0 { println!("WARN: new frame out of order"); } if (pts_passed + self.current_pts) > (self.seconds_per_segment as i32 * self.input_stream.time_base().den()) as i64 { self.format.write_packet_null()?; self.write_segment()?; self.segment += 1; self.current_pts = (pts_passed + self.current_pts) % self.input_stream.time_base().den() as i64; } else { self.current_pts += pts_passed } self.last_pts = frame.pts(); self.encoder.send_audio_frame(&frame)?; while let Some(new_packet) = self.encoder.read_packet()? { new_packet.set_pts(frame.pts()); new_packet.set_dts(new_packet.pts()); new_packet.set_duration(pts_passed); new_packet.rescale( self.input_stream.time_base(), self.output_stream.time_base(), ); new_packet.set_stream(0); new_packet.data(); self.format.write_packet(&new_packet)?; } } Ok(()) } } pub struct Transcoder { input: Format<()>, input_video: Stream, input_frame_rate: Ratio, video_output: Format, output_path: String, output_video: Stream, video_frame_buffer: SortedFrameBuffer, video_encoder: Encoder, video_decoder: Decoder, frame_scaler: Scaler, last_pts: i64, current_pts: i64, video_segment: u32, extra_data: Option>, audio: Option, segment_target: Option, input_video_index: Option, input_audio_index: Option, seconds_per_segment: u32, } impl Transcoder { pub fn create( input_path: &str, output_path: &str, video_index: Option, audio_index: Option, ) -> Result { let input = Format::open(input_path, None).expect("Failed to open media"); let output_path = output_path.to_string(); let input_video = input .stream(AVMEDIA_TYPE_VIDEO, video_index, None) .map_err(|err| format!("Failed to find video stream: {}", err))? .ok_or("Failed to find video stream".to_string())?; let input_audio = input .stream(AVMEDIA_TYPE_AUDIO, audio_index, Some(input_video.index())) .map_err(|err| format!("Failed to find audio stream: {}", err))?; if audio_index.is_some() && input_audio.is_none() { return Err("Failed to find audio stream".to_string()); } let input_frame_rate = input_video.avg_frame_rate(&input).unwrap().to_num(); let mut options = Dictionary::new(); options.set("movflags", "+dash+delay_moov+skip_trailer+frag_custom"); let video_avio = AVIO::writer(Buffer::default()); let video_output = Format::output_avio(video_avio, "mp4")?; let output_video = video_output.new_stream("h264")?; output_video.params().copy_from(&input_video.params())?; output_video.set_sample_aspect_ratio(input_video.sample_aspect_ratio()); video_output.set_flags(input.flags()); let audio = if let Some(input_stream) = input_audio { let audio_avio = AVIO::writer(Buffer::default()); let format = Format::output_avio(audio_avio, "mp4")?; let output_stream = format.new_stream("aac")?; format.set_flags(input.flags()); let decoder = input_stream.decoder(None).ok_or(format!( "Couldn't find encoder for input audio with codec: {:?}", input_stream.codec() ))?; decoder.configure(&input_stream)?; decoder.open()?; decoder.set_time_base(input_stream.time_base()); let encoder = output_stream .encoder(None) .ok_or("Couldn't find encoder for AAC".to_string())?; encoder.set_channels(decoder.channels()); encoder.set_channel_layout(decoder.channel_layout()); encoder.set_sample_format( encoder .sample_formats() .first() .copied() .ok_or("No sample formats found for AAC")?, ); encoder.set_sample_rate(decoder.sample_rate()); encoder.set_time_base(input_stream.time_base()); encoder.set_time_base(Ratio::new(1, decoder.sample_rate())); output_stream.set_time_base(encoder.time_base()); encoder.set_frame_size(decoder.frame_size() * 4); encoder.open()?; encoder.configure(&output_stream)?; let resampler = Resampler::from_coder(&decoder, &encoder); let mut audio_options = Dictionary::new(); audio_options.copy_from(&mut options)?; format.init_output(audio_options)?; Some(AudioTranscoder { frame_buffer: SortedFrameBuffer::new(), encoder, decoder, format, output_stream, input_stream, output_path: output_path.to_string(), current_pts: 0, last_pts: 0, segment: 0, resampler, seconds_per_segment: SECONDS_PER_SEGMENT, }) } else { None }; let video_decoder = input_video.decoder_select(None, |d| { d.configure(&input_video)?; d.open()?; Ok(()) })?; let video_encoder = output_video.encoder_select(None, |encoder| { encoder.set_pixel_format(AVPixelFormat::AV_PIX_FMT_YUV420P); encoder.set_color_range(video_decoder.color_range()); encoder.set_color_primaries(video_decoder.color_primaries()); encoder.set_color_trc(video_decoder.color_trc()); encoder.set_height(video_decoder.height()); encoder.set_width(video_decoder.width()); encoder.set_time_base(input_frame_rate.invert()); output_video.set_time_base(encoder.time_base()); encoder.open()?; encoder.configure(&output_video)?; Ok(()) })?; let param = output_video.params(); param.set_height(video_decoder.height()); param.set_width(video_decoder.width()); param.set_pixel_format(AV_PIX_FMT_YUV420P); video_output.init_output(options)?; let frame_scaler = Scaler::from_coder(&video_decoder, &video_encoder, SWS_BILINEAR); let video_frame_buffer = SortedFrameBuffer::new(); Ok(Transcoder { input, input_video, input_frame_rate, video_output, output_video, audio, segment_target: None, input_video_index: None, output_path, video_encoder, video_decoder, frame_scaler, video_frame_buffer, last_pts: 0, current_pts: 0, video_segment: 0, extra_data: None, input_audio_index: None, seconds_per_segment: SECONDS_PER_SEGMENT, }) } pub fn seek(&self, segment: u32, stream: Option<&Stream>) -> Result<(), String> { self.input.seek(segment * self.seconds_per_segment, stream) } pub fn open(&mut self) -> Result<(), String> { DirBuilder::new() .recursive(true) .create(self.output_path.to_string()) .map_err(|_| "Failed to create target directory".to_string())?; self.video_output.write_header(None).unwrap(); self.input_video_index = Some(self.input_video.index()); self.input_audio_index = self.audio.as_ref().map(|ia| ia.input_stream.index()); Ok(()) } pub fn duration(&self) -> i64 { self.input.duration() } pub fn duration_secs(&self) -> f64 { self.input.duration() as f64 / AV_TIME_BASE as f64 } pub fn transcode(&mut self) -> Result { Ok(if let Some(packet) = self.input.next_packet() { if self.input_video_index == Some(packet.stream()) { self.video_process_packet(packet)?; } else if self.input_audio_index == Some(packet.stream()) { // Safe assumption if let Some(ref mut audio) = &mut self.audio { audio.process_packet(packet)?; } } true } else { false }) } #[inline] pub fn video_stream(&self) -> &Stream { &self.input_video } #[inline] pub fn audio_stream(&self) -> Option<&Stream> { if let Some(ref audio) = self.audio { Some(audio.input_stream()) } else { None } } pub fn segment(&self) -> u32 { min( self.video_segment, self.audio.as_ref().map_or(u32::MAX, |audio| audio.segment), ) } pub fn finish(&mut self) -> Result<(), String> { self.encode_video()?; self.video_output.write_trailer()?; self.video_write_segment()?; Ok(()) } fn video_process_packet(&mut self, packet: Packet) -> Result<(), String> { self.video_decoder.send_packet(&packet)?; while let Some(frame) = self.video_decoder.read_frame()? { let segment: u32 = (self .input_video .time_base() .to_num() .mul(packet.pts() as i32) .to_integer() / self.seconds_per_segment as i32) as u32; if let Some(target) = self.segment_target { if target - 1 < segment { continue; } } let pts = frame.pts(); let frame = self .frame_scaler .scale(frame) .expect("Failed to scale video frame"); frame.set_pts(pts); self.video_frame_buffer.insert_frame(frame); self.encode_video()?; } Ok(()) } fn video_write_header(&mut self) -> Result<(), String> { File::create(format!("{}/video-init.mp4", &self.output_path)) .unwrap() .write_all(&mut self.video_output.avio_inner_mut().unwrap().buffer()) .map_err(|_| "Failed to write video init segment".to_string())?; self.start_segment(true); self.video_output.write_packet_null()?; Ok(()) } fn video_write_segment(&mut self) -> Result<(), String> { if self.video_segment == 0 { self.video_write_header()?; } if let Some(avio) = self.video_output.avio.as_mut() { avio.flush() } let segment = self.video_output.avio_inner_mut().unwrap().buffer(); let mut segment = annex_b_to_avc(segment, 4)?; File::create(format!( "{}/video-segment-{:0>5}.m4s", self.output_path, self.video_segment )) .unwrap() .write_all(&mut segment) .map_err(|_| format!("Failed to write video segment {}", self.video_segment))?; self.start_segment(false); self.video_encoder.flush(); Ok(()) } fn encode_video(&mut self) -> Result<(), String> { while let Some(frame) = self.video_frame_buffer.pop_first() { let pts_passed = frame.pts() - self.last_pts; if (pts_passed + self.current_pts) > (self.seconds_per_segment * self.input_frame_rate.den() as u32) as i64 { self.video_output.write_packet_null()?; self.video_write_segment()?; self.video_segment += 1; self.current_pts = (pts_passed + self.current_pts) % self.input_frame_rate.den() as i64; frame.set_pict_type(AV_PICTURE_TYPE_I); } else { self.current_pts += pts_passed } self.last_pts = frame.pts(); self.video_encoder.send_video_frame(&frame)?; while let Some(new_packet) = self.video_encoder.read_packet()? { if self.extra_data.is_none() { let items = NalBuffer::from_stream(new_packet.data())?; let mut sps = None; let mut pss = None; for item in items { if item.nal_unit.nal_unit_type == NalUnitType::PictureParameterSet { pss = Some(item.to_vec()); } else if item.nal_unit.nal_unit_type == NalUnitType::SequenceParameterSet { sps = Some(item.to_vec()); } } if let Some(sps) = sps { if let Some(pss) = pss { let config = AvcDecoderConfigurationRecord::from_parameter_sets(sps, pss); self.extra_data = Some(config.to_bytes()?); } } if let Some(extra_data) = &self.extra_data { self.output_video.params().set_extra_data(extra_data); self.output_video .params() .set_sample_aspect_ratio(Ratio::new(1, 1)); } } new_packet.set_pts(frame.pts()); new_packet.set_dts(new_packet.pts()); new_packet.set_duration(pts_passed); new_packet.rescale(self.input_video.time_base(), self.output_video.time_base()); new_packet.set_stream(0); self.video_output.write_packet(&new_packet)?; } } Ok(()) } fn start_segment(&self, force: bool) { if self.video_segment == 0 && !force { return; } unsafe { let pb = (*self.video_output.as_mut_ptr()).pb; avio_wb32(pb, 24); ffio_wfourcc(pb, b"styp"); ffio_wfourcc(pb, b"msdh"); avio_wb32(pb, 0); /* minor */ ffio_wfourcc(pb, b"msdh"); ffio_wfourcc(pb, b"msix"); } } pub fn stop(self) {} } #[derive(Default, Debug)] struct Buffer { buffer: Vec, } impl Buffer { fn buffer(&mut self) -> Vec { std::mem::replace(&mut self.buffer, vec![]) } } impl AVIOWriter for Buffer { fn write(&mut self, mut buffer: Vec) { self.buffer.append(&mut buffer) } } #[inline] fn ffio_wfourcc(pb: *mut AVIOContext, tag: &[u8]) { let mut nr: u32 = 0; nr |= tag[3] as u32; nr |= (tag[2] as u32) << 8; nr |= (tag[1] as u32) << 16; nr |= (tag[0] as u32) << 24; unsafe { avio_wb32(pb, nr) } }