finish up audio transcoder

2020-07-10 23:58:34 +02:00 · 2020-07-10 23:58:34 +02:00 · fe171a56be
commit fe171a56be
parent 75ce22c581
9 changed files with 171 additions and 26 deletions
--- a/src/av/avio.rs
+++ b/src/av/avio.rs
@ -35,6 +35,7 @@ unsafe extern "C" fn read_packet_with_inner(
    let items = avio.data.read(buffer_size);
    let len = min(items.len(), buffer_size as usize);
    buffer.copy_from(items.as_ptr().cast(), len);
+    std::mem::forget(avio);
    len as i32
 }

@ -52,7 +53,7 @@ unsafe extern "C" fn seek_with_inner(ctx: *mut c_void, offset: i64, whence: i32)
    println!("ok");
    let mut avio: Box<AVIOInner<Box<dyn AVIOSeekable>>> = Box::from_raw(ctx.cast());
    avio.data.seek(offset, whence);
-
+    std::mem::forget(avio);
    0
 }

--- a/src/av/dictionary.rs
+++ b/src/av/dictionary.rs
@ -1,4 +1,7 @@
-use ffmpeg_sys_next::{av_dict_free, av_dict_get, av_dict_set, av_dict_set_int, AVDictionary};
+use crate::av::verify_response;
+use ffmpeg_sys_next::{
+    av_dict_copy, av_dict_free, av_dict_get, av_dict_set, av_dict_set_int, AVDictionary,
+};
 use std::ffi::CString;
 use std::ptr::{null, null_mut};

@ -25,6 +28,14 @@ impl Dictionary {
        }
    }

+    pub fn copy_from(&mut self, from: &mut Dictionary) -> Result<(), String> {
+        verify_response("Failed to copy dictionary", unsafe {
+            av_dict_copy(&mut self.ptr, from.as_ptr(), 0)
+        })?;
+
+        Ok(())
+    }
+
    pub fn disown(&mut self) {
        self.owned = false;
    }
--- a/src/av/encoder.rs
+++ b/src/av/encoder.rs
@ -41,9 +41,6 @@ impl Encoder {
        //         self.sample_format()
        //     ));
        // }
-
-        println!("{:?} / {}", frame.nb_samples(), self.frame_size());
-
        self.send_frame(frame)
    }

--- a/src/av/frame.rs
+++ b/src/av/frame.rs
@ -84,6 +84,31 @@ impl Frame {
        unsafe { std::mem::transmute::<_, AVSampleFormat>((*self.ptr).format) }
    }

+    #[inline]
+    pub fn sample_rate(&self) -> i32 {
+        unsafe { (*self.ptr).sample_rate }
+    }
+
+    #[inline]
+    pub fn set_sample_rate(&self, sample_rate: i32) {
+        unsafe { (*self.ptr).sample_rate = sample_rate }
+    }
+
+    #[inline]
+    pub fn channel_layout(&self) -> u64 {
+        unsafe { (*self.ptr).channel_layout }
+    }
+
+    #[inline]
+    pub fn set_channel_layout(&self, channel_layout: u64) {
+        unsafe { (*self.ptr).channel_layout = channel_layout }
+    }
+
+    #[inline]
+    pub fn set_nb_samples(&self, nb_samples: i32) {
+        unsafe { (*self.ptr).nb_samples = nb_samples }
+    }
+
    #[inline]
    pub fn nb_samples(&self) -> i32 {
        unsafe { (*self.ptr).nb_samples }
@ -99,6 +124,23 @@ impl Frame {
        self.set_width(width);
        self.set_height(height);

+        self.allocate()
+    }
+
+    pub fn allocate_audio(
+        &self,
+        sample_format: AVSampleFormat,
+        channel_layout: u64,
+        nb_samples: i32,
+    ) -> Result<(), String> {
+        self.set_sample_format(sample_format);
+        self.set_channel_layout(channel_layout);
+        self.set_nb_samples(nb_samples);
+
+        self.allocate()
+    }
+
+    fn allocate(&self) -> Result<(), String> {
        verify_response("Failed to allocate frame", unsafe {
            av_frame_get_buffer(self.ptr, 32)
        })?;
--- a/src/av/resampler.rs
+++ b/src/av/resampler.rs
@ -2,15 +2,26 @@ use crate::av::as_ptr::{AsMutPtr, AsPtr};
 use crate::av::decoder::Decoder;
 use crate::av::encoder::Encoder;
 use crate::av::frame::Frame;
+use crate::av::verify_response;
 use crate::av::xcoder::XCoder;
 use ffmpeg_sys_next::{
-    swr_alloc, swr_alloc_set_opts, swr_convert_frame, swr_get_delay, AVFrame, SwrContext,
+    swr_alloc, swr_alloc_set_opts, swr_convert_frame, swr_get_delay, AVFrame, AVSampleFormat,
+    SwrContext,
 };
 use std::ptr::{null, null_mut};

 pub struct Resampler {
    ptr: *mut SwrContext,
-    output_sample_rate: i64,
+    input: StreamConfig,
+    output: StreamConfig,
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct StreamConfig {
+    sample_rate: i32,
+    sample_format: AVSampleFormat,
+    channel_layout: u64,
+    frame_size: i32,
 }

 impl Resampler {
@ -31,30 +42,57 @@ impl Resampler {

            Resampler {
                ptr: swr,
-                output_sample_rate: encoder.sample_rate() as i64,
+                input: StreamConfig {
+                    sample_rate: decoder.sample_rate(),
+                    sample_format: decoder.sample_format(),
+                    channel_layout: decoder.channel_layout(),
+                    frame_size: decoder.frame_size(),
+                },
+                output: StreamConfig {
+                    sample_rate: encoder.sample_rate(),
+                    sample_format: encoder.sample_format(),
+                    channel_layout: encoder.channel_layout(),
+                    frame_size: encoder.frame_size(),
+                },
            }
        }
    }

-    pub fn convert(&self, input: Frame) -> Frame {
+    pub fn convert(&self, input: &Frame) -> Result<Frame, String> {
        let output = Frame::alloc();
-        unsafe {
-            swr_convert_frame(self.ptr, output.as_mut_ptr(), input.as_ptr());
-        }
+        output.allocate_audio(
+            self.output.sample_format,
+            self.output.channel_layout,
+            self.output.frame_size,
+        )?;
+        output.set_sample_rate(self.output.sample_rate);

-        output
+        verify_response("Failed to resample frame", unsafe {
+            swr_convert_frame(self.ptr, output.as_mut_ptr(), input.as_ptr())
+        })?;
+
+        output.set_pts((input.pts() * input.sample_rate() as i64) / output.sample_rate() as i64);
+
+        Ok(output)
    }

-    pub fn drain(&self) -> Option<Frame> {
-        if 0 < unsafe { swr_get_delay(self.ptr, self.output_sample_rate) } {
+    pub fn drain(&self) -> Result<Option<Frame>, String> {
+        if 0 < unsafe { swr_get_delay(self.ptr, self.output.sample_rate as i64) } {
            let output = Frame::alloc();
-            unsafe {
-                swr_convert_frame(self.ptr, output.as_mut_ptr(), null());
-            }
+            output.allocate_audio(
+                self.output.sample_format,
+                self.output.channel_layout,
+                self.output.frame_size,
+            )?;
+            output.set_sample_rate(self.output.sample_rate);

-            Some(output)
+            verify_response("Failed to resample remaining frame", unsafe {
+                swr_convert_frame(self.ptr, output.as_mut_ptr(), null())
+            })?;
+
+            Ok(Some(output))
        } else {
-            None
+            Ok(None)
        }
    }
 }
--- a/src/av/xcoder.rs
+++ b/src/av/xcoder.rs
@ -4,9 +4,9 @@ use crate::av::{verify_response, Rational};
 use crate::av_err2str;
 use ffmpeg_sys_next::AVPixelFormat::AV_PIX_FMT_NONE;
 use ffmpeg_sys_next::{
-    av_malloc, avcodec_open2, AVCodecContext, AVColorPrimaries, AVColorRange,
-    AVColorTransferCharacteristic, AVPixelFormat, AVRational, AVSampleFormat, AVERROR, AVERROR_EOF,
-    EAGAIN,
+    av_malloc, avcodec_flush_buffers, avcodec_open2, AVCodecContext, AVCodecID, AVColorPrimaries,
+    AVColorRange, AVColorTransferCharacteristic, AVPixelFormat, AVRational, AVSampleFormat,
+    AVERROR, AVERROR_EOF, EAGAIN,
 };
 use num_rational::Ratio;
 use std::ffi::CStr;
@ -52,6 +52,11 @@ pub trait XCoder: Sized {
        Ok(())
    }

+    #[inline]
+    fn codec(&self) -> AVCodecID {
+        unsafe { (*(*self.as_ptr()).codec).id }
+    }
+
    #[inline]
    fn time_base(&self) -> Ratio<i32> {
        unsafe { (*self.as_ptr()).time_base.to_num() }
@ -211,6 +216,12 @@ pub trait XCoder: Sized {
        }
    }

+    fn flush(&self) {
+        unsafe {
+            avcodec_flush_buffers(self.as_mut_ptr());
+        }
+    }
+
    fn set_private_data(&self, key: &str, value: &str) {
        unsafe {
            Dictionary {
--- a/src/transcoder.rs
+++ b/src/transcoder.rs
@ -58,7 +58,17 @@ impl AudioTranscoder {
            .send_packet(&packet)
            .map_err(|err| format!("Failed sending audio packet: {}", err))?;
        while let Some(frame) = self.decoder.read_frame()? {
-            self.frame_buffer.insert_frame(frame);
+            let resampled = self.resampler.convert(&frame)?;
+            let resampled_pts = resampled.pts();
+            self.frame_buffer.insert_frame(resampled);
+            let mut offset: f64 = 0f64;
+            while let Some(drain_resampled) = self.resampler.drain()? {
+                offset += (drain_resampled.nb_samples() as f64 / self.encoder.sample_rate() as f64)
+                    * self.input_stream.time_base().den() as f64;
+                drain_resampled.set_pts(resampled_pts + offset as i64);
+                self.frame_buffer.insert_frame(drain_resampled);
+            }
+
            self.encode()?;
        }

@ -120,6 +130,10 @@ impl AudioTranscoder {
    fn encode(&mut self) -> Result<(), String> {
        while let Some(frame) = self.frame_buffer.pop_first() {
            let pts_passed = frame.pts() - self.last_pts;
+            if pts_passed <= 0 && self.last_pts != 0 {
+                println!("WARN: new frame out of order");
+            }
+
            if (pts_passed + self.current_pts) > (5 * self.input_stream.time_base().den()) as i64 {
                self.format.write_packet_null()?;
                println!(
@ -145,6 +159,7 @@ impl AudioTranscoder {
                    self.output_stream.time_base(),
                );
                new_packet.set_stream(0);
+                new_packet.data();
                self.format.write_packet(&new_packet)?;
            }
        }
@ -197,6 +212,7 @@ impl Transcoder {
            ))?;
            decoder.configure(&input_stream)?;
            decoder.open()?;
+            decoder.set_time_base(input_stream.time_base());

            let encoder = output_stream
                .encoder(None)
@ -217,14 +233,30 @@ impl Transcoder {
            encoder.set_frame_size(decoder.frame_size() * 4);
            encoder.open()?;
            encoder.configure(&output_stream)?;
+            println!(
+                "audio input stream: {}/{}",
+                input_stream.time_base().num(),
+                input_stream.time_base().den()
+            );

            println!(
-                "audio decoder[{}] -> audio encoder[{}]",
+                "audio decoder[{}] ({:?} {}hz | {}/{}) -> audio encoder[{}] ({:?} {}hz | {}/{})",
                decoder.name(),
-                encoder.name()
+                decoder.codec(),
+                decoder.sample_rate(),
+                decoder.time_base().num(),
+                decoder.time_base().den(),
+                encoder.name(),
+                encoder.codec(),
+                encoder.sample_rate(),
+                encoder.time_base().num(),
+                encoder.time_base().den(),
            );

            let resampler = Resampler::from_coder(&decoder, &encoder);
+            let mut audio_options = Dictionary::new();
+            audio_options.copy_from(&mut options);
+            format.init_output(audio_options);

            Some(AudioTranscoder {
                frame_buffer: SortedFrameBuffer::new(),
@ -376,6 +408,8 @@ impl Transcoder {
        .map_err(|_| format!("Failed to write video segment {}", self.video_segment))?;
        self.start_segment(false);

+        self.video_encoder.flush();
+
        Ok(())
    }

--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@ -87,6 +87,10 @@ impl<K: Copy + Debug + Eq + Hash + Ord, V: Debug> SortedBuffer<K, V> {
        self.open = false;
    }

+    pub fn first_key(&mut self) -> Option<K> {
+        self.sorted_keys.first().copied()
+    }
+
    pub fn pop_first(&mut self) -> Option<V> {
        if self.sorted_keys.len() == 0 || (self.open && self.buffer_size >= self.items.len()) {
            return None;
--- a/test/test.mpd
+++ b/test/test.mpd
@ -13,5 +13,12 @@
                                 startNumber="0"/>
            </Representation>
        </AdaptationSet>
+        <AdaptationSet contentType="audio">
+            <Representation mimeType="audio/mp4" codecs="mp4a.40.2" >
+                <SegmentTemplate media="audio-segment-$Number%05d$.m4s"
+                                 initialization="audio-init.mp4" duration="5000" timescale="1000"
+                                 startNumber="0"/>
+            </Representation>
+        </AdaptationSet>
    </Period>
 </MPD>