Newest 'ffmpeg' Questions - Stack Overflow

http://stackoverflow.com/questions/tagged/ffmpeg

Les articles publiés sur le site

  • How to render two videos with alpha channel in real time in pygame with synched audio ?

    21 décembre, par Francesco Calderone

    I need to play two videos with synched sound in real-time with Pygame. Pygame does not currently support video streams, so I am using a ffmpeg subprocess. The first video is a prores422_hq. This is a background video with no alpha channel. The second video is a prores4444 overlay video with an alpha channel, and it needs to be played in real-tim on top of the first video (with transparency). All of this needs synched sound from the first base video only.

    I have tried many libraries, including pymovie pyav and opencv. The best result so far is to use a subprocess with ffmpeg.

    ffmpeg -i testing/stefano_prores422_hq.mov -stream_loop -1 -i testing/key_prores4444.mov -filter_complex "[1:v]format=rgba,colorchannelmixer=aa=1.0[overlay];[0:v][overlay]overlay" -f nut pipe:1 | ffplay -

    When running this in the terminal and playing with ffplay, everything is perfect, the overlay looks good, no dropped frames, and the sound is in synch.

    However, trying to feed that to pygame via a subprocess creates either video delays and drop frames or audio not in synch.

    EXAMPLE ONE:

    # SOUND IS NOT SYNCHED - sound is played via ffplay
    import pygame
    import subprocess
    import numpy as np
    import sys
    
    def main():
        pygame.init()
        screen_width, screen_height = 1920, 1080
        screen = pygame.display.set_mode((screen_width, screen_height))
        pygame.display.set_caption("PyGame + FFmpeg Overlay with Audio")
        clock = pygame.time.Clock()
    
        # LAUNCH AUDIO-ONLY SUBPROCESS
        audio_cmd = [
            "ffplay",
            "-nodisp",          # no video window
            "-autoexit",        # exit when video ends
            "-loglevel", "quiet",
            "testing/stefano_prores422_hq.mov"
        ]
        audio_process = subprocess.Popen(audio_cmd)
    
        # LAUNCH VIDEO-OVERLAY SUBPROCESS
        ffmpeg_command = [
            "ffmpeg",
            "-re",                        # read at native frame rate
            "-i", "testing/stefano_prores422_hq.mov",
            "-stream_loop", "-1",         # loop alpha video
            "-i", "testing/key_prores4444.mov",
            "-filter_complex",
            "[1:v]format=rgba,colorchannelmixer=aa=1.0[overlay];"  # ensure alpha channel
            "[0:v][overlay]overlay",      # overlay second input onto first
            "-f", "rawvideo",             # output raw video
            "-pix_fmt", "rgba",           # RGBA format
            "pipe:1"                      # write to STDOUT
        ]
        video_process = subprocess.Popen(
            ffmpeg_command,
            stdout=subprocess.PIPE,
            stderr=subprocess.DEVNULL
        )
        frame_size = screen_width * screen_height * 4  # RGBA = 4 bytes/pixel
        running = True
        while running:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False
                    break
    
            raw_frame = video_process.stdout.read(frame_size)
    
            if len(raw_frame) < frame_size:
                running = False
                break
            # Convert raw bytes -> NumPy array -> PyGame surface
            frame_array = np.frombuffer(raw_frame, dtype=np.uint8)
            frame_array = frame_array.reshape((screen_height, screen_width, 4))
            frame_surface = pygame.image.frombuffer(frame_array.tobytes(), 
                                                    (screen_width, screen_height), 
                                                    "RGBA")
            screen.blit(frame_surface, (0, 0))
            pygame.display.flip()
            clock.tick(25)
        video_process.terminate()
        video_process.wait()
        audio_process.terminate()
        audio_process.wait()
        pygame.quit()
        sys.exit()
    
    if __name__ == "__main__":
        main()
    
    

    EXAMPLE TWO

    # NO VIDEO OVERLAY - SOUND SYNCHED
    import ffmpeg
    import pygame
    import sys
    import numpy as np
    import tempfile
    import os
    
    def extract_audio(input_file, output_file):
        """Extract audio from video file to temporary WAV file"""
        (
            ffmpeg
            .input(input_file)
            .output(output_file, acodec='pcm_s16le', ac=2, ar='44100')
            .overwrite_output()
            .run(capture_stdout=True, capture_stderr=True)
        )
    
    def get_video_fps(input_file):
        probe = ffmpeg.probe(input_file)
        video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
        fps_str = video_info.get('r_frame_rate', '25/1')
        num, den = map(int, fps_str.split('/'))
        return num / den
    
    input_file = "testing/stefano_prores422_hq.mov"
    
    # Create temporary WAV file
    temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
    temp_audio.close()
    extract_audio(input_file, temp_audio.name)
    
    probe = ffmpeg.probe(input_file)
    video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
    width = int(video_info['width'])
    height = int(video_info['height'])
    fps = get_video_fps(input_file)
    
    process = (
        ffmpeg
        .input(input_file)
        .output('pipe:', format='rawvideo', pix_fmt='rgb24')
        .run_async(pipe_stdout=True)
    )
    
    pygame.init()
    pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=4096)
    clock = pygame.time.Clock()
    screen = pygame.display.set_mode((width, height))
    
    pygame.mixer.music.load(temp_audio.name)
    pygame.mixer.music.play()
    
    frame_count = 0
    start_time = pygame.time.get_ticks()
    
    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.mixer.music.stop()
                os.unlink(temp_audio.name)
                sys.exit()
    
        in_bytes = process.stdout.read(width * height * 3)
        if not in_bytes:
            break
    
        # Calculate timing for synchronization
        expected_frame_time = frame_count * (1000 / fps)
        actual_time = pygame.time.get_ticks() - start_time
        
        if actual_time < expected_frame_time:
            pygame.time.wait(int(expected_frame_time - actual_time))
        
        in_frame = (
            np.frombuffer(in_bytes, dtype="uint8")
            .reshape([height, width, 3])
        )
        out_frame = pygame.surfarray.make_surface(np.transpose(in_frame, (1, 0, 2)))
        screen.blit(out_frame, (0, 0))
        pygame.display.flip()
        
        frame_count += 1
    
    pygame.mixer.music.stop()
    process.wait()
    pygame.quit()
    os.unlink(temp_audio.name)
    

    I also tried using pygame mixer and a separate mp3 audio file, but that didn't work either. Any help on how to synch the sound while keeping the playback of both videos to 25 FPS would be greatly appreciated!!!

  • Rounded corners in subtitle (Advanced Substation Alpha [.ass])

    21 décembre, par Leander Mihm

    Is it possible to have rounded corners with the BorderStyle 4 in .ass (Advanced Substation Alpha)? I only found out that the BorderStyle 4 exists, because I was looking at this stackoverflow. Is there any good and complete documentation of the Advanced Substation Alpha format?

    I'm currently using the following configuration:

    BorderStyle=4 Outline=10

    Preview of the current state

  • How to render two videos with alpha channel in real time in pygame ?

    21 décembre, par Francesco Calderone

    I need to play two videos with synched sound in real-time with Pygame. Pygame does not currently support video streams, so I am using a ffmpeg subprocess. The first video is a prores422_hq. This is a background video with no alpha channel. The second video is a prores4444 overlay video with an alpha channel, and it needs to be played in real-tim on top of the first video (with transparency). All of this needs synched sound from the first base video only.

    I have tried many libraries, including pymovie pyav and opencv. The best result so far is to use a subprocess with ffmpeg.

    ffmpeg -i testing/stefano_prores422_hq.mov -stream_loop -1 -i testing/key_prores4444.mov -filter_complex "[1:v]format=rgba,colorchannelmixer=aa=1.0[overlay];[0:v][overlay]overlay" -f nut pipe:1 | ffplay -

    When running this in the terminal and playing with ffplay, everything is perfect, the overlay looks good, no dropped frames, and the sound is in synch.

    However, trying to feed that to pygame via a subprocess creates either video delays and drop frames or audio not in synch.

    EXAMPLE ONE:

    # SOUND IS NOT SYNCHED - sound is played via ffplay
    import pygame
    import subprocess
    import numpy as np
    import sys
    
    def main():
        pygame.init()
        screen_width, screen_height = 1920, 1080
        screen = pygame.display.set_mode((screen_width, screen_height))
        pygame.display.set_caption("PyGame + FFmpeg Overlay with Audio")
        clock = pygame.time.Clock()
    
        # LAUNCH AUDIO-ONLY SUBPROCESS
        audio_cmd = [
            "ffplay",
            "-nodisp",          # no video window
            "-autoexit",        # exit when video ends
            "-loglevel", "quiet",
            "testing/stefano_prores422_hq.mov"
        ]
        audio_process = subprocess.Popen(audio_cmd)
    
        # LAUNCH VIDEO-OVERLAY SUBPROCESS
        ffmpeg_command = [
            "ffmpeg",
            "-re",                        # read at native frame rate
            "-i", "testing/stefano_prores422_hq.mov",
            "-stream_loop", "-1",         # loop alpha video
            "-i", "testing/key_prores4444.mov",
            "-filter_complex",
            "[1:v]format=rgba,colorchannelmixer=aa=1.0[overlay];"  # ensure alpha channel
            "[0:v][overlay]overlay",      # overlay second input onto first
            "-f", "rawvideo",             # output raw video
            "-pix_fmt", "rgba",           # RGBA format
            "pipe:1"                      # write to STDOUT
        ]
        video_process = subprocess.Popen(
            ffmpeg_command,
            stdout=subprocess.PIPE,
            stderr=subprocess.DEVNULL
        )
        frame_size = screen_width * screen_height * 4  # RGBA = 4 bytes/pixel
        running = True
        while running:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False
                    break
    
            raw_frame = video_process.stdout.read(frame_size)
    
            if len(raw_frame) < frame_size:
                running = False
                break
            # Convert raw bytes -> NumPy array -> PyGame surface
            frame_array = np.frombuffer(raw_frame, dtype=np.uint8)
            frame_array = frame_array.reshape((screen_height, screen_width, 4))
            frame_surface = pygame.image.frombuffer(frame_array.tobytes(), 
                                                    (screen_width, screen_height), 
                                                    "RGBA")
            screen.blit(frame_surface, (0, 0))
            pygame.display.flip()
            clock.tick(25)
        video_process.terminate()
        video_process.wait()
        audio_process.terminate()
        audio_process.wait()
        pygame.quit()
        sys.exit()
    
    if __name__ == "__main__":
        main()
    
    

    EXAMPLE TWO

    # NO VIDEO OVERLAY - SOUND SYNCHED
    import ffmpeg
    import pygame
    import sys
    import numpy as np
    import tempfile
    import os
    
    def extract_audio(input_file, output_file):
        """Extract audio from video file to temporary WAV file"""
        (
            ffmpeg
            .input(input_file)
            .output(output_file, acodec='pcm_s16le', ac=2, ar='44100')
            .overwrite_output()
            .run(capture_stdout=True, capture_stderr=True)
        )
    
    def get_video_fps(input_file):
        probe = ffmpeg.probe(input_file)
        video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
        fps_str = video_info.get('r_frame_rate', '25/1')
        num, den = map(int, fps_str.split('/'))
        return num / den
    
    input_file = "testing/stefano_prores422_hq.mov"
    
    # Create temporary WAV file
    temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
    temp_audio.close()
    extract_audio(input_file, temp_audio.name)
    
    probe = ffmpeg.probe(input_file)
    video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video')
    width = int(video_info['width'])
    height = int(video_info['height'])
    fps = get_video_fps(input_file)
    
    process = (
        ffmpeg
        .input(input_file)
        .output('pipe:', format='rawvideo', pix_fmt='rgb24')
        .run_async(pipe_stdout=True)
    )
    
    pygame.init()
    pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=4096)
    clock = pygame.time.Clock()
    screen = pygame.display.set_mode((width, height))
    
    pygame.mixer.music.load(temp_audio.name)
    pygame.mixer.music.play()
    
    frame_count = 0
    start_time = pygame.time.get_ticks()
    
    while True:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.mixer.music.stop()
                os.unlink(temp_audio.name)
                sys.exit()
    
        in_bytes = process.stdout.read(width * height * 3)
        if not in_bytes:
            break
    
        # Calculate timing for synchronization
        expected_frame_time = frame_count * (1000 / fps)
        actual_time = pygame.time.get_ticks() - start_time
        
        if actual_time < expected_frame_time:
            pygame.time.wait(int(expected_frame_time - actual_time))
        
        in_frame = (
            np.frombuffer(in_bytes, dtype="uint8")
            .reshape([height, width, 3])
        )
        out_frame = pygame.surfarray.make_surface(np.transpose(in_frame, (1, 0, 2)))
        screen.blit(out_frame, (0, 0))
        pygame.display.flip()
        
        frame_count += 1
    
    pygame.mixer.music.stop()
    process.wait()
    pygame.quit()
    os.unlink(temp_audio.name)
    

    I also tried using pygame mixer and a separate mp3 audio file, but that didn't work either. Any help on how to synch the sound while keeping the playback of both videos to 25 FPS would be greatly appreciated!!!

  • How to improve the fluency of rtsp streaming through ffmpeg (processing 16 pictures at the same time)

    21 décembre, par Ling Yun

    When the button is clicked, I create 16 threads in Qt, and then pass the rtsp data address and the label to be rendered to the process, and then the process does this: run:

    
    void rtspthread::run()
    {
    
        while(!shouldStop){
            openRtspStream(rtspUrl.toUtf8().constData(),index);
        }
    
        qDebug() << "RTSP stream stopped.";
        emit finished();  
    }
    
    

    open input stream:

    void rtspthread::openRtspStream(const char* rtspUrl,int index)
    
    {
    
        AVDictionary *options = nullptr;
        AVFrame *pFrameRGB = nullptr;
        uint8_t *pOutBuffer = nullptr;
        struct SwsContext *swsContext;
        AVFormatContext *pFormatCtx = nullptr;
        pFormatCtx = avformat_alloc_context();
        av_dict_set(&options, "rtsp_transport", "tcp", 0);
        av_dict_set(&options, "maxrate", "4000k", 0);
        if (avformat_open_input(&pFormatCtx, rtspUrl, nullptr, &options) != 0) {
            printf("Couldn't open stream file.\n");
            return;
        }
    
        if (avformat_find_stream_info(pFormatCtx, NULL)<0)
        {
            printf("Couldn't find stream information.\n");
            return;
        }
        int videoStreamIndex = -1;
        for (int i = 0; i < pFormatCtx->nb_streams; i++) {
            if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
                videoStreamIndex = i;
                break;
            }
        }
        if (videoStreamIndex!=-1){
            AVStream* videoStream = pFormatCtx->streams[videoStreamIndex];
            
            AVCodecParameters* codecpar = videoStream->codecpar;
            const AVCodec* videoCodec = avcodec_find_decoder(codecpar->codec_id);
    
            AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec);
    
            avcodec_parameters_to_context(videoCodecContext,codecpar);
    
            avcodec_open2(videoCodecContext,videoCodec,nullptr);
    
            AVPixelFormat srcPixFmt = videoCodecContext->pix_fmt;
            QLabel* label = this->parentWidget->findChild("videoLabel");
            int targetWidth = label->width();
            int targetHeight = label->height();
            
            pOutBuffer = (uint8_t*)av_malloc(av_image_get_buffer_size(AV_PIX_FMT_RGB32,
                                                                        videoCodecContext->width,
                                                                        videoCodecContext->height, 1));
    
            
            pFrameRGB = av_frame_alloc();
            av_image_fill_arrays(pFrameRGB->data, pFrameRGB->linesize, pOutBuffer,
                                 AV_PIX_FMT_RGB32, videoCodecContext->width, videoCodecContext->height, 1);
    
    
            swsContext= sws_getContext(
                videoCodecContext->width,videoCodecContext->height,srcPixFmt,
                targetWidth, targetHeight,AV_PIX_FMT_RGB32,
                SWS_BICUBIC,nullptr,nullptr,nullptr
                );
            
            AVPacket packet;
            AVFrame* frame = av_frame_alloc();
            int frameCounter = 0;
            while (av_read_frame(pFormatCtx, &packet) >= 0) {
                if (shouldStop) {
                    break;
                }
                if (packet.stream_index == videoStreamIndex) {
                    
                    int ret = avcodec_send_packet(videoCodecContext,&packet);
                    int rets = avcodec_receive_frame(videoCodecContext, frame);
                    if (rets < 0) {
                        qDebug() << "Error receiving frame from codec context";
                    }
                    
                    sws_scale(swsContext, frame->data, frame->linesize, 0, videoCodecContext->height,
                              pFrameRGB->data, pFrameRGB->linesize);
    
                    
                    QImage img(pFrameRGB->data[0], targetWidth, targetHeight,
                               pFrameRGB->linesize[0], QImage::Format_RGB32);
                    
                    qDebug() << index;
    
                    emit frameReady(img.copy(),index);
    
    
                    QThread::msleep(30);  // 控制帧率
                }
                av_packet_unref(&packet);
    
            }
            av_frame_free(&frame);
            av_frame_free(&pFrameRGB);
            sws_freeContext(swsContext);
            avcodec_free_context(&videoCodecContext);
            avformat_close_input(&pFormatCtx);
            avformat_free_context(pFormatCtx);
    
        }
    
    
    }
    
    

    The video is stuck and has snow screen. I want to lower the resolution and reduce the snow screen. The server cannot change the resolution.

  • ffprobe newer version detect audio codec incorrectly

    21 décembre, par alancc

    I find a strange problem.

    I have a test video with h264 video codec and aac audio codec. It is at https://drive.google.com/file/d/1YAyz5cO0kb9r0MgahCpISR4bZ_1_n8PL/view?usp=sharing

    I build a ffmpeg version by myself, its version is:

    ffprobe version 7.0.2 Copyright (c) 2007-2024 the FFmpeg developers
      built with gcc 14.1.0 (Rev3, Built by MSYS2 project)
      configuration: --enable-shared
      libavutil      59.  8.100 / 59.  8.100
      libavcodec     61.  3.100 / 61.  3.100
      libavformat    61.  1.100 / 61.  1.100
      libavdevice    61.  1.100 / 61.  1.100
      libavfilter    10.  1.100 / 10.  1.100
      libswscale      8.  1.100 /  8.  1.100
      libswresample   5.  1.100 /  5.  1.100
    

    I then use ffprobe to get its info:

    ffprobe -v quiet -print_format ini -show_streams -show_packets test_h264.mp4 > test_h264.ini
    

    Then I get an ini file which shows the audio codec as MP2:

    [streams.stream.0]
    index=0
    codec_name=mp2
    codec_long_name=MP2 (MPEG audio layer 2)
    profile=unknown
    codec_type=audio
    codec_tag_string=mp4a
    codec_tag=0x6134706d
    sample_fmt=fltp
    sample_rate=44100
    channels=2
    channel_layout=stereo
    bits_per_sample=0
    initial_padding=0
    id=0x1
    r_frame_rate=0/0
    avg_frame_rate=0/0
    time_base=1/44100
    start_pts=2788
    start_time=0.063220
    duration_ts=435455
    duration=9.874263
    bit_rate=127706
    max_bit_rate=N/A
    bits_per_raw_sample=N/A
    nb_frames=378
    nb_read_frames=N/A
    nb_read_packets=378
    

    Another developer he uses his version of ffprobe:

    ffprobe version 2023-02-22-git-d5cc7acff1-full_build-www.gyan.dev Copyright (c) 2007-2023 the FFmpeg developers  
    

    Based on the year, my version(2024) should be newer than his(2023), but his version of ffprobe can get the audio codec properly:

    [streams.stream.1]
    index=1
    codec_name=aac
    codec_long_name=AAC (Advanced Audio Coding)
    profile=LC
    codec_type=audio
    codec_tag_string=mp4a
    codec_tag=0x6134706d
    sample_fmt=fltp
    sample_rate=44100
    channels=2
    channel_layout=stereo
    bits_per_sample=0
    initial_padding=0
    id=0x2
    r_frame_rate=0/0
    avg_frame_rate=0/0
    time_base=1/44100
    start_pts=1764
    start_time=0.040000
    duration_ts=436480
    duration=9.897506
    bit_rate=111733
    max_bit_rate=N/A
    bits_per_raw_sample=N/A
    nb_frames=427
    nb_read_frames=N/A
    nb_read_packets=427
    extradata_size=5
    

    Why?

    I also tried a ffprobe version on ubuntu with the following version:

    ffprobe version 6.1.1-3ubuntu5 Copyright (c) 2007-2023 the FFmpeg developers
      built with gcc 13 (Ubuntu 13.2.0-23ubuntu3)
      configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-openal --enable-opencl --enable-opengl --disable-sndio --enable-libvpl --disable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-ladspa --enable-libbluray --enable-libjack --enable-libpulse --enable-librabbitmq --enable-librist --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libx264 --enable-libzmq --enable-libzvbi --enable-lv2 --enable-sdl2 --enable-libplacebo --enable-librav1e --enable-pocketsphinx --enable-librsvg --enable-libjxl --enable-shared
      libavutil      58. 29.100 / 58. 29.100
      libavcodec     60. 31.102 / 60. 31.102
      libavformat    60. 16.100 / 60. 16.100
      libavdevice    60.  3.100 / 60.  3.100
      libavfilter     9. 12.100 /  9. 12.100
      libswscale      7.  5.100 /  7.  5.100
      libswresample   4. 12.100 /  4. 12.100
      libpostproc    57.  3.100 / 57.  3.100
    

    It will detect the audio as aac properly, but with different parameters, for example, bit_rate is 111733(developer) but 110399(ubuntu). But this parameter comes from the same file so should be the same.

    [streams.stream.1]
    index=1
    codec_name=aac
    codec_long_name=AAC (Advanced Audio Coding)
    profile=LC
    codec_type=audio
    codec_tag_string=mp4a
    codec_tag=0x6134706d
    sample_fmt=fltp
    sample_rate=44100
    channels=2
    channel_layout=stereo
    bits_per_sample=0
    initial_padding=0
    id=0x2
    r_frame_rate=0/0
    avg_frame_rate=0/0
    time_base=1/44100
    start_pts=0
    start_time=0.000000
    duration_ts=441353
    duration=10.008005
    bit_rate=110399
    max_bit_rate=N/A
    bits_per_raw_sample=N/A
    nb_frames=432
    nb_read_frames=N/A
    nb_read_packets=432
    extradata_size=5