Newest 'ffmpeg' Questions - Stack Overflow
Les articles publiés sur le site
-
How to render two videos with alpha channel in real time in pygame with synched audio ?
21 décembre, par Francesco CalderoneI need to play two videos with synched sound in real-time with Pygame. Pygame does not currently support video streams, so I am using a ffmpeg subprocess. The first video is a prores422_hq. This is a background video with no alpha channel. The second video is a prores4444 overlay video with an alpha channel, and it needs to be played in real-tim on top of the first video (with transparency). All of this needs synched sound from the first base video only.
I have tried many libraries, including pymovie pyav and opencv. The best result so far is to use a subprocess with ffmpeg.
ffmpeg -i testing/stefano_prores422_hq.mov -stream_loop -1 -i testing/key_prores4444.mov -filter_complex "[1:v]format=rgba,colorchannelmixer=aa=1.0[overlay];[0:v][overlay]overlay" -f nut pipe:1 | ffplay -
When running this in the terminal and playing with ffplay, everything is perfect, the overlay looks good, no dropped frames, and the sound is in synch.
However, trying to feed that to pygame via a subprocess creates either video delays and drop frames or audio not in synch.
EXAMPLE ONE:
# SOUND IS NOT SYNCHED - sound is played via ffplay import pygame import subprocess import numpy as np import sys def main(): pygame.init() screen_width, screen_height = 1920, 1080 screen = pygame.display.set_mode((screen_width, screen_height)) pygame.display.set_caption("PyGame + FFmpeg Overlay with Audio") clock = pygame.time.Clock() # LAUNCH AUDIO-ONLY SUBPROCESS audio_cmd = [ "ffplay", "-nodisp", # no video window "-autoexit", # exit when video ends "-loglevel", "quiet", "testing/stefano_prores422_hq.mov" ] audio_process = subprocess.Popen(audio_cmd) # LAUNCH VIDEO-OVERLAY SUBPROCESS ffmpeg_command = [ "ffmpeg", "-re", # read at native frame rate "-i", "testing/stefano_prores422_hq.mov", "-stream_loop", "-1", # loop alpha video "-i", "testing/key_prores4444.mov", "-filter_complex", "[1:v]format=rgba,colorchannelmixer=aa=1.0[overlay];" # ensure alpha channel "[0:v][overlay]overlay", # overlay second input onto first "-f", "rawvideo", # output raw video "-pix_fmt", "rgba", # RGBA format "pipe:1" # write to STDOUT ] video_process = subprocess.Popen( ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL ) frame_size = screen_width * screen_height * 4 # RGBA = 4 bytes/pixel running = True while running: for event in pygame.event.get(): if event.type == pygame.QUIT: running = False break raw_frame = video_process.stdout.read(frame_size) if len(raw_frame) < frame_size: running = False break # Convert raw bytes -> NumPy array -> PyGame surface frame_array = np.frombuffer(raw_frame, dtype=np.uint8) frame_array = frame_array.reshape((screen_height, screen_width, 4)) frame_surface = pygame.image.frombuffer(frame_array.tobytes(), (screen_width, screen_height), "RGBA") screen.blit(frame_surface, (0, 0)) pygame.display.flip() clock.tick(25) video_process.terminate() video_process.wait() audio_process.terminate() audio_process.wait() pygame.quit() sys.exit() if __name__ == "__main__": main()
EXAMPLE TWO
# NO VIDEO OVERLAY - SOUND SYNCHED import ffmpeg import pygame import sys import numpy as np import tempfile import os def extract_audio(input_file, output_file): """Extract audio from video file to temporary WAV file""" ( ffmpeg .input(input_file) .output(output_file, acodec='pcm_s16le', ac=2, ar='44100') .overwrite_output() .run(capture_stdout=True, capture_stderr=True) ) def get_video_fps(input_file): probe = ffmpeg.probe(input_file) video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video') fps_str = video_info.get('r_frame_rate', '25/1') num, den = map(int, fps_str.split('/')) return num / den input_file = "testing/stefano_prores422_hq.mov" # Create temporary WAV file temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) temp_audio.close() extract_audio(input_file, temp_audio.name) probe = ffmpeg.probe(input_file) video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video') width = int(video_info['width']) height = int(video_info['height']) fps = get_video_fps(input_file) process = ( ffmpeg .input(input_file) .output('pipe:', format='rawvideo', pix_fmt='rgb24') .run_async(pipe_stdout=True) ) pygame.init() pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=4096) clock = pygame.time.Clock() screen = pygame.display.set_mode((width, height)) pygame.mixer.music.load(temp_audio.name) pygame.mixer.music.play() frame_count = 0 start_time = pygame.time.get_ticks() while True: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.mixer.music.stop() os.unlink(temp_audio.name) sys.exit() in_bytes = process.stdout.read(width * height * 3) if not in_bytes: break # Calculate timing for synchronization expected_frame_time = frame_count * (1000 / fps) actual_time = pygame.time.get_ticks() - start_time if actual_time < expected_frame_time: pygame.time.wait(int(expected_frame_time - actual_time)) in_frame = ( np.frombuffer(in_bytes, dtype="uint8") .reshape([height, width, 3]) ) out_frame = pygame.surfarray.make_surface(np.transpose(in_frame, (1, 0, 2))) screen.blit(out_frame, (0, 0)) pygame.display.flip() frame_count += 1 pygame.mixer.music.stop() process.wait() pygame.quit() os.unlink(temp_audio.name)
I also tried using pygame mixer and a separate mp3 audio file, but that didn't work either. Any help on how to synch the sound while keeping the playback of both videos to 25 FPS would be greatly appreciated!!!
-
Rounded corners in subtitle (Advanced Substation Alpha [.ass])
21 décembre, par Leander MihmIs it possible to have rounded corners with the BorderStyle 4 in .ass (Advanced Substation Alpha)? I only found out that the BorderStyle 4 exists, because I was looking at this stackoverflow. Is there any good and complete documentation of the Advanced Substation Alpha format?
I'm currently using the following configuration:
BorderStyle=4 Outline=10
-
How to render two videos with alpha channel in real time in pygame ?
21 décembre, par Francesco CalderoneI need to play two videos with synched sound in real-time with Pygame. Pygame does not currently support video streams, so I am using a ffmpeg subprocess. The first video is a prores422_hq. This is a background video with no alpha channel. The second video is a prores4444 overlay video with an alpha channel, and it needs to be played in real-tim on top of the first video (with transparency). All of this needs synched sound from the first base video only.
I have tried many libraries, including pymovie pyav and opencv. The best result so far is to use a subprocess with ffmpeg.
ffmpeg -i testing/stefano_prores422_hq.mov -stream_loop -1 -i testing/key_prores4444.mov -filter_complex "[1:v]format=rgba,colorchannelmixer=aa=1.0[overlay];[0:v][overlay]overlay" -f nut pipe:1 | ffplay -
When running this in the terminal and playing with ffplay, everything is perfect, the overlay looks good, no dropped frames, and the sound is in synch.
However, trying to feed that to pygame via a subprocess creates either video delays and drop frames or audio not in synch.
EXAMPLE ONE:
# SOUND IS NOT SYNCHED - sound is played via ffplay import pygame import subprocess import numpy as np import sys def main(): pygame.init() screen_width, screen_height = 1920, 1080 screen = pygame.display.set_mode((screen_width, screen_height)) pygame.display.set_caption("PyGame + FFmpeg Overlay with Audio") clock = pygame.time.Clock() # LAUNCH AUDIO-ONLY SUBPROCESS audio_cmd = [ "ffplay", "-nodisp", # no video window "-autoexit", # exit when video ends "-loglevel", "quiet", "testing/stefano_prores422_hq.mov" ] audio_process = subprocess.Popen(audio_cmd) # LAUNCH VIDEO-OVERLAY SUBPROCESS ffmpeg_command = [ "ffmpeg", "-re", # read at native frame rate "-i", "testing/stefano_prores422_hq.mov", "-stream_loop", "-1", # loop alpha video "-i", "testing/key_prores4444.mov", "-filter_complex", "[1:v]format=rgba,colorchannelmixer=aa=1.0[overlay];" # ensure alpha channel "[0:v][overlay]overlay", # overlay second input onto first "-f", "rawvideo", # output raw video "-pix_fmt", "rgba", # RGBA format "pipe:1" # write to STDOUT ] video_process = subprocess.Popen( ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL ) frame_size = screen_width * screen_height * 4 # RGBA = 4 bytes/pixel running = True while running: for event in pygame.event.get(): if event.type == pygame.QUIT: running = False break raw_frame = video_process.stdout.read(frame_size) if len(raw_frame) < frame_size: running = False break # Convert raw bytes -> NumPy array -> PyGame surface frame_array = np.frombuffer(raw_frame, dtype=np.uint8) frame_array = frame_array.reshape((screen_height, screen_width, 4)) frame_surface = pygame.image.frombuffer(frame_array.tobytes(), (screen_width, screen_height), "RGBA") screen.blit(frame_surface, (0, 0)) pygame.display.flip() clock.tick(25) video_process.terminate() video_process.wait() audio_process.terminate() audio_process.wait() pygame.quit() sys.exit() if __name__ == "__main__": main()
EXAMPLE TWO
# NO VIDEO OVERLAY - SOUND SYNCHED import ffmpeg import pygame import sys import numpy as np import tempfile import os def extract_audio(input_file, output_file): """Extract audio from video file to temporary WAV file""" ( ffmpeg .input(input_file) .output(output_file, acodec='pcm_s16le', ac=2, ar='44100') .overwrite_output() .run(capture_stdout=True, capture_stderr=True) ) def get_video_fps(input_file): probe = ffmpeg.probe(input_file) video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video') fps_str = video_info.get('r_frame_rate', '25/1') num, den = map(int, fps_str.split('/')) return num / den input_file = "testing/stefano_prores422_hq.mov" # Create temporary WAV file temp_audio = tempfile.NamedTemporaryFile(suffix='.wav', delete=False) temp_audio.close() extract_audio(input_file, temp_audio.name) probe = ffmpeg.probe(input_file) video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video') width = int(video_info['width']) height = int(video_info['height']) fps = get_video_fps(input_file) process = ( ffmpeg .input(input_file) .output('pipe:', format='rawvideo', pix_fmt='rgb24') .run_async(pipe_stdout=True) ) pygame.init() pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=4096) clock = pygame.time.Clock() screen = pygame.display.set_mode((width, height)) pygame.mixer.music.load(temp_audio.name) pygame.mixer.music.play() frame_count = 0 start_time = pygame.time.get_ticks() while True: for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.mixer.music.stop() os.unlink(temp_audio.name) sys.exit() in_bytes = process.stdout.read(width * height * 3) if not in_bytes: break # Calculate timing for synchronization expected_frame_time = frame_count * (1000 / fps) actual_time = pygame.time.get_ticks() - start_time if actual_time < expected_frame_time: pygame.time.wait(int(expected_frame_time - actual_time)) in_frame = ( np.frombuffer(in_bytes, dtype="uint8") .reshape([height, width, 3]) ) out_frame = pygame.surfarray.make_surface(np.transpose(in_frame, (1, 0, 2))) screen.blit(out_frame, (0, 0)) pygame.display.flip() frame_count += 1 pygame.mixer.music.stop() process.wait() pygame.quit() os.unlink(temp_audio.name)
I also tried using pygame mixer and a separate mp3 audio file, but that didn't work either. Any help on how to synch the sound while keeping the playback of both videos to 25 FPS would be greatly appreciated!!!
-
How to improve the fluency of rtsp streaming through ffmpeg (processing 16 pictures at the same time)
21 décembre, par Ling YunWhen the button is clicked, I create 16 threads in Qt, and then pass the rtsp data address and the label to be rendered to the process, and then the process does this: run:
void rtspthread::run() { while(!shouldStop){ openRtspStream(rtspUrl.toUtf8().constData(),index); } qDebug() << "RTSP stream stopped."; emit finished(); }
open input stream:
void rtspthread::openRtspStream(const char* rtspUrl,int index) { AVDictionary *options = nullptr; AVFrame *pFrameRGB = nullptr; uint8_t *pOutBuffer = nullptr; struct SwsContext *swsContext; AVFormatContext *pFormatCtx = nullptr; pFormatCtx = avformat_alloc_context(); av_dict_set(&options, "rtsp_transport", "tcp", 0); av_dict_set(&options, "maxrate", "4000k", 0); if (avformat_open_input(&pFormatCtx, rtspUrl, nullptr, &options) != 0) { printf("Couldn't open stream file.\n"); return; } if (avformat_find_stream_info(pFormatCtx, NULL)<0) { printf("Couldn't find stream information.\n"); return; } int videoStreamIndex = -1; for (int i = 0; i < pFormatCtx->nb_streams; i++) { if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { videoStreamIndex = i; break; } } if (videoStreamIndex!=-1){ AVStream* videoStream = pFormatCtx->streams[videoStreamIndex]; AVCodecParameters* codecpar = videoStream->codecpar; const AVCodec* videoCodec = avcodec_find_decoder(codecpar->codec_id); AVCodecContext* videoCodecContext = avcodec_alloc_context3(videoCodec); avcodec_parameters_to_context(videoCodecContext,codecpar); avcodec_open2(videoCodecContext,videoCodec,nullptr); AVPixelFormat srcPixFmt = videoCodecContext->pix_fmt; QLabel* label = this->parentWidget->findChild("videoLabel"); int targetWidth = label->width(); int targetHeight = label->height(); pOutBuffer = (uint8_t*)av_malloc(av_image_get_buffer_size(AV_PIX_FMT_RGB32, videoCodecContext->width, videoCodecContext->height, 1)); pFrameRGB = av_frame_alloc(); av_image_fill_arrays(pFrameRGB->data, pFrameRGB->linesize, pOutBuffer, AV_PIX_FMT_RGB32, videoCodecContext->width, videoCodecContext->height, 1); swsContext= sws_getContext( videoCodecContext->width,videoCodecContext->height,srcPixFmt, targetWidth, targetHeight,AV_PIX_FMT_RGB32, SWS_BICUBIC,nullptr,nullptr,nullptr ); AVPacket packet; AVFrame* frame = av_frame_alloc(); int frameCounter = 0; while (av_read_frame(pFormatCtx, &packet) >= 0) { if (shouldStop) { break; } if (packet.stream_index == videoStreamIndex) { int ret = avcodec_send_packet(videoCodecContext,&packet); int rets = avcodec_receive_frame(videoCodecContext, frame); if (rets < 0) { qDebug() << "Error receiving frame from codec context"; } sws_scale(swsContext, frame->data, frame->linesize, 0, videoCodecContext->height, pFrameRGB->data, pFrameRGB->linesize); QImage img(pFrameRGB->data[0], targetWidth, targetHeight, pFrameRGB->linesize[0], QImage::Format_RGB32); qDebug() << index; emit frameReady(img.copy(),index); QThread::msleep(30); // 控制帧率 } av_packet_unref(&packet); } av_frame_free(&frame); av_frame_free(&pFrameRGB); sws_freeContext(swsContext); avcodec_free_context(&videoCodecContext); avformat_close_input(&pFormatCtx); avformat_free_context(pFormatCtx); } }
The video is stuck and has snow screen. I want to lower the resolution and reduce the snow screen. The server cannot change the resolution.
-
ffprobe newer version detect audio codec incorrectly
21 décembre, par alanccI find a strange problem.
I have a test video with h264 video codec and aac audio codec. It is at https://drive.google.com/file/d/1YAyz5cO0kb9r0MgahCpISR4bZ_1_n8PL/view?usp=sharing
I build a ffmpeg version by myself, its version is:
ffprobe version 7.0.2 Copyright (c) 2007-2024 the FFmpeg developers built with gcc 14.1.0 (Rev3, Built by MSYS2 project) configuration: --enable-shared libavutil 59. 8.100 / 59. 8.100 libavcodec 61. 3.100 / 61. 3.100 libavformat 61. 1.100 / 61. 1.100 libavdevice 61. 1.100 / 61. 1.100 libavfilter 10. 1.100 / 10. 1.100 libswscale 8. 1.100 / 8. 1.100 libswresample 5. 1.100 / 5. 1.100
I then use ffprobe to get its info:
ffprobe -v quiet -print_format ini -show_streams -show_packets test_h264.mp4 > test_h264.ini
Then I get an ini file which shows the audio codec as MP2:
[streams.stream.0] index=0 codec_name=mp2 codec_long_name=MP2 (MPEG audio layer 2) profile=unknown codec_type=audio codec_tag_string=mp4a codec_tag=0x6134706d sample_fmt=fltp sample_rate=44100 channels=2 channel_layout=stereo bits_per_sample=0 initial_padding=0 id=0x1 r_frame_rate=0/0 avg_frame_rate=0/0 time_base=1/44100 start_pts=2788 start_time=0.063220 duration_ts=435455 duration=9.874263 bit_rate=127706 max_bit_rate=N/A bits_per_raw_sample=N/A nb_frames=378 nb_read_frames=N/A nb_read_packets=378
Another developer he uses his version of ffprobe:
ffprobe version 2023-02-22-git-d5cc7acff1-full_build-www.gyan.dev Copyright (c) 2007-2023 the FFmpeg developers
Based on the year, my version(2024) should be newer than his(2023), but his version of ffprobe can get the audio codec properly:
[streams.stream.1] index=1 codec_name=aac codec_long_name=AAC (Advanced Audio Coding) profile=LC codec_type=audio codec_tag_string=mp4a codec_tag=0x6134706d sample_fmt=fltp sample_rate=44100 channels=2 channel_layout=stereo bits_per_sample=0 initial_padding=0 id=0x2 r_frame_rate=0/0 avg_frame_rate=0/0 time_base=1/44100 start_pts=1764 start_time=0.040000 duration_ts=436480 duration=9.897506 bit_rate=111733 max_bit_rate=N/A bits_per_raw_sample=N/A nb_frames=427 nb_read_frames=N/A nb_read_packets=427 extradata_size=5
Why?
I also tried a ffprobe version on ubuntu with the following version:
ffprobe version 6.1.1-3ubuntu5 Copyright (c) 2007-2023 the FFmpeg developers built with gcc 13 (Ubuntu 13.2.0-23ubuntu3) configuration: --prefix=/usr --extra-version=3ubuntu5 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --disable-omx --enable-gnutls --enable-libaom --enable-libass --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libharfbuzz --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-openal --enable-opencl --enable-opengl --disable-sndio --enable-libvpl --disable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-ladspa --enable-libbluray --enable-libjack --enable-libpulse --enable-librabbitmq --enable-librist --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libx264 --enable-libzmq --enable-libzvbi --enable-lv2 --enable-sdl2 --enable-libplacebo --enable-librav1e --enable-pocketsphinx --enable-librsvg --enable-libjxl --enable-shared libavutil 58. 29.100 / 58. 29.100 libavcodec 60. 31.102 / 60. 31.102 libavformat 60. 16.100 / 60. 16.100 libavdevice 60. 3.100 / 60. 3.100 libavfilter 9. 12.100 / 9. 12.100 libswscale 7. 5.100 / 7. 5.100 libswresample 4. 12.100 / 4. 12.100 libpostproc 57. 3.100 / 57. 3.100
It will detect the audio as aac properly, but with different parameters, for example, bit_rate is 111733(developer) but 110399(ubuntu). But this parameter comes from the same file so should be the same.
[streams.stream.1] index=1 codec_name=aac codec_long_name=AAC (Advanced Audio Coding) profile=LC codec_type=audio codec_tag_string=mp4a codec_tag=0x6134706d sample_fmt=fltp sample_rate=44100 channels=2 channel_layout=stereo bits_per_sample=0 initial_padding=0 id=0x2 r_frame_rate=0/0 avg_frame_rate=0/0 time_base=1/44100 start_pts=0 start_time=0.000000 duration_ts=441353 duration=10.008005 bit_rate=110399 max_bit_rate=N/A bits_per_raw_sample=N/A nb_frames=432 nb_read_frames=N/A nb_read_packets=432 extradata_size=5