
Recherche avancée
Autres articles (44)
-
Les formats acceptés
28 janvier 2010, parLes commandes suivantes permettent d’avoir des informations sur les formats et codecs gérés par l’installation local de ffmpeg :
ffmpeg -codecs ffmpeg -formats
Les format videos acceptés en entrée
Cette liste est non exhaustive, elle met en exergue les principaux formats utilisés : h264 : H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 m4v : raw MPEG-4 video format flv : Flash Video (FLV) / Sorenson Spark / Sorenson H.263 Theora wmv :
Les formats vidéos de sortie possibles
Dans un premier temps on (...) -
Les vidéos
21 avril 2011, parComme les documents de type "audio", Mediaspip affiche dans la mesure du possible les vidéos grâce à la balise html5 .
Un des inconvénients de cette balise est qu’elle n’est pas reconnue correctement par certains navigateurs (Internet Explorer pour ne pas le nommer) et que chaque navigateur ne gère en natif que certains formats de vidéos.
Son avantage principal quant à lui est de bénéficier de la prise en charge native de vidéos dans les navigateur et donc de se passer de l’utilisation de Flash et (...) -
Déploiements possibles
31 janvier 2010, parDeux types de déploiements sont envisageable dépendant de deux aspects : La méthode d’installation envisagée (en standalone ou en ferme) ; Le nombre d’encodages journaliers et la fréquentation envisagés ;
L’encodage de vidéos est un processus lourd consommant énormément de ressources système (CPU et RAM), il est nécessaire de prendre tout cela en considération. Ce système n’est donc possible que sur un ou plusieurs serveurs dédiés.
Version mono serveur
La version mono serveur consiste à n’utiliser qu’une (...)
Sur d’autres sites (4775)
-
FFmpeg fails to draw text
6 avril 2024, par Edoardo BalducciI've rarely used ffmpeg before, so, sorry If the question is too dumb.
I have a problem adding a text layer to a video frame using ffmpeg.


This is my current code :


import subprocess
from PyQt5.QtGui import QPixmap, QImage
from PyQt5.QtWidgets import QLabel

class VideoThumbnailLabel(QLabel):
 def __init__(self, file_path, *args, **kwargs):
 super().__init__(*args, **kwargs)
 self.video = video
 video_duration = self.get_video_duration(file_path)
 thumbnail_path = self.get_thumbnail(file_path, video_duration)
 if thumbnail_path:
 self.setPixmap(QPixmap(thumbnail_path).scaled(160, 90, Qt.KeepAspectRatio))
 self.setToolTip(f"{video.title}\n{video.description}")

 def get_video_duration(self, video_path):
 """Returns the duration of the video in seconds."""
 command = [
 'ffprobe', '-v', 'error', '-show_entries',
 'format=duration', '-of',
 'default=noprint_wrappers=1:nokey=1', video_path
 ]
 try:
 result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
 if result.returncode != 0:
 print(f"ffprobe error: {result.stderr}")
 return 0
 duration = float(result.stdout)
 return int(duration) # Returning duration as an integer for simplicity
 except Exception as e:
 print(f"Error getting video duration: {e}")
 return 0

 def get_thumbnail(self, video_path, duration):
 """Generates a thumbnail with the video duration overlaid."""
 output_path = "thumbnail.jpg" # Temporary thumbnail file
 duration_str = f"{duration // 3600:02d}:{(duration % 3600) // 60:02d}:{duration % 60:02d}"
 command = [
 'ffmpeg', '-i', video_path,
 '-ss', '00:00:01', # Time to take the screenshot
 '-frames:v', '1', # Number of frames to capture
 '-vf', f"drawtext=text='Duration: {duration_str}':x=10:y=10:fontsize=24:fontcolor=white",
 '-q:v', '2', # Output quality
 '-y', # Overwrite output files without asking
 output_path
 ]
 try:
 result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 if result.returncode != 0:
 print(f"ffmpeg error: {result.stderr}")
 return None
 return output_path
 except Exception as e:
 print(f"Error generating thumbnail with duration: {e}")
 return None



and it is used like this :


for i, video in enumerate(self.videos):
 video_widget = VideoThumbnailLabel(video.file)
 video_widget.mousePressEvent = lambda event, v=video: self.onThumbnailClick(
 v
 )
 self.layout.addWidget(video_widget, i // 3, i % 3)



I'm facing a problem where I am not able to get the thumbnail if I try to add the duration (I've tested it without the draw filter and worked fine)


I get this error (from the
result.returncode
) that I'm not able to comprehend :

ffmpeg error: b"ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers\n built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)\n configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.1.1_4 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopenvino --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox --enable-neon\n libavutil 58. 29.100 / 58. 29.100\n libavcodec 60. 31.102 / 60. 31.102\n libavformat 60. 16.100 / 60. 16.100\n libavdevice 60. 3.100 / 60. 3.100\n libavfilter 9. 12.100 / 9. 12.100\n libswscale 7. 5.100 / 7. 5.100\n libswresample 4. 12.100 / 4. 12.100\n libpostproc 57. 3.100 / 57. 3.100\nInput #0, mov,mp4,m4a,3gp,3g2,mj2, from '/Users/edoardo/Projects/work/test/BigBuckBunny.mp4':\n Metadata:\n major_brand : mp42\n minor_version : 0\n compatible_brands: isomavc1mp42\n creation_time : 2010-01-10T08:29:06.000000Z\n Duration: 00:09:56.47, start: 0.000000, bitrate: 2119 kb/s\n Stream #0:0[0x1](und): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 125 kb/s (default)\n Metadata:\n creation_time : 2010-01-10T08:29:06.000000Z\n handler_name : (C) 2007 Google Inc. v08.13.2007.\n vendor_id : [0][0][0][0]\n Stream #0:1[0x2](und): Video: h264 (High) (avc1 / 0x31637661), yuv420p(progressive), 1280x720 [SAR 1:1 DAR 16:9], 1991 kb/s, 24 fps, 24 tbr, 24k tbn (default)\n Metadata:\n creation_time : 2010-01-10T08:29:06.000000Z\n handler_name : (C) 2007 Google Inc. v08.13.2007.\n vendor_id : [0][0][0][0]\n[Parsed_drawtext_0 @ 0x60000331cd10] Both text and text file provided. Please provide only one\n[AVFilterGraph @ 0x600002018000] Error initializing filters\n[vost#0:0/mjpeg @ 0x13ce0c7e0] Error initializing a simple filtergraph\nError opening output file thumbnail.jpg.\nError opening output files: Invalid argument\n"



I've installed both
ffmpeg
andffmprobe
in my machine :

┌(edoardomacbook-air)-[~/Projects/work/tests-scripts] 
└─ $ ffmpeg -version && ffprobe -version 2 ⚙ 
ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.1.1_4 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopenvino --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox --enable-neon
libavutil 58. 29.100 / 58. 29.100
libavcodec 60. 31.102 / 60. 31.102
libavformat 60. 16.100 / 60. 16.100
libavdevice 60. 3.100 / 60. 3.100
libavfilter 9. 12.100 / 9. 12.100
libswscale 7. 5.100 / 7. 5.100
libswresample 4. 12.100 / 4. 12.100
libpostproc 57. 3.100 / 57. 3.100
ffprobe version 6.1.1 Copyright (c) 2007-2023 the FFmpeg developers
built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/6.1.1_4 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopenvino --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox --enable-audiotoolbox --enable-neon
libavutil 58. 29.100 / 58. 29.100
libavcodec 60. 31.102 / 60. 31.102
libavformat 60. 16.100 / 60. 16.100
libavdevice 60. 3.100 / 60. 3.100
libavfilter 9. 12.100 / 9. 12.100
libswscale 7. 5.100 / 7. 5.100
libswresample 4. 12.100 / 4. 12.100
libpostproc 57. 3.100 / 57. 3.100



Does anyone see the problem ?



P.S. : I know that I havent provided a minimal reproducible example, but since I don't know where the problem lies I didn't want to exclude anything


-
FFmpeg RTSP drop rate increases when frame rate is reduced
13 avril 2024, par Avishka PereraI need to read an RTSP stream, process the images individually in Python, and then write the images back to an RTSP stream. As the RTSP server, I am using Mediamtx [1]. For streaming, I am using FFmpeg [2].


I have the following code that works perfectly fine. For simplification purposes, I am streaming three generated images.


import time
import numpy as np
import subprocess

width, height = 640, 480
fps = 25
rtsp_server_address = f"rtsp://localhost:8554/mystream"

ffmpeg_cmd = [
 "ffmpeg",
 "-re",
 "-f",
 "rawvideo",
 "-pix_fmt",
 "rgb24",
 "-s",
 f"{width}x{height}",
 "-i",
 "-",
 "-r",
 str(fps),
 "-avoid_negative_ts",
 "make_zero",
 "-vcodec",
 "libx264",
 "-threads",
 "4",
 "-f",
 "rtsp",
 rtsp_server_address,
]
colors = np.array(
 [
 [255, 0, 0],
 [0, 255, 0],
 [0, 0, 255],
 ]
).reshape(3, 1, 1, 3)
images = (np.ones((3, width, height, 3)) * colors).astype(np.uint8)

if __name__ == "__main__":

 process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
 start = time.time()
 exported = 0
 while True:
 exported += 1
 next_time = start + exported / fps
 now = time.time()
 if next_time > now:
 sleep_dur = next_time - now
 time.sleep(sleep_dur)

 image = images[exported % 3]
 image_bytes = image.tobytes()

 process.stdin.write(image_bytes)
 process.stdin.flush()

 process.stdin.close()
 process.wait()



The issue is, that I need to run this at 10 fps because the processing step is heavy and can only afford 10 fps. Hence, as I reduce the frame rate from 25 to 10, the drop rate increases from 0% to 100%. And after a few iterations, I get a
BrokenPipeError: [Errno 32] Broken pipe
. Refer to the appendix for the complete log.

As an alternative, I can use OpenCV compiled from source with GStreamer [3], but I prefer using FFmpeg to make the shipping process simple. Since compiling OpenCV from source can be tedious and dependent on the system.


References


[1] Mediamtx (formerly rtsp-simple-server) : https://github.com/bluenviron/mediamtx


[2] FFmpeg : https://github.com/FFmpeg/FFmpeg


[3] Compile OpenCV with GStreamer : https://github.com/bluenviron/mediamtx?tab=readme-ov-file#opencv


Appendix


Creating the source stream


To instantiate the unprocessed stream, I use the following command. This streams the content of my webcam as and RTSP stream.


ffmpeg -video_size 1280x720 -i /dev/video0 -avoid_negative_ts make_zero -vcodec libx264 -r 10 -f rtsp rtsp://localhost:8554/webcam



Error log


ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
 built with gcc 12.3.0 (conda-forge gcc 12.3.0-5)
 configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1712656518955/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1712656518955/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1712656518955/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1712656518955/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1712656518955/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-vaapi --enable-libopenvino --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --enable-libopus --pkg-config=/home/conda/feedstock_root/build_artifacts/ffmpeg_1712656518955/_build_env/bin/pkg-config
 libavutil 58. 29.100 / 58. 29.100
 libavcodec 60. 31.102 / 60. 31.102
 libavformat 60. 16.100 / 60. 16.100
 libavdevice 60. 3.100 / 60. 3.100
 libavfilter 9. 12.100 / 9. 12.100
 libswscale 7. 5.100 / 7. 5.100
 libswresample 4. 12.100 / 4. 12.100
 libpostproc 57. 3.100 / 57. 3.100
Input #0, rawvideo, from 'fd:':
 Duration: N/A, start: 0.000000, bitrate: 184320 kb/s
 Stream #0:0: Video: rawvideo (RGB[24] / 0x18424752), rgb24, 640x480, 184320 kb/s, 25 tbr, 25 tbn
Stream mapping:
 Stream #0:0 -> #0:0 (rawvideo (native) -> h264 (libx264))
[libx264 @ 0x5e2ef8b01340] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0x5e2ef8b01340] profile High 4:4:4 Predictive, level 2.2, 4:4:4, 8-bit
[libx264 @ 0x5e2ef8b01340] 264 - core 164 r3095 baee400 - H.264/MPEG-4 AVC codec - Copyleft 2003-2022 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=4 threads=4 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=10 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00
Output #0, rtsp, to 'rtsp://localhost:8554/mystream':
 Metadata:
 encoder : Lavf60.16.100
 Stream #0:0: Video: h264, yuv444p(tv, progressive), 640x480, q=2-31, 10 fps, 90k tbn
 Metadata:
 encoder : Lavc60.31.102 libx264
 Side data:
 cpb: bitrate max/min/avg: 0/0/0 buffer size: 0 vbv_delay: N/A
[vost#0:0/libx264 @ 0x5e2ef8b01080] Error submitting a packet to the muxer: Broken pipe 
[out#0/rtsp @ 0x5e2ef8afd780] Error muxing a packet
[out#0/rtsp @ 0x5e2ef8afd780] video:1kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: unknown
frame= 1 fps=0.1 q=-1.0 Lsize=N/A time=00:00:04.70 bitrate=N/A dup=0 drop=70 speed=0.389x 
[libx264 @ 0x5e2ef8b01340] frame I:16 Avg QP: 6.00 size: 147
[libx264 @ 0x5e2ef8b01340] frame P:17 Avg QP: 9.94 size: 101
[libx264 @ 0x5e2ef8b01340] frame B:17 Avg QP: 9.94 size: 64
[libx264 @ 0x5e2ef8b01340] consecutive B-frames: 50.0% 0.0% 42.0% 8.0%
[libx264 @ 0x5e2ef8b01340] mb I I16..4: 81.3% 18.7% 0.0%
[libx264 @ 0x5e2ef8b01340] mb P I16..4: 52.9% 0.0% 0.0% P16..4: 0.0% 0.0% 0.0% 0.0% 0.0% skip:47.1%
[libx264 @ 0x5e2ef8b01340] mb B I16..4: 0.0% 5.9% 0.0% B16..8: 0.1% 0.0% 0.0% direct: 0.0% skip:94.0% L0:56.2% L1:43.8% BI: 0.0%
[libx264 @ 0x5e2ef8b01340] 8x8 transform intra:15.4% inter:100.0%
[libx264 @ 0x5e2ef8b01340] coded y,u,v intra: 0.0% 0.0% 0.0% inter: 0.0% 0.0% 0.0%
[libx264 @ 0x5e2ef8b01340] i16 v,h,dc,p: 97% 0% 3% 0%
[libx264 @ 0x5e2ef8b01340] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 0% 0% 100% 0% 0% 0% 0% 0% 0%
[libx264 @ 0x5e2ef8b01340] Weighted P-Frames: Y:52.9% UV:52.9%
[libx264 @ 0x5e2ef8b01340] ref P L0: 88.9% 0.0% 0.0% 11.1%
[libx264 @ 0x5e2ef8b01340] kb/s:8.27
Conversion failed!
Traceback (most recent call last):
 File "/home/avishka/projects/read-process-stream/minimal-ffmpeg-error.py", line 58, in <module>
 process.stdin.write(image_bytes)
BrokenPipeError: [Errno 32] Broken pipe
</module>


-
ffmpeg failed to load audio file
14 avril 2024, par Vaishnav GhengeFailed to load audio: ffmpeg version 5.1.4-0+deb12u1 Copyright (c) Failed to load audio: ffmpeg version 5.1.4-0+deb12u1 Copyright (c) 2000-2023 the FFmpeg developers
 built with gcc 12 (Debian 12.2.0-14)
 configuration: --prefix=/usr --extra-version=0+deb12u1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libglslang --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librist --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --disable-sndio --enable-libjxl --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-libplacebo --enable-librav1e --enable-shared
 libavutil 57. 28.100 / 57. 28.100
 libavcodec 59. 37.100 / 59. 37.100
 libavformat 59. 27.100 / 59. 27.100
 libavdevice 59. 7.100 / 59. 7.100
 libavfilter 8. 44.100 / 8. 44.100
 libswscale 6. 7.100 / 6. 7.100
 libswresample 4. 7.100 / 4. 7.100
 libpostproc 56. 6.100 / 56. 6.100
/tmp/tmpjlchcpdm.wav: Invalid data found when processing input



backend :



@app.route("/transcribe", methods=["POST"])
def transcribe():
 # Check if audio file is present in the request
 if 'audio_file' not in request.files:
 return jsonify({"error": "No file part"}), 400
 
 audio_file = request.files.get('audio_file')

 # Check if audio_file is sent in files
 if not audio_file:
 return jsonify({"error": "`audio_file` is missing in request.files"}), 400

 # Check if the file is present
 if audio_file.filename == '':
 return jsonify({"error": "No selected file"}), 400

 # Save the file with a unique name
 filename = secure_filename(audio_file.filename)
 unique_filename = os.path.join("uploads", str(uuid.uuid4()) + '_' + filename)
 # audio_file.save(unique_filename)
 
 # Read the contents of the audio file
 contents = audio_file.read()

 max_file_size = 500 * 1024 * 1024
 if len(contents) > max_file_size:
 return jsonify({"error": "File is too large"}), 400

 # Check if the file extension suggests it's a WAV file
 if not filename.lower().endswith('.wav'):
 # Delete the file if it's not a WAV file
 os.remove(unique_filename)
 return jsonify({"error": "Only WAV files are supported"}), 400

 print(f"\033[92m{filename}\033[0m")

 # Call Celery task asynchronously
 result = transcribe_audio.delay(contents)

 return jsonify({
 "task_id": result.id,
 "status": "pending"
 })


@celery_app.task
def transcribe_audio(contents):
 # Transcribe the audio
 try:
 # Create a temporary file to save the audio data
 with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
 temp_path = temp_audio.name
 temp_audio.write(contents)

 print(f"\033[92mFile temporary path: {temp_path}\033[0m")
 transcribe_start_time = time.time()

 # Transcribe the audio
 transcription = transcribe_with_whisper(temp_path)
 
 transcribe_end_time = time.time()
 print(f"\033[92mTranscripted text: {transcription}\033[0m")

 return transcription, transcribe_end_time - transcribe_start_time

 except Exception as e:
 print(f"\033[92mError: {e}\033[0m")
 return str(e)



frontend :


useEffect(() => {
 const init = () => {
 navigator.mediaDevices.getUserMedia({audio: true})
 .then((audioStream) => {
 const recorder = new MediaRecorder(audioStream);

 recorder.ondataavailable = e => {
 if (e.data.size > 0) {
 setChunks(prevChunks => [...prevChunks, e.data]);
 }
 };

 recorder.onerror = (e) => {
 console.log("error: ", e);
 }

 recorder.onstart = () => {
 console.log("started");
 }

 recorder.start();

 setStream(audioStream);
 setRecorder(recorder);
 });
 }

 init();

 return () => {
 if (recorder && recorder.state === 'recording') {
 recorder.stop();
 }

 if (stream) {
 stream.getTracks().forEach(track => track.stop());
 }
 }
 }, []);

 useEffect(() => {
 // Send chunks of audio data to the backend at regular intervals
 const intervalId = setInterval(() => {
 if (recorder && recorder.state === 'recording') {
 recorder.requestData(); // Trigger data available event
 }
 }, 8000); // Adjust the interval as needed


 return () => {
 if (intervalId) {
 console.log("Interval cleared");
 clearInterval(intervalId);
 }
 };
 }, [recorder]);

 useEffect(() => {
 const processAudio = async () => {
 if (chunks.length > 0) {
 // Send the latest chunk to the server for transcription
 const latestChunk = chunks[chunks.length - 1];

 const audioBlob = new Blob([latestChunk]);
 convertBlobToAudioFile(audioBlob);
 }
 };

 void processAudio();
 }, [chunks]);

 const convertBlobToAudioFile = useCallback((blob: Blob) => {
 // Convert Blob to audio file (e.g., WAV)
 // This conversion may require using a third-party library or service
 // For example, you can use the MediaRecorder API to record audio in WAV format directly
 // Alternatively, you can use a library like recorderjs to perform the conversion
 // Here's a simplified example using recorderjs:

 const reader = new FileReader();
 reader.onload = () => {
 const audioBuffer = reader.result; // ArrayBuffer containing audio data

 // Send audioBuffer to Flask server or perform further processing
 sendAudioToFlask(audioBuffer as ArrayBuffer);
 };

 reader.readAsArrayBuffer(blob);
 }, []);

 const sendAudioToFlask = useCallback((audioBuffer: ArrayBuffer) => {
 const formData = new FormData();
 formData.append('audio_file', new Blob([audioBuffer]), `speech_audio.wav`);

 console.log(formData.get("audio_file"));

 fetch('http://34.87.75.138:8000/transcribe', {
 method: 'POST',
 body: formData
 })
 .then(response => response.json())
 .then((data: { task_id: string, status: string }) => {
 pendingTaskIdsRef.current.push(data.task_id);
 })
 .catch(error => {
 console.error('Error sending audio to Flask server:', error);
 });
 }, []);



I was trying to pass the audio from frontend to whisper model which is in flask app