Coverage for src/image_utils/video_utils.py: 97%
36 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-19 16:15 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-19 16:15 -0700
1import numpy as np
2from pathlib import Path
3from typing import Final, Set
4from enum import Enum, unique
6# Several functions were taken from: https://github.com/argoverse/av2-api
9@unique
10class VideoCodecs(str, Enum):
11 """Available video codecs for encoding mp4 videos.
13 NOTE: The codecs available are dependent on the FFmpeg build that
14 you are using. We recommend defaulting to LIBX264.
15 """
17 LIBX264 = "libx264" # https://en.wikipedia.org/wiki/Advanced_Video_Coding
18 LIBX265 = "libx265" # https://en.wikipedia.org/wiki/High_Efficiency_Video_Coding
19 HEVC_VIDEOTOOLBOX = "hevc_videotoolbox" # macOS GPU acceleration.
22HIGH_EFFICIENCY_VIDEO_CODECS: Final[Set[VideoCodecs]] = set([VideoCodecs.LIBX265, VideoCodecs.HEVC_VIDEOTOOLBOX])
25def crop_video_to_even_dims(video: np.ndarray) -> np.ndarray:
26 """Crop a video tensor (4d nd-array) along the height and width dimensions to assure even dimensions.
28 Note: typical "pad" or "crop" filters do not function properly with pypi AV's stream configuration options.
30 Args:
31 video: (N,H1,W1,3) array representing N RGB frames of identical dimensions, where H1 and W1 may be odd.
33 Returns:
34 (N,H2,W2,3) array representing N RGB frames of identical dimensions, where H2 and W2 are even.
35 The crop is performed on the far right column and/or bottom row of each frame.
36 """
37 _, H1, W1, _ = video.shape
38 height_crop_sz = H1 % 2
39 width_crop_sz = W1 % 2
41 H2 = H1 - height_crop_sz
42 W2 = W1 - width_crop_sz
44 return video[:, :H2, :W2, :]
47def write_video(
48 video: np.ndarray,
49 dst: Path,
50 codec: VideoCodecs = VideoCodecs.LIBX264,
51 fps: int = 10,
52 crf: int = 27,
53 preset: str = "veryfast",
54) -> None:
55 """Use the FFMPEG Python bindings to encode a video from a sequence of RGB frames.
57 Reference: https://github.com/PyAV-Org/PyAV
59 Args:
60 video: (N,H,W,3) Array representing N RGB frames of identical dimensions.
61 dst: Path to save folder.
62 codec: Name of the codec.
63 fps: Frame rate for video.
64 crf: Constant rate factor (CRF) parameter of video, controlling the quality.
65 Lower values would result in better quality, at the expense of higher file sizes.
66 For x264, the valid Constant Rate Factor (crf) range is 0-51.
67 preset: File encoding speed. Options range from "ultrafast", ..., "fast", ..., "medium", ..., "slow", ...
68 Higher compression efficiency often translates to slower video encoding speed, at file write time.
69 """
70 _, H, W, _ = video.shape
72 # crop, if the height or width is odd (avoid "height not divisible by 2" error)
73 if H % 2 != 0 or W % 2 != 0:
74 video = crop_video_to_even_dims(video)
75 _, H, W, _ = video.shape
77 dst.parent.mkdir(parents=True, exist_ok=True)
78 import av
79 with av.open(str(dst), "w") as output:
80 stream = output.add_stream(codec, fps)
81 if codec in HIGH_EFFICIENCY_VIDEO_CODECS:
82 stream.codec_tag = "hvc1"
83 stream.width = W
84 stream.height = H
85 stream.options = {
86 "crf": str(crf),
87 "hwaccel": "auto",
88 "movflags": "+faststart",
89 "preset": preset,
90 "profile:v": "main",
91 }
93 format = "rgb24"
94 for _, img in enumerate(video):
95 frame = av.VideoFrame.from_ndarray(img, format=format)
96 output.mux(stream.encode(frame))
97 output.mux(stream.encode(None))