Coverage for src/image_utils/video_utils.py: 97%

36 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-19 16:15 -0700

1import numpy as np 

2from pathlib import Path 

3from typing import Final, Set 

4from enum import Enum, unique 

5 

6# Several functions were taken from: https://github.com/argoverse/av2-api 

7 

8 

9@unique 

10class VideoCodecs(str, Enum): 

11 """Available video codecs for encoding mp4 videos. 

12 

13 NOTE: The codecs available are dependent on the FFmpeg build that 

14 you are using. We recommend defaulting to LIBX264. 

15 """ 

16 

17 LIBX264 = "libx264" # https://en.wikipedia.org/wiki/Advanced_Video_Coding 

18 LIBX265 = "libx265" # https://en.wikipedia.org/wiki/High_Efficiency_Video_Coding 

19 HEVC_VIDEOTOOLBOX = "hevc_videotoolbox" # macOS GPU acceleration. 

20 

21 

22HIGH_EFFICIENCY_VIDEO_CODECS: Final[Set[VideoCodecs]] = set([VideoCodecs.LIBX265, VideoCodecs.HEVC_VIDEOTOOLBOX]) 

23 

24 

25def crop_video_to_even_dims(video: np.ndarray) -> np.ndarray: 

26 """Crop a video tensor (4d nd-array) along the height and width dimensions to assure even dimensions. 

27 

28 Note: typical "pad" or "crop" filters do not function properly with pypi AV's stream configuration options. 

29 

30 Args: 

31 video: (N,H1,W1,3) array representing N RGB frames of identical dimensions, where H1 and W1 may be odd. 

32 

33 Returns: 

34 (N,H2,W2,3) array representing N RGB frames of identical dimensions, where H2 and W2 are even. 

35 The crop is performed on the far right column and/or bottom row of each frame. 

36 """ 

37 _, H1, W1, _ = video.shape 

38 height_crop_sz = H1 % 2 

39 width_crop_sz = W1 % 2 

40 

41 H2 = H1 - height_crop_sz 

42 W2 = W1 - width_crop_sz 

43 

44 return video[:, :H2, :W2, :] 

45 

46 

47def write_video( 

48 video: np.ndarray, 

49 dst: Path, 

50 codec: VideoCodecs = VideoCodecs.LIBX264, 

51 fps: int = 10, 

52 crf: int = 27, 

53 preset: str = "veryfast", 

54) -> None: 

55 """Use the FFMPEG Python bindings to encode a video from a sequence of RGB frames. 

56 

57 Reference: https://github.com/PyAV-Org/PyAV 

58 

59 Args: 

60 video: (N,H,W,3) Array representing N RGB frames of identical dimensions. 

61 dst: Path to save folder. 

62 codec: Name of the codec. 

63 fps: Frame rate for video. 

64 crf: Constant rate factor (CRF) parameter of video, controlling the quality. 

65 Lower values would result in better quality, at the expense of higher file sizes. 

66 For x264, the valid Constant Rate Factor (crf) range is 0-51. 

67 preset: File encoding speed. Options range from "ultrafast", ..., "fast", ..., "medium", ..., "slow", ... 

68 Higher compression efficiency often translates to slower video encoding speed, at file write time. 

69 """ 

70 _, H, W, _ = video.shape 

71 

72 # crop, if the height or width is odd (avoid "height not divisible by 2" error) 

73 if H % 2 != 0 or W % 2 != 0: 

74 video = crop_video_to_even_dims(video) 

75 _, H, W, _ = video.shape 

76 

77 dst.parent.mkdir(parents=True, exist_ok=True) 

78 import av 

79 with av.open(str(dst), "w") as output: 

80 stream = output.add_stream(codec, fps) 

81 if codec in HIGH_EFFICIENCY_VIDEO_CODECS: 

82 stream.codec_tag = "hvc1" 

83 stream.width = W 

84 stream.height = H 

85 stream.options = { 

86 "crf": str(crf), 

87 "hwaccel": "auto", 

88 "movflags": "+faststart", 

89 "preset": preset, 

90 "profile:v": "main", 

91 } 

92 

93 format = "rgb24" 

94 for _, img in enumerate(video): 

95 frame = av.VideoFrame.from_ndarray(img, format=format) 

96 output.mux(stream.encode(frame)) 

97 output.mux(stream.encode(None))