Module deepsport_utilities.utils

Expand source code
from dataclasses import dataclass
from datetime import datetime
from enum import IntFlag
import io
import os
import random
import struct
import subprocess
from urllib import request
import errno

import cv2
import imageio
from matplotlib import pyplot as plt
import m3u8
import numpy as np

from mlworkflow import Dataset, AugmentedDataset, SideRunner, TransformedDataset, FilteredDataset
from aleatorpy import pseudo_random



# This object is defined both in here and in experimentator repository
# Any change here should be reported in experimentator as well
class SubsetType(IntFlag):
    TRAIN = 1
    EVAL  = 2

# This object is defined both in here and in experimentator repository
# Any change here should be reported in experimentator as well
class Subset:
    def __init__(self, name: str, subset_type: SubsetType, dataset: Dataset, keys=None, repetitions=1, desc=None):
        keys = keys if keys is not None else dataset.keys.all()
        assert isinstance(keys, (tuple, list)), f"Received instance of {type(keys)} for subset {name}"
        self.name = name
        self.type = subset_type
        self.dataset = FilteredDataset(dataset, predicate=lambda k,v: v is not None)
        self._keys = keys
        self.keys = keys
        self.repetitions = repetitions
        self.desc = desc
        self.is_training = self.type == SubsetType.TRAIN
        loop = None if self.is_training else repetitions
        self.shuffled_keys = pseudo_random(evolutive=self.is_training)(self.shuffled_keys)
        self.dataset.query_item = pseudo_random(loop=loop, input_dependent=True)(self.dataset.query_item)

    def shuffled_keys(self): # pylint: disable=method-hidden
        keys = self.keys * self.repetitions
        return random.sample(keys, len(keys)) if self.is_training else keys

    def __len__(self):
        return len(self.keys)*self.repetitions

    def __str__(self):
        return f"Subset<{self.name}>({len(self)})"

def gamma_correction(image, gammas=np.array([1.0, 1.0, 1.0])):
    image = image.astype(np.float32)
    image = image ** (1/gammas)
    image = np.clip(image, 0, 255).astype(np.uint8)
    return image



def find(path, dirs=None, verbose=True):
    if os.path.isabs(path):
        if not os.path.isfile(path) and not os.path.isdir(path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
        return path

    dirs = dirs or [os.getcwd(), *os.getenv("DATA_PATH", "").split(":")]
    for dirname in dirs:
        if dirname is None:
            continue
        tmp_path = os.path.join(dirname, path)
        if os.path.isfile(tmp_path) or os.path.isdir(tmp_path):
            if verbose:
                print("{} found in {}".format(path, tmp_path))
            return tmp_path

    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                                "{} (searched in {})".format(path, dirs))


class DefaultDict(dict):
    def __init__(self, factory):
        self.factory = factory
    def __missing__(self, key):
        self[key] = self.factory(key)
        return self[key]

class LazyGeneratorBackedList(list):
    def __init__(self, gen):
        self.gen = gen
    def next(self):
        item = next(self.gen, None)
        if item is None:
            raise StopIteration
        self.append(item)
    def __getitem__(self, i):
        while i < -len(self) or len(self) <= i:
            self.next()
        return super().__getitem__(i)

class DefaultList(list):
    def __init__(self, *args, default=None, default_factory=None):
        super().__init__(*args)
        self.default_factory = default_factory or (lambda x: default)
    def __getitem__(self, i):
        if i < -len(self) or len(self) <= i:
            return self.default_factory(i)
        return super().__getitem__(i)




class DelayedCallback:
    def __init__(self, callback, timedelta):
        self.timedelta = timedelta
        self.last = datetime.now()
        self.callback = callback
    def __call__(self):
        now = datetime.now()
        if now - self.last > self.timedelta:
            self.last = now
            self.callback()
    def __del__(self):
        try:
            self.callback()
        except:
            pass


class TolerentDataset(AugmentedDataset):
    def __init__(self, parent, retry=0):
        super().__init__(parent)
        self.retry = retry
    def augment(self, root_key, root_item):
        retry = self.retry
        while root_item is None and retry:
            root_item = self.parent.query_item(root_key)
            retry -= 1
        return root_item


class MergedDataset(Dataset):
    def __init__(self, *ds):
        self.ds = ds
        self.cache = {}
    def yield_keys(self):
        for ds in self.ds:
            for key in ds.yield_keys():
                self.cache[key] = ds
                yield key
    def query_item(self, key):
        return self.cache[key].query_item(key)

class VideoReaderDataset(Dataset):
    cap = None
    def __init__(self, filename, scale_factor=None, output_shape=None):
        raise
        # TODO: use instead
        # vid = imageio.get_reader("/home/gva/KS-FR-STCHAMOND_93815_concatenated.mp4",  'ffmpeg')
        # nums = [0, 1, 2]
        # for num in nums:
        #     image = vid.get_data(num)
        assert not scale_factor or not output_shape, "You cannot provide both 'scale_factor' and 'output_shape' arguments."
        self.cap = cv2.VideoCapture(filename)
        self.fps = self.cap.get(cv2.CAP_PROP_FPS)
        self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        shape = tuple([int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))])
        if scale_factor:
            shape = tuple(int(x*scale_factor) for x in shape)
        elif output_shape:
            shape = output_shape
        self.shape = tuple(x-x%2 for x in shape) # make sure shape is even
    def __del__(self):
        if self.cap is not None:
            self.cap.release()
    def yield_keys(self):
        yield from range(self.frame_count)
    def query_item(self, i):
        self.cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        _, frame = self.cap.read()
        if frame is None:
            return None
        frame = cv2.resize(frame, self.shape)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        return frame

class M3u8PlaylistDataset(Dataset):
    def __init__(self, filename, download_folder=None):
        self.playlist = m3u8.load(filename)
        self.download_folder = download_folder
    def yield_keys(self):
        yield from self.playlist.segments
    def query_item(self, key):
        if self.download_folder is not None:
            filename = os.path.join(self.download_folder, os.path.basename(key.uri))
            request.urlretrieve(key.uri, filename)
            return filename
        return key.uri

class VideoFileNameToDatasetReaderTransform():
    def __init__(self, **kwargs):
        self.kwargs = kwargs
    def __call__(self, key, filename):
        return VideoReaderDataset(filename, **self.kwargs)

class VideoFromPlaylistDataset(AugmentedDataset):
    def augment(self, root_key, dataset):
        for key in dataset.yield_keys():
            item = dataset.query_item(key)
            if item is not None:
                yield (root_key, root_key.uri, key), item

def VideoDataset(filename, **kwargs):
    folder = os.path.dirname(filename)
    supported_formats = {
        ".m3u8": lambda name: VideoFromPlaylistDataset(
            TransformedDataset(
                M3u8PlaylistDataset(name, download_folder=folder),
                [VideoFileNameToDatasetReaderTransform(**kwargs)]
            )
        ),
        ".mp4": lambda name: VideoReaderDataset(name, **kwargs) # pylint: disable=unnecessary-lambda
    }
    return supported_formats[os.path.splitext(filename)[1]](filename)


class DatasetSamplerDataset(Dataset):
    def __init__(self, dataset, count):
        self.parent = dataset
        self.keys = random.sample(list(dataset.keys.all()), count)
    def yield_keys(self):
        for key in self.keys:
            yield key
    def query_item(self, key):
        return self.parent.query_item(key)


def concatenate_chunks(output_filename, *chunk_urls):
    side_runner = SideRunner(10)
    for chunk_url in chunk_urls:
        side_runner.run_async(subprocess.run, ["wget", chunk_url])
    side_runner.collect_runs()

    command = [
        'ffmpeg',
        '-y',
        '-protocol_whitelist "concat,file,http,https,tcp,tls"',
        '-i "concat:{}"'.format("|".join([url[url.rfind("/")+1:] for url in chunk_urls])),
        '-c:a copy',
        '-c:v copy',
        '-movflags faststart',
        output_filename
    ]
    os.system(" ".join(command))
    #subprocess.run(command) # For obscure reason, subprocess doesn't work here

@dataclass
class BoundingBox:
    x: int
    y: int
    w: int
    h: int
    @property
    def x_slice(self):
        return slice(int(self.x), int(self.x+self.w), None)
    @property
    def y_slice(self):
        return slice(int(self.y), int(self.y+self.h), None)

    def increase_box(self, max_width, max_height, aspect_ratio=None, margin=0, padding=0):
        """ Adapt the bounding-box s.t. it
                - is increased by `margin` on all directions
                - lies within the source image of size `max_width`x`max_height`
                - has the aspect ratio given by `aspect_ratio` (if not None)
                - contains the original bounding-box (box is increased if necessary, up to source image limits)
            Arguments:
                max_width (int)      - width of input image
                max_height (int)     - height of input image
                aspect_ratio (float) - output aspect-ratio
                margin (int)         - margin in pixels to be added on 4 sides
            Returns:
                x_slice (slice) - the horizontal slice
                y_slice (slice) - the vertical slice
        """
        top   = max(-padding,           int(self.y-margin))
        bot   = min(max_height+padding, int(self.y+self.h+margin))
        left  = max(-padding,           int(self.x-margin))
        right = min(max_width+padding,  int(self.x+self.w+margin))

        if aspect_ratio is None:
            return slice(left, right, None), slice(top, bot, None)

        if padding:
            raise NotImplementedError("increase_box method doesn't support padding when aspect ratio is given")

        w = right - left
        h = bot - top
        if w/h > aspect_ratio: # box is wider
            h = int(w/aspect_ratio)
            if h > max_height: # box is too wide
                h = max_height
                w = int(max_height*aspect_ratio)
                left = max_width//2 - w//2
                return slice(left, w, None), slice(0, h, None)
            cy = (bot+top)//2
            if cy + h//2 > max_height: # box hits the top
                return slice(left, right, None), slice(0, h, None)
            if cy - h//2 < 0: # box hits the bot
                return slice(left, right, None), slice(max_height-h, max_height, None)
            return slice(left, right, None), slice(cy-h//2, cy-h//2+h, None)

        if w/h < aspect_ratio: # box is taller
            w = int(h*aspect_ratio)
            if w > max_width: # box is too tall
                w = max_width
                h = int(max_width/aspect_ratio)
                top = max_height//2 - h//2
                return slice(0, w, None), slice(top, top+h, None)
            cx = (left+right)//2
            if cx + w//2 > max_width: # box hits the right
                return slice(max_width-w, max_width, None), slice(top, bot, None)
            if cx - w//2 < 0: # box hits the left
                return slice(0, w, None), slice(top, bot, None)
            return slice(cx-w//2, cx-w//2+w, None), slice(top, bot, None)

        # else: good aspect_ratio
        return slice(left, right, None), slice(top, bot, None)



class BalancedSubest(Subset):
    """
    """
    def __init(self, balancing_attr, *args, **kwargs):
        super().__init__(self, *args, **kwargs)
        self.balancing_attr = balancing_attr
    def shuffled_keys(self):
        # logic
        return None

class VideoMaker():
    format_map = {
        ".mp4": 'mp4v',
        ".avi": 'XVID',
        ".mpeg4": 'H264'
    }
    writer = None
    def __init__(self, filename="output.mp4", framerate=15):
        self.filename = filename
        self.framerate = framerate
        self.fourcc = cv2.VideoWriter_fourcc(*self.format_map[os.path.splitext(filename)[1]])
    def __enter__(self):
        return self
    def __call__(self, image):
        if self.writer is None:
            shape = (image.shape[1], image.shape[0])
            self.writer = cv2.VideoWriter(filename=self.filename, fourcc=self.fourcc, fps=self.framerate, frameSize=shape, apiPreference=cv2.CAP_FFMPEG)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        self.writer.write(image)
    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.writer:
            self.writer.release()
            self.writer = None
            print("{} successfully written".format(self.filename))
    def __del__(self):
        if self.writer:
            self.writer.release()
            self.writer = None
            print("{} successfully written".format(self.filename))


def blend(image, saliency, alpha=1.0, beta=0.5, gamma=0.0):
    #assert image.dtype == np.uint8 and image.shape[2] == 3
    #assert saliency.dtype == np.uint8

    if len(saliency.shape) == 2 or saliency.shape[2] == 1:
        saliency = np.dstack((saliency, saliency, saliency))
    return cv2.addWeighted(image, alpha, saliency, beta, gamma)

color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
color_cycle_rgb = list(map(lambda color: list(map(lambda c: int(c, 16), [color[1:3], color[3:5], color[5:7]])), color_cycle))

# Image is 2D numpy array, q is quality 0-100
def jpegBlur(im, q):
    buf = io.BytesIO()
    imageio.imwrite(buf,im,format='jpg',quality=q)
    s = buf.getbuffer()
    return imageio.imread(s,format='jpg')

def setdefaultattr(obj, name, value):
    if not hasattr(obj, name):
        setattr(obj, name, value)
    return getattr(obj, name)

def split_equally(d, K):
    """ splits equally the keys of d given their values
        arguments:
            d (dict) - A dict {"label1": 30, "label2": 45, "label3": 22, ... "label<N>": 14}
            K (int)  - The number of split to make
        returns:
            A list of 'K' lists splitting equally the values of 'd':
            e.g. [[label1, label12, label19], [label2, label15], [label3, label10, label11], ...]
            where
            ```
               d["label1"]+d["label12"]+d["label19"]  ~=  d["label2"]+d["label15"]  ~=  d["label3"]+d["label10"]+d["label11]
            ```
    """
    s = sorted(d.items(), key=lambda kv: kv[1])
    f = [{"count": 0, "list": []} for _ in range(K)]
    while s:
        arena_label, count = s.pop(-1)
        index, _ = min(enumerate(f), key=(lambda x: x[1]["count"]))
        f[index]["count"] += count
        f[index]["list"].append(arena_label)
    return [x["list"] for x in f]


class MJPEGReader:
    def __init__(self, filename):
        self.fd = open(f"{filename}.idx", "rb")
        self.cap = cv2.VideoCapture(filename)
        self.header, self.version = struct.unpack("QI", self.fd.read(12))
    def __del__(self):
        if self.cap:
            self.cap.release()
    def __iter__(self):
        return self
    def __next__(self):
        try:
            tvsec, tvusec, offset, frame_idx, other = struct.unpack("IIQII", self.fd.read(24))
        except:
            raise StopIteration
        found, image = self.cap.read()
        timestamp = round(tvsec*1000+tvusec/1000)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if image is not None else None
        return timestamp, offset, frame_idx, other, image

def colorify_heatmap(heatmap, colormap="jet"):
    return (plt.get_cmap(colormap)(heatmap)[...,0:3]*255).astype(np.uint8)

Functions

def VideoDataset(filename, **kwargs)
Expand source code
def VideoDataset(filename, **kwargs):
    folder = os.path.dirname(filename)
    supported_formats = {
        ".m3u8": lambda name: VideoFromPlaylistDataset(
            TransformedDataset(
                M3u8PlaylistDataset(name, download_folder=folder),
                [VideoFileNameToDatasetReaderTransform(**kwargs)]
            )
        ),
        ".mp4": lambda name: VideoReaderDataset(name, **kwargs) # pylint: disable=unnecessary-lambda
    }
    return supported_formats[os.path.splitext(filename)[1]](filename)
def blend(image, saliency, alpha=1.0, beta=0.5, gamma=0.0)
Expand source code
def blend(image, saliency, alpha=1.0, beta=0.5, gamma=0.0):
    #assert image.dtype == np.uint8 and image.shape[2] == 3
    #assert saliency.dtype == np.uint8

    if len(saliency.shape) == 2 or saliency.shape[2] == 1:
        saliency = np.dstack((saliency, saliency, saliency))
    return cv2.addWeighted(image, alpha, saliency, beta, gamma)
def colorify_heatmap(heatmap, colormap='jet')
Expand source code
def colorify_heatmap(heatmap, colormap="jet"):
    return (plt.get_cmap(colormap)(heatmap)[...,0:3]*255).astype(np.uint8)
def concatenate_chunks(output_filename, *chunk_urls)
Expand source code
def concatenate_chunks(output_filename, *chunk_urls):
    side_runner = SideRunner(10)
    for chunk_url in chunk_urls:
        side_runner.run_async(subprocess.run, ["wget", chunk_url])
    side_runner.collect_runs()

    command = [
        'ffmpeg',
        '-y',
        '-protocol_whitelist "concat,file,http,https,tcp,tls"',
        '-i "concat:{}"'.format("|".join([url[url.rfind("/")+1:] for url in chunk_urls])),
        '-c:a copy',
        '-c:v copy',
        '-movflags faststart',
        output_filename
    ]
    os.system(" ".join(command))
def find(path, dirs=None, verbose=True)
Expand source code
def find(path, dirs=None, verbose=True):
    if os.path.isabs(path):
        if not os.path.isfile(path) and not os.path.isdir(path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
        return path

    dirs = dirs or [os.getcwd(), *os.getenv("DATA_PATH", "").split(":")]
    for dirname in dirs:
        if dirname is None:
            continue
        tmp_path = os.path.join(dirname, path)
        if os.path.isfile(tmp_path) or os.path.isdir(tmp_path):
            if verbose:
                print("{} found in {}".format(path, tmp_path))
            return tmp_path

    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                                "{} (searched in {})".format(path, dirs))
def gamma_correction(image, gammas=array([1., 1., 1.]))
Expand source code
def gamma_correction(image, gammas=np.array([1.0, 1.0, 1.0])):
    image = image.astype(np.float32)
    image = image ** (1/gammas)
    image = np.clip(image, 0, 255).astype(np.uint8)
    return image
def jpegBlur(im, q)
Expand source code
def jpegBlur(im, q):
    buf = io.BytesIO()
    imageio.imwrite(buf,im,format='jpg',quality=q)
    s = buf.getbuffer()
    return imageio.imread(s,format='jpg')
def setdefaultattr(obj, name, value)
Expand source code
def setdefaultattr(obj, name, value):
    if not hasattr(obj, name):
        setattr(obj, name, value)
    return getattr(obj, name)
def split_equally(d, K)

splits equally the keys of d given their values arguments: d (dict) - A dict {"label1": 30, "label2": 45, "label3": 22, … "label": 14} K (int) - The number of split to make returns: A list of 'K' lists splitting equally the values of 'd': e.g. [[label1, label12, label19], [label2, label15], [label3, label10, label11], …] where d["label1"]+d["label12"]+d["label19"] ~= d["label2"]+d["label15"] ~= d["label3"]+d["label10"]+d["label11]

Expand source code
def split_equally(d, K):
    """ splits equally the keys of d given their values
        arguments:
            d (dict) - A dict {"label1": 30, "label2": 45, "label3": 22, ... "label<N>": 14}
            K (int)  - The number of split to make
        returns:
            A list of 'K' lists splitting equally the values of 'd':
            e.g. [[label1, label12, label19], [label2, label15], [label3, label10, label11], ...]
            where
            ```
               d["label1"]+d["label12"]+d["label19"]  ~=  d["label2"]+d["label15"]  ~=  d["label3"]+d["label10"]+d["label11]
            ```
    """
    s = sorted(d.items(), key=lambda kv: kv[1])
    f = [{"count": 0, "list": []} for _ in range(K)]
    while s:
        arena_label, count = s.pop(-1)
        index, _ = min(enumerate(f), key=(lambda x: x[1]["count"]))
        f[index]["count"] += count
        f[index]["list"].append(arena_label)
    return [x["list"] for x in f]

Classes

class BalancedSubest (name: str, subset_type: SubsetType, dataset: mlworkflow.datasets.Dataset, keys=None, repetitions=1, desc=None)
Expand source code
class BalancedSubest(Subset):
    """
    """
    def __init(self, balancing_attr, *args, **kwargs):
        super().__init__(self, *args, **kwargs)
        self.balancing_attr = balancing_attr
    def shuffled_keys(self):
        # logic
        return None

Ancestors

Methods

def shuffled_keys(self)
Expand source code
def shuffled_keys(self):
    # logic
    return None
class BoundingBox (x: int, y: int, w: int, h: int)

BoundingBox(x: int, y: int, w: int, h: int)

Expand source code
class BoundingBox:
    x: int
    y: int
    w: int
    h: int
    @property
    def x_slice(self):
        return slice(int(self.x), int(self.x+self.w), None)
    @property
    def y_slice(self):
        return slice(int(self.y), int(self.y+self.h), None)

    def increase_box(self, max_width, max_height, aspect_ratio=None, margin=0, padding=0):
        """ Adapt the bounding-box s.t. it
                - is increased by `margin` on all directions
                - lies within the source image of size `max_width`x`max_height`
                - has the aspect ratio given by `aspect_ratio` (if not None)
                - contains the original bounding-box (box is increased if necessary, up to source image limits)
            Arguments:
                max_width (int)      - width of input image
                max_height (int)     - height of input image
                aspect_ratio (float) - output aspect-ratio
                margin (int)         - margin in pixels to be added on 4 sides
            Returns:
                x_slice (slice) - the horizontal slice
                y_slice (slice) - the vertical slice
        """
        top   = max(-padding,           int(self.y-margin))
        bot   = min(max_height+padding, int(self.y+self.h+margin))
        left  = max(-padding,           int(self.x-margin))
        right = min(max_width+padding,  int(self.x+self.w+margin))

        if aspect_ratio is None:
            return slice(left, right, None), slice(top, bot, None)

        if padding:
            raise NotImplementedError("increase_box method doesn't support padding when aspect ratio is given")

        w = right - left
        h = bot - top
        if w/h > aspect_ratio: # box is wider
            h = int(w/aspect_ratio)
            if h > max_height: # box is too wide
                h = max_height
                w = int(max_height*aspect_ratio)
                left = max_width//2 - w//2
                return slice(left, w, None), slice(0, h, None)
            cy = (bot+top)//2
            if cy + h//2 > max_height: # box hits the top
                return slice(left, right, None), slice(0, h, None)
            if cy - h//2 < 0: # box hits the bot
                return slice(left, right, None), slice(max_height-h, max_height, None)
            return slice(left, right, None), slice(cy-h//2, cy-h//2+h, None)

        if w/h < aspect_ratio: # box is taller
            w = int(h*aspect_ratio)
            if w > max_width: # box is too tall
                w = max_width
                h = int(max_width/aspect_ratio)
                top = max_height//2 - h//2
                return slice(0, w, None), slice(top, top+h, None)
            cx = (left+right)//2
            if cx + w//2 > max_width: # box hits the right
                return slice(max_width-w, max_width, None), slice(top, bot, None)
            if cx - w//2 < 0: # box hits the left
                return slice(0, w, None), slice(top, bot, None)
            return slice(cx-w//2, cx-w//2+w, None), slice(top, bot, None)

        # else: good aspect_ratio
        return slice(left, right, None), slice(top, bot, None)

Class variables

var h : int
var w : int
var x : int
var y : int

Instance variables

var x_slice
Expand source code
@property
def x_slice(self):
    return slice(int(self.x), int(self.x+self.w), None)
var y_slice
Expand source code
@property
def y_slice(self):
    return slice(int(self.y), int(self.y+self.h), None)

Methods

def increase_box(self, max_width, max_height, aspect_ratio=None, margin=0, padding=0)

Adapt the bounding-box s.t. it - is increased by margin on all directions - lies within the source image of size max_widthxmax_height - has the aspect ratio given by aspect_ratio (if not None) - contains the original bounding-box (box is increased if necessary, up to source image limits)

Arguments

max_width (int) - width of input image max_height (int) - height of input image aspect_ratio (float) - output aspect-ratio margin (int) - margin in pixels to be added on 4 sides

Returns

x_slice (slice) - the horizontal slice y_slice (slice) - the vertical slice

Expand source code
def increase_box(self, max_width, max_height, aspect_ratio=None, margin=0, padding=0):
    """ Adapt the bounding-box s.t. it
            - is increased by `margin` on all directions
            - lies within the source image of size `max_width`x`max_height`
            - has the aspect ratio given by `aspect_ratio` (if not None)
            - contains the original bounding-box (box is increased if necessary, up to source image limits)
        Arguments:
            max_width (int)      - width of input image
            max_height (int)     - height of input image
            aspect_ratio (float) - output aspect-ratio
            margin (int)         - margin in pixels to be added on 4 sides
        Returns:
            x_slice (slice) - the horizontal slice
            y_slice (slice) - the vertical slice
    """
    top   = max(-padding,           int(self.y-margin))
    bot   = min(max_height+padding, int(self.y+self.h+margin))
    left  = max(-padding,           int(self.x-margin))
    right = min(max_width+padding,  int(self.x+self.w+margin))

    if aspect_ratio is None:
        return slice(left, right, None), slice(top, bot, None)

    if padding:
        raise NotImplementedError("increase_box method doesn't support padding when aspect ratio is given")

    w = right - left
    h = bot - top
    if w/h > aspect_ratio: # box is wider
        h = int(w/aspect_ratio)
        if h > max_height: # box is too wide
            h = max_height
            w = int(max_height*aspect_ratio)
            left = max_width//2 - w//2
            return slice(left, w, None), slice(0, h, None)
        cy = (bot+top)//2
        if cy + h//2 > max_height: # box hits the top
            return slice(left, right, None), slice(0, h, None)
        if cy - h//2 < 0: # box hits the bot
            return slice(left, right, None), slice(max_height-h, max_height, None)
        return slice(left, right, None), slice(cy-h//2, cy-h//2+h, None)

    if w/h < aspect_ratio: # box is taller
        w = int(h*aspect_ratio)
        if w > max_width: # box is too tall
            w = max_width
            h = int(max_width/aspect_ratio)
            top = max_height//2 - h//2
            return slice(0, w, None), slice(top, top+h, None)
        cx = (left+right)//2
        if cx + w//2 > max_width: # box hits the right
            return slice(max_width-w, max_width, None), slice(top, bot, None)
        if cx - w//2 < 0: # box hits the left
            return slice(0, w, None), slice(top, bot, None)
        return slice(cx-w//2, cx-w//2+w, None), slice(top, bot, None)

    # else: good aspect_ratio
    return slice(left, right, None), slice(top, bot, None)
class DatasetSamplerDataset (dataset, count)
Expand source code
class DatasetSamplerDataset(Dataset):
    def __init__(self, dataset, count):
        self.parent = dataset
        self.keys = random.sample(list(dataset.keys.all()), count)
    def yield_keys(self):
        for key in self.keys:
            yield key
    def query_item(self, key):
        return self.parent.query_item(key)

Ancestors

  • mlworkflow.datasets.Dataset

Methods

def query_item(self, key)

Returns a tuple for one item, typically (Xi, Yi), or (Xi,)

Expand source code
def query_item(self, key):
    return self.parent.query_item(key)
def yield_keys(self)
Expand source code
def yield_keys(self):
    for key in self.keys:
        yield key
class DefaultDict (factory)

dict() -> new empty dictionary dict(mapping) -> new dictionary initialized from a mapping object's (key, value) pairs dict(iterable) -> new dictionary initialized as if via: d = {} for k, v in iterable: d[k] = v dict(**kwargs) -> new dictionary initialized with the name=value pairs in the keyword argument list. For example: dict(one=1, two=2)

Expand source code
class DefaultDict(dict):
    def __init__(self, factory):
        self.factory = factory
    def __missing__(self, key):
        self[key] = self.factory(key)
        return self[key]

Ancestors

  • builtins.dict
class DefaultList (*args, default=None, default_factory=None)

Built-in mutable sequence.

If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.

Expand source code
class DefaultList(list):
    def __init__(self, *args, default=None, default_factory=None):
        super().__init__(*args)
        self.default_factory = default_factory or (lambda x: default)
    def __getitem__(self, i):
        if i < -len(self) or len(self) <= i:
            return self.default_factory(i)
        return super().__getitem__(i)

Ancestors

  • builtins.list
class DelayedCallback (callback, timedelta)
Expand source code
class DelayedCallback:
    def __init__(self, callback, timedelta):
        self.timedelta = timedelta
        self.last = datetime.now()
        self.callback = callback
    def __call__(self):
        now = datetime.now()
        if now - self.last > self.timedelta:
            self.last = now
            self.callback()
    def __del__(self):
        try:
            self.callback()
        except:
            pass
class LazyGeneratorBackedList (gen)

Built-in mutable sequence.

If no argument is given, the constructor creates a new empty list. The argument must be an iterable if specified.

Expand source code
class LazyGeneratorBackedList(list):
    def __init__(self, gen):
        self.gen = gen
    def next(self):
        item = next(self.gen, None)
        if item is None:
            raise StopIteration
        self.append(item)
    def __getitem__(self, i):
        while i < -len(self) or len(self) <= i:
            self.next()
        return super().__getitem__(i)

Ancestors

  • builtins.list

Methods

def next(self)
Expand source code
def next(self):
    item = next(self.gen, None)
    if item is None:
        raise StopIteration
    self.append(item)
class M3u8PlaylistDataset (filename, download_folder=None)
Expand source code
class M3u8PlaylistDataset(Dataset):
    def __init__(self, filename, download_folder=None):
        self.playlist = m3u8.load(filename)
        self.download_folder = download_folder
    def yield_keys(self):
        yield from self.playlist.segments
    def query_item(self, key):
        if self.download_folder is not None:
            filename = os.path.join(self.download_folder, os.path.basename(key.uri))
            request.urlretrieve(key.uri, filename)
            return filename
        return key.uri

Ancestors

  • mlworkflow.datasets.Dataset

Methods

def query_item(self, key)

Returns a tuple for one item, typically (Xi, Yi), or (Xi,)

Expand source code
def query_item(self, key):
    if self.download_folder is not None:
        filename = os.path.join(self.download_folder, os.path.basename(key.uri))
        request.urlretrieve(key.uri, filename)
        return filename
    return key.uri
def yield_keys(self)
Expand source code
def yield_keys(self):
    yield from self.playlist.segments
class MJPEGReader (filename)
Expand source code
class MJPEGReader:
    def __init__(self, filename):
        self.fd = open(f"{filename}.idx", "rb")
        self.cap = cv2.VideoCapture(filename)
        self.header, self.version = struct.unpack("QI", self.fd.read(12))
    def __del__(self):
        if self.cap:
            self.cap.release()
    def __iter__(self):
        return self
    def __next__(self):
        try:
            tvsec, tvusec, offset, frame_idx, other = struct.unpack("IIQII", self.fd.read(24))
        except:
            raise StopIteration
        found, image = self.cap.read()
        timestamp = round(tvsec*1000+tvusec/1000)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if image is not None else None
        return timestamp, offset, frame_idx, other, image
class MergedDataset (*ds)
Expand source code
class MergedDataset(Dataset):
    def __init__(self, *ds):
        self.ds = ds
        self.cache = {}
    def yield_keys(self):
        for ds in self.ds:
            for key in ds.yield_keys():
                self.cache[key] = ds
                yield key
    def query_item(self, key):
        return self.cache[key].query_item(key)

Ancestors

  • mlworkflow.datasets.Dataset

Methods

def query_item(self, key)

Returns a tuple for one item, typically (Xi, Yi), or (Xi,)

Expand source code
def query_item(self, key):
    return self.cache[key].query_item(key)
def yield_keys(self)
Expand source code
def yield_keys(self):
    for ds in self.ds:
        for key in ds.yield_keys():
            self.cache[key] = ds
            yield key
class Subset (name: str, subset_type: SubsetType, dataset: mlworkflow.datasets.Dataset, keys=None, repetitions=1, desc=None)
Expand source code
class Subset:
    def __init__(self, name: str, subset_type: SubsetType, dataset: Dataset, keys=None, repetitions=1, desc=None):
        keys = keys if keys is not None else dataset.keys.all()
        assert isinstance(keys, (tuple, list)), f"Received instance of {type(keys)} for subset {name}"
        self.name = name
        self.type = subset_type
        self.dataset = FilteredDataset(dataset, predicate=lambda k,v: v is not None)
        self._keys = keys
        self.keys = keys
        self.repetitions = repetitions
        self.desc = desc
        self.is_training = self.type == SubsetType.TRAIN
        loop = None if self.is_training else repetitions
        self.shuffled_keys = pseudo_random(evolutive=self.is_training)(self.shuffled_keys)
        self.dataset.query_item = pseudo_random(loop=loop, input_dependent=True)(self.dataset.query_item)

    def shuffled_keys(self): # pylint: disable=method-hidden
        keys = self.keys * self.repetitions
        return random.sample(keys, len(keys)) if self.is_training else keys

    def __len__(self):
        return len(self.keys)*self.repetitions

    def __str__(self):
        return f"Subset<{self.name}>({len(self)})"

Subclasses

Methods

def shuffled_keys(self)
Expand source code
def shuffled_keys(self): # pylint: disable=method-hidden
    keys = self.keys * self.repetitions
    return random.sample(keys, len(keys)) if self.is_training else keys
class SubsetType (value, names=None, *, module=None, qualname=None, type=None, start=1)

An enumeration.

Expand source code
class SubsetType(IntFlag):
    TRAIN = 1
    EVAL  = 2

Ancestors

  • enum.IntFlag
  • builtins.int
  • enum.Flag
  • enum.Enum

Class variables

var EVAL
var TRAIN
class TolerentDataset (parent, retry=0)

"Augments" a dataset in the sense that it can produce many child items from one root item of the dataset. The root key must be retrievable from the child key. By convention, the root key is in the first element of the child key. This is overridable with the root_key method.

>>> class PermutingDataset(AugmentedDataset):
...     def augment(self, root_key, root_item):
...         yield (root_key, 0), root_item
...         yield (root_key, 1), root_item[::-1]
>>> d = DictDataset({0: ("Denzel", "Washington"), 1: ("Tom", "Hanks")})
>>> d = PermutingDataset(d)
>>> new_keys = d.keys()
>>> new_keys
((0, 0), (0, 1), (1, 0), (1, 1))
>>> d.query(new_keys)
(array(['Denzel', 'Washington', 'Tom', 'Hanks'], ...),
 array(['Washington', 'Denzel', 'Hanks', 'Tom'], ...))
Expand source code
class TolerentDataset(AugmentedDataset):
    def __init__(self, parent, retry=0):
        super().__init__(parent)
        self.retry = retry
    def augment(self, root_key, root_item):
        retry = self.retry
        while root_item is None and retry:
            root_item = self.parent.query_item(root_key)
            retry -= 1
        return root_item

Ancestors

  • mlworkflow.datasets.AugmentedDataset
  • mlworkflow.datasets.Dataset

Methods

def augment(self, root_key, root_item)
Expand source code
def augment(self, root_key, root_item):
    retry = self.retry
    while root_item is None and retry:
        root_item = self.parent.query_item(root_key)
        retry -= 1
    return root_item
class VideoFileNameToDatasetReaderTransform (**kwargs)
Expand source code
class VideoFileNameToDatasetReaderTransform():
    def __init__(self, **kwargs):
        self.kwargs = kwargs
    def __call__(self, key, filename):
        return VideoReaderDataset(filename, **self.kwargs)
class VideoFromPlaylistDataset (parent)

"Augments" a dataset in the sense that it can produce many child items from one root item of the dataset. The root key must be retrievable from the child key. By convention, the root key is in the first element of the child key. This is overridable with the root_key method.

>>> class PermutingDataset(AugmentedDataset):
...     def augment(self, root_key, root_item):
...         yield (root_key, 0), root_item
...         yield (root_key, 1), root_item[::-1]
>>> d = DictDataset({0: ("Denzel", "Washington"), 1: ("Tom", "Hanks")})
>>> d = PermutingDataset(d)
>>> new_keys = d.keys()
>>> new_keys
((0, 0), (0, 1), (1, 0), (1, 1))
>>> d.query(new_keys)
(array(['Denzel', 'Washington', 'Tom', 'Hanks'], ...),
 array(['Washington', 'Denzel', 'Hanks', 'Tom'], ...))
Expand source code
class VideoFromPlaylistDataset(AugmentedDataset):
    def augment(self, root_key, dataset):
        for key in dataset.yield_keys():
            item = dataset.query_item(key)
            if item is not None:
                yield (root_key, root_key.uri, key), item

Ancestors

  • mlworkflow.datasets.AugmentedDataset
  • mlworkflow.datasets.Dataset

Methods

def augment(self, root_key, dataset)
Expand source code
def augment(self, root_key, dataset):
    for key in dataset.yield_keys():
        item = dataset.query_item(key)
        if item is not None:
            yield (root_key, root_key.uri, key), item
class VideoMaker (filename='output.mp4', framerate=15)
Expand source code
class VideoMaker():
    format_map = {
        ".mp4": 'mp4v',
        ".avi": 'XVID',
        ".mpeg4": 'H264'
    }
    writer = None
    def __init__(self, filename="output.mp4", framerate=15):
        self.filename = filename
        self.framerate = framerate
        self.fourcc = cv2.VideoWriter_fourcc(*self.format_map[os.path.splitext(filename)[1]])
    def __enter__(self):
        return self
    def __call__(self, image):
        if self.writer is None:
            shape = (image.shape[1], image.shape[0])
            self.writer = cv2.VideoWriter(filename=self.filename, fourcc=self.fourcc, fps=self.framerate, frameSize=shape, apiPreference=cv2.CAP_FFMPEG)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        self.writer.write(image)
    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.writer:
            self.writer.release()
            self.writer = None
            print("{} successfully written".format(self.filename))
    def __del__(self):
        if self.writer:
            self.writer.release()
            self.writer = None
            print("{} successfully written".format(self.filename))

Class variables

var format_map
var writer
class VideoReaderDataset (filename, scale_factor=None, output_shape=None)
Expand source code
class VideoReaderDataset(Dataset):
    cap = None
    def __init__(self, filename, scale_factor=None, output_shape=None):
        raise
        # TODO: use instead
        # vid = imageio.get_reader("/home/gva/KS-FR-STCHAMOND_93815_concatenated.mp4",  'ffmpeg')
        # nums = [0, 1, 2]
        # for num in nums:
        #     image = vid.get_data(num)
        assert not scale_factor or not output_shape, "You cannot provide both 'scale_factor' and 'output_shape' arguments."
        self.cap = cv2.VideoCapture(filename)
        self.fps = self.cap.get(cv2.CAP_PROP_FPS)
        self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        shape = tuple([int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))])
        if scale_factor:
            shape = tuple(int(x*scale_factor) for x in shape)
        elif output_shape:
            shape = output_shape
        self.shape = tuple(x-x%2 for x in shape) # make sure shape is even
    def __del__(self):
        if self.cap is not None:
            self.cap.release()
    def yield_keys(self):
        yield from range(self.frame_count)
    def query_item(self, i):
        self.cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        _, frame = self.cap.read()
        if frame is None:
            return None
        frame = cv2.resize(frame, self.shape)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        return frame

Ancestors

  • mlworkflow.datasets.Dataset

Class variables

var cap

Methods

def query_item(self, i)

Returns a tuple for one item, typically (Xi, Yi), or (Xi,)

Expand source code
def query_item(self, i):
    self.cap.set(cv2.CAP_PROP_POS_FRAMES, i)
    _, frame = self.cap.read()
    if frame is None:
        return None
    frame = cv2.resize(frame, self.shape)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    return frame
def yield_keys(self)
Expand source code
def yield_keys(self):
    yield from range(self.frame_count)