A simple library to download, slice and search NFL game footage on a play-by-play basis.
This library comes with preloaded play-by-play meta data, which describes the start time of each play in the game footage. However, the actual footage does not come with this library and is not released by me. This package therefore provides utilities to batch download NFL Game Footage from the original source.
Once game footage is downloaded, you can use this library to search
plays and construct a playlist to play in vlc
with the
nflvid.vlc submodule.
""" A simple library to download, slice and search NFL game footage on a play-by-play basis. This library comes with preloaded play-by-play meta data, which describes the start time of each play in the game footage. However, the actual footage does not come with this library and is not released by me. This package therefore provides utilities to batch download NFL Game Footage from the original source. Once game footage is downloaded, you can use this library to search plays and construct a playlist to play in `vlc` with the [nflvid.vlc](http://pdoc.burntsushi.net/nflvid/vlc.m.html) submodule. """ import gzip import math import os import os.path as path import socket import sys import tempfile import urllib2 import bs4 import eventlet _httplib2 = eventlet.import_patched('httplib2') import eventlet.green.subprocess as subprocess from nflgame import OrderedDict __pdoc__ = {} __broadcast_cache = {} # game eid -> play id -> Play __coach_cache = {} # game eid -> play id -> Play _xmlf = path.join(path.split(__file__)[0], 'pbp-xml', '%s.xml.gz') _xml_base_url = 'http://e2.cdnl3.neulion.com/nfl/edl/nflgr/%d/%s.xml' _coach_url = 'rtmp://neulionms.fcod.llnwd.net/a5306/e1/mp4:' \ 'u/nfl/nfl/coachtapes/%s/%s_all_1600' _coach_url = ( 'rtmp://neulionms.fcod.llnwd.net', 'a5306/e1', 'mp4:u/nfl/nfl/coachtapes/%s/%s_all_1600', ) _broadcast_url = 'http://nlds82.cdnl3nl.neulion.com/nlds_vod/nfl/vod/' \ '%s/%s/%s/%s/2_%s_%s_%s_%s_h_whole_%d_%s.mp4.m3u8' def _eprint(s): print >> sys.stderr, s def broadcast_urls(gobj, quality='1600'): """ Returns possible HTTP Live Stream URLs (an m3u8 file) for the given game and quality. Use `nflvid.broadcast_url_status` to determine if it's a valid URL or not. Alternatively, use `nflvid.first_valid_broadcast_url` to retrieve the first valid URL. The kludge here is that the broadcast URLs can vary slightly and unpredictably from game to game. I haven't discovered a reliable means of accurately predicting which URL is correct. Note that it is unlikely any URL returned will be valid for preseason or postseason games. """ month, day = gobj.eid[4:6], gobj.eid[6:8] return [ _broadcast_url % (gobj.season(), month, day, gobj.gamekey, gobj.gamekey, gobj.away.lower(), gobj.home.lower(), gobj.season(), i, quality) for i in range(1, 4) ] def broadcast_url_status(url): """ Returns the HTTP status as a string for the given broadcast URL. A broadcast URL should be considered valid if and only if its HTTP status is `200`. """ resp, _ = _httplib2.Http().request(url, 'HEAD') return resp['status'] def first_valid_broadcast_url(urls): """ Returns the first valid broadcast URL in the list. If there is no valid broadcast URL, then `None` is returned. """ for url in urls: if broadcast_url_status(url) == '200': return url return None def coach_url(gobj): """ Returns the rtmp URL as a triple for the coach footage of the given game. The elements of the triple are: (rtmp server, rtmp app name, rtmp playpath) Coach video only comes in 1600 quality. """ return ( _coach_url[0], _coach_url[1], _coach_url[2] % (gobj.season(), gobj.gamekey), ) def footage_full(footage_dir, eid): """ Returns the path to the full video for a given game inside an nflvid footage directory. If the full footage doesn't exist, then None is returned. """ fp = _full_path(footage_dir, eid) if not os.access(fp, os.R_OK): return None return fp def footage_plays(footage_play_dir, eid): """ Returns a list of all footage broken down by play inside an nflvid footage directory. The list is sorted numerically by play id. If no footage breakdown exists for the game provided, then an empty list is returned. """ fp = _play_path(footage_play_dir, eid) if not os.access(fp, os.R_OK): return [] return sorted(os.listdir(fp), key=lambda s: int(s[0:-4])) def footage_play(footage_play_dir, eid, playid, stat=True): """ Returns a file path to an existing play slice in the footage play directory for the game and play given. If the file for the play is not readable, then `None` is returned. If `stat` is `False`, then the file's access will not be checked. """ gamedir = _play_path(footage_play_dir, eid) fp = path.join(gamedir, '%04d.mp4' % int(playid)) if stat and not os.access(fp, os.R_OK): return None return fp def _full_path(footage_dir, eid): return path.join(footage_dir, '%s.mp4' % eid) def _play_path(footage_play_dir, eid): return path.join(footage_play_dir, '%s' % eid) def _nice_game(gobj): return '(Season: %s, Week: %s, %s)' \ % (gobj.schedule['year'], gobj.schedule['week'], gobj) def unsliced_plays(footage_play_dir, gobj, coach=True, dry_run=False): """ Scans the game directory inside footage_play_dir and returns a list of plays that haven't been sliced yet. In particular, a play is only considered sliced if the following file is readable, assuming {playid} is its play id: {footage_play_dir}/{eid}/{playid}.mp4 All plays for the game given that don't fit this criteria will be returned in the list. If the list is empty, then all plays for the game have been sliced. Alternatively, `None` can be returned if there was a problem retrieving the play-by-play meta data. If `coach` is `False`, then play timings for broadcast footage will be used instead of coach timings. If `dry_run` is `True`, then only the first 10 plays of the game are sliced. """ ps = plays(gobj, coach) outdir = _play_path(footage_play_dir, gobj.eid) unsliced = [] if ps is None: return None for i, p in enumerate(ps.values()): if dry_run and i >= 10: break pid = p.idstr() if not os.access(path.join(outdir, '%s.mp4' % pid), os.R_OK): unsliced.append(p) return unsliced def slice(footage_play_dir, full_footage_file, gobj, coach=True, threads=4, dry_run=False): """ Uses `ffmpeg` to slice the given footage file into play-by-play pieces. The `full_footage_file` should be a path to a full game downloaded with `nflvid-footage` and `gobj` should be the corresponding `nflgame.game.Game` object. The `footage_play_dir` is where the pieces will be saved: {footage_play_dir}/{eid}/{playid}.mp4 This function will not duplicate work. If a video file exists for a particular play, then slice will not regenerate it. Note that this function uses an `eventlet` green pool to run multiple `ffmpeg` instances simultaneously. The maximum number of threads to use is specified by `threads`. This function only terminates when all threads have finished processing. If `coach` is `False`, then play timings for broadcast footage will be used instead of coach timings. If `dry_run` is `True`, then only the first 10 plays of the game are sliced. """ outdir = _play_path(footage_play_dir, gobj.eid) if not os.access(outdir, os.R_OK): os.makedirs(outdir) unsliced = unsliced_plays(footage_play_dir, gobj, coach, dry_run) if unsliced is None or len(unsliced) == 0: # Only show an annoying error message if there are no sliced # plays on disk. if not footage_plays(footage_play_dir, gobj.eid): _eprint( 'There are no unsliced plays remaining for game %s %s.\n' 'If they have not been sliced yet, then the XML play-by-play ' 'meta data may not be available or is corrupt.' % (gobj, _nice_game(gobj))) return max_dur = 0 if coach else 25 pool = eventlet.greenpool.GreenPool(threads) for p in unsliced: pool.spawn_n(slice_play, footage_play_dir, full_footage_file, gobj, p, max_dur, coach) pool.waitall() _eprint('DONE slicing game %s %s' % (gobj.eid, _nice_game(gobj))) def artificial_slice(footage_play_dir, gobj, gobj_play): """ Creates a video file that contains a single static image with a textual description of the play. The purpose is to provide some representation of a play even if its video form doesn't exist. (Or more likely, the play-by-play meta data for that play is corrupt.) This function requires the use of ImageMagick's `convert` with pango support. Note that `gobj_play` is an `nflgame.game.Play` object and not a `nflvid.Play` object. """ outdir = _play_path(footage_play_dir, gobj.eid) outpath = path.join(outdir, '%04d.mp4' % int(gobj_play.playid)) pango = '<span size="20000" foreground="white">' with tempfile.NamedTemporaryFile(mode='w+', suffix='.png') as tmp: cmd = ['convert', '-size', '640x480', # size of coach footage. configurable? '-background', 'black', 'pango:\n\n\n\n\n\n\n\n\n\n%s%s</span>' % (pango, gobj_play), tmp.name, ] _run_command(cmd) cmd = ['ffmpeg', '-f', 'image2', '-loop', '1', '-r:v', '7', '-i', tmp.name, '-pix_fmt', 'yuv420p', '-an', '-t', '10', outpath, ] _run_command(cmd) def slice_play(footage_play_dir, full_footage_file, gobj, play, max_duration=0, cut_scoreboard=True): """ This is just like `nflvid.slice`, but it only slices the play provided. In typical cases, `nflvid.slice` should be used since it makes sure not to duplicate work. This function will not check if the play-by-play directory for `gobj` has been created. `max_duration` is used to cap the length of a play. This drastically cuts down on the storage requirements of a game at the cost of potentially missing longer plays. This is particularly useful if you are slicing broadcast footage, where imposing a cap at about 15 seconds can decrease storage requirements by more than half without missing much. When `cut_scoreboard` is `True`, the first 3.0 seconds of the play will be clipped to remove the scoreboard view. """ outdir = _play_path(footage_play_dir, gobj.eid) st = play.start outpath = path.join(outdir, '%s.mp4' % play.idstr()) et = play.end if et is None: # Probably the last play of the game. et = st.add_seconds(40) if max_duration > 0 and (et.seconds() - st.seconds()) > max_duration: et = st.add_seconds(max_duration) if cut_scoreboard: st = st.add_seconds(3.0) dr = PlayTime(seconds=et.fractional() - st.fractional()) start_time = '%02d:%02d:%02d.%d' % (st.hh, st.mm, st.ss, st.milli) duration = '%02d:%02d:%02d.%d' % (dr.hh, dr.mm, dr.ss, dr.milli) cmd = ['ffmpeg', '-ss', start_time, '-t', duration, '-i', full_footage_file, '-acodec', 'copy', '-vcodec', 'copy', outpath, ] _run_command(cmd) def download_broadcast(footage_dir, gobj, quality='1600', dry_run=False): """ Starts an `ffmpeg` process to download the full broadcast of the given game with the quality provided. The qualities available are: 400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best. The footage will be saved to the following path: footage_dir/{eid}.mp4 If footage is already at that path, then an `exceptions.LookupError` is raised. A full game's worth of broadcast footage at a quality of 1600 is about **2GB**. """ fp = _full_path(footage_dir, gobj.eid) if os.access(fp, os.R_OK): raise LookupError('Footage path "%s" already exists.' % fp) urls = broadcast_urls(gobj, quality) url = first_valid_broadcast_url(urls) if url is None: _eprint('BAD URLs for game %s: %s' % (_nice_game(gobj), ', '.join(urls))) _eprint('FAILED to download game %s' % _nice_game(gobj)) return cmd = ['ffmpeg', '-i', url] if dry_run: cmd += ['-t', '30'] cmd += ['-absf', 'aac_adtstoasc', # no idea. ffmpeg says I need it though. '-acodec', 'copy', '-vcodec', 'copy', fp, ] _eprint('Downloading game %s %s' % (gobj.eid, _nice_game(gobj))) if not _run_command(cmd): _eprint('FAILED to download game %s' % _nice_game(gobj)) else: _eprint('DONE with game %s %s' % (gobj.eid, _nice_game(gobj))) def download_coach(footage_dir, gobj, dry_run=False): """ Starts an `rtmpdump` process to download the full coach footage of the given game. Currently, the only quality available is 1600. The footage will be saved to the following path: footage_dir/{eid}.mp4 If footage is already at that path, then an `exceptions.LookupError` is raised. A full game's worth of footage at a quality of 1600 is about **1GB**. """ fp = _full_path(footage_dir, gobj.eid) if os.access(fp, os.R_OK): raise LookupError('Footage path "%s" already exists.' % fp) server, app, path = coach_url(gobj) cmd = ['rtmpdump', '--rtmp', server, '--app', app, '--playpath', path, '--timeout', '60', ] if dry_run: cmd += ['--stop', '30'] cmd += ['-o', fp] _eprint('Downloading game %s %s' % (gobj.eid, _nice_game(gobj))) status = _run_command(cmd) if status is None: _eprint('DONE (incomplete) with game %s %s' % (gobj.eid, _nice_game(gobj))) elif not status: _eprint('FAILED to download game %s %s' % (gobj.eid, _nice_game(gobj))) else: _eprint('DONE with game %s %s' % (gobj.eid, _nice_game(gobj))) def _run_command(cmd): try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) output = p.communicate()[0].strip() if p.returncode != 0: err = subprocess.CalledProcessError(p.returncode, cmd) err.output = output raise err except subprocess.CalledProcessError, e: # A hack for rtmpdump... if e.returncode == 2 and cmd[0] == 'rtmpdump': return None indent = lambda s: '\n'.join(map(lambda l: ' %s' % l, s.split('\n'))) _eprint("Could not run '%s' (exit code %d):\n%s" % (' '.join(cmd), e.returncode, indent(e.output))) return False except OSError, e: _eprint("Could not run '%s' (errno: %d): %s" % (' '.join(cmd), e.errno, e.strerror)) return False return True def plays(gobj, coach=True): """ Returns an ordered dictionary of all plays for a particular game with timings for the coach footage. If `coach` is `False`, then the timings will be for the broadcast footage. The game `gobj` must be an `nflgame.game.Game` object. If there is a problem retrieving the data, `None` is returned. If the game is over, then the XML data is saved to disk. """ if coach: cache = __coach_cache else: cache = __broadcast_cache if gobj.game_over() and gobj.eid in cache: return cache[gobj.eid] rawxml = _get_xml_data(gobj.eid, gobj.gamekey) ps = _xml_plays(rawxml, coach) if ps is None: return None if len(ps) == 0: _eprint('Could not find timing nodes in XML data, ' 'which provide the start time of each play.') return None __broadcast_cache[gobj.eid] = ps # Save the XML data to disk if the game is over. fp = _xmlf % gobj.eid if gobj.game_over() and not os.access(fp, os.R_OK): try: print >> gzip.open(fp, 'w+'), rawxml, except IOError: _eprint('Could not cache XML data. Please make ' '"%s" writable.' % path.dirname(fp)) return ps def play(gobj, playid, coach=True): """ Returns a `nflvid.Play` object given a game and a play id with timings for the coach footage. If `coach` is `False`, then the timings will be for the broadcast footage. The game `gobj` must be an `nflgame.game.Game` object. If a play with the given id does not exist, `None` is returned. """ return plays(gobj).get(playid, None) class Play (object): """ Represents the start and end timings of single play in coach or broadcast footage. """ def __init__(self, start, end, playid): self.start = start """ Corresponds to the `ArchiveTCIN` or `CATIN` field in the source data. `ArchiveTCIN` is used for broadcast footage while `CATIN` is used for coach footage. """ self.end = end """ The end time of the play. This is typically the start time of the next play (from `ArchiveTCIN` or `CATIN`). When the next play isn't available, this is `None`. """ self.playid = playid """ A numeric play identifier that serves as a foreign key from an `nflgame.game.Play` object to a `nflvid.Play` object. """ def idstr(self): """Returns a string play id padded with zeroes.""" return '%04d' % int(self.playid) def __str__(self): return '(%s, %s, %s)' % (self.playid, self.start, self.end) class PlayTime (object): """ Represents a footage time point retrieved from the source XML meta data. """ __pdoc__['hh'] = 'The hour portion of the play time.' __pdoc__['mm'] = 'The minutes portion of the play time.' __pdoc__['ss'] = 'The seconds portion of the play time.' __pdoc__['milli'] = 'The milliseconds portion of the play time.' def __init__(self, point=None, seconds=None): """ Construct a PlayTime object given a `point` in time in the format `HH:MM:SS:MMM` where `MMM` can be either 2 or 3 digits. Alternatively, `seconds` can be provided (which may be a float). """ if seconds is not None: milli = int(1000 * (seconds - math.floor(seconds))) seconds = int(math.floor(seconds)) hh = seconds / 3600 seconds -= hh * 3600 mm = seconds / 60 seconds -= mm * 60 ss = seconds self.hh, self.mm, self.ss, self.milli = hh, mm, ss, milli self.__point = '%02d:%02d:%02d:%03d' % (hh, mm, ss, milli) return self.__point = point self.__coach = False try: parts = self.__point.split(':') if len(parts[3]) == 3: self.__coach = True parts = map(int, parts) except ValueError: assert False, 'Bad play time format: %s' % self.__point if len(parts) != 4: assert False, 'Expected 4 parts but got %d in: %s' \ % (len(parts), self.__point) self.hh, self.mm, self.ss, self.milli = parts # I believe milliseconds is given in tens of milliseconds # for the ArchiveTCIN node. But the CATIN node (coach timing) # provides regular milliseconds. if not self.__coach: self.milli *= 10 def add_seconds(self, seconds): """ Returns a new PlayTime with `seconds` (int or float) added to self. """ return PlayTime(seconds=self.fractional() + seconds) def seconds(self): """ Returns this time point rounded to the nearest second. """ secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss if self.milli >= 50: secs += 1 return secs def fractional(self): """ Returns this time point as fractional seconds based on milliseconds. """ secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss secs = (1000 * secs) + self.milli return float(secs) / 1000.0 def __cmp__(self, other): return cmp(self.fractional(), other.fractional()) def __sub__(self, other): """ Returns the difference rounded to nearest second between two time points. The `other` time point must take place before the current time point. """ assert other <= self, '%s is not <= than %s' % (other, self) return int(round(self.fractional() - other.fractional())) def __str__(self): return self.__point def _xml_plays(data, coach=True): """ Parses the XML raw string `data` given into an ordered dictionary of `nflvid.Play` objects corresponding to coach play timings. If `coach` is set to `False`, then play timings for the broadcast are retrieved. The dictionary is keyed by play id. """ if data is None: return None soup = bs4.BeautifulSoup(data) # For broadcast timings, we need to subtract the offset given from # the play start timings. try: boffset = int(soup.find('dataset').get('offset', 0)) except ValueError: boffset = 0 # Load everything into a list first, since we need to look ahead to see # the next play's start time to compute the current play's duration. rows = [] for row in soup.find_all('row'): playid = row.find('id') if not playid: playid = row.get('playid', None) if not playid: continue playid = playid.strip() else: playid = playid.get_text().strip() if coach: start = row.find('catin') else: start = row.find('archivetcin') if not start: continue start = PlayTime(start.get_text().strip()) if not coach: start = start.add_seconds(-boffset) # If this start doesn't procede the last start time, skip it. if len(rows) > 0 and start < rows[-1][1]: continue rows.append((playid, start, row)) # A predicate for determining whether to ignore a row or not in our final # result set. For example, timeouts take a lot of time but aren't needed # for play-by-play footage. def ignore(row): if 'playdescription' in row.attrs: if row['playdescription'].lower().startswith('timeout'): return True if row['playdescription'].lower().startswith('two-minute'): return True # Did we miss anything? if 'preplaybyplay' in row.attrs: if row['preplaybyplay'].lower().startswith('timeout'): return True return False d = OrderedDict() for i, (playid, start, row) in enumerate(rows): if ignore(row): continue end = None if i < len(rows) - 1: end = rows[i+1][1] d[playid] = Play(start, end, playid) return d def _get_xml_data(eid=None, gamekey=None, fpath=None): """ Returns the XML play data corresponding to the game given. A game must be specified in one of two ways: by providing the `eid` and `gamekey` or by providing the file path `fpath` to a gzipped XML file. If the XML data is already on disk, it is read, decompressed and returned. Otherwise, the XML data is downloaded from the NFL web site. If the data doesn't exist yet or there was an error, `nflvid._get_xml_data` returns None. """ assert (eid is not None and gamekey is not None) or fpath is not None if fpath is not None: return gzip.open(fpath).read() fpath = _xmlf % eid if os.access(fpath, os.R_OK): return gzip.open(fpath).read() try: year = int(eid[0:4]) month = int(eid[4:6]) if month <= 3: year -= 1 u = _xml_base_url % (year, gamekey) # The year and the game key. return urllib2.urlopen(u, timeout=10).read() except urllib2.HTTPError, e: _eprint(e) except socket.timeout, e: _eprint(e) return None
def artificial_slice(
footage_play_dir, gobj, gobj_play)
Creates a video file that contains a single static image with a textual description of the play. The purpose is to provide some representation of a play even if its video form doesn't exist. (Or more likely, the play-by-play meta data for that play is corrupt.)
This function requires the use of ImageMagick's convert
with
pango support.
Note that gobj_play
is an nflgame.game.Play
object and not a
Play
object.
def artificial_slice(footage_play_dir, gobj, gobj_play): """ Creates a video file that contains a single static image with a textual description of the play. The purpose is to provide some representation of a play even if its video form doesn't exist. (Or more likely, the play-by-play meta data for that play is corrupt.) This function requires the use of ImageMagick's `convert` with pango support. Note that `gobj_play` is an `nflgame.game.Play` object and not a `nflvid.Play` object. """ outdir = _play_path(footage_play_dir, gobj.eid) outpath = path.join(outdir, '%04d.mp4' % int(gobj_play.playid)) pango = '<span size="20000" foreground="white">' with tempfile.NamedTemporaryFile(mode='w+', suffix='.png') as tmp: cmd = ['convert', '-size', '640x480', # size of coach footage. configurable? '-background', 'black', 'pango:\n\n\n\n\n\n\n\n\n\n%s%s</span>' % (pango, gobj_play), tmp.name, ] _run_command(cmd) cmd = ['ffmpeg', '-f', 'image2', '-loop', '1', '-r:v', '7', '-i', tmp.name, '-pix_fmt', 'yuv420p', '-an', '-t', '10', outpath, ] _run_command(cmd)
def broadcast_url_status(
url)
Returns the HTTP status as a string for the given broadcast URL. A
broadcast URL should be considered valid if and only if its HTTP
status is 200
.
def broadcast_url_status(url): """ Returns the HTTP status as a string for the given broadcast URL. A broadcast URL should be considered valid if and only if its HTTP status is `200`. """ resp, _ = _httplib2.Http().request(url, 'HEAD') return resp['status']
def broadcast_urls(
gobj, quality='1600')
Returns possible HTTP Live Stream URLs (an m3u8 file) for the given
game and quality. Use broadcast_url_status
to determine
if it's a valid URL or not. Alternatively, use
first_valid_broadcast_url
to retrieve the first valid URL.
The kludge here is that the broadcast URLs can vary slightly and unpredictably from game to game. I haven't discovered a reliable means of accurately predicting which URL is correct.
Note that it is unlikely any URL returned will be valid for preseason or postseason games.
def broadcast_urls(gobj, quality='1600'): """ Returns possible HTTP Live Stream URLs (an m3u8 file) for the given game and quality. Use `nflvid.broadcast_url_status` to determine if it's a valid URL or not. Alternatively, use `nflvid.first_valid_broadcast_url` to retrieve the first valid URL. The kludge here is that the broadcast URLs can vary slightly and unpredictably from game to game. I haven't discovered a reliable means of accurately predicting which URL is correct. Note that it is unlikely any URL returned will be valid for preseason or postseason games. """ month, day = gobj.eid[4:6], gobj.eid[6:8] return [ _broadcast_url % (gobj.season(), month, day, gobj.gamekey, gobj.gamekey, gobj.away.lower(), gobj.home.lower(), gobj.season(), i, quality) for i in range(1, 4) ]
def coach_url(
gobj)
Returns the rtmp URL as a triple for the coach footage of the given game. The elements of the triple are:
(rtmp server, rtmp app name, rtmp playpath)
Coach video only comes in 1600 quality.
def coach_url(gobj): """ Returns the rtmp URL as a triple for the coach footage of the given game. The elements of the triple are: (rtmp server, rtmp app name, rtmp playpath) Coach video only comes in 1600 quality. """ return ( _coach_url[0], _coach_url[1], _coach_url[2] % (gobj.season(), gobj.gamekey), )
def download_broadcast(
footage_dir, gobj, quality='1600', dry_run=False)
Starts an ffmpeg
process to download the full broadcast of the
given game with the quality provided. The qualities available are:
400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best.
The footage will be saved to the following path:
footage_dir/{eid}.mp4
If footage is already at that path, then an
exceptions.LookupError
is raised.
A full game's worth of broadcast footage at a quality of 1600 is about 2GB.
def download_broadcast(footage_dir, gobj, quality='1600', dry_run=False): """ Starts an `ffmpeg` process to download the full broadcast of the given game with the quality provided. The qualities available are: 400, 800, 1200, 1600, 2400, 3000, 4500 with 4500 being the best. The footage will be saved to the following path: footage_dir/{eid}.mp4 If footage is already at that path, then an `exceptions.LookupError` is raised. A full game's worth of broadcast footage at a quality of 1600 is about **2GB**. """ fp = _full_path(footage_dir, gobj.eid) if os.access(fp, os.R_OK): raise LookupError('Footage path "%s" already exists.' % fp) urls = broadcast_urls(gobj, quality) url = first_valid_broadcast_url(urls) if url is None: _eprint('BAD URLs for game %s: %s' % (_nice_game(gobj), ', '.join(urls))) _eprint('FAILED to download game %s' % _nice_game(gobj)) return cmd = ['ffmpeg', '-i', url] if dry_run: cmd += ['-t', '30'] cmd += ['-absf', 'aac_adtstoasc', # no idea. ffmpeg says I need it though. '-acodec', 'copy', '-vcodec', 'copy', fp, ] _eprint('Downloading game %s %s' % (gobj.eid, _nice_game(gobj))) if not _run_command(cmd): _eprint('FAILED to download game %s' % _nice_game(gobj)) else: _eprint('DONE with game %s %s' % (gobj.eid, _nice_game(gobj)))
def download_coach(
footage_dir, gobj, dry_run=False)
Starts an rtmpdump
process to download the full coach footage of
the given game. Currently, the only quality available is 1600.
The footage will be saved to the following path:
footage_dir/{eid}.mp4
If footage is already at that path, then an
exceptions.LookupError
is raised.
A full game's worth of footage at a quality of 1600 is about 1GB.
def download_coach(footage_dir, gobj, dry_run=False): """ Starts an `rtmpdump` process to download the full coach footage of the given game. Currently, the only quality available is 1600. The footage will be saved to the following path: footage_dir/{eid}.mp4 If footage is already at that path, then an `exceptions.LookupError` is raised. A full game's worth of footage at a quality of 1600 is about **1GB**. """ fp = _full_path(footage_dir, gobj.eid) if os.access(fp, os.R_OK): raise LookupError('Footage path "%s" already exists.' % fp) server, app, path = coach_url(gobj) cmd = ['rtmpdump', '--rtmp', server, '--app', app, '--playpath', path, '--timeout', '60', ] if dry_run: cmd += ['--stop', '30'] cmd += ['-o', fp] _eprint('Downloading game %s %s' % (gobj.eid, _nice_game(gobj))) status = _run_command(cmd) if status is None: _eprint('DONE (incomplete) with game %s %s' % (gobj.eid, _nice_game(gobj))) elif not status: _eprint('FAILED to download game %s %s' % (gobj.eid, _nice_game(gobj))) else: _eprint('DONE with game %s %s' % (gobj.eid, _nice_game(gobj)))
def first_valid_broadcast_url(
urls)
Returns the first valid broadcast URL in the list. If there is no
valid broadcast URL, then None
is returned.
def first_valid_broadcast_url(urls): """ Returns the first valid broadcast URL in the list. If there is no valid broadcast URL, then `None` is returned. """ for url in urls: if broadcast_url_status(url) == '200': return url return None
def footage_full(
footage_dir, eid)
Returns the path to the full video for a given game inside an nflvid footage directory.
If the full footage doesn't exist, then None is returned.
def footage_full(footage_dir, eid): """ Returns the path to the full video for a given game inside an nflvid footage directory. If the full footage doesn't exist, then None is returned. """ fp = _full_path(footage_dir, eid) if not os.access(fp, os.R_OK): return None return fp
def footage_play(
footage_play_dir, eid, playid, stat=True)
Returns a file path to an existing play slice in the footage play directory for the game and play given.
If the file for the play is not readable, then None
is returned.
If stat
is False
, then the file's access will not be checked.
def footage_play(footage_play_dir, eid, playid, stat=True): """ Returns a file path to an existing play slice in the footage play directory for the game and play given. If the file for the play is not readable, then `None` is returned. If `stat` is `False`, then the file's access will not be checked. """ gamedir = _play_path(footage_play_dir, eid) fp = path.join(gamedir, '%04d.mp4' % int(playid)) if stat and not os.access(fp, os.R_OK): return None return fp
def footage_plays(
footage_play_dir, eid)
Returns a list of all footage broken down by play inside an nflvid footage directory. The list is sorted numerically by play id.
If no footage breakdown exists for the game provided, then an empty list is returned.
def footage_plays(footage_play_dir, eid): """ Returns a list of all footage broken down by play inside an nflvid footage directory. The list is sorted numerically by play id. If no footage breakdown exists for the game provided, then an empty list is returned. """ fp = _play_path(footage_play_dir, eid) if not os.access(fp, os.R_OK): return [] return sorted(os.listdir(fp), key=lambda s: int(s[0:-4]))
def play(
gobj, playid, coach=True)
Returns a Play
object given a game and a play id with
timings for the coach footage. If coach
is False
, then the
timings will be for the broadcast footage.
The game gobj
must be an nflgame.game.Game
object.
If a play with the given id does not exist, None
is returned.
def play(gobj, playid, coach=True): """ Returns a `nflvid.Play` object given a game and a play id with timings for the coach footage. If `coach` is `False`, then the timings will be for the broadcast footage. The game `gobj` must be an `nflgame.game.Game` object. If a play with the given id does not exist, `None` is returned. """ return plays(gobj).get(playid, None)
def plays(
gobj, coach=True)
Returns an ordered dictionary of all plays for a particular game
with timings for the coach footage. If coach
is False
, then the
timings will be for the broadcast footage.
The game gobj
must be an nflgame.game.Game
object.
If there is a problem retrieving the data, None
is returned.
If the game is over, then the XML data is saved to disk.
def plays(gobj, coach=True): """ Returns an ordered dictionary of all plays for a particular game with timings for the coach footage. If `coach` is `False`, then the timings will be for the broadcast footage. The game `gobj` must be an `nflgame.game.Game` object. If there is a problem retrieving the data, `None` is returned. If the game is over, then the XML data is saved to disk. """ if coach: cache = __coach_cache else: cache = __broadcast_cache if gobj.game_over() and gobj.eid in cache: return cache[gobj.eid] rawxml = _get_xml_data(gobj.eid, gobj.gamekey) ps = _xml_plays(rawxml, coach) if ps is None: return None if len(ps) == 0: _eprint('Could not find timing nodes in XML data, ' 'which provide the start time of each play.') return None __broadcast_cache[gobj.eid] = ps # Save the XML data to disk if the game is over. fp = _xmlf % gobj.eid if gobj.game_over() and not os.access(fp, os.R_OK): try: print >> gzip.open(fp, 'w+'), rawxml, except IOError: _eprint('Could not cache XML data. Please make ' '"%s" writable.' % path.dirname(fp)) return ps
def slice(
footage_play_dir, full_footage_file, gobj, coach=True, threads=4, dry_run=False)
Uses ffmpeg
to slice the given footage file into play-by-play
pieces. The full_footage_file
should be a path to a full
game downloaded with nflvid-footage
and gobj
should be the
corresponding nflgame.game.Game
object.
The footage_play_dir
is where the pieces will be saved:
{footage_play_dir}/{eid}/{playid}.mp4
This function will not duplicate work. If a video file exists for a particular play, then slice will not regenerate it.
Note that this function uses an eventlet
green pool to run
multiple ffmpeg
instances simultaneously. The maximum number
of threads to use is specified by threads
. This function only
terminates when all threads have finished processing.
If coach
is False
, then play timings for broadcast footage will
be used instead of coach timings.
If dry_run
is True
, then only the first 10 plays of the game
are sliced.
def slice(footage_play_dir, full_footage_file, gobj, coach=True, threads=4, dry_run=False): """ Uses `ffmpeg` to slice the given footage file into play-by-play pieces. The `full_footage_file` should be a path to a full game downloaded with `nflvid-footage` and `gobj` should be the corresponding `nflgame.game.Game` object. The `footage_play_dir` is where the pieces will be saved: {footage_play_dir}/{eid}/{playid}.mp4 This function will not duplicate work. If a video file exists for a particular play, then slice will not regenerate it. Note that this function uses an `eventlet` green pool to run multiple `ffmpeg` instances simultaneously. The maximum number of threads to use is specified by `threads`. This function only terminates when all threads have finished processing. If `coach` is `False`, then play timings for broadcast footage will be used instead of coach timings. If `dry_run` is `True`, then only the first 10 plays of the game are sliced. """ outdir = _play_path(footage_play_dir, gobj.eid) if not os.access(outdir, os.R_OK): os.makedirs(outdir) unsliced = unsliced_plays(footage_play_dir, gobj, coach, dry_run) if unsliced is None or len(unsliced) == 0: # Only show an annoying error message if there are no sliced # plays on disk. if not footage_plays(footage_play_dir, gobj.eid): _eprint( 'There are no unsliced plays remaining for game %s %s.\n' 'If they have not been sliced yet, then the XML play-by-play ' 'meta data may not be available or is corrupt.' % (gobj, _nice_game(gobj))) return max_dur = 0 if coach else 25 pool = eventlet.greenpool.GreenPool(threads) for p in unsliced: pool.spawn_n(slice_play, footage_play_dir, full_footage_file, gobj, p, max_dur, coach) pool.waitall() _eprint('DONE slicing game %s %s' % (gobj.eid, _nice_game(gobj)))
def slice_play(
footage_play_dir, full_footage_file, gobj, play, max_duration=0, cut_scoreboard=True)
This is just like slice
, but it only slices the play
provided. In typical cases, slice
should be used since it
makes sure not to duplicate work.
This function will not check if the play-by-play directory for
gobj
has been created.
max_duration
is used to cap the length of a play. This
drastically cuts down on the storage requirements of a game at the
cost of potentially missing longer plays. This is particularly
useful if you are slicing broadcast footage, where imposing a cap
at about 15 seconds can decrease storage requirements by more than
half without missing much.
When cut_scoreboard
is True
, the first 3.0 seconds of the play
will be clipped to remove the scoreboard view.
def slice_play(footage_play_dir, full_footage_file, gobj, play, max_duration=0, cut_scoreboard=True): """ This is just like `nflvid.slice`, but it only slices the play provided. In typical cases, `nflvid.slice` should be used since it makes sure not to duplicate work. This function will not check if the play-by-play directory for `gobj` has been created. `max_duration` is used to cap the length of a play. This drastically cuts down on the storage requirements of a game at the cost of potentially missing longer plays. This is particularly useful if you are slicing broadcast footage, where imposing a cap at about 15 seconds can decrease storage requirements by more than half without missing much. When `cut_scoreboard` is `True`, the first 3.0 seconds of the play will be clipped to remove the scoreboard view. """ outdir = _play_path(footage_play_dir, gobj.eid) st = play.start outpath = path.join(outdir, '%s.mp4' % play.idstr()) et = play.end if et is None: # Probably the last play of the game. et = st.add_seconds(40) if max_duration > 0 and (et.seconds() - st.seconds()) > max_duration: et = st.add_seconds(max_duration) if cut_scoreboard: st = st.add_seconds(3.0) dr = PlayTime(seconds=et.fractional() - st.fractional()) start_time = '%02d:%02d:%02d.%d' % (st.hh, st.mm, st.ss, st.milli) duration = '%02d:%02d:%02d.%d' % (dr.hh, dr.mm, dr.ss, dr.milli) cmd = ['ffmpeg', '-ss', start_time, '-t', duration, '-i', full_footage_file, '-acodec', 'copy', '-vcodec', 'copy', outpath, ] _run_command(cmd)
def unsliced_plays(
footage_play_dir, gobj, coach=True, dry_run=False)
Scans the game directory inside footage_play_dir and returns a list of plays that haven't been sliced yet. In particular, a play is only considered sliced if the following file is readable, assuming {playid} is its play id:
{footage_play_dir}/{eid}/{playid}.mp4
All plays for the game given that don't fit this criteria will be returned in the list.
If the list is empty, then all plays for the game have been sliced.
Alternatively, None
can be returned if there was a problem
retrieving the play-by-play meta data.
If coach
is False
, then play timings for broadcast footage will
be used instead of coach timings.
If dry_run
is True
, then only the first 10 plays of the game
are sliced.
def unsliced_plays(footage_play_dir, gobj, coach=True, dry_run=False): """ Scans the game directory inside footage_play_dir and returns a list of plays that haven't been sliced yet. In particular, a play is only considered sliced if the following file is readable, assuming {playid} is its play id: {footage_play_dir}/{eid}/{playid}.mp4 All plays for the game given that don't fit this criteria will be returned in the list. If the list is empty, then all plays for the game have been sliced. Alternatively, `None` can be returned if there was a problem retrieving the play-by-play meta data. If `coach` is `False`, then play timings for broadcast footage will be used instead of coach timings. If `dry_run` is `True`, then only the first 10 plays of the game are sliced. """ ps = plays(gobj, coach) outdir = _play_path(footage_play_dir, gobj.eid) unsliced = [] if ps is None: return None for i, p in enumerate(ps.values()): if dry_run and i >= 10: break pid = p.idstr() if not os.access(path.join(outdir, '%s.mp4' % pid), os.R_OK): unsliced.append(p) return unsliced
class Play
Represents the start and end timings of single play in coach or broadcast footage.
class Play (object): """ Represents the start and end timings of single play in coach or broadcast footage. """ def __init__(self, start, end, playid): self.start = start """ Corresponds to the `ArchiveTCIN` or `CATIN` field in the source data. `ArchiveTCIN` is used for broadcast footage while `CATIN` is used for coach footage. """ self.end = end """ The end time of the play. This is typically the start time of the next play (from `ArchiveTCIN` or `CATIN`). When the next play isn't available, this is `None`. """ self.playid = playid """ A numeric play identifier that serves as a foreign key from an `nflgame.game.Play` object to a `nflvid.Play` object. """ def idstr(self): """Returns a string play id padded with zeroes.""" return '%04d' % int(self.playid) def __str__(self): return '(%s, %s, %s)' % (self.playid, self.start, self.end)
var end
The end time of the play. This is typically the start time of
the next play (from ArchiveTCIN
or CATIN
). When the next
play isn't available, this is None
.
var playid
A numeric play identifier that serves as a foreign key from an
nflgame.game.Play
object to a Play
object.
var start
Corresponds to the ArchiveTCIN
or CATIN
field in the source
data. ArchiveTCIN
is used for broadcast footage while CATIN
is used for coach footage.
def __init__(
self, start, end, playid)
def __init__(self, start, end, playid): self.start = start """ Corresponds to the `ArchiveTCIN` or `CATIN` field in the source data. `ArchiveTCIN` is used for broadcast footage while `CATIN` is used for coach footage. """ self.end = end """ The end time of the play. This is typically the start time of the next play (from `ArchiveTCIN` or `CATIN`). When the next play isn't available, this is `None`. """ self.playid = playid """ A numeric play identifier that serves as a foreign key from an `nflgame.game.Play` object to a `nflvid.Play` object. """
def idstr(
self)
Returns a string play id padded with zeroes.
def idstr(self): """Returns a string play id padded with zeroes.""" return '%04d' % int(self.playid)
class PlayTime
Represents a footage time point retrieved from the source XML meta data.
class PlayTime (object): """ Represents a footage time point retrieved from the source XML meta data. """ __pdoc__['hh'] = 'The hour portion of the play time.' __pdoc__['mm'] = 'The minutes portion of the play time.' __pdoc__['ss'] = 'The seconds portion of the play time.' __pdoc__['milli'] = 'The milliseconds portion of the play time.' def __init__(self, point=None, seconds=None): """ Construct a PlayTime object given a `point` in time in the format `HH:MM:SS:MMM` where `MMM` can be either 2 or 3 digits. Alternatively, `seconds` can be provided (which may be a float). """ if seconds is not None: milli = int(1000 * (seconds - math.floor(seconds))) seconds = int(math.floor(seconds)) hh = seconds / 3600 seconds -= hh * 3600 mm = seconds / 60 seconds -= mm * 60 ss = seconds self.hh, self.mm, self.ss, self.milli = hh, mm, ss, milli self.__point = '%02d:%02d:%02d:%03d' % (hh, mm, ss, milli) return self.__point = point self.__coach = False try: parts = self.__point.split(':') if len(parts[3]) == 3: self.__coach = True parts = map(int, parts) except ValueError: assert False, 'Bad play time format: %s' % self.__point if len(parts) != 4: assert False, 'Expected 4 parts but got %d in: %s' \ % (len(parts), self.__point) self.hh, self.mm, self.ss, self.milli = parts # I believe milliseconds is given in tens of milliseconds # for the ArchiveTCIN node. But the CATIN node (coach timing) # provides regular milliseconds. if not self.__coach: self.milli *= 10 def add_seconds(self, seconds): """ Returns a new PlayTime with `seconds` (int or float) added to self. """ return PlayTime(seconds=self.fractional() + seconds) def seconds(self): """ Returns this time point rounded to the nearest second. """ secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss if self.milli >= 50: secs += 1 return secs def fractional(self): """ Returns this time point as fractional seconds based on milliseconds. """ secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss secs = (1000 * secs) + self.milli return float(secs) / 1000.0 def __cmp__(self, other): return cmp(self.fractional(), other.fractional()) def __sub__(self, other): """ Returns the difference rounded to nearest second between two time points. The `other` time point must take place before the current time point. """ assert other <= self, '%s is not <= than %s' % (other, self) return int(round(self.fractional() - other.fractional())) def __str__(self): return self.__point
def __init__(
self, point=None, seconds=None)
Construct a PlayTime object given a point
in time in the
format HH:MM:SS:MMM
where MMM
can be either 2 or 3 digits.
Alternatively, seconds
can be provided (which may be a
float).
def __init__(self, point=None, seconds=None): """ Construct a PlayTime object given a `point` in time in the format `HH:MM:SS:MMM` where `MMM` can be either 2 or 3 digits. Alternatively, `seconds` can be provided (which may be a float). """ if seconds is not None: milli = int(1000 * (seconds - math.floor(seconds))) seconds = int(math.floor(seconds)) hh = seconds / 3600 seconds -= hh * 3600 mm = seconds / 60 seconds -= mm * 60 ss = seconds self.hh, self.mm, self.ss, self.milli = hh, mm, ss, milli self.__point = '%02d:%02d:%02d:%03d' % (hh, mm, ss, milli) return self.__point = point self.__coach = False try: parts = self.__point.split(':') if len(parts[3]) == 3: self.__coach = True parts = map(int, parts) except ValueError: assert False, 'Bad play time format: %s' % self.__point if len(parts) != 4: assert False, 'Expected 4 parts but got %d in: %s' \ % (len(parts), self.__point) self.hh, self.mm, self.ss, self.milli = parts # I believe milliseconds is given in tens of milliseconds # for the ArchiveTCIN node. But the CATIN node (coach timing) # provides regular milliseconds. if not self.__coach: self.milli *= 10
def add_seconds(
self, seconds)
Returns a new PlayTime with seconds
(int or float) added to
self.
def add_seconds(self, seconds): """ Returns a new PlayTime with `seconds` (int or float) added to self. """ return PlayTime(seconds=self.fractional() + seconds)
def fractional(
self)
Returns this time point as fractional seconds based on milliseconds.
def fractional(self): """ Returns this time point as fractional seconds based on milliseconds. """ secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss secs = (1000 * secs) + self.milli return float(secs) / 1000.0
def seconds(
self)
Returns this time point rounded to the nearest second.
def seconds(self): """ Returns this time point rounded to the nearest second. """ secs = (self.hh * 60 * 60) + (self.mm * 60) + self.ss if self.milli >= 50: secs += 1 return secs
This submodule exposes a couple of convenience functions for opening a
sequence of plays with vlc
.
This module is for picky users where just running
vlc play1.mp4 play2.mp4 ...
isn't enough. Namely, this module instructs
vlc to write a text marquee for each play describing the current game
situ...
Documentation generated by
pdoc
0.1.5.
pdoc is in the public domain with the
UNLICENSE.