Package nflgame :: Module game
[frames] | no frames]

Source Code for Module nflgame.game

  1  from collections import namedtuple 
  2  import os 
  3  import os.path as path 
  4  import gzip 
  5  import json 
  6  import sys 
  7  import urllib2 
  8   
  9  from nflgame import OrderedDict 
 10  import nflgame.player 
 11  import nflgame.seq 
 12  import nflgame.statmap 
 13   
 14  _MAX_INT = sys.maxint 
 15   
 16  _jsonf = path.join(path.split(__file__)[0], 'gamecenter-json', '%s.json.gz') 
 17  _json_base_url = "http://www.nfl.com/liveupdate/game-center/%s/%s_gtd.json" 
 18   
 19  GameDiff = namedtuple('GameDiff', ['plays', 'players']) 
 20  """ 
 21  Represents the difference between two points in time of the same game 
 22  in terms of plays and player statistics. 
 23  """ 
 24   
 25  TeamStats = namedtuple('TeamStats', 
 26                         ['first_downs', 'total_yds', 'passing_yds', 
 27                          'rushing_yds', 'penalty_cnt', 'penalty_yds', 
 28                          'turnovers', 'punt_cnt', 'punt_yds', 'punt_avg', 
 29                          'pos_time']) 
 30  """A collection of team statistics for an entire game.""" 
 31   
 32   
33 -class FieldPosition (object):
34 """ 35 Represents field position. 36 37 The representation here is an integer offset where the 50 yard line 38 corresponds to '0'. Being in the own territory corresponds to a negative 39 offset while being in the opponent's territory corresponds to a positive 40 offset. 41 42 e.g., NE has the ball on the NE 45, the offset is -5. 43 e.g., NE has the ball on the NYG 2, the offset is 48. 44 """
45 - def __new__(cls, pos_team, yardline):
46 if not yardline: 47 return None 48 return object.__new__(cls)
49
50 - def __init__(self, pos_team, yardline):
51 """ 52 pos_team is the team on offense, and yardline is a string formatted 53 like 'team-territory yard-line'. e.g., "NE 32". 54 """ 55 if yardline == '50': 56 self.offset = 0 57 return 58 59 territory, yd_str = yardline.split() 60 yd = int(yd_str) 61 if territory == pos_team: 62 self.offset = -(50 - yd) 63 else: 64 self.offset = 50 - yd
65
66 - def __cmp__(self, other):
67 return cmp(self.offset, other.offset)
68
69 - def __str__(self):
70 return '%d' % self.offset
71 72
73 -class PossessionTime (object):
74 """ 75 Represents the amount of time a drive lasted in (minutes, seconds). 76 """
77 - def __init__(self, clock):
78 self.clock = clock 79 self.minutes, self.seconds = map(int, self.clock.split(':'))
80
81 - def total_seconds(self):
82 """ 83 Returns the total number of seconds that this possession lasted for. 84 """ 85 return self.seconds + self.minutes * 60
86
87 - def __cmp__(self, other):
88 a, b = (self.minutes, self.seconds), (other.minutes, other.seconds) 89 return cmp(a, b)
90
91 - def __add__(self, other):
92 new_time = PossessionTime('0:00') 93 total_seconds = self.total_seconds() + other.total_seconds() 94 new_time.minutes = total_seconds / 60 95 new_time.seconds = total_seconds % 60 96 new_time.clock = '%.2d:%.2d' % (new_time.minutes, new_time.seconds) 97 return new_time
98
99 - def __sub__(self, other):
100 assert self >= other 101 new_time = PossessionTime('0:00') 102 total_seconds = self.total_seconds() - other.total_seconds() 103 new_time.minutes = total_seconds / 60 104 new_time.seconds = total_seconds % 60 105 new_time.clock = '%.2d:%.2d' % (new_time.minutes, new_time.seconds) 106 return new_time
107
108 - def __str__(self):
109 return self.clock
110 111
112 -class GameClock (object):
113 """ 114 Represents the current time in a game. Namely, it keeps track of the 115 quarter and clock time. Also, GameClock can represent whether 116 the game hasn't started yet, is half time or if it's over. 117 """
118 - def __init__(self, qtr, clock):
119 self.qtr = qtr 120 self.clock = clock 121 122 # Make it easy for comparison. 123 # try: 124 self.__minutes, self.__seconds = map(int, self.clock.split(':')) 125 # except ValueError: 126 # self.__minutes, self.__seconds = 0, 0 127 try: 128 self.__qtr = int(self.qtr) 129 if self.__qtr >= 3: 130 self.__qtr += 1 # Let halftime be quarter 3 131 except ValueError: 132 if self.is_pregame(): 133 self.__qtr = 0 134 elif self.is_halftime(): 135 self.__qtr = 3 136 elif self.is_final(): 137 self.__qtr = sys.maxint 138 else: 139 assert False, 'Unknown QTR value: "%s"' % self.qtr
140
141 - def is_pregame(self):
142 return self.qtr == 'Pregame'
143
144 - def is_halftime(self):
145 return self.qtr == 'Halftime'
146
147 - def is_final(self):
148 return self.qtr == 'Final' or self.qtr == 'final overtime'
149
150 - def __cmp__(self, other):
151 if self.__qtr != other.__qtr: 152 return cmp(self.__qtr, other.__qtr) 153 elif self.__minutes != other.__minutes: 154 return cmp(other.__minutes, self.__minutes) 155 return cmp(other.__seconds, self.__seconds)
156
157 - def __str__(self):
158 """ 159 Returns a nicely formatted string indicating the current time of the 160 game. Examples include "Q1 10:52", "Q4 1:25", "Pregame", "Halftime" 161 and "Final". 162 """ 163 try: 164 q = int(self.qtr) 165 return 'Q%d %s' % (q, self.clock) 166 except ValueError: 167 return self.qtr
168 169
170 -class Game (object):
171 """ 172 Game represents a single pre- or regular-season game. It provides a window 173 into the statistics of every player that played into the game, along with 174 the winner of the game, the score and a list of all the scoring plays. 175 """ 176
177 - def __new__(cls, eid=None, fpath=None):
178 # If we can't get a valid JSON data, exit out and return None. 179 try: 180 rawData = _get_json_data(eid, fpath) 181 except urllib2.URLError: 182 return None 183 if rawData is None or rawData.strip() == '{}': 184 return None 185 game = object.__new__(cls) 186 game.rawData = rawData 187 return game
188
189 - def __init__(self, eid=None, fpath=None):
190 """ 191 Creates a new Game instance given a game identifier. 192 193 The game identifier is used by NFL.com's GameCenter live update web 194 pages. It is used to construct a URL to download JSON data for the 195 game. 196 197 If the game has been completed, the JSON data will be cached to disk 198 so that subsequent accesses will not re-download the data but instead 199 read it from disk. 200 201 When the JSON data is written to disk, it is compressed using gzip. 202 """ 203 204 if eid is not None: 205 self.eid = eid 206 self.data = json.loads(self.rawData)[self.eid] 207 else: # For when we have rawData (fpath) and no eid. 208 self.eid = None 209 self.data = json.loads(self.rawData) 210 for k, v in self.data.iteritems(): 211 if isinstance(v, dict): 212 self.eid = k 213 self.data = v 214 break 215 assert self.eid is not None 216 217 # Home and team cumulative statistics. 218 self.home = self.data['home']['abbr'] 219 self.away = self.data['away']['abbr'] 220 self.stats_home = _json_team_stats(self.data['home']['stats']['team']) 221 self.stats_away = _json_team_stats(self.data['away']['stats']['team']) 222 223 # Load up some simple static values. 224 self.time = GameClock(self.data['qtr'], self.data['clock']) 225 self.down = _tryint(self.data['down']) 226 self.togo = _tryint(self.data['togo']) 227 self.score_home = int(self.data['home']['score']['T']) 228 self.score_away = int(self.data['away']['score']['T']) 229 for q in (1, 2, 3, 4, 5): 230 for team in ('home', 'away'): 231 score = self.data[team]['score'][str(q)] 232 self.__dict__['score_%s_q%d' % (team, q)] = int(score) 233 234 if not self.game_over(): 235 self.winner = None 236 else: 237 if self.score_home > self.score_away: 238 self.winner = self.home 239 self.loser = self.away 240 elif self.score_away > self.score_home: 241 self.winner = self.away 242 self.loser = self.home 243 else: 244 self.winner = '%s/%s' % (self.home, self.away) 245 self.loser = '%s/%s' % (self.home, self.away) 246 247 # Load the scoring summary into a simple list of strings. 248 self.scores = [] 249 for k in sorted(map(int, self.data['scrsummary'])): 250 play = self.data['scrsummary'][str(k)] 251 s = '%s - Q%d - %s - %s' \ 252 % (play['team'], play['qtr'], play['type'], play['desc']) 253 self.scores.append(s) 254 255 # Check to see if the game is over, and if so, cache the data. 256 if self.game_over() and not os.access(_jsonf % eid, os.R_OK): 257 self.save()
258
259 - def is_home(self, team):
260 """Returns true if team (i.e., 'NE') is the home team.""" 261 return team == self.home
262
263 - def game_over(self):
264 """game_over returns true if the game is no longer being played.""" 265 return self.time.is_final()
266
267 - def playing(self):
268 """playing returns true if the game is currently being played.""" 269 return not self.time.is_pregame() and not self.time.is_final()
270
271 - def save(self, fpath=None):
272 """ 273 Save the JSON data to fpath. This is done automatically if the 274 game is over. 275 """ 276 if fpath is None: 277 fpath = _jsonf % self.eid 278 try: 279 print >> gzip.open(fpath, 'w+'), self.rawData, 280 except IOError: 281 print >> sys.stderr, "Could not cache JSON data. Please " \ 282 "make '%s' writable." \ 283 % os.path.dirname(fpath)
284
285 - def nice_score(self):
286 """ 287 Returns a string of the score of the game. 288 e.g., "NE (32) vs. NYG (0)". 289 """ 290 return '%s (%d) vs. %s (%d)' \ 291 % (self.home, self.score_home, self.away, self.score_away)
292
293 - def max_player_stats(self):
294 """ 295 Returns a GenPlayers sequence of player statistics that combines 296 game statistics and play statistics by taking the max value of 297 each corresponding statistic. 298 299 This is useful when accuracy is desirable. Namely, using only 300 play-by-play data or using only game statistics can be unreliable. 301 That is, both are inconsistently correct. 302 303 Taking the max values of each statistic reduces the chance of being 304 wrong (particularly for stats that are in both play-by-play data 305 and game statistics), but does not eliminate them. 306 """ 307 game_players = list(self.players) 308 play_players = list(self.drives.plays().players()) 309 max_players = OrderedDict() 310 for pgame in game_players: 311 for pplay in play_players: 312 if pgame.playerid != pplay.playerid: 313 continue 314 315 newp = nflgame.player.GamePlayerStats(pgame.playerid, 316 pgame.name, 317 pgame.home) 318 maxstats = {} 319 for stat, val in pgame._stats.iteritems(): 320 maxstats[stat] = val 321 for stat, val in pplay._stats.iteritems(): 322 maxstats[stat] = max([val, maxstats.get(stat, -_MAX_INT)]) 323 324 newp._add_stats(maxstats) 325 max_players[pgame.playerid] = newp 326 327 break 328 return nflgame.seq.GenPlayerStats(max_players)
329
330 - def __getattr__(self, name):
331 if name == 'players': 332 self.__players = _json_game_player_stats(self.data) 333 self.players = nflgame.seq.GenPlayerStats(self.__players) 334 return self.players 335 if name == 'drives': 336 self.__drives = _json_drives(self, self.home, self.data['drives']) 337 self.drives = nflgame.seq.GenDrives(self.__drives) 338 return self.drives
339
340 - def __sub__(self, other):
341 return diff(other, self)
342
343 - def __str__(self):
344 return self.nice_score()
345 346
347 -def diff(before, after):
348 """ 349 Returns the difference between two points of time in a game in terms of 350 plays and player statistics. The return value is a GameDiff namedtuple 351 with two attributes: plays and players. Each contains *only* the data 352 that is in the after game but not in the before game. 353 354 This is useful for sending alerts where you're guaranteed to see each 355 play statistic only once (assuming NFL.com behaves itself). 356 357 XXX: There is an assertion that requires after's game clock be the same 358 or later than before's game clock. This may need to be removed if NFL.com 359 allows its game clock to be rolled back due to corrections from refs. 360 """ 361 assert after.time >= before.time, \ 362 'When diffing two games, "after" (%s) must be later or the ' \ 363 'same time as "before" (%s).' % (after.time, before.time) 364 assert after.eid == before.eid 365 366 plays = [] 367 after_plays = list(after.drives.plays()) 368 before_plays = list(before.drives.plays()) 369 for play in after_plays: 370 if play not in before_plays: 371 plays.append(play) 372 373 # You might think that updated play data is enough. You could scan 374 # it for statistics you're looking for (like touchdowns). 375 # But sometimes a play can sneak in twice if its description gets 376 # updated (late call? play review? etc.) 377 # Thus, we do a diff on the play statistics for player data too. 378 _players = OrderedDict() 379 after_players = list(after.drives.players()) 380 before_players = list(before.drives.players()) 381 for aplayer in after_players: 382 has_before = False 383 for bplayer in before_players: 384 if aplayer.playerid == bplayer.playerid: 385 has_before = True 386 pdiff = aplayer - bplayer 387 if pdiff is not None: 388 _players[aplayer.playerid] = pdiff 389 if not has_before: 390 _players[aplayer.playerid] = aplayer 391 players = nflgame.seq.GenPlayerStats(_players) 392 393 return GameDiff(plays=plays, players=players)
394 395
396 -class Drive (object):
397 """ 398 Drive represents a single drive in an NFL game. It contains a list 399 of all plays that happened in the drive, in chronological order. 400 It also contains meta information about the drive such as the start 401 and stop times and field position, length of possession, the number 402 of first downs and a short descriptive string of the result of the 403 drive. 404 405 """
406 - def __init__(self, game, drive_num, home_team, data):
407 if data is None: 408 return 409 self.game = game 410 self.drive_num = drive_num 411 self.team = data['posteam'] 412 self.home = self.team == home_team 413 self.first_downs = int(data['fds']) 414 self.result = data['result'] 415 self.penalty_yds = int(data['penyds']) 416 self.total_yds = int(data['ydsgained']) 417 self.pos_time = PossessionTime(data['postime']) 418 self.play_cnt = int(data['numplays']) 419 self.field_start = FieldPosition(self.team, data['start']['yrdln']) 420 self.time_start = GameClock(data['start']['qtr'], 421 data['start']['time']) 422 423 # When the game is over, the yardline isn't reported. So find the 424 # last play that does report a yardline. 425 if data['end']['yrdln'].strip(): 426 self.field_end = FieldPosition(self.team, data['end']['yrdln']) 427 else: 428 self.field_end = None 429 playids = sorted(map(int, data['plays'].keys()), reverse=True) 430 for pid in playids: 431 yrdln = data['plays'][str(pid)]['yrdln'].strip() 432 if yrdln: 433 self.field_end = FieldPosition(self.team, yrdln) 434 break 435 if self.field_end is None: 436 self.field_end = FieldPosition(self.team, '50') 437 438 # When a drive lasts from Q1 to Q2 or Q3 to Q4, the 'end' doesn't 439 # seem to change to the proper quarter. So look at the last play and 440 # use that quarter instead. 441 lastplayid = str(sorted(map(int, data['plays'].keys()))[-1]) 442 endqtr = data['plays'][lastplayid]['qtr'] 443 self.time_end = GameClock(endqtr, data['end']['time']) 444 445 self.__plays = _json_plays(self, data['plays']) 446 self.plays = nflgame.seq.GenPlays(self.__plays)
447
448 - def __add__(self, other):
449 """ 450 Adds the statistics of two drives together. 451 452 Note that once two drives are added, the following fields 453 automatically get None values: result, field_start, field_end, 454 time_start and time_end. 455 """ 456 assert self.team == other.team, \ 457 'Cannot add drives from different teams "%s" and "%s".' \ 458 % (self.team, other.team) 459 new_drive = Drive(None, 0, '', None) 460 new_drive.team = self.team 461 new_drive.home = self.home 462 new_drive.first_downs = self.first_downs + other.first_downs 463 new_drive.penalty_yds = self.penalty_yds + other.penalty_yds 464 new_drive.total_yds = self.total_yds + other.total_yds 465 new_drive.pos_time = self.pos_time + other.pos_time 466 new_drive.play_cnt = self.play_cnt + other.play_cnt 467 new_drive.__plays = self.__plays + other.__plays 468 new_drive.result = None 469 new_drive.field_start = None 470 new_drive.field_end = None 471 new_drive.time_start = None 472 new_drive.time_end = None 473 return new_drive
474
475 - def __str__(self):
476 return '%s (Start: %s, End: %s) %s' \ 477 % (self.team, self.time_start, self.time_end, self.result)
478 479
480 -class Play (object):
481 """ 482 Play represents a single play. It contains a list of all players 483 that participated in the play (including offense, defense and special 484 teams). The play also includes meta information about what down it 485 is, field position, clock time, etc. 486 487 Play objects also contain team-level statistics, such as whether the 488 play was a first down, a fourth down failure, etc. 489 """
490 - def __init__(self, drive, playid, data):
491 self.data = data 492 self.drive = drive 493 self.playid = playid 494 self.team = data['posteam'] 495 self.home = self.drive.home 496 self.desc = data['desc'] 497 self.note = data['note'] 498 self.down = int(data['down']) 499 self.yards_togo = int(data['ydstogo']) 500 self.touchdown = 'touchdown' in self.desc.lower() 501 502 if not self.team: 503 self.time, self.yardline = None, None 504 else: 505 self.time = GameClock(data['qtr'], data['time']) 506 self.yardline = FieldPosition(self.team, data['yrdln']) 507 508 # Load team statistics directly into the Play instance. 509 # Things like third down attempts, first downs, etc. 510 if '0' in data['players']: 511 for info in data['players']['0']: 512 if info['statId'] not in nflgame.statmap.idmap: 513 continue 514 statvals = nflgame.statmap.values(info['statId'], 515 info['yards']) 516 for k, v in statvals.iteritems(): 517 self.__dict__[k] = self.__dict__.get(k, 0) + v 518 519 # Load the sequence of "events" in a play into a list of dictionaries. 520 self.events = _json_play_events(data['players']) 521 522 # Now load cumulative player data for this play into 523 # a GenPlayerStats generator. We then flatten this data 524 # and add it to the play itself so that plays can be 525 # filter by these statistics. 526 self.__players = _json_play_players(self, data['players']) 527 self.players = nflgame.seq.GenPlayerStats(self.__players) 528 for p in self.players: 529 for k, v in p.stats.iteritems(): 530 # Sometimes we may see duplicate statistics (like tackle 531 # assists). Let's just overwrite in this case, since this 532 # data is from the perspective of the play. i.e., there 533 # is one assisted tackle rather than two. 534 self.__dict__[k] = v
535
536 - def has_player(self, playerid):
537 """Whether a player with id playerid participated in this play.""" 538 return playerid in self.__players
539
540 - def __str__(self):
541 if self.team: 542 if self.down != 0: 543 return '(%s, %s, %d and %d) %s' \ 544 % (self.team, self.data['yrdln'], 545 self.down, self.yards_togo, self.desc) 546 else: 547 return '(%s, %s) %s' \ 548 % (self.team, self.data['yrdln'], self.desc) 549 return self.desc
550
551 - def __eq__(self, other):
552 """ 553 We use the play description to determine equality because the 554 play description can be changed. (Like when a play is reversed.) 555 """ 556 return self.playid == other.playid and self.desc == other.desc
557
558 - def __getattr__(self, name):
559 return 0
560 561
562 -def _json_team_stats(data):
563 """ 564 Takes a team stats JSON entry and converts it to a TeamStats namedtuple. 565 """ 566 return TeamStats( 567 first_downs=int(data['totfd']), 568 total_yds=int(data['totyds']), 569 passing_yds=int(data['pyds']), 570 rushing_yds=int(data['ryds']), 571 penalty_cnt=int(data['pen']), 572 penalty_yds=int(data['penyds']), 573 turnovers=int(data['trnovr']), 574 punt_cnt=int(data['pt']), 575 punt_yds=int(data['ptyds']), 576 punt_avg=int(data['ptavg']), 577 pos_time=PossessionTime(data['top']))
578 579
580 -def _json_drives(game, home_team, data):
581 """ 582 Takes a home or away JSON entry and converts it to a list of Drive 583 objects. 584 """ 585 drive_nums = [] 586 for drive_num in data: 587 try: 588 drive_nums.append(int(drive_num)) 589 except: 590 pass 591 drives = [] 592 playids = set() # Plays can be repeated! Ah! 593 for i, drive_num in enumerate(sorted(drive_nums), 1): 594 repeat_drive = False 595 for playid in data[str(drive_num)]['plays']: 596 if playid in playids: 597 repeat_drive = True 598 break 599 playids.add(playid) 600 if repeat_drive: 601 continue 602 drives.append(Drive(game, i, home_team, data[str(drive_num)])) 603 return drives
604 605
606 -def _json_plays(drive, data):
607 """ 608 Takes a single JSON drive entry (data) and converts it to a list 609 of Play objects. 610 """ 611 plays = [] 612 for playid in map(str, sorted(map(int, data))): 613 plays.append(Play(drive, playid, data[playid])) 614 return plays
615 616
617 -def _json_play_players(play, data):
618 """ 619 Takes a single JSON play entry (data) and converts it to an OrderedDict 620 of player statistics. 621 622 play is the instance of Play that this data is part of. It is used 623 to determine whether the player belong to the home team or not. 624 """ 625 players = OrderedDict() 626 for playerid, statcats in data.iteritems(): 627 if playerid == '0': 628 continue 629 for info in statcats: 630 if info['statId'] not in nflgame.statmap.idmap: 631 continue 632 if playerid not in players: 633 home = play.drive.game.is_home(info['clubcode']) 634 stats = nflgame.player.PlayPlayerStats(playerid, 635 info['playerName'], 636 home) 637 players[playerid] = stats 638 statvals = nflgame.statmap.values(info['statId'], info['yards']) 639 players[playerid]._add_stats(statvals) 640 return players
641 642
643 -def _json_play_events(data):
644 """ 645 Takes a single JSON play entry (data) and converts it to a list of events. 646 """ 647 temp = list() 648 for playerid, statcats in data.iteritems(): 649 for info in statcats: 650 if info['statId'] not in nflgame.statmap.idmap: 651 continue 652 statvals = nflgame.statmap.values(info['statId'], info['yards']) 653 statvals['playerid'] = None if playerid == '0' else playerid 654 statvals['playername'] = info['playerName'] or None 655 statvals['team'] = info['clubcode'] 656 temp.append((int(info['sequence']), statvals)) 657 return [t[1] for t in sorted(temp, key=lambda t: t[0])]
658 659
660 -def _json_game_player_stats(data):
661 """ 662 Parses the 'home' and 'away' team stats and returns an OrderedDict 663 mapping player id to their total game statistics as instances of 664 nflgame.player.GamePlayerStats. 665 """ 666 players = OrderedDict() 667 for team in ('home', 'away'): 668 for category in nflgame.statmap.categories: 669 if category not in data[team]['stats']: 670 continue 671 for pid, raw in data[team]['stats'][category].iteritems(): 672 stats = {} 673 for k, v in raw.iteritems(): 674 if k == 'name': 675 continue 676 stats['%s_%s' % (category, k)] = v 677 if pid not in players: 678 home = team == 'home' 679 players[pid] = nflgame.player.GamePlayerStats(pid, 680 raw['name'], 681 home) 682 players[pid]._add_stats(stats) 683 return players
684 685
686 -def _get_json_data(eid=None, fpath=None):
687 """ 688 Returns the JSON data corresponding to the game represented by eid. 689 690 If the JSON data is already on disk, it is read, decompressed and returned. 691 692 Otherwise, the JSON data is downloaded from the NFL web site. If the data 693 doesn't exist yet or there was an error, _get_json_data returns None. 694 695 If eid is None, then the JSON data is read from the file at fpath. 696 """ 697 assert eid is not None or fpath is not None 698 699 if fpath is not None: 700 return gzip.open(fpath).read() 701 702 fpath = _jsonf % eid 703 if os.access(fpath, os.R_OK): 704 return gzip.open(fpath).read() 705 try: 706 return urllib2.urlopen(_json_base_url % (eid, eid)).read() 707 except urllib2.HTTPError: 708 pass 709 return None
710 711
712 -def _tryint(v):
713 """ 714 Tries to convert v to an integer. If it fails, return 0. 715 """ 716 try: 717 return int(v) 718 except: 719 return 0
720