1 from collections import namedtuple
2 import os
3 import os.path as path
4 import gzip
5 import json
6 import sys
7 import urllib2
8
9 from nflgame import OrderedDict
10 import nflgame.player
11 import nflgame.seq
12 import nflgame.statmap
13
14 _MAX_INT = sys.maxint
15
16 _jsonf = path.join(path.split(__file__)[0], 'gamecenter-json', '%s.json.gz')
17 _json_base_url = "http://www.nfl.com/liveupdate/game-center/%s/%s_gtd.json"
18
19 GameDiff = namedtuple('GameDiff', ['plays', 'players'])
20 """
21 Represents the difference between two points in time of the same game
22 in terms of plays and player statistics.
23 """
24
25 TeamStats = namedtuple('TeamStats',
26 ['first_downs', 'total_yds', 'passing_yds',
27 'rushing_yds', 'penalty_cnt', 'penalty_yds',
28 'turnovers', 'punt_cnt', 'punt_yds', 'punt_avg',
29 'pos_time'])
30 """A collection of team statistics for an entire game."""
31
32
34 """
35 Represents field position.
36
37 The representation here is an integer offset where the 50 yard line
38 corresponds to '0'. Being in the own territory corresponds to a negative
39 offset while being in the opponent's territory corresponds to a positive
40 offset.
41
42 e.g., NE has the ball on the NE 45, the offset is -5.
43 e.g., NE has the ball on the NYG 2, the offset is 48.
44 """
45 - def __new__(cls, pos_team, yardline):
46 if not yardline:
47 return None
48 return object.__new__(cls)
49
51 """
52 pos_team is the team on offense, and yardline is a string formatted
53 like 'team-territory yard-line'. e.g., "NE 32".
54 """
55 if yardline == '50':
56 self.offset = 0
57 return
58
59 territory, yd_str = yardline.split()
60 yd = int(yd_str)
61 if territory == pos_team:
62 self.offset = -(50 - yd)
63 else:
64 self.offset = 50 - yd
65
67 return cmp(self.offset, other.offset)
68
70 return '%d' % self.offset
71
72
74 """
75 Represents the amount of time a drive lasted in (minutes, seconds).
76 """
78 self.clock = clock
79 self.minutes, self.seconds = map(int, self.clock.split(':'))
80
82 """
83 Returns the total number of seconds that this possession lasted for.
84 """
85 return self.seconds + self.minutes * 60
86
88 a, b = (self.minutes, self.seconds), (other.minutes, other.seconds)
89 return cmp(a, b)
90
98
107
110
111
113 """
114 Represents the current time in a game. Namely, it keeps track of the
115 quarter and clock time. Also, GameClock can represent whether
116 the game hasn't started yet, is half time or if it's over.
117 """
119 self.qtr = qtr
120 self.clock = clock
121
122
123
124 self.__minutes, self.__seconds = map(int, self.clock.split(':'))
125
126
127 try:
128 self.__qtr = int(self.qtr)
129 if self.__qtr >= 3:
130 self.__qtr += 1
131 except ValueError:
132 if self.is_pregame():
133 self.__qtr = 0
134 elif self.is_halftime():
135 self.__qtr = 3
136 elif self.is_final():
137 self.__qtr = sys.maxint
138 else:
139 assert False, 'Unknown QTR value: "%s"' % self.qtr
140
142 return self.qtr == 'Pregame'
143
145 return self.qtr == 'Halftime'
146
148 return self.qtr == 'Final' or self.qtr == 'final overtime'
149
151 if self.__qtr != other.__qtr:
152 return cmp(self.__qtr, other.__qtr)
153 elif self.__minutes != other.__minutes:
154 return cmp(other.__minutes, self.__minutes)
155 return cmp(other.__seconds, self.__seconds)
156
158 """
159 Returns a nicely formatted string indicating the current time of the
160 game. Examples include "Q1 10:52", "Q4 1:25", "Pregame", "Halftime"
161 and "Final".
162 """
163 try:
164 q = int(self.qtr)
165 return 'Q%d %s' % (q, self.clock)
166 except ValueError:
167 return self.qtr
168
169
170 -class Game (object):
171 """
172 Game represents a single pre- or regular-season game. It provides a window
173 into the statistics of every player that played into the game, along with
174 the winner of the game, the score and a list of all the scoring plays.
175 """
176
177 - def __new__(cls, eid=None, fpath=None):
178
179 try:
180 rawData = _get_json_data(eid, fpath)
181 except urllib2.URLError:
182 return None
183 if rawData is None or rawData.strip() == '{}':
184 return None
185 game = object.__new__(cls)
186 game.rawData = rawData
187 return game
188
189 - def __init__(self, eid=None, fpath=None):
190 """
191 Creates a new Game instance given a game identifier.
192
193 The game identifier is used by NFL.com's GameCenter live update web
194 pages. It is used to construct a URL to download JSON data for the
195 game.
196
197 If the game has been completed, the JSON data will be cached to disk
198 so that subsequent accesses will not re-download the data but instead
199 read it from disk.
200
201 When the JSON data is written to disk, it is compressed using gzip.
202 """
203
204 if eid is not None:
205 self.eid = eid
206 self.data = json.loads(self.rawData)[self.eid]
207 else:
208 self.eid = None
209 self.data = json.loads(self.rawData)
210 for k, v in self.data.iteritems():
211 if isinstance(v, dict):
212 self.eid = k
213 self.data = v
214 break
215 assert self.eid is not None
216
217
218 self.home = self.data['home']['abbr']
219 self.away = self.data['away']['abbr']
220 self.stats_home = _json_team_stats(self.data['home']['stats']['team'])
221 self.stats_away = _json_team_stats(self.data['away']['stats']['team'])
222
223
224 self.time = GameClock(self.data['qtr'], self.data['clock'])
225 self.down = _tryint(self.data['down'])
226 self.togo = _tryint(self.data['togo'])
227 self.score_home = int(self.data['home']['score']['T'])
228 self.score_away = int(self.data['away']['score']['T'])
229 for q in (1, 2, 3, 4, 5):
230 for team in ('home', 'away'):
231 score = self.data[team]['score'][str(q)]
232 self.__dict__['score_%s_q%d' % (team, q)] = int(score)
233
234 if not self.game_over():
235 self.winner = None
236 else:
237 if self.score_home > self.score_away:
238 self.winner = self.home
239 self.loser = self.away
240 elif self.score_away > self.score_home:
241 self.winner = self.away
242 self.loser = self.home
243 else:
244 self.winner = '%s/%s' % (self.home, self.away)
245 self.loser = '%s/%s' % (self.home, self.away)
246
247
248 self.scores = []
249 for k in sorted(map(int, self.data['scrsummary'])):
250 play = self.data['scrsummary'][str(k)]
251 s = '%s - Q%d - %s - %s' \
252 % (play['team'], play['qtr'], play['type'], play['desc'])
253 self.scores.append(s)
254
255
256 if self.game_over() and not os.access(_jsonf % eid, os.R_OK):
257 self.save()
258
260 """Returns true if team (i.e., 'NE') is the home team."""
261 return team == self.home
262
264 """game_over returns true if the game is no longer being played."""
265 return self.time.is_final()
266
268 """playing returns true if the game is currently being played."""
269 return not self.time.is_pregame() and not self.time.is_final()
270
271 - def save(self, fpath=None):
272 """
273 Save the JSON data to fpath. This is done automatically if the
274 game is over.
275 """
276 if fpath is None:
277 fpath = _jsonf % self.eid
278 try:
279 print >> gzip.open(fpath, 'w+'), self.rawData,
280 except IOError:
281 print >> sys.stderr, "Could not cache JSON data. Please " \
282 "make '%s' writable." \
283 % os.path.dirname(fpath)
284
286 """
287 Returns a string of the score of the game.
288 e.g., "NE (32) vs. NYG (0)".
289 """
290 return '%s (%d) vs. %s (%d)' \
291 % (self.home, self.score_home, self.away, self.score_away)
292
294 """
295 Returns a GenPlayers sequence of player statistics that combines
296 game statistics and play statistics by taking the max value of
297 each corresponding statistic.
298
299 This is useful when accuracy is desirable. Namely, using only
300 play-by-play data or using only game statistics can be unreliable.
301 That is, both are inconsistently correct.
302
303 Taking the max values of each statistic reduces the chance of being
304 wrong (particularly for stats that are in both play-by-play data
305 and game statistics), but does not eliminate them.
306 """
307 game_players = list(self.players)
308 play_players = list(self.drives.plays().players())
309 max_players = OrderedDict()
310 for pgame in game_players:
311 for pplay in play_players:
312 if pgame.playerid != pplay.playerid:
313 continue
314
315 newp = nflgame.player.GamePlayerStats(pgame.playerid,
316 pgame.name,
317 pgame.home)
318 maxstats = {}
319 for stat, val in pgame._stats.iteritems():
320 maxstats[stat] = val
321 for stat, val in pplay._stats.iteritems():
322 maxstats[stat] = max([val, maxstats.get(stat, -_MAX_INT)])
323
324 newp._add_stats(maxstats)
325 max_players[pgame.playerid] = newp
326
327 break
328 return nflgame.seq.GenPlayerStats(max_players)
329
331 if name == 'players':
332 self.__players = _json_game_player_stats(self.data)
333 self.players = nflgame.seq.GenPlayerStats(self.__players)
334 return self.players
335 if name == 'drives':
336 self.__drives = _json_drives(self, self.home, self.data['drives'])
337 self.drives = nflgame.seq.GenDrives(self.__drives)
338 return self.drives
339
341 return diff(other, self)
342
345
346
347 -def diff(before, after):
348 """
349 Returns the difference between two points of time in a game in terms of
350 plays and player statistics. The return value is a GameDiff namedtuple
351 with two attributes: plays and players. Each contains *only* the data
352 that is in the after game but not in the before game.
353
354 This is useful for sending alerts where you're guaranteed to see each
355 play statistic only once (assuming NFL.com behaves itself).
356
357 XXX: There is an assertion that requires after's game clock be the same
358 or later than before's game clock. This may need to be removed if NFL.com
359 allows its game clock to be rolled back due to corrections from refs.
360 """
361 assert after.time >= before.time, \
362 'When diffing two games, "after" (%s) must be later or the ' \
363 'same time as "before" (%s).' % (after.time, before.time)
364 assert after.eid == before.eid
365
366 plays = []
367 after_plays = list(after.drives.plays())
368 before_plays = list(before.drives.plays())
369 for play in after_plays:
370 if play not in before_plays:
371 plays.append(play)
372
373
374
375
376
377
378 _players = OrderedDict()
379 after_players = list(after.drives.players())
380 before_players = list(before.drives.players())
381 for aplayer in after_players:
382 has_before = False
383 for bplayer in before_players:
384 if aplayer.playerid == bplayer.playerid:
385 has_before = True
386 pdiff = aplayer - bplayer
387 if pdiff is not None:
388 _players[aplayer.playerid] = pdiff
389 if not has_before:
390 _players[aplayer.playerid] = aplayer
391 players = nflgame.seq.GenPlayerStats(_players)
392
393 return GameDiff(plays=plays, players=players)
394
395
397 """
398 Drive represents a single drive in an NFL game. It contains a list
399 of all plays that happened in the drive, in chronological order.
400 It also contains meta information about the drive such as the start
401 and stop times and field position, length of possession, the number
402 of first downs and a short descriptive string of the result of the
403 drive.
404
405 """
406 - def __init__(self, game, drive_num, home_team, data):
407 if data is None:
408 return
409 self.game = game
410 self.drive_num = drive_num
411 self.team = data['posteam']
412 self.home = self.team == home_team
413 self.first_downs = int(data['fds'])
414 self.result = data['result']
415 self.penalty_yds = int(data['penyds'])
416 self.total_yds = int(data['ydsgained'])
417 self.pos_time = PossessionTime(data['postime'])
418 self.play_cnt = int(data['numplays'])
419 self.field_start = FieldPosition(self.team, data['start']['yrdln'])
420 self.time_start = GameClock(data['start']['qtr'],
421 data['start']['time'])
422
423
424
425 if data['end']['yrdln'].strip():
426 self.field_end = FieldPosition(self.team, data['end']['yrdln'])
427 else:
428 self.field_end = None
429 playids = sorted(map(int, data['plays'].keys()), reverse=True)
430 for pid in playids:
431 yrdln = data['plays'][str(pid)]['yrdln'].strip()
432 if yrdln:
433 self.field_end = FieldPosition(self.team, yrdln)
434 break
435 if self.field_end is None:
436 self.field_end = FieldPosition(self.team, '50')
437
438
439
440
441 lastplayid = str(sorted(map(int, data['plays'].keys()))[-1])
442 endqtr = data['plays'][lastplayid]['qtr']
443 self.time_end = GameClock(endqtr, data['end']['time'])
444
445 self.__plays = _json_plays(self, data['plays'])
446 self.plays = nflgame.seq.GenPlays(self.__plays)
447
449 """
450 Adds the statistics of two drives together.
451
452 Note that once two drives are added, the following fields
453 automatically get None values: result, field_start, field_end,
454 time_start and time_end.
455 """
456 assert self.team == other.team, \
457 'Cannot add drives from different teams "%s" and "%s".' \
458 % (self.team, other.team)
459 new_drive = Drive(None, 0, '', None)
460 new_drive.team = self.team
461 new_drive.home = self.home
462 new_drive.first_downs = self.first_downs + other.first_downs
463 new_drive.penalty_yds = self.penalty_yds + other.penalty_yds
464 new_drive.total_yds = self.total_yds + other.total_yds
465 new_drive.pos_time = self.pos_time + other.pos_time
466 new_drive.play_cnt = self.play_cnt + other.play_cnt
467 new_drive.__plays = self.__plays + other.__plays
468 new_drive.result = None
469 new_drive.field_start = None
470 new_drive.field_end = None
471 new_drive.time_start = None
472 new_drive.time_end = None
473 return new_drive
474
476 return '%s (Start: %s, End: %s) %s' \
477 % (self.team, self.time_start, self.time_end, self.result)
478
479
480 -class Play (object):
481 """
482 Play represents a single play. It contains a list of all players
483 that participated in the play (including offense, defense and special
484 teams). The play also includes meta information about what down it
485 is, field position, clock time, etc.
486
487 Play objects also contain team-level statistics, such as whether the
488 play was a first down, a fourth down failure, etc.
489 """
490 - def __init__(self, drive, playid, data):
491 self.data = data
492 self.drive = drive
493 self.playid = playid
494 self.team = data['posteam']
495 self.home = self.drive.home
496 self.desc = data['desc']
497 self.note = data['note']
498 self.down = int(data['down'])
499 self.yards_togo = int(data['ydstogo'])
500 self.touchdown = 'touchdown' in self.desc.lower()
501
502 if not self.team:
503 self.time, self.yardline = None, None
504 else:
505 self.time = GameClock(data['qtr'], data['time'])
506 self.yardline = FieldPosition(self.team, data['yrdln'])
507
508
509
510 if '0' in data['players']:
511 for info in data['players']['0']:
512 if info['statId'] not in nflgame.statmap.idmap:
513 continue
514 statvals = nflgame.statmap.values(info['statId'],
515 info['yards'])
516 for k, v in statvals.iteritems():
517 self.__dict__[k] = self.__dict__.get(k, 0) + v
518
519
520 self.events = _json_play_events(data['players'])
521
522
523
524
525
526 self.__players = _json_play_players(self, data['players'])
527 self.players = nflgame.seq.GenPlayerStats(self.__players)
528 for p in self.players:
529 for k, v in p.stats.iteritems():
530
531
532
533
534 self.__dict__[k] = v
535
537 """Whether a player with id playerid participated in this play."""
538 return playerid in self.__players
539
541 if self.team:
542 if self.down != 0:
543 return '(%s, %s, %d and %d) %s' \
544 % (self.team, self.data['yrdln'],
545 self.down, self.yards_togo, self.desc)
546 else:
547 return '(%s, %s) %s' \
548 % (self.team, self.data['yrdln'], self.desc)
549 return self.desc
550
552 """
553 We use the play description to determine equality because the
554 play description can be changed. (Like when a play is reversed.)
555 """
556 return self.playid == other.playid and self.desc == other.desc
557
560
561
563 """
564 Takes a team stats JSON entry and converts it to a TeamStats namedtuple.
565 """
566 return TeamStats(
567 first_downs=int(data['totfd']),
568 total_yds=int(data['totyds']),
569 passing_yds=int(data['pyds']),
570 rushing_yds=int(data['ryds']),
571 penalty_cnt=int(data['pen']),
572 penalty_yds=int(data['penyds']),
573 turnovers=int(data['trnovr']),
574 punt_cnt=int(data['pt']),
575 punt_yds=int(data['ptyds']),
576 punt_avg=int(data['ptavg']),
577 pos_time=PossessionTime(data['top']))
578
579
581 """
582 Takes a home or away JSON entry and converts it to a list of Drive
583 objects.
584 """
585 drive_nums = []
586 for drive_num in data:
587 try:
588 drive_nums.append(int(drive_num))
589 except:
590 pass
591 drives = []
592 playids = set()
593 for i, drive_num in enumerate(sorted(drive_nums), 1):
594 repeat_drive = False
595 for playid in data[str(drive_num)]['plays']:
596 if playid in playids:
597 repeat_drive = True
598 break
599 playids.add(playid)
600 if repeat_drive:
601 continue
602 drives.append(Drive(game, i, home_team, data[str(drive_num)]))
603 return drives
604
605
607 """
608 Takes a single JSON drive entry (data) and converts it to a list
609 of Play objects.
610 """
611 plays = []
612 for playid in map(str, sorted(map(int, data))):
613 plays.append(Play(drive, playid, data[playid]))
614 return plays
615
616
641
642
644 """
645 Takes a single JSON play entry (data) and converts it to a list of events.
646 """
647 temp = list()
648 for playerid, statcats in data.iteritems():
649 for info in statcats:
650 if info['statId'] not in nflgame.statmap.idmap:
651 continue
652 statvals = nflgame.statmap.values(info['statId'], info['yards'])
653 statvals['playerid'] = None if playerid == '0' else playerid
654 statvals['playername'] = info['playerName'] or None
655 statvals['team'] = info['clubcode']
656 temp.append((int(info['sequence']), statvals))
657 return [t[1] for t in sorted(temp, key=lambda t: t[0])]
658
659
661 """
662 Parses the 'home' and 'away' team stats and returns an OrderedDict
663 mapping player id to their total game statistics as instances of
664 nflgame.player.GamePlayerStats.
665 """
666 players = OrderedDict()
667 for team in ('home', 'away'):
668 for category in nflgame.statmap.categories:
669 if category not in data[team]['stats']:
670 continue
671 for pid, raw in data[team]['stats'][category].iteritems():
672 stats = {}
673 for k, v in raw.iteritems():
674 if k == 'name':
675 continue
676 stats['%s_%s' % (category, k)] = v
677 if pid not in players:
678 home = team == 'home'
679 players[pid] = nflgame.player.GamePlayerStats(pid,
680 raw['name'],
681 home)
682 players[pid]._add_stats(stats)
683 return players
684
685
687 """
688 Returns the JSON data corresponding to the game represented by eid.
689
690 If the JSON data is already on disk, it is read, decompressed and returned.
691
692 Otherwise, the JSON data is downloaded from the NFL web site. If the data
693 doesn't exist yet or there was an error, _get_json_data returns None.
694
695 If eid is None, then the JSON data is read from the file at fpath.
696 """
697 assert eid is not None or fpath is not None
698
699 if fpath is not None:
700 return gzip.open(fpath).read()
701
702 fpath = _jsonf % eid
703 if os.access(fpath, os.R_OK):
704 return gzip.open(fpath).read()
705 try:
706 return urllib2.urlopen(_json_base_url % (eid, eid)).read()
707 except urllib2.HTTPError:
708 pass
709 return None
710
711
713 """
714 Tries to convert v to an integer. If it fails, return 0.
715 """
716 try:
717 return int(v)
718 except:
719 return 0
720