Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Travel Mode Choice""" 

2from statsmodels.datasets import utils as du 

3 

4__docformat__ = 'restructuredtext' 

5 

6COPYRIGHT = """This is public domain.""" 

7TITLE = __doc__ 

8SOURCE = """ 

9Greene, W.H. and D. Hensher (1997) Multinomial logit and discrete choice models 

10in Greene, W. H. (1997) LIMDEP version 7.0 user's manual revised, Plainview, 

11New York econometric software, Inc. 

12Download from on-line complements to Greene, W.H. (2011) Econometric Analysis, 

13Prentice Hall, 7th Edition (data table F18-2) 

14http://people.stern.nyu.edu/wgreene/Text/Edition7/TableF18-2.csv 

15""" 

16 

17DESCRSHORT = """Data used to study travel mode choice between Australian cities 

18""" 

19 

20DESCRLONG = """The data, collected as part of a 1987 intercity mode choice 

21study, are a sub-sample of 210 non-business trips between Sydney, Canberra and 

22Melbourne in which the traveler chooses a mode from four alternatives (plane, 

23car, bus and train). The sample, 840 observations, is choice based with 

24over-sampling of the less popular modes (plane, train and bus) and under-sampling 

25of the more popular mode, car. The level of service data was derived from highway 

26and transport networks in Sydney, Melbourne, non-metropolitan N.S.W. and Victoria, 

27including the Australian Capital Territory.""" 

28 

29NOTE = """:: 

30 

31 Number of observations: 840 Observations On 4 Modes for 210 Individuals. 

32 Number of variables: 8 

33 Variable name definitions:: 

34 

35 individual = 1 to 210 

36 mode = 

37 1 - air 

38 2 - train 

39 3 - bus 

40 4 - car 

41 choice = 

42 0 - no 

43 1 - yes 

44 ttme = terminal waiting time for plane, train and bus (minutes); 0 

45 for car. 

46 invc = in vehicle cost for all stages (dollars). 

47 invt = travel time (in-vehicle time) for all stages (minutes). 

48 gc = generalized cost measure:invc+(invt*value of travel time savings) 

49 (dollars). 

50 hinc = household income ($1000s). 

51 psize = traveling group size in mode chosen (number).""" 

52 

53 

54def load(as_pandas=None): 

55 """ 

56 Load the data modechoice data and return a Dataset class instance. 

57 

58 Parameters 

59 ---------- 

60 as_pandas : bool 

61 Flag indicating whether to return pandas DataFrames and Series 

62 or numpy recarrays and arrays. If True, returns pandas. 

63 

64 Returns 

65 ------- 

66 Dataset instance: 

67 See DATASET_PROPOSAL.txt for more information. 

68 """ 

69 return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas) 

70 

71 

72def load_pandas(): 

73 """ 

74 Load the data modechoice data and return a Dataset class instance. 

75 

76 Returns 

77 ------- 

78 Dataset instance: 

79 See DATASET_PROPOSAL.txt for more information. 

80 """ 

81 data = _get_data() 

82 return du.process_pandas(data, endog_idx = 2, exog_idx=[3,4,5,6,7,8]) 

83 

84 

85def _get_data(): 

86 return du.load_csv(__file__, 'modechoice.csv', sep=';', convert_float=True)