Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Nile River Flows.""" 

2import pandas as pd 

3 

4from statsmodels.datasets import utils as du 

5 

6__docformat__ = 'restructuredtext' 

7 

8COPYRIGHT = """This is public domain.""" 

9TITLE = """Nile River flows at Ashwan 1871-1970""" 

10SOURCE = """ 

11This data is first analyzed in: 

12 

13 Cobb, G. W. 1978. "The Problem of the Nile: Conditional Solution to a 

14 Changepoint Problem." *Biometrika*. 65.2, 243-51. 

15""" 

16 

17DESCRSHORT = """This dataset contains measurements on the annual flow of 

18the Nile as measured at Ashwan for 100 years from 1871-1970.""" 

19 

20DESCRLONG = DESCRSHORT + " There is an apparent changepoint near 1898." 

21 

22#suggested notes 

23NOTE = """:: 

24 

25 Number of observations: 100 

26 Number of variables: 2 

27 Variable name definitions: 

28 

29 year - the year of the observations 

30 volumne - the discharge at Aswan in 10^8, m^3 

31""" 

32 

33 

34def load(as_pandas=None): 

35 """ 

36 Load the Nile data and return a Dataset class instance. 

37 

38 Parameters 

39 ---------- 

40 as_pandas : bool 

41 Flag indicating whether to return pandas DataFrames and Series 

42 or numpy recarrays and arrays. If True, returns pandas. 

43 

44 Returns 

45 ------- 

46 Dataset instance: 

47 See DATASET_PROPOSAL.txt for more information. 

48 """ 

49 return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas) 

50 

51 

52def load_pandas(): 

53 data = _get_data() 

54 # TODO: time series 

55 endog = pd.Series(data['volume'], index=data['year'].astype(int)) 

56 dataset = du.Dataset(data=data, names=list(data.columns), endog=endog, endog_name='volume') 

57 return dataset 

58 

59 

60def _get_data(): 

61 return du.load_csv(__file__, 'nile.csv').astype(float)