Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Breast Cancer Data""" 

2from statsmodels.datasets import utils as du 

3 

4__docformat__ = 'restructuredtext' 

5 

6COPYRIGHT = """???""" 

7TITLE = """Breast Cancer Data""" 

8SOURCE = """ 

9This is the breast cancer data used in Owen's empirical likelihood. It is taken from 

10Rice, J.A. Mathematical Statistics and Data Analysis. 

11http://www.cengage.com/statistics/discipline_content/dataLibrary.html 

12""" 

13 

14DESCRSHORT = """Breast Cancer and county population""" 

15 

16DESCRLONG = """The number of breast cancer observances in various counties""" 

17 

18#suggested notes 

19NOTE = """:: 

20 

21 Number of observations: 301 

22 Number of variables: 2 

23 Variable name definitions: 

24 

25 cancer - The number of breast cancer observances 

26 population - The population of the county 

27 

28""" 

29 

30 

31def load_pandas(): 

32 data = _get_data() 

33 return du.process_pandas(data, endog_idx=0, exog_idx=None) 

34 

35 

36def load(as_pandas=None): 

37 """ 

38 Load the data and return a Dataset class instance. 

39 

40 Parameters 

41 ---------- 

42 as_pandas : bool 

43 Flag indicating whether to return pandas DataFrames and Series 

44 or numpy recarrays and arrays. If True, returns pandas. 

45 

46 Returns 

47 ------- 

48 Dataset instance: 

49 See DATASET_PROPOSAL.txt for more information. 

50 """ 

51 return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas) 

52 

53 

54def _get_data(): 

55 return du.load_csv(__file__, 'cancer.csv', convert_float=True)