Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2 

3"""Multivariate analysis of variance 

4 

5author: Yichuan Liu 

6""" 

7import numpy as np 

8 

9from statsmodels.compat.pandas import Substitution 

10from statsmodels.base.model import Model 

11from .multivariate_ols import MultivariateTestResults 

12from .multivariate_ols import _multivariate_ols_fit 

13from .multivariate_ols import _multivariate_ols_test, _hypotheses_doc 

14 

15__docformat__ = 'restructuredtext en' 

16 

17 

18class MANOVA(Model): 

19 """ 

20 Multivariate Analysis of Variance 

21 

22 The implementation of MANOVA is based on multivariate regression and does 

23 not assume that the explanatory variables are categorical. Any type of 

24 variables as in regression is allowed. 

25 

26 Parameters 

27 ---------- 

28 endog : array_like 

29 Dependent variables. A nobs x k_endog array where nobs is 

30 the number of observations and k_endog is the number of dependent 

31 variables. 

32 exog : array_like 

33 Independent variables. A nobs x k_exog array where nobs is the 

34 number of observations and k_exog is the number of independent 

35 variables. An intercept is not included by default and should be added 

36 by the user. Models specified using a formula include an intercept by 

37 default. 

38 

39 Attributes 

40 ---------- 

41 endog : ndarray 

42 See Parameters. 

43 exog : ndarray 

44 See Parameters. 

45 

46 Notes 

47 ----- 

48 MANOVA is used though the `mv_test` function, and `fit` is not used. 

49 

50 The ``from_formula`` interface is the recommended method to specify 

51 a model and simplifies testing without needing to manually configure 

52 the contrast matrices. 

53 

54 References 

55 ---------- 

56 .. [*] ftp://public.dhe.ibm.com/software/analytics/spss/documentation/ 

57 statistics/20.0/en/client/Manuals/IBM_SPSS_Statistics_Algorithms.pdf 

58 """ 

59 _formula_max_endog = None 

60 

61 def __init__(self, endog, exog, missing='none', hasconst=None, **kwargs): 

62 if len(endog.shape) == 1 or endog.shape[1] == 1: 

63 raise ValueError('There must be more than one dependent variable' 

64 ' to fit MANOVA!') 

65 super(MANOVA, self).__init__(endog, exog, missing=missing, 

66 hasconst=hasconst, **kwargs) 

67 self._fittedmod = _multivariate_ols_fit(self.endog, self.exog) 

68 

69 def fit(self): 

70 raise NotImplementedError('fit is not needed to use MANOVA. Call' 

71 'mv_test directly on a MANOVA instance.') 

72 

73 @Substitution(hypotheses_doc=_hypotheses_doc) 

74 def mv_test(self, hypotheses=None): 

75 """ 

76 Linear hypotheses testing 

77 

78 Parameters 

79 ---------- 

80 %(hypotheses_doc)s 

81 

82 Returns 

83 ------- 

84 results: MultivariateTestResults 

85 

86 Notes 

87 ----- 

88 Testing the linear hypotheses 

89 

90 L * params * M = 0 

91 

92 where `params` is the regression coefficient matrix for the 

93 linear model y = x * params 

94 

95 If the model is not specified using the formula interfact, then the 

96 hypotheses test each included exogenous variable, one at a time. In 

97 most applications with categorical variables, the ``from_formula`` 

98 interface should be preferred when specifying a model since it 

99 provides knowledge about the model when specifying the hypotheses. 

100 """ 

101 if hypotheses is None: 

102 if (hasattr(self, 'data') and self.data is not None and 

103 hasattr(self.data, 'design_info')): 

104 terms = self.data.design_info.term_name_slices 

105 hypotheses = [] 

106 for key in terms: 

107 L_contrast = np.eye(self.exog.shape[1])[terms[key], :] 

108 hypotheses.append([key, L_contrast, None]) 

109 else: 

110 hypotheses = [] 

111 for i in range(self.exog.shape[1]): 

112 name = 'x%d' % (i) 

113 L = np.zeros([1, self.exog.shape[1]]) 

114 L[0, i] = 1 

115 hypotheses.append([name, L, None]) 

116 

117 results = _multivariate_ols_test(hypotheses, self._fittedmod, 

118 self.exog_names, self.endog_names) 

119 

120 return MultivariateTestResults(results, self.endog_names, 

121 self.exog_names)