Package InversionTest :: Module InversionDistribution
[hide private]
[frames] | no frames]

Source Code for Module InversionTest.InversionDistribution

  1  """ 
  2  Properties of the permutations (null distribution) for inversions 
  3  of a totally-ordered sequence.  Includes: max, mean, variance, 
  4  similarity, correlation, and counts of permutations with k inversions 
  5  (called I_n[k] in some literature). 
  6   
  7  Author: Benjamin D. Nye 
  8  License: Apache License V2.0 
  9  """ 
10 11 # Basic Properties 12 #------------------------------------------ 13 -def maxInversions(n):
14 """ 15 Maximum inversions for a permutation of a sequence of n elements 16 Note: This will always return an integer 17 @param n: Number of elements 18 @type n: int 19 @return: Maximum number of inversions between n elements 20 @rtype: int 21 """ 22 return (n*(n-1))/2
23
24 -def meanOfInversions(n):
25 """ 26 Mean (expected) inversions for a permutation of a sequence of n elements 27 Note: This returns a float, as the expectation maybe fractional. 28 @param n: Number of elements 29 @type n: int 30 @return: Mean number of inversions across all permutations 31 @rtype: float 32 """ 33 return (n*(n-1))/4.0
34
35 -def varianceOfInversions(n):
36 """ 37 Variance of inversions for a permutation of a sequence of n elements 38 Note: This returns a float, as the variance maybe fractional. 39 @param n: Number of elements 40 @type n: int 41 @return: Variance of inversions across all permutations 42 @rtype: float 43 """ 44 return (n*(n-1.0)*(2.0*n + 5.0))/72.0
45
46 -def correlationByInversions(inversions, maxInversions):
47 """ 48 Correlation between sequences, given the inversions and max possible 49 A correlation of -1 is the worst, while 1 is the best. This value 50 is equivalent to a Kendall's Tau correlation. 51 @param inversions: Number of inversions 52 @type inversions: int 53 @param maxInversions: Maximum possible inversions 54 @type maxInversions: int 55 @return: Correlation, in [-1, 1] 56 @rtype: float 57 """ 58 sim = similarityByInversions(inversions, maxInversions) 59 if sim is None: 60 return None 61 else: 62 return 2.0*sim - 1.0
63
64 -def similarityByInversions(inversions, maxInversions):
65 """ 66 Similarity between sequences, given the inversions and max possible 67 A similarity of 0 is the worst, while 1 is the best. 68 @param inversions: Number of inversions 69 @type inversions: int 70 @param maxInversions: Maximum possible inversions 71 @type maxInversions: int 72 @return: Similarity, in [0, 1] 73 @rtype: float 74 """ 75 if maxInversions == 0: 76 return None 77 else: 78 return 1.0 - float(inversions)/maxInversions
79
80 # Counts of Permutations with k Inversions 81 #------------------------------------------ 82 -def rawInversionCounts(n):
83 """ 84 Get the count of the number of permutations with k inversions, 85 as a vector of Inv(n,k) in the form [Inv(n,0), Inv(n,1), ..., Inv(n,k_max)] 86 where n is the length of the sequence and k_max is the maximum inversions 87 Note: This implementation uses the InversionCountsCache to boost speed 88 @param n: Number of elements in the sequence 89 @type n: int 90 @return: List of inversion counts, where index k gives the # of permutations with k inversions 91 @rtype: list of int 92 """ 93 if n <= 0: 94 return [] 95 elif n <= 1: 96 return [1] 97 else: 98 value = InversionCountsCache.fetchCachedRawInversionCounts(n) 99 if value is None: 100 startingPoint = None 101 cache = True 102 cacheSize = InversionCountsCache.getCacheSize() 103 if n >= cacheSize: 104 maxCacheN = cacheSize-1 105 startingCounts = InversionCountsCache.fetchCachedRawInversionCounts(maxCacheN) 106 if startingCounts is not None: 107 startingPoint = (maxCacheN, startingCounts) 108 cache = False 109 value = _directCalcRawInversionCounts(n, startingPoint, cache) 110 return value
111
112 -def _directCalcRawInversionCounts(n, startingPoint=None, cache=True):
113 """ 114 Calculate the count of the number of permutations with k inversions, 115 as a vector of Inv(n,k) in the form [Inv(n,0), Inv(n,1), ..., Inv(n,k_max)] 116 where n is the length of the sequence and k_max is the maximum inversions. 117 Note: This implementation can be bootstrapped using a startingPoint, to avoid 118 calculating known inversions (as the algorithm works through iterative transforms) 119 @param n: Number of elements in the sequence 120 @type n: int 121 @param startingPoint: Pair of (x, rawInversionCounts(x)) where x<n. Used to bootstrap the calculation. 122 @type startingPoint: tuple of (int, list of int) 123 @param cache: If True, cache the calculated values. Else, ignore the cache. 124 @type cache: bool 125 @return: List of inversion counts, where index k gives the # of permutations with k inversions 126 @rtype: list of int 127 """ 128 absoluteMax = maxInversions(n)+1 129 if startingPoint is None or startingPoint[0] > n: 130 startingN = 2 131 inversionCounts = [1]+[0]*int(absoluteMax-1) 132 else: 133 startingN, inversionCounts = startingPoint 134 for i in xrange(startingN,n+1): 135 aMax = int(maxInversions(i)+1) 136 inversionCounts[0:aMax] = (sum(inversionCounts[max(0,j-(i-1)):j+1]) for j in xrange(0,aMax)) 137 if cache: 138 InversionCountsCache.cacheRawInversionCounts(i, inversionCounts[0:aMax]) 139 return inversionCounts
140
141 142 # Cache Management 143 #------------------------------------------ 144 -class InversionCountsCache(object):
145 """ A cache for managing stored inversion count calculations """ 146 # Optimization Variables 147 # Note: Cache increases by 10x for each next 100 entries (200~ 200MB, 300~2GB) 148 DEFAULT_CACHE_SIZE = 100 149 EXACT_DISTRIBUTION_CACHE = [None]*DEFAULT_CACHE_SIZE 150 151 @classmethod
152 - def getCacheSize(cls):
153 """ 154 Get the cache size 155 @return: Size of the cache 156 @rtype: int 157 """ 158 return len(cls.EXACT_DISTRIBUTION_CACHE)
159 160 @classmethod
162 """ 163 Set the cache size 164 @param n: New size of the cache. By default, resets to DEFAULT_CACHE_SIZE 165 @type n: int 166 """ 167 if n != cls.getCacheSize(): 168 if n > cls.getCacheSize(): 169 cls.EXACT_DISTRIBUTION_CACHE.extend([None]*(n-cls.getCacheSize())) 170 else: 171 del cls.EXACT_DISTRIBUTION_CACHE[n:]
172 173 @classmethod
175 """ 176 Fetch a value from the cache by index n, if available. 177 @param n: Index of the cache to read 178 @type n: int 179 @return: Cached value at index n, or None if out of bounds 180 @rtype: list of int or None 181 """ 182 if n < cls.getCacheSize() and n >= 0: 183 value = cls.EXACT_DISTRIBUTION_CACHE[n] 184 if value is not None: 185 return list(value) 186 else: 187 return None 188 else: 189 return None
190 191 @classmethod
192 - def cacheRawInversionCounts(cls, n, counts):
193 """ 194 Cache a value from the cache at index n 195 @param n: Index of the cache to write 196 @type n: int 197 @param counts: List of counts of permutations with k inversions 198 @type counts: list of int 199 """ 200 if n < cls.getCacheSize() and n >= 0: 201 cls.EXACT_DISTRIBUTION_CACHE[n] = tuple(counts)
202 203 @classmethod
204 - def clear(cls):
205 """ 206 Clear all cached values. For general use, this should be unnecessary. 207 """ 208 cls.EXACT_DISTRIBUTION_CACHE[:] = (None for i in xrange(len(cls.EXACT_DISTRIBUTION_CACHE)))
209