Package InversionTest ::
Module InversionDistribution
|
|
1 """
2 Properties of the permutations (null distribution) for inversions
3 of a totally-ordered sequence. Includes: max, mean, variance,
4 similarity, correlation, and counts of permutations with k inversions
5 (called I_n[k] in some literature).
6
7 Author: Benjamin D. Nye
8 License: Apache License V2.0
9 """
14 """
15 Maximum inversions for a permutation of a sequence of n elements
16 Note: This will always return an integer
17 @param n: Number of elements
18 @type n: int
19 @return: Maximum number of inversions between n elements
20 @rtype: int
21 """
22 return (n*(n-1))/2
23
25 """
26 Mean (expected) inversions for a permutation of a sequence of n elements
27 Note: This returns a float, as the expectation maybe fractional.
28 @param n: Number of elements
29 @type n: int
30 @return: Mean number of inversions across all permutations
31 @rtype: float
32 """
33 return (n*(n-1))/4.0
34
36 """
37 Variance of inversions for a permutation of a sequence of n elements
38 Note: This returns a float, as the variance maybe fractional.
39 @param n: Number of elements
40 @type n: int
41 @return: Variance of inversions across all permutations
42 @rtype: float
43 """
44 return (n*(n-1.0)*(2.0*n + 5.0))/72.0
45
47 """
48 Correlation between sequences, given the inversions and max possible
49 A correlation of -1 is the worst, while 1 is the best. This value
50 is equivalent to a Kendall's Tau correlation.
51 @param inversions: Number of inversions
52 @type inversions: int
53 @param maxInversions: Maximum possible inversions
54 @type maxInversions: int
55 @return: Correlation, in [-1, 1]
56 @rtype: float
57 """
58 sim = similarityByInversions(inversions, maxInversions)
59 if sim is None:
60 return None
61 else:
62 return 2.0*sim - 1.0
63
65 """
66 Similarity between sequences, given the inversions and max possible
67 A similarity of 0 is the worst, while 1 is the best.
68 @param inversions: Number of inversions
69 @type inversions: int
70 @param maxInversions: Maximum possible inversions
71 @type maxInversions: int
72 @return: Similarity, in [0, 1]
73 @rtype: float
74 """
75 if maxInversions == 0:
76 return None
77 else:
78 return 1.0 - float(inversions)/maxInversions
79
83 """
84 Get the count of the number of permutations with k inversions,
85 as a vector of Inv(n,k) in the form [Inv(n,0), Inv(n,1), ..., Inv(n,k_max)]
86 where n is the length of the sequence and k_max is the maximum inversions
87 Note: This implementation uses the InversionCountsCache to boost speed
88 @param n: Number of elements in the sequence
89 @type n: int
90 @return: List of inversion counts, where index k gives the # of permutations with k inversions
91 @rtype: list of int
92 """
93 if n <= 0:
94 return []
95 elif n <= 1:
96 return [1]
97 else:
98 value = InversionCountsCache.fetchCachedRawInversionCounts(n)
99 if value is None:
100 startingPoint = None
101 cache = True
102 cacheSize = InversionCountsCache.getCacheSize()
103 if n >= cacheSize:
104 maxCacheN = cacheSize-1
105 startingCounts = InversionCountsCache.fetchCachedRawInversionCounts(maxCacheN)
106 if startingCounts is not None:
107 startingPoint = (maxCacheN, startingCounts)
108 cache = False
109 value = _directCalcRawInversionCounts(n, startingPoint, cache)
110 return value
111
113 """
114 Calculate the count of the number of permutations with k inversions,
115 as a vector of Inv(n,k) in the form [Inv(n,0), Inv(n,1), ..., Inv(n,k_max)]
116 where n is the length of the sequence and k_max is the maximum inversions.
117 Note: This implementation can be bootstrapped using a startingPoint, to avoid
118 calculating known inversions (as the algorithm works through iterative transforms)
119 @param n: Number of elements in the sequence
120 @type n: int
121 @param startingPoint: Pair of (x, rawInversionCounts(x)) where x<n. Used to bootstrap the calculation.
122 @type startingPoint: tuple of (int, list of int)
123 @param cache: If True, cache the calculated values. Else, ignore the cache.
124 @type cache: bool
125 @return: List of inversion counts, where index k gives the # of permutations with k inversions
126 @rtype: list of int
127 """
128 absoluteMax = maxInversions(n)+1
129 if startingPoint is None or startingPoint[0] > n:
130 startingN = 2
131 inversionCounts = [1]+[0]*int(absoluteMax-1)
132 else:
133 startingN, inversionCounts = startingPoint
134 for i in xrange(startingN,n+1):
135 aMax = int(maxInversions(i)+1)
136 inversionCounts[0:aMax] = (sum(inversionCounts[max(0,j-(i-1)):j+1]) for j in xrange(0,aMax))
137 if cache:
138 InversionCountsCache.cacheRawInversionCounts(i, inversionCounts[0:aMax])
139 return inversionCounts
140
145 """ A cache for managing stored inversion count calculations """
146
147
148 DEFAULT_CACHE_SIZE = 100
149 EXACT_DISTRIBUTION_CACHE = [None]*DEFAULT_CACHE_SIZE
150
151 @classmethod
153 """
154 Get the cache size
155 @return: Size of the cache
156 @rtype: int
157 """
158 return len(cls.EXACT_DISTRIBUTION_CACHE)
159
160 @classmethod
172
173 @classmethod
175 """
176 Fetch a value from the cache by index n, if available.
177 @param n: Index of the cache to read
178 @type n: int
179 @return: Cached value at index n, or None if out of bounds
180 @rtype: list of int or None
181 """
182 if n < cls.getCacheSize() and n >= 0:
183 value = cls.EXACT_DISTRIBUTION_CACHE[n]
184 if value is not None:
185 return list(value)
186 else:
187 return None
188 else:
189 return None
190
191 @classmethod
193 """
194 Cache a value from the cache at index n
195 @param n: Index of the cache to write
196 @type n: int
197 @param counts: List of counts of permutations with k inversions
198 @type counts: list of int
199 """
200 if n < cls.getCacheSize() and n >= 0:
201 cls.EXACT_DISTRIBUTION_CACHE[n] = tuple(counts)
202
203 @classmethod
209