Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/groupby/categorical.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import numpy as np
3from pandas.core.algorithms import unique1d
4from pandas.core.arrays.categorical import (
5 Categorical,
6 CategoricalDtype,
7 _recode_for_categories,
8)
11def recode_for_groupby(c: Categorical, sort: bool, observed: bool):
12 """
13 Code the categories to ensure we can groupby for categoricals.
15 If observed=True, we return a new Categorical with the observed
16 categories only.
18 If sort=False, return a copy of self, coded with categories as
19 returned by .unique(), followed by any categories not appearing in
20 the data. If sort=True, return self.
22 This method is needed solely to ensure the categorical index of the
23 GroupBy result has categories in the order of appearance in the data
24 (GH-8868).
26 Parameters
27 ----------
28 c : Categorical
29 sort : boolean
30 The value of the sort parameter groupby was called with.
31 observed : boolean
32 Account only for the observed values
34 Returns
35 -------
36 New Categorical
37 If sort=False, the new categories are set to the order of
38 appearance in codes (unless ordered=True, in which case the
39 original order is preserved), followed by any unrepresented
40 categories in the original order.
41 Categorical or None
42 If we are observed, return the original categorical, otherwise None
43 """
45 # we only care about observed values
46 if observed:
47 unique_codes = unique1d(c.codes)
49 take_codes = unique_codes[unique_codes != -1]
50 if c.ordered:
51 take_codes = np.sort(take_codes)
53 # we recode according to the uniques
54 categories = c.categories.take(take_codes)
55 codes = _recode_for_categories(c.codes, c.categories, categories)
57 # return a new categorical that maps our new codes
58 # and categories
59 dtype = CategoricalDtype(categories, ordered=c.ordered)
60 return Categorical(codes, dtype=dtype, fastpath=True), c
62 # Already sorted according to c.categories; all is fine
63 if sort:
64 return c, None
66 # sort=False should order groups in as-encountered order (GH-8868)
67 cat = c.unique()
69 # But for groupby to work, all categories should be present,
70 # including those missing from the data (GH-13179), which .unique()
71 # above dropped
72 cat = cat.add_categories(c.categories[~c.categories.isin(cat.categories)])
74 return c.reorder_categories(cat.categories), None
77def recode_from_groupby(c: Categorical, sort: bool, ci):
78 """
79 Reverse the codes_to_groupby to account for sort / observed.
81 Parameters
82 ----------
83 c : Categorical
84 sort : boolean
85 The value of the sort parameter groupby was called with.
86 ci : CategoricalIndex
87 The codes / categories to recode
89 Returns
90 -------
91 CategoricalIndex
92 """
94 # we re-order to the original category orderings
95 if sort:
96 return ci.set_categories(c.categories)
98 # we are not sorting, so add unobserved to the end
99 return ci.add_categories(c.categories[~c.categories.isin(ci.categories)])