>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(a="float32"), name="t")
>>> expr = t.select(s.where(lambda col: col.get_name() == "a"))
>>> expr.columns
['a']
Choose Table columns based on dtype, regex, and other criteria
selectors.where(predicate)
Select columns that satisfy predicate
.
Use this selector when one of the other selectors does not meet your needs.
Name | Type | Description | Default |
---|---|---|---|
predicate |
Callable[[ir.Value], bool] | A callable that accepts an ibis value expression and returns a bool |
required |
selectors.numeric()
Return numeric columns.
selectors.of_type(dtype)
Select columns of type dtype
.
Name | Type | Description | Default |
---|---|---|---|
dtype |
dt.DataType | str | type[dt.DataType] | DataType instance, str or DataType class |
required |
Select according to a specific DataType
instance
>>> import ibis
>>> import ibis.expr.datatypes as dt
>>> import ibis.selectors as s
>>> t = ibis.table(
... dict(name="string", siblings="array<string>", parents="array<int64>")
... )
>>> expr = t.select(s.of_type(dt.Array(dt.string)))
>>> expr.columns
['siblings']
Strings are also accepted
Abstract/unparametrized types may also be specified by their string name (e.g. “integer” for any integer type), or by passing in a DataType
class instead. The following options are equivalent.
selectors.startswith(prefixes)
Select columns whose name starts with one of prefixes
.
Name | Type | Description | Default |
---|---|---|---|
prefixes |
str | tuple[str, …] | Prefixes to compare column names against | required |
selectors.endswith(suffixes)
Select columns whose name ends with one of suffixes
.
Name | Type | Description | Default |
---|---|---|---|
suffixes |
str | tuple[str, …] | Suffixes to compare column names against | required |
selectors.contains(needles, how=any)
Return columns whose name contains needles
.
Name | Type | Description | Default |
---|---|---|---|
needles |
str | tuple[str, …] | One or more strings to search for in column names | required |
how |
Callable[[Iterable[bool]], bool] | A boolean reduction to allow the configuration of how needles are summarized. |
any |
Select columns that contain either "a"
or "b"
>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(
... dict(
... a="int64", b="string", c="float", d="array<int16>", ab="struct<x: int>"
... )
... )
>>> expr = t.select(s.contains(("a", "b")))
>>> expr.columns
['a', 'b', 'ab']
Select columns that contain all of "a"
and "b"
, that is, both "a"
and "b"
must be in each column’s name to match.
selectors.matches(regex)
Return columns whose name matches the regular expression regex
.
Name | Type | Description | Default |
---|---|---|---|
regex |
str | re.Pattern | A string or re.Pattern object |
required |
selectors.any_of(*predicates)
Include columns satisfying any of predicates
.
selectors.all_of(*predicates)
Include columns satisfying all of predicates
.
selectors.c(*names)
Select specific column names.
selectors.across(selector, func, names=None)
Apply data transformations across multiple columns.
Name | Type | Description | Default |
---|---|---|---|
selector |
Selector | Iterable[str] | str | An expression that selects columns on which the transformation function will be applied, an iterable of str column names or a single str column name. |
required |
func |
Deferred | Callable[[ir.Value], ir.Value] | Mapping[str | None, Deferred | Callable[[ir.Value], ir.Value]] | A function (or dictionary of functions) to use to transform the data. | required |
names |
str | Callable[[str, str | None], str] | None | A lambda function or a format string to name the columns created by the transformation function. | None |
Type | Description |
---|---|
Across | An Across selector object |
>>> import ibis
>>> ibis.options.interactive = True
>>> from ibis import _, selectors as s
>>> t = ibis.examples.penguins.fetch()
>>> t.select(s.startswith("bill")).mutate(
... s.across(s.numeric(), dict(centered=_ - _.mean()), names="{fn}_{col}")
... )
/Users/cody/repos/ibis-birdbrain/venv/lib/python3.11/site-packages/google/auth/_default.py:76: UserWarning:
Your application has authenticated using end user credentials from Google Cloud SDK without a quota project. You might receive a "quota exceeded" or "API not enabled" error. See the following page for troubleshooting: https://cloud.google.com/docs/authentication/adc-troubleshooting/user-creds.
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ bill_length_mm ┃ bill_depth_mm ┃ centered_bill_length_mm ┃ centered_bill_depth_mm ┃ ┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩ │ float64 │ float64 │ float64 │ float64 │ ├────────────────┼───────────────┼─────────────────────────┼────────────────────────┤ │ 39.1 │ 18.7 │ -4.82193 │ 1.54883 │ │ 39.5 │ 17.4 │ -4.42193 │ 0.24883 │ │ 40.3 │ 18.0 │ -3.62193 │ 0.84883 │ │ nan │ nan │ nan │ nan │ │ 36.7 │ 19.3 │ -7.22193 │ 2.14883 │ │ 39.3 │ 20.6 │ -4.62193 │ 3.44883 │ │ 38.9 │ 17.8 │ -5.02193 │ 0.64883 │ │ 39.2 │ 19.6 │ -4.72193 │ 2.44883 │ │ 34.1 │ 18.1 │ -9.82193 │ 0.94883 │ │ 42.0 │ 20.2 │ -1.92193 │ 3.04883 │ │ … │ … │ … │ … │ └────────────────┴───────────────┴─────────────────────────┴────────────────────────┘
selectors.if_any(selector, predicate)
Return the disjunction of predicate
applied on all selector
columns.
Name | Type | Description | Default |
---|---|---|---|
selector |
Selector | A column selector | required |
predicate |
Deferred | Callable | A callable or deferred object defining a predicate to apply to each column from selector . |
required |
>>> import ibis
>>> from ibis import selectors as s, _
>>> ibis.options.interactive = True
>>> penguins = ibis.examples.penguins.fetch()
>>> cols = s.across(s.endswith("_mm"), (_ - _.mean()) / _.std())
>>> expr = penguins.mutate(cols).filter(s.if_any(s.endswith("_mm"), _.abs() > 2))
>>> expr_by_hand = penguins.mutate(cols).filter(
... (_.bill_length_mm.abs() > 2)
... | (_.bill_depth_mm.abs() > 2)
... | (_.flipper_length_mm.abs() > 2)
... )
>>> expr.equals(expr_by_hand)
True
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ │ string │ string │ float64 │ float64 │ float64 │ int64 │ string │ int64 │ ├─────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤ │ Adelie │ Biscoe │ -1.102918 │ 0.733585 │ -2.056265 │ 3150 │ female │ 2007 │ │ Gentoo │ Biscoe │ 1.113200 │ -0.430972 │ 2.068326 │ 5700 │ male │ 2007 │ │ Gentoo │ Biscoe │ 2.871441 │ -0.076542 │ 2.068326 │ 6050 │ male │ 2007 │ │ Gentoo │ Biscoe │ 1.900745 │ -0.734769 │ 2.139439 │ 5650 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.076570 │ -0.177807 │ 2.068326 │ 5700 │ male │ 2008 │ │ Gentoo │ Biscoe │ 0.856789 │ -0.582871 │ 2.068326 │ 5800 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.497815 │ -0.076542 │ 2.068326 │ 5550 │ male │ 2009 │ │ Gentoo │ Biscoe │ 1.387925 │ -0.430972 │ 2.068326 │ 5500 │ male │ 2009 │ │ Gentoo │ Biscoe │ 2.047266 │ -0.582871 │ 2.068326 │ 5850 │ male │ 2009 │ │ Adelie │ Dream │ -2.165189 │ -0.836035 │ -0.918447 │ 3050 │ female │ 2009 │ │ … │ … │ … │ … │ … │ … │ … │ … │ └─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
selectors.if_all(selector, predicate)
Return the conjunction of predicate
applied on all selector
columns.
Name | Type | Description | Default |
---|---|---|---|
selector |
Selector | A column selector | required |
predicate |
Deferred | Callable | A callable or deferred object defining a predicate to apply to each column from selector . |
required |
>>> import ibis
>>> from ibis import selectors as s, _
>>> ibis.options.interactive = True
>>> penguins = ibis.examples.penguins.fetch()
>>> cols = s.across(s.endswith("_mm"), (_ - _.mean()) / _.std())
>>> expr = penguins.mutate(cols).filter(s.if_all(s.endswith("_mm"), _.abs() > 1))
>>> expr_by_hand = penguins.mutate(cols).filter(
... (_.bill_length_mm.abs() > 1)
... & (_.bill_depth_mm.abs() > 1)
... & (_.flipper_length_mm.abs() > 1)
... )
>>> expr.equals(expr_by_hand)
True
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ │ string │ string │ float64 │ float64 │ float64 │ int64 │ string │ int64 │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤ │ Adelie │ Dream │ -1.157863 │ 1.088015 │ -1.416243 │ 3300 │ female │ 2007 │ │ Adelie │ Torgersen │ -1.231123 │ 1.138648 │ -1.202902 │ 3900 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.149830 │ -1.443630 │ 1.214962 │ 5700 │ male │ 2007 │ │ Gentoo │ Biscoe │ 1.039940 │ -1.089200 │ 1.072735 │ 4750 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.131515 │ -1.089200 │ 1.712757 │ 5000 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.241405 │ -1.089200 │ 1.570530 │ 5550 │ male │ 2008 │ │ Gentoo │ Biscoe │ 1.351295 │ -1.494263 │ 1.214962 │ 5300 │ male │ 2009 │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
selectors.r
Ranges of columns.
selectors.first()
Return the first column of a table.
selectors.last()
Return the last column of a table.
selectors.all()
Return every column from a table.