narwhals.selectors
The following selectors are all supported. In addition, just like in Polars, the following set operations are supported:
- set intersection:
&
- set union:
|
- set difference:
-
- complement:
~
boolean() -> Selector
Select boolean columns.
Returns:
Type | Description |
---|---|
Selector
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> import narwhals.selectors as ncs
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function to select boolean dtypes:
>>> def agnostic_select_boolean(df_native: IntoFrameT) -> IntoFrameT:
... df_nw = nw.from_native(df_native)
... return df_nw.select(ncs.boolean()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_select_boolean
:
>>> agnostic_select_boolean(df_pd)
c
0 False
1 True
>>> agnostic_select_boolean(df_pl)
shape: (2, 1)
┌───────┐
│ c │
│ --- │
│ bool │
╞═══════╡
│ false │
│ true │
└───────┘
>>> agnostic_select_boolean(df_pa)
pyarrow.Table
c: bool
----
c: [[false,true]]
by_dtype(*dtypes: DType | type[DType] | Iterable[DType | type[DType]]) -> Selector
Select columns based on their dtype.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dtypes
|
DType | type[DType] | Iterable[DType | type[DType]]
|
one or data types to select |
()
|
Returns:
Type | Description |
---|---|
Selector
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> import narwhals.selectors as ncs
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function to select int64 and float64 dtypes and multiplies each value by 2:
>>> def agnostic_select_by_dtype(df_native: IntoFrameT) -> IntoFrameT:
... df_nw = nw.from_native(df_native)
... return df_nw.select(ncs.by_dtype(nw.Int64, nw.Float64) * 2).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_select_by_dtype
:
>>> agnostic_select_by_dtype(df_pd)
a c
0 2 8.2
1 4 4.6
>>> agnostic_select_by_dtype(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ c │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 2 ┆ 8.2 │
│ 4 ┆ 4.6 │
└─────┴─────┘
>>> agnostic_select_by_dtype(df_pa)
pyarrow.Table
a: int64
c: double
----
a: [[2,4]]
c: [[8.2,4.6]]
categorical() -> Selector
Select categorical columns.
Returns:
Type | Description |
---|---|
Selector
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> import narwhals.selectors as ncs
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function that first converts column "b" to categorical, and then selects categorical dtypes:
>>> def agnostic_select_categorical(df_native: IntoFrameT) -> IntoFrameT:
... df_nw = nw.from_native(df_native).with_columns(
... b=nw.col("b").cast(nw.Categorical())
... )
... return df_nw.select(ncs.categorical()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_select_categorical
:
>>> agnostic_select_categorical(df_pd)
b
0 x
1 y
>>> agnostic_select_categorical(df_pl)
shape: (2, 1)
┌─────┐
│ b │
│ --- │
│ cat │
╞═════╡
│ x │
│ y │
└─────┘
>>> agnostic_select_categorical(df_pa)
pyarrow.Table
b: dictionary<values=string, indices=uint32, ordered=0>
----
b: [ -- dictionary:
["x","y"] -- indices:
[0,1]]
datetime(time_unit: TimeUnit | Iterable[TimeUnit] | None = None, time_zone: str | timezone | Iterable[str | timezone | None] | None = ('*', None)) -> Selector
Select all datetime columns, optionally filtering by time unit/zone.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
time_unit
|
TimeUnit | Iterable[TimeUnit] | None
|
One (or more) of the allowed timeunit precision strings, "ms", "us", "ns" and "s". Omit to select columns with any valid timeunit. |
None
|
time_zone
|
str | timezone | Iterable[str | timezone | None] | None
|
Specify which timezone(s) to select:
|
('*', None)
|
Returns:
Type | Description |
---|---|
Selector
|
A new expression. |
Examples:
>>> from datetime import datetime, timezone
>>> import pyarrow as pa
>>> import narwhals as nw
>>> import narwhals.selectors as ncs
>>>
>>> utc_tz = timezone.utc
>>> data = {
... "tstamp_utc": [
... datetime(2023, 4, 10, 12, 14, 16, 999000, tzinfo=utc_tz),
... datetime(2025, 8, 25, 14, 18, 22, 666000, tzinfo=utc_tz),
... ],
... "tstamp": [
... datetime(2000, 11, 20, 18, 12, 16, 600000),
... datetime(2020, 10, 30, 10, 20, 25, 123000),
... ],
... "numeric": [3.14, 6.28],
... }
>>> df_native = pa.table(data)
>>> df_nw = nw.from_native(df_native)
>>> df_nw.select(ncs.datetime()).to_native()
pyarrow.Table
tstamp_utc: timestamp[us, tz=UTC]
tstamp: timestamp[us]
----
tstamp_utc: [[2023-04-10 12:14:16.999000Z,2025-08-25 14:18:22.666000Z]]
tstamp: [[2000-11-20 18:12:16.600000,2020-10-30 10:20:25.123000]]
Select only datetime columns that have any time_zone specification:
>>> df_nw.select(ncs.datetime(time_zone="*")).to_native()
pyarrow.Table
tstamp_utc: timestamp[us, tz=UTC]
----
tstamp_utc: [[2023-04-10 12:14:16.999000Z,2025-08-25 14:18:22.666000Z]]
matches(pattern: str) -> Selector
Select all columns that match the given regex pattern.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pattern
|
str
|
A valid regular expression pattern. |
required |
Returns:
Type | Description |
---|---|
Selector
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> import narwhals.selectors as ncs
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {
... "foo": ["x", "y"],
... "bar": [123, 456],
... "baz": [2.0, 5.5],
... "zap": [0, 1],
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function to select column names containing an 'a', preceded by a character that is not 'z':
>>> def agnostic_select_match(df_native: IntoFrameT) -> IntoFrameT:
... df_nw = nw.from_native(df_native)
... return df_nw.select(ncs.matches("[^z]a")).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_select_match
:
>>> agnostic_select_match(df_pd)
bar baz
0 123 2.0
1 456 5.5
>>> agnostic_select_match(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ bar ┆ baz │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 123 ┆ 2.0 │
│ 456 ┆ 5.5 │
└─────┴─────┘
>>> agnostic_select_match(df_pa)
pyarrow.Table
bar: int64
baz: double
----
bar: [[123,456]]
baz: [[2,5.5]]
numeric() -> Selector
Select numeric columns.
Returns:
Type | Description |
---|---|
Selector
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> import narwhals.selectors as ncs
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [4.1, 2.3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function to select numeric dtypes and multiplies each value by 2:
>>> def agnostic_select_numeric(df_native: IntoFrameT) -> IntoFrameT:
... df_nw = nw.from_native(df_native)
... return df_nw.select(ncs.numeric() * 2).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_select_numeric
:
>>> agnostic_select_numeric(df_pd)
a c
0 2 8.2
1 4 4.6
>>> agnostic_select_numeric(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ c │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 2 ┆ 8.2 │
│ 4 ┆ 4.6 │
└─────┴─────┘
>>> agnostic_select_numeric(df_pa)
pyarrow.Table
a: int64
c: double
----
a: [[2,4]]
c: [[8.2,4.6]]
string() -> Selector
Select string columns.
Returns:
Type | Description |
---|---|
Selector
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> import narwhals.selectors as ncs
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2], "b": ["x", "y"], "c": [False, True]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function to select string dtypes:
>>> def agnostic_select_string(df_native: IntoFrameT) -> IntoFrameT:
... df_nw = nw.from_native(df_native)
... return df_nw.select(ncs.string()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_select_string
:
>>> agnostic_select_string(df_pd)
b
0 x
1 y
>>> agnostic_select_string(df_pl)
shape: (2, 1)
┌─────┐
│ b │
│ --- │
│ str │
╞═════╡
│ x │
│ y │
└─────┘
>>> agnostic_select_string(df_pa)
pyarrow.Table
b: string
----
b: [["x","y"]]