Skip to content

narwhals.Expr

abs()

Return absolute value of each element.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, -2], "b": [-3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_abs(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").abs()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_abs:

>>> agnostic_abs(df_pd)
   a  b
0  1  3
1  2  4
>>> agnostic_abs(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 3   │
│ 2   ┆ 4   │
└─────┴─────┘
>>> agnostic_abs(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1,2]]
b: [[3,4]]

alias(name)

Rename the expression.

Parameters:

Name Type Description Default
name str

The new name.

required

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2], "b": [4, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_alias(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select((nw.col("b") + 10).alias("c")).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_alias:

>>> agnostic_alias(df_pd)
    c
0  14
1  15
>>> agnostic_alias(df_pl)
shape: (2, 1)
┌─────┐
│ c   │
│ --- │
│ i64 │
╞═════╡
│ 14  │
│ 15  │
└─────┘
>>> agnostic_alias(df_pa)
pyarrow.Table
c: int64
----
c: [[14,15]]

all()

Return whether all values in the column are True.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [True, False], "b": [True, True]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").all()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_all:

>>> agnostic_all(df_pd)
       a     b
0  False  True
>>> agnostic_all(df_pl)
shape: (1, 2)
┌───────┬──────┐
│ a     ┆ b    │
│ ---   ┆ ---  │
│ bool  ┆ bool │
╞═══════╪══════╡
│ false ┆ true │
└───────┴──────┘
>>> agnostic_all(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false]]
b: [[true]]

any()

Return whether any of the values in the column are True.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [True, False], "b": [True, True]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a dataframe-agnostic function:

>>> def agnostic_any(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").any()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_any:

>>> agnostic_any(df_pd)
      a     b
0  True  True
>>> agnostic_any(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ bool ┆ bool │
╞══════╪══════╡
│ true ┆ true │
└──────┴──────┘
>>> agnostic_any(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true]]
b: [[true]]

arg_max()

Returns the index of the maximum value.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [10, 20], "b": [150, 100]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_arg_max(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a", "b").arg_max().name.suffix("_arg_max")
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_arg_max:

>>> agnostic_arg_max(df_pd)
   a_arg_max  b_arg_max
0          1          0
>>> agnostic_arg_max(df_pl)
shape: (1, 2)
┌───────────┬───────────┐
│ a_arg_max ┆ b_arg_max │
│ ---       ┆ ---       │
│ u32       ┆ u32       │
╞═══════════╪═══════════╡
│ 1         ┆ 0         │
└───────────┴───────────┘
>>> agnostic_arg_max(df_pa)
pyarrow.Table
a_arg_max: int64
b_arg_max: int64
----
a_arg_max: [[1]]
b_arg_max: [[0]]

arg_min()

Returns the index of the minimum value.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [10, 20], "b": [150, 100]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_arg_min(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a", "b").arg_min().name.suffix("_arg_min")
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_arg_min:

>>> agnostic_arg_min(df_pd)
   a_arg_min  b_arg_min
0          0          1
>>> agnostic_arg_min(df_pl)
shape: (1, 2)
┌───────────┬───────────┐
│ a_arg_min ┆ b_arg_min │
│ ---       ┆ ---       │
│ u32       ┆ u32       │
╞═══════════╪═══════════╡
│ 0         ┆ 1         │
└───────────┴───────────┘
>>> agnostic_arg_min(df_pa)
pyarrow.Table
a_arg_min: int64
b_arg_min: int64
----
a_arg_min: [[0]]
b_arg_min: [[1]]

arg_true()

Find elements where boolean expression is True.

Returns:

Type Description
Self

A new expression.

cast(dtype)

Redefine an object's data type.

Parameters:

Name Type Description Default
dtype DType | type[DType]

Data type that the object will be cast into.

required

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_cast(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_cast:

>>> agnostic_cast(df_pd)
   foo  bar
0  1.0    6
1  2.0    7
2  3.0    8
>>> agnostic_cast(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ f32 ┆ u8  │
╞═════╪═════╡
│ 1.0 ┆ 6   │
│ 2.0 ┆ 7   │
│ 3.0 ┆ 8   │
└─────┴─────┘
>>> agnostic_cast(df_pa)
pyarrow.Table
foo: float
bar: uint8
----
foo: [[1,2,3]]
bar: [[6,7,8]]

count()

Returns the number of non-null elements in the column.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3], "b": [None, 4, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_count(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().count()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_count:

>>> agnostic_count(df_pd)
   a  b
0  3  2
>>> agnostic_count(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 3   ┆ 2   │
└─────┴─────┘
>>> agnostic_count(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[3]]
b: [[2]]

cum_count(*, reverse=False)

Return the cumulative count of the non-null values in the column.

Parameters:

Name Type Description Default
reverse bool

reverse the operation

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": ["x", "k", None, "d"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_cum_count(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         nw.col("a").cum_count().alias("cum_count"),
...         nw.col("a").cum_count(reverse=True).alias("cum_count_reverse"),
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_cum_count:

>>> agnostic_cum_count(df_pd)
      a  cum_count  cum_count_reverse
0     x          1                  3
1     k          2                  2
2  None          2                  1
3     d          3                  1
>>> agnostic_cum_count(df_pl)
shape: (4, 3)
┌──────┬───────────┬───────────────────┐
│ a    ┆ cum_count ┆ cum_count_reverse │
│ ---  ┆ ---       ┆ ---               │
│ str  ┆ u32       ┆ u32               │
╞══════╪═══════════╪═══════════════════╡
│ x    ┆ 1         ┆ 3                 │
│ k    ┆ 2         ┆ 2                 │
│ null ┆ 2         ┆ 1                 │
│ d    ┆ 3         ┆ 1                 │
└──────┴───────────┴───────────────────┘
>>> agnostic_cum_count(df_pa)
pyarrow.Table
a: string
cum_count: uint32
cum_count_reverse: uint32
----
a: [["x","k",null,"d"]]
cum_count: [[1,2,2,3]]
cum_count_reverse: [[3,2,1,1]]

cum_max(*, reverse=False)

Return the cumulative max of the non-null values in the column.

Parameters:

Name Type Description Default
reverse bool

reverse the operation

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 3, None, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_cum_max(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         nw.col("a").cum_max().alias("cum_max"),
...         nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"),
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_:

>>> agnostic_cum_max(df_pd)
     a  cum_max  cum_max_reverse
0  1.0      1.0              3.0
1  3.0      3.0              3.0
2  NaN      NaN              NaN
3  2.0      3.0              2.0
>>> agnostic_cum_max(df_pl)
shape: (4, 3)
┌──────┬─────────┬─────────────────┐
│ a    ┆ cum_max ┆ cum_max_reverse │
│ ---  ┆ ---     ┆ ---             │
│ i64  ┆ i64     ┆ i64             │
╞══════╪═════════╪═════════════════╡
│ 1    ┆ 1       ┆ 3               │
│ 3    ┆ 3       ┆ 3               │
│ null ┆ null    ┆ null            │
│ 2    ┆ 3       ┆ 2               │
└──────┴─────────┴─────────────────┘
>>> agnostic_cum_max(df_pa)
pyarrow.Table
a: int64
cum_max: int64
cum_max_reverse: int64
----
a: [[1,3,null,2]]
cum_max: [[1,3,null,3]]
cum_max_reverse: [[3,3,null,2]]

cum_min(*, reverse=False)

Return the cumulative min of the non-null values in the column.

Parameters:

Name Type Description Default
reverse bool

reverse the operation

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [3, 1, None, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_cum_min(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         nw.col("a").cum_min().alias("cum_min"),
...         nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"),
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_cum_min:

>>> agnostic_cum_min(df_pd)
     a  cum_min  cum_min_reverse
0  3.0      3.0              1.0
1  1.0      1.0              1.0
2  NaN      NaN              NaN
3  2.0      1.0              2.0
>>> agnostic_cum_min(df_pl)
shape: (4, 3)
┌──────┬─────────┬─────────────────┐
│ a    ┆ cum_min ┆ cum_min_reverse │
│ ---  ┆ ---     ┆ ---             │
│ i64  ┆ i64     ┆ i64             │
╞══════╪═════════╪═════════════════╡
│ 3    ┆ 3       ┆ 1               │
│ 1    ┆ 1       ┆ 1               │
│ null ┆ null    ┆ null            │
│ 2    ┆ 1       ┆ 2               │
└──────┴─────────┴─────────────────┘
>>> agnostic_cum_min(df_pa)
pyarrow.Table
a: int64
cum_min: int64
cum_min_reverse: int64
----
a: [[3,1,null,2]]
cum_min: [[3,1,null,1]]
cum_min_reverse: [[1,1,null,2]]

cum_prod(*, reverse=False)

Return the cumulative product of the non-null values in the column.

Parameters:

Name Type Description Default
reverse bool

reverse the operation

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 3, None, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_cum_prod(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         nw.col("a").cum_prod().alias("cum_prod"),
...         nw.col("a").cum_prod(reverse=True).alias("cum_prod_reverse"),
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_cum_prod:

>>> agnostic_cum_prod(df_pd)
     a  cum_prod  cum_prod_reverse
0  1.0       1.0               6.0
1  3.0       3.0               6.0
2  NaN       NaN               NaN
3  2.0       6.0               2.0
>>> agnostic_cum_prod(df_pl)
shape: (4, 3)
┌──────┬──────────┬──────────────────┐
│ a    ┆ cum_prod ┆ cum_prod_reverse │
│ ---  ┆ ---      ┆ ---              │
│ i64  ┆ i64      ┆ i64              │
╞══════╪══════════╪══════════════════╡
│ 1    ┆ 1        ┆ 6                │
│ 3    ┆ 3        ┆ 6                │
│ null ┆ null     ┆ null             │
│ 2    ┆ 6        ┆ 2                │
└──────┴──────────┴──────────────────┘
>>> agnostic_cum_prod(df_pa)
pyarrow.Table
a: int64
cum_prod: int64
cum_prod_reverse: int64
----
a: [[1,3,null,2]]
cum_prod: [[1,3,null,6]]
cum_prod_reverse: [[6,6,null,2]]

cum_sum(*, reverse=False)

Return cumulative sum.

Parameters:

Name Type Description Default
reverse bool

reverse the operation

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").cum_sum()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_cum_sum:

>>> agnostic_cum_sum(df_pd)
    a   b
0   1   2
1   2   6
2   5  10
3  10  16
4  15  22
>>> agnostic_cum_sum(df_pl)
shape: (5, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 2   │
│ 2   ┆ 6   │
│ 5   ┆ 10  │
│ 10  ┆ 16  │
│ 15  ┆ 22  │
└─────┴─────┘
>>> agnostic_cum_sum(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1,2,5,10,15]]
b: [[2,6,10,16,22]]

diff()

Returns the difference between each element and the previous one.

Returns:

Type Description
Self

A new expression.

Notes

pandas may change the dtype here, for example when introducing missing values in an integer column. To ensure, that the dtype doesn't change, you may want to use fill_null and cast. For example, to calculate the diff and fill missing values with 0 in a Int64 column, you could do:

nw.col("a").diff().fill_null(0).cast(nw.Int64)

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 1, 3, 5, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_diff(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(a_diff=nw.col("a").diff()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_diff:

>>> agnostic_diff(df_pd)
   a_diff
0     NaN
1     0.0
2     2.0
3     2.0
4     0.0
>>> agnostic_diff(df_pl)
shape: (5, 1)
┌────────┐
│ a_diff │
│ ---    │
│ i64    │
╞════════╡
│ null   │
│ 0      │
│ 2      │
│ 2      │
│ 0      │
└────────┘
>>> agnostic_diff(df_pa)
pyarrow.Table
a_diff: int64
----
a_diff: [[null,0,2,2,0]]

drop_nulls()

Drop null values.

Returns:

Type Description
Self

A new expression.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
>>> df_pl = pl.DataFrame({"a": [2.0, 4.0, None, 3.0, None, 5.0]})
>>> df_pa = pa.table({"a": [2.0, 4.0, None, 3.0, None, 5.0]})

Let's define a dataframe-agnostic function:

>>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").drop_nulls()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_drop_nulls:

>>> agnostic_drop_nulls(df_pd)
     a
0  2.0
1  4.0
3  3.0
5  5.0
>>> agnostic_drop_nulls(df_pl)
shape: (4, 1)
┌─────┐
│ a   │
│ --- │
│ f64 │
╞═════╡
│ 2.0 │
│ 4.0 │
│ 3.0 │
│ 5.0 │
└─────┘
>>> agnostic_drop_nulls(df_pa)
pyarrow.Table
a: double
----
a: [[2,4,3,5]]

ewm_mean(*, com=None, span=None, half_life=None, alpha=None, adjust=True, min_periods=1, ignore_nulls=False)

Compute exponentially-weighted moving average.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Parameters:

Name Type Description Default
com float | None

Specify decay in terms of center of mass, \(\gamma\), with
\(\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0\)

None
span float | None

Specify decay in terms of span, \(\theta\), with
\(\alpha = \frac{2}{\theta + 1} \forall \theta \geq 1\)

None
half_life float | None

Specify decay in terms of half-life, \(\tau\), with
\(\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0\)

None
alpha float | None

Specify smoothing factor alpha directly, \(0 < \alpha \leq 1\).

None
adjust bool

Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings

  • When adjust=True (the default) the EW function is calculated using weights \(w_i = (1 - \alpha)^i\)
  • When adjust=False the EW function is calculated recursively by $$ y_0=x_0 $$ $$ y_t = (1 - \alpha)y_{t - 1} + \alpha x_t $$
True
min_periods int

Minimum number of observations in window required to have a value, (otherwise result is null).

1
ignore_nulls bool

Ignore missing values when calculating weights.

  • When ignore_nulls=False (default), weights are based on absolute positions. For example, the weights of \(x_0\) and \(x_2\) used in calculating the final weighted average of \([x_0, None, x_2]\) are \((1-\alpha)^2\) and \(1\) if adjust=True, and \((1-\alpha)^2\) and \(\alpha\) if adjust=False.
  • When ignore_nulls=True, weights are based on relative positions. For example, the weights of \(x_0\) and \(x_2\) used in calculating the final weighted average of \([x_0, None, x_2]\) are \(1-\alpha\) and \(1\) if adjust=True, and \(1-\alpha\) and \(\alpha\) if adjust=False.
False

Returns:

Type Description
Self

Expr

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

We define a library agnostic function:

>>> def agnostic_ewm_mean(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a").ewm_mean(com=1, ignore_nulls=False)
...     ).to_native()

We can then pass either pandas or Polars to agnostic_ewm_mean:

>>> agnostic_ewm_mean(df_pd)
          a
0  1.000000
1  1.666667
2  2.428571
>>> agnostic_ewm_mean(df_pl)
shape: (3, 1)
┌──────────┐
│ a        │
│ ---      │
│ f64      │
╞══════════╡
│ 1.0      │
│ 1.666667 │
│ 2.428571 │
└──────────┘

fill_null(value=None, strategy=None, limit=None)

Fill null values with given value.

Parameters:

Name Type Description Default
value Any | None

Value used to fill null values.

None
strategy Literal['forward', 'backward'] | None

Strategy used to fill null values.

None
limit int | None

Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.

None

Returns:

Type Description
Self

A new expression.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> df_pd = pd.DataFrame(
...     {
...         "a": [2, 4, None, None, 3, 5],
...         "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
...     }
... )
>>> data = {
...     "a": [2, 4, None, None, 3, 5],
...     "b": [2.0, 4.0, None, None, 3.0, 5.0],
... }
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_fill_null(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(nw.col("a", "b").fill_null(0)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_fill_null:

>>> agnostic_fill_null(df_pd)
     a    b
0  2.0  2.0
1  4.0  4.0
2  0.0  0.0
3  0.0  0.0
4  3.0  3.0
5  5.0  5.0
>>> agnostic_fill_null(df_pl)
shape: (6, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 2   ┆ 2.0 │
│ 4   ┆ 4.0 │
│ 0   ┆ 0.0 │
│ 0   ┆ 0.0 │
│ 3   ┆ 3.0 │
│ 5   ┆ 5.0 │
└─────┴─────┘
>>> agnostic_fill_null(df_pa)
pyarrow.Table
a: int64
b: double
----
a: [[2,4,0,0,3,5]]
b: [[2,4,0,0,3,5]]

Using a strategy:

>>> def agnostic_fill_null_with_strategy(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         nw.col("a", "b")
...         .fill_null(strategy="forward", limit=1)
...         .name.suffix("_filled")
...     ).to_native()
>>> agnostic_fill_null_with_strategy(df_pd)
     a    b  a_filled  b_filled
0  2.0  2.0       2.0       2.0
1  4.0  4.0       4.0       4.0
2  NaN  NaN       4.0       4.0
3  NaN  NaN       NaN       NaN
4  3.0  3.0       3.0       3.0
5  5.0  5.0       5.0       5.0
>>> agnostic_fill_null_with_strategy(df_pl)
shape: (6, 4)
┌──────┬──────┬──────────┬──────────┐
│ a    ┆ b    ┆ a_filled ┆ b_filled │
│ ---  ┆ ---  ┆ ---      ┆ ---      │
│ i64  ┆ f64  ┆ i64      ┆ f64      │
╞══════╪══════╪══════════╪══════════╡
│ 2    ┆ 2.0  ┆ 2        ┆ 2.0      │
│ 4    ┆ 4.0  ┆ 4        ┆ 4.0      │
│ null ┆ null ┆ 4        ┆ 4.0      │
│ null ┆ null ┆ null     ┆ null     │
│ 3    ┆ 3.0  ┆ 3        ┆ 3.0      │
│ 5    ┆ 5.0  ┆ 5        ┆ 5.0      │
└──────┴──────┴──────────┴──────────┘
>>> agnostic_fill_null_with_strategy(df_pa)
pyarrow.Table
a: int64
b: double
a_filled: int64
b_filled: double
----
a: [[2,4,null,null,3,5]]
b: [[2,4,null,null,3,5]]
a_filled: [[2,4,4,null,3,5]]
b_filled: [[2,4,4,null,3,5]]

filter(*predicates)

Filters elements based on a condition, returning a new expression.

Parameters:

Name Type Description Default
predicates Any

Conditions to filter by (which get ANDed together).

()

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a").filter(nw.col("a") > 4),
...         nw.col("b").filter(nw.col("b") < 13),
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_filter:

>>> agnostic_filter(df_pd)
   a   b
3  5  10
4  6  11
5  7  12
>>> agnostic_filter(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 5   ┆ 10  │
│ 6   ┆ 11  │
│ 7   ┆ 12  │
└─────┴─────┘
>>> agnostic_filter(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[5,6,7]]
b: [[10,11,12]]

gather_every(n, offset=0)

Take every nth value in the Series and return as new Series.

Warning

Expr.gather_every is deprecated and will be removed in a future version. Hint: instead of df.select(nw.col('a').gather_every()), use df.select(nw.col('a')).gather_every() instead. Note: this will remain available in narwhals.stable.v1. See stable api for more information.

Parameters:

Name Type Description Default
n int

Gather every n-th row.

required
offset int

Starting index.

0

Returns:

Type Description
Self

A new expression.

head(n=10)

Get the first n rows.

Warning

Expr.head is deprecated and will be removed in a future version. Hint: instead of df.select(nw.col('a').head()), use df.select(nw.col('a')).head() instead. Note: this will remain available in narwhals.stable.v1. See stable api for more information.

Parameters:

Name Type Description Default
n int

Number of rows to return.

10

Returns:

Type Description
Self

A new expression.

clip(lower_bound=None, upper_bound=None)

Clip values in the Series.

Parameters:

Name Type Description Default
lower_bound IntoExpr | Any | None

Lower bound value.

None
upper_bound IntoExpr | Any | None

Upper bound value.

None

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_clip_lower(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").clip(2)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_clip_lower:

>>> agnostic_clip_lower(df_pd)
   a
0  2
1  2
2  3
>>> agnostic_clip_lower(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 2   │
│ 2   │
│ 3   │
└─────┘
>>> agnostic_clip_lower(df_pa)
pyarrow.Table
a: int64
----
a: [[2,2,3]]

We define another library agnostic function:

>>> def agnostic_clip_upper(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").clip(upper_bound=2)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_clip_upper:

>>> agnostic_clip_upper(df_pd)
   a
0  1
1  2
2  2
>>> agnostic_clip_upper(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 1   │
│ 2   │
│ 2   │
└─────┘
>>> agnostic_clip_upper(df_pa)
pyarrow.Table
a: int64
----
a: [[1,2,2]]

We can have both at the same time

>>> data = {"a": [-1, 1, -3, 3, -5, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_clip(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").clip(-1, 3)).to_native()

We can pass any supported library such as Pandas, Polars, or PyArrow to agnostic_clip:

>>> agnostic_clip(df_pd)
   a
0 -1
1  1
2 -1
3  3
4 -1
5  3
>>> agnostic_clip(df_pl)
shape: (6, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ -1  │
│ 1   │
│ -1  │
│ 3   │
│ -1  │
│ 3   │
└─────┘
>>> agnostic_clip(df_pa)
pyarrow.Table
a: int64
----
a: [[-1,1,-1,3,-1,3]]

is_between(lower_bound, upper_bound, closed='both')

Check if this expression is between the given lower and upper bounds.

Parameters:

Name Type Description Default
lower_bound Any | IntoExpr

Lower bound value.

required
upper_bound Any | IntoExpr

Upper bound value.

required
closed Literal['left', 'right', 'none', 'both']

Define which sides of the interval are closed (inclusive).

'both'

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 4, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_is_between(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").is_between(2, 4, "right")).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_is_between:

>>> agnostic_is_between(df_pd)
       a
0  False
1  False
2   True
3   True
4  False
>>> agnostic_is_between(df_pl)
shape: (5, 1)
┌───────┐
│ a     │
│ ---   │
│ bool  │
╞═══════╡
│ false │
│ false │
│ true  │
│ true  │
│ false │
└───────┘
>>> agnostic_is_between(df_pa)
pyarrow.Table
a: bool
----
a: [[false,false,true,true,false]]

is_duplicated()

Return a boolean mask indicating duplicated values.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_is_duplicated(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().is_duplicated()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_is_duplicated:

>>> agnostic_is_duplicated(df_pd)
       a      b
0   True   True
1  False   True
2  False  False
3   True  False
>>> agnostic_is_duplicated(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ true  ┆ true  │
│ false ┆ true  │
│ false ┆ false │
│ true  ┆ false │
└───────┴───────┘
>>> agnostic_is_duplicated(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true,false,false,true]]
b: [[true,true,false,false]]

is_finite()

Returns boolean values indicating which original values are finite.

Warning

Different backend handle null values differently. is_finite will return False for NaN and Null's in the Dask and pandas non-nullable backend, while for Polars, PyArrow and pandas nullable backends null values are kept as such.

Returns:

Type Description
Self

Expression of Boolean data type.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [float("nan"), float("inf"), 2.0, None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_is_finite(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").is_finite()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_is_finite:

>>> agnostic_is_finite(df_pd)
       a
0  False
1  False
2   True
3  False
>>> agnostic_is_finite(df_pl)
shape: (4, 1)
┌───────┐
│ a     │
│ ---   │
│ bool  │
╞═══════╡
│ false │
│ false │
│ true  │
│ null  │
└───────┘
>>> agnostic_is_finite(df_pa)
pyarrow.Table
a: bool
----
a: [[false,false,true,null]]

is_first_distinct()

Return a boolean mask indicating the first occurrence of each distinct value.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_is_first_distinct(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().is_first_distinct()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_is_first_distinct:

>>> agnostic_is_first_distinct(df_pd)
       a      b
0   True   True
1   True  False
2   True   True
3  False   True
>>> agnostic_is_first_distinct(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ true  ┆ true  │
│ true  ┆ false │
│ true  ┆ true  │
│ false ┆ true  │
└───────┴───────┘
>>> agnostic_is_first_distinct(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true,true,true,false]]
b: [[true,false,true,true]]

is_in(other)

Check if elements of this expression are present in the other iterable.

Parameters:

Name Type Description Default
other Any

iterable

required

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 9, 10]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_is_in(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(b=nw.col("a").is_in([1, 2])).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_is_in:

>>> agnostic_is_in(df_pd)
    a      b
0   1   True
1   2   True
2   9  False
3  10  False
>>> agnostic_is_in(df_pl)
shape: (4, 2)
┌─────┬───────┐
│ a   ┆ b     │
│ --- ┆ ---   │
│ i64 ┆ bool  │
╞═════╪═══════╡
│ 1   ┆ true  │
│ 2   ┆ true  │
│ 9   ┆ false │
│ 10  ┆ false │
└─────┴───────┘
>>> agnostic_is_in(df_pa)
pyarrow.Table
a: int64
b: bool
----
a: [[1,2,9,10]]
b: [[true,true,false,false]]

is_last_distinct()

Return a boolean mask indicating the last occurrence of each distinct value.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_is_last_distinct(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().is_last_distinct()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_is_last_distinct:

>>> agnostic_is_last_distinct(df_pd)
       a      b
0  False  False
1   True   True
2   True   True
3   True   True
>>> agnostic_is_last_distinct(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ false ┆ false │
│ true  ┆ true  │
│ true  ┆ true  │
│ true  ┆ true  │
└───────┴───────┘
>>> agnostic_is_last_distinct(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false,true,true,true]]
b: [[false,true,true,true]]

is_nan()

Indicate which values are NaN.

Returns:

Type Description
Self

A new expression.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"orig": [0.0, None, 2.0]}
>>> df_pd = pd.DataFrame(data).astype({"orig": "Float64"})
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_self_div_is_nan(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         divided=nw.col("orig") / nw.col("orig"),
...         divided_is_nan=(nw.col("orig") / nw.col("orig")).is_nan(),
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_self_div_is_nan:

>>> print(agnostic_self_div_is_nan(df_pd))
   orig  divided  divided_is_nan
0   0.0      NaN            True
1  <NA>     <NA>            <NA>
2   2.0      1.0           False
>>> print(agnostic_self_div_is_nan(df_pl))
shape: (3, 3)
┌──────┬─────────┬────────────────┐
│ orig ┆ divided ┆ divided_is_nan │
│ ---  ┆ ---     ┆ ---            │
│ f64  ┆ f64     ┆ bool           │
╞══════╪═════════╪════════════════╡
│ 0.0  ┆ NaN     ┆ true           │
│ null ┆ null    ┆ null           │
│ 2.0  ┆ 1.0     ┆ false          │
└──────┴─────────┴────────────────┘
>>> print(agnostic_self_div_is_nan(df_pa))
pyarrow.Table
orig: double
divided: double
divided_is_nan: bool
----
orig: [[0,null,2]]
divided: [[nan,null,1]]
divided_is_nan: [[true,null,false]]

is_null()

Returns a boolean Series indicating which values are null.

Returns:

Type Description
Self

A new expression.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> df_pd = pd.DataFrame(
...     {
...         "a": [2, 4, None, 3, 5],
...         "b": [2.0, 4.0, float("nan"), 3.0, 5.0],
...     }
... )
>>> data = {
...     "a": [2, 4, None, 3, 5],
...     "b": [2.0, 4.0, None, 3.0, 5.0],
... }
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_is_null(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_is_null:

>>> agnostic_is_null(df_pd)
     a    b  a_is_null  b_is_null
0  2.0  2.0      False      False
1  4.0  4.0      False      False
2  NaN  NaN       True       True
3  3.0  3.0      False      False
4  5.0  5.0      False      False
>>> agnostic_is_null(df_pl)
shape: (5, 4)
┌──────┬──────┬───────────┬───────────┐
│ a    ┆ b    ┆ a_is_null ┆ b_is_null │
│ ---  ┆ ---  ┆ ---       ┆ ---       │
│ i64  ┆ f64  ┆ bool      ┆ bool      │
╞══════╪══════╪═══════════╪═══════════╡
│ 2    ┆ 2.0  ┆ false     ┆ false     │
│ 4    ┆ 4.0  ┆ false     ┆ false     │
│ null ┆ null ┆ true      ┆ true      │
│ 3    ┆ 3.0  ┆ false     ┆ false     │
│ 5    ┆ 5.0  ┆ false     ┆ false     │
└──────┴──────┴───────────┴───────────┘
>>> agnostic_is_null(df_pa)
pyarrow.Table
a: int64
b: double
a_is_null: bool
b_is_null: bool
----
a: [[2,4,null,3,5]]
b: [[2,4,null,3,5]]
a_is_null: [[false,false,true,false,false]]
b_is_null: [[false,false,true,false,false]]

is_unique()

Return a boolean mask indicating unique values.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_is_unique(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().is_unique()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_is_unique:

>>> agnostic_is_unique(df_pd)
       a      b
0  False  False
1   True  False
2   True   True
3  False   True
>>> agnostic_is_unique(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ false ┆ false │
│ true  ┆ false │
│ true  ┆ true  │
│ false ┆ true  │
└───────┴───────┘
>>> agnostic_is_unique(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false,true,true,false]]
b: [[false,false,true,true]]

len()

Return the number of elements in the column.

Null values count towards the total.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function that computes the len over different values of "b" column:

>>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
...         nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_len:

>>> agnostic_len(df_pd)
   a1  a2
0   2   1
>>> agnostic_len(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a1  ┆ a2  │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 2   ┆ 1   │
└─────┴─────┘
>>> agnostic_len(df_pa)
pyarrow.Table
a1: int64
a2: int64
----
a1: [[2]]
a2: [[1]]

map_batches(function, return_dtype=None)

Apply a custom python function to a whole Series or sequence of Series.

The output of this custom function is presumed to be either a Series, or a NumPy array (in which case it will be automatically converted into a Series).

Parameters:

Name Type Description Default
function Callable[[Any], Self]

Function to apply to Series.

required
return_dtype DType | None

Dtype of the output Series. If not set, the dtype will be inferred based on the first non-null value that is returned by the function.

None

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_map_batches(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a", "b").map_batches(
...             lambda s: s.to_numpy() + 1, return_dtype=nw.Float64
...         )
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_map_batches:

>>> agnostic_map_batches(df_pd)
     a    b
0  2.0  5.0
1  3.0  6.0
2  4.0  7.0
>>> agnostic_map_batches(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 2.0 ┆ 5.0 │
│ 3.0 ┆ 6.0 │
│ 4.0 ┆ 7.0 │
└─────┴─────┘
>>> agnostic_map_batches(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[2,3,4]]
b: [[5,6,7]]

max()

Returns the maximum value(s) from a column(s).

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [10, 20], "b": [50, 100]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.max("a", "b")).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_max:

>>> agnostic_max(df_pd)
    a    b
0  20  100
>>> agnostic_max(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 20  ┆ 100 │
└─────┴─────┘
>>> agnostic_max(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[20]]
b: [[100]]

mean()

Get mean value.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [-1, 0, 1], "b": [2, 4, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").mean()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_mean:

>>> agnostic_mean(df_pd)
     a    b
0  0.0  4.0
>>> agnostic_mean(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 0.0 ┆ 4.0 │
└─────┴─────┘
>>> agnostic_mean(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[0]]
b: [[4]]

median()

Get median value.

Returns:

Type Description
Self

A new expression.

Notes

Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 8, 3], "b": [4, 5, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").median()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_median:

>>> agnostic_median(df_pd)
     a    b
0  3.0  4.0
>>> agnostic_median(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 3.0 ┆ 4.0 │
└─────┴─────┘
>>> agnostic_median(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[3]]
b: [[4]]

min()

Returns the minimum value(s) from a column(s).

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2], "b": [4, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.min("a", "b")).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_min:

>>> agnostic_min(df_pd)
   a  b
0  1  3
>>> agnostic_min(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 3   │
└─────┴─────┘
>>> agnostic_min(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1]]
b: [[3]]

mode()

Compute the most occurring value(s).

Can return multiple values.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {
...     "a": [1, 1, 2, 3],
...     "b": [1, 1, 2, 2],
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_mode(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").mode()).sort("a").to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_mode:

>>> agnostic_mode(df_pd)
   a
0  1
>>> agnostic_mode(df_pl)
shape: (1, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 1   │
└─────┘
>>> agnostic_mode(df_pa)
pyarrow.Table
a: int64
----
a: [[1]]

null_count()

Count null values.

Returns:

Type Description
Self

A new expression.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().null_count()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_null_count:

>>> agnostic_null_count(df_pd)
   a  b
0  1  2
>>> agnostic_null_count(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 1   ┆ 2   │
└─────┴─────┘
>>> agnostic_null_count(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1]]
b: [[2]]

n_unique()

Returns count of unique values.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_n_unique(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").n_unique()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_n_unique:

>>> agnostic_n_unique(df_pd)
   a  b
0  5  3
>>> agnostic_n_unique(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 5   ┆ 3   │
└─────┴─────┘
>>> agnostic_n_unique(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[5]]
b: [[3]]

over(*keys)

Compute expressions over the given groups.

Parameters:

Name Type Description Default
keys str | Iterable[str]

Names of columns to compute window expression over. Must be names of columns, as opposed to expressions - so, this is a bit less flexible than Polars' Expr.over.

()

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_min_over_b(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         a_min_per_group=nw.col("a").min().over("b")
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_min_over_b:

>>> agnostic_min_over_b(df_pd)
   a  b  a_min_per_group
0  1  1                1
1  2  1                1
2  3  2                3
>>> agnostic_min_over_b(df_pl)
shape: (3, 3)
┌─────┬─────┬─────────────────┐
│ a   ┆ b   ┆ a_min_per_group │
│ --- ┆ --- ┆ ---             │
│ i64 ┆ i64 ┆ i64             │
╞═════╪═════╪═════════════════╡
│ 1   ┆ 1   ┆ 1               │
│ 2   ┆ 1   ┆ 1               │
│ 3   ┆ 2   ┆ 3               │
└─────┴─────┴─────────────────┘
>>> agnostic_min_over_b(df_pa)
pyarrow.Table
a: int64
b: int64
a_min_per_group: int64
----
a: [[1,2,3]]
b: [[1,1,2]]
a_min_per_group: [[1,1,3]]

Cumulative operations are also supported, but (currently) only for pandas and Polars:

>>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(c=nw.col("a").cum_sum().over("b")).to_native()
>>> agnostic_cum_sum(df_pd)
   a  b  c
0  1  1  1
1  2  1  3
2  3  2  3
>>> agnostic_cum_sum(df_pl)
shape: (3, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1   ┆ 1   ┆ 1   │
│ 2   ┆ 1   ┆ 3   │
│ 3   ┆ 2   ┆ 3   │
└─────┴─────┴─────┘

pipe(function, *args, **kwargs)

Pipe function call.

Parameters:

Name Type Description Default
function Callable[[Any], Self]

Function to apply.

required
args Any

Positional arguments to pass to function.

()
kwargs Any

Keyword arguments to pass to function.

{}

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Lets define a library-agnostic function:

>>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").pipe(lambda x: x + 1)).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_pipe:

>>> agnostic_pipe(df_pd)
   a
0  2
1  3
2  4
3  5
>>> agnostic_pipe(df_pl)
shape: (4, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 2   │
│ 3   │
│ 4   │
│ 5   │
└─────┘
>>> agnostic_pipe(df_pa)
pyarrow.Table
a: int64
----
a: [[2,3,4,5]]

quantile(quantile, interpolation)

Get quantile value.

Parameters:

Name Type Description Default
quantile float

Quantile between 0.0 and 1.0.

required
interpolation Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']

Interpolation method.

required

Returns:

Type Description
Self

A new expression.

Note
  • pandas and Polars may have implementation differences for a given interpolation method.
  • dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the native 'dask' - method.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": list(range(50)), "b": list(range(50, 100))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_quantile(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a", "b").quantile(0.5, interpolation="linear")
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_quantile:

>>> agnostic_quantile(df_pd)
      a     b
0  24.5  74.5
>>> agnostic_quantile(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 24.5 ┆ 74.5 │
└──────┴──────┘
>>> agnostic_quantile(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[24.5]]
b: [[74.5]]

rank(method='average', *, descending=False)

Assign ranks to data, dealing with ties appropriately.

Notes

The resulting dtype may differ between backends.

Parameters:

Name Type Description Default
method Literal['average', 'min', 'max', 'dense', 'ordinal']

The method used to assign ranks to tied elements. The following methods are available (default is 'average'):

  • 'average' : The average of the ranks that would have been assigned to all the tied values is assigned to each value.
  • 'min' : The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.)
  • 'max' : The maximum of the ranks that would have been assigned to all the tied values is assigned to each value.
  • 'dense' : Like 'min', but the rank of the next highest element is assigned the rank immediately after those assigned to the tied elements.
  • 'ordinal' : All values are given a distinct rank, corresponding to the order that the values occur in the Series.
'average'
descending bool

Rank in descending order.

False

Returns:

Type Description
Self

A new expression with rank data.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [3, 6, 1, 1, 6]}

We define a dataframe-agnostic function that computes the dense rank for the data:

>>> def agnostic_dense_rank(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     result = df.with_columns(rnk=nw.col("a").rank(method="dense"))
...     return result.to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_dense_rank:

>>> agnostic_dense_rank(pd.DataFrame(data))
   a  rnk
0  3  2.0
1  6  3.0
2  1  1.0
3  1  1.0
4  6  3.0
>>> agnostic_dense_rank(pl.DataFrame(data))
shape: (5, 2)
┌─────┬─────┐
│ a   ┆ rnk │
│ --- ┆ --- │
│ i64 ┆ u32 │
╞═════╪═════╡
│ 3   ┆ 2   │
│ 6   ┆ 3   │
│ 1   ┆ 1   │
│ 1   ┆ 1   │
│ 6   ┆ 3   │
└─────┴─────┘
>>> agnostic_dense_rank(pa.table(data))
pyarrow.Table
a: int64
rnk: uint64
----
a: [[3,6,1,1,6]]
rnk: [[2,3,1,1,3]]

replace_strict(old, new=None, *, return_dtype=None)

Replace all values by different values.

This function must replace all non-null input values (else it raises an error).

Parameters:

Name Type Description Default
old Sequence[Any] | Mapping[Any, Any]

Sequence of values to replace. It also accepts a mapping of values to their replacement as syntactic sugar for replace_all(old=list(mapping.keys()), new=list(mapping.values())).

required
new Sequence[Any] | None

Sequence of values to replace by. Length must match the length of old.

None
return_dtype DType | type[DType] | None

The data type of the resulting expression. If set to None (default), the data type is determined automatically based on the other inputs.

None

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [3, 0, 1, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define dataframe-agnostic functions:

>>> def agnostic_replace_strict(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         b=nw.col("a").replace_strict(
...             [0, 1, 2, 3],
...             ["zero", "one", "two", "three"],
...             return_dtype=nw.String,
...         )
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_replace_strict:

>>> agnostic_replace_strict(df_pd)
   a      b
0  3  three
1  0   zero
2  1    one
3  2    two
>>> agnostic_replace_strict(df_pl)
shape: (4, 2)
┌─────┬───────┐
│ a   ┆ b     │
│ --- ┆ ---   │
│ i64 ┆ str   │
╞═════╪═══════╡
│ 3   ┆ three │
│ 0   ┆ zero  │
│ 1   ┆ one   │
│ 2   ┆ two   │
└─────┴───────┘
>>> agnostic_replace_strict(df_pa)
pyarrow.Table
a: int64
b: string
----
a: [[3,0,1,2]]
b: [["three","zero","one","two"]]

rolling_mean(window_size, *, min_periods=None, center=False)

Apply a rolling mean (moving mean) over the values.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

A window of length window_size will traverse the values. The resulting values will be aggregated to their mean.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Parameters:

Name Type Description Default
window_size int

The length of the window in number of elements. It must be a strictly positive integer.

required
min_periods int | None

The number of values in the window that should be non-null before computing a result. If set to None (default), it will be set equal to window_size. If provided, it must be a strictly positive integer, and less than or equal to window_size

None
center bool

Set the labels at the center of the window.

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_rolling_mean(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         b=nw.col("a").rolling_mean(window_size=3, min_periods=1)
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_rolling_mean:

>>> agnostic_rolling_mean(df_pd)
     a    b
0  1.0  1.0
1  2.0  1.5
2  NaN  1.5
3  4.0  3.0
>>> agnostic_rolling_mean(df_pl)
shape: (4, 2)
┌──────┬─────┐
│ a    ┆ b   │
│ ---  ┆ --- │
│ f64  ┆ f64 │
╞══════╪═════╡
│ 1.0  ┆ 1.0 │
│ 2.0  ┆ 1.5 │
│ null ┆ 1.5 │
│ 4.0  ┆ 3.0 │
└──────┴─────┘
>>> agnostic_rolling_mean(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[1,1.5,1.5,3]]

rolling_std(window_size, *, min_periods=None, center=False, ddof=1)

Apply a rolling standard deviation (moving standard deviation) over the values.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

A window of length window_size will traverse the values. The resulting values will be aggregated to their standard deviation.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Parameters:

Name Type Description Default
window_size int

The length of the window in number of elements. It must be a strictly positive integer.

required
min_periods int | None

The number of values in the window that should be non-null before computing a result. If set to None (default), it will be set equal to window_size. If provided, it must be a strictly positive integer, and less than or equal to window_size.

None
center bool

Set the labels at the center of the window.

False
ddof int

Delta Degrees of Freedom; the divisor for a length N window is N - ddof.

1

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_rolling_std(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         b=nw.col("a").rolling_std(window_size=3, min_periods=1)
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_rolling_std:

>>> agnostic_rolling_std(df_pd)
     a         b
0  1.0       NaN
1  2.0  0.707107
2  NaN  0.707107
3  4.0  1.414214
>>> agnostic_rolling_std(df_pl)
shape: (4, 2)
┌──────┬──────────┐
│ a    ┆ b        │
│ ---  ┆ ---      │
│ f64  ┆ f64      │
╞══════╪══════════╡
│ 1.0  ┆ null     │
│ 2.0  ┆ 0.707107 │
│ null ┆ 0.707107 │
│ 4.0  ┆ 1.414214 │
└──────┴──────────┘
>>> agnostic_rolling_std(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[nan,0.7071067811865476,0.7071067811865476,1.4142135623730951]]

rolling_sum(window_size, *, min_periods=None, center=False)

Apply a rolling sum (moving sum) over the values.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

A window of length window_size will traverse the values. The resulting values will be aggregated to their sum.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Parameters:

Name Type Description Default
window_size int

The length of the window in number of elements. It must be a strictly positive integer.

required
min_periods int | None

The number of values in the window that should be non-null before computing a result. If set to None (default), it will be set equal to window_size. If provided, it must be a strictly positive integer, and less than or equal to window_size

None
center bool

Set the labels at the center of the window.

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_rolling_sum(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         b=nw.col("a").rolling_sum(window_size=3, min_periods=1)
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_rolling_sum:

>>> agnostic_rolling_sum(df_pd)
     a    b
0  1.0  1.0
1  2.0  3.0
2  NaN  3.0
3  4.0  6.0
>>> agnostic_rolling_sum(df_pl)
shape: (4, 2)
┌──────┬─────┐
│ a    ┆ b   │
│ ---  ┆ --- │
│ f64  ┆ f64 │
╞══════╪═════╡
│ 1.0  ┆ 1.0 │
│ 2.0  ┆ 3.0 │
│ null ┆ 3.0 │
│ 4.0  ┆ 6.0 │
└──────┴─────┘
>>> agnostic_rolling_sum(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[1,3,3,6]]

rolling_var(window_size, *, min_periods=None, center=False, ddof=1)

Apply a rolling variance (moving variance) over the values.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

A window of length window_size will traverse the values. The resulting values will be aggregated to their variance.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Parameters:

Name Type Description Default
window_size int

The length of the window in number of elements. It must be a strictly positive integer.

required
min_periods int | None

The number of values in the window that should be non-null before computing a result. If set to None (default), it will be set equal to window_size. If provided, it must be a strictly positive integer, and less than or equal to window_size.

None
center bool

Set the labels at the center of the window.

False
ddof int

Delta Degrees of Freedom; the divisor for a length N window is N - ddof.

1

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_rolling_var(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         b=nw.col("a").rolling_var(window_size=3, min_periods=1)
...     ).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_rolling_var:

>>> agnostic_rolling_var(df_pd)
     a    b
0  1.0  NaN
1  2.0  0.5
2  NaN  0.5
3  4.0  2.0
>>> agnostic_rolling_var(df_pl)
shape: (4, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 1.0  ┆ null │
│ 2.0  ┆ 0.5  │
│ null ┆ 0.5  │
│ 4.0  ┆ 2.0  │
└──────┴──────┘
>>> agnostic_rolling_var(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[nan,0.5,0.5,2]]

round(decimals=0)

Round underlying floating point data by decimals digits.

Parameters:

Name Type Description Default
decimals int

Number of decimals to round by.

0

Returns:

Type Description
Self

A new expression.

Notes

For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.

pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and 4.5 to 4.0, etc..).

Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1.12345, 2.56789, 3.901234]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function that rounds to the first decimal:

>>> def agnostic_round(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").round(1)).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_round:

>>> agnostic_round(df_pd)
     a
0  1.1
1  2.6
2  3.9
>>> agnostic_round(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ f64 │
╞═════╡
│ 1.1 │
│ 2.6 │
│ 3.9 │
└─────┘
>>> agnostic_round(df_pa)
pyarrow.Table
a: double
----
a: [[1.1,2.6,3.9]]

sample(n=None, *, fraction=None, with_replacement=False, seed=None)

Sample randomly from this expression.

Warning

Expr.sample is deprecated and will be removed in a future version. Hint: instead of df.select(nw.col('a').sample()), use df.select(nw.col('a')).sample() instead. Note: this will remain available in narwhals.stable.v1. See stable api for more information.

Parameters:

Name Type Description Default
n int | None

Number of items to return. Cannot be used with fraction.

None
fraction float | None

Fraction of items to return. Cannot be used with n.

None
with_replacement bool

Allow values to be sampled more than once.

False
seed int | None

Seed for the random number generator. If set to None (default), a random seed is generated for each sample operation.

None

Returns:

Type Description
Self

A new expression.

shift(n)

Shift values by n positions.

Parameters:

Name Type Description Default
n int

Number of positions to shift values by.

required

Returns:

Type Description
Self

A new expression.

Notes

pandas may change the dtype here, for example when introducing missing values in an integer column. To ensure, that the dtype doesn't change, you may want to use fill_null and cast. For example, to shift and fill missing values with 0 in a Int64 column, you could do:

nw.col("a").shift(1).fill_null(0).cast(nw.Int64)

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 1, 3, 5, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_shift(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(a_shift=nw.col("a").shift(n=1)).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_shift:

>>> agnostic_shift(df_pd)
   a_shift
0      NaN
1      1.0
2      1.0
3      3.0
4      5.0
>>> agnostic_shift(df_pl)
shape: (5, 1)
┌─────────┐
│ a_shift │
│ ---     │
│ i64     │
╞═════════╡
│ null    │
│ 1       │
│ 1       │
│ 3       │
│ 5       │
└─────────┘
>>> agnostic_shift(df_pa)
pyarrow.Table
a_shift: int64
----
a_shift: [[null,1,1,3,5]]

sort(*, descending=False, nulls_last=False)

Sort this column. Place null values first.

Warning

Expr.sort is deprecated and will be removed in a future version. Hint: instead of df.select(nw.col('a').sort()), use df.select(nw.col('a')).sort() instead. Note: this will remain available in narwhals.stable.v1. See stable api for more information.

Parameters:

Name Type Description Default
descending bool

Sort in descending order.

False
nulls_last bool

Place null values last instead of first.

False

Returns:

Type Description
Self

A new expression.

skew()

Calculate the sample skewness of a column.

Returns:

Type Description
Self

An expression representing the sample skewness of the column.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_skew(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").skew()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_skew:

>>> agnostic_skew(df_pd)
     a         b
0  0.0  1.472427
>>> agnostic_skew(df_pl)
shape: (1, 2)
┌─────┬──────────┐
│ a   ┆ b        │
│ --- ┆ ---      │
│ f64 ┆ f64      │
╞═════╪══════════╡
│ 0.0 ┆ 1.472427 │
└─────┴──────────┘
>>> agnostic_skew(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[0]]
b: [[1.4724267269058975]]

std(*, ddof=1)

Get standard deviation.

Parameters:

Name Type Description Default
ddof int

"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

1

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_std(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").std(ddof=0)).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_std:

>>> agnostic_std(df_pd)
          a         b
0  17.79513  1.265789
>>> agnostic_std(df_pl)
shape: (1, 2)
┌──────────┬──────────┐
│ a        ┆ b        │
│ ---      ┆ ---      │
│ f64      ┆ f64      │
╞══════════╪══════════╡
│ 17.79513 ┆ 1.265789 │
└──────────┴──────────┘
>>> agnostic_std(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[17.795130420052185]]
b: [[1.2657891697365016]]

sum()

Return the sum value.

Returns:

Type Description
Expr

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [5, 10], "b": [50, 100]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").sum()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_sum:

>>> agnostic_sum(df_pd)
    a    b
0  15  150
>>> agnostic_sum(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 15  ┆ 150 │
└─────┴─────┘
>>> agnostic_sum(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[15]]
b: [[150]]

tail(n=10)

Get the last n rows.

Warning

Expr.tail is deprecated and will be removed in a future version. Hint: instead of df.select(nw.col('a').tail()), use df.select(nw.col('a')).tail() instead. Note: this will remain available in narwhals.stable.v1. See stable api for more information.

Parameters:

Name Type Description Default
n int

Number of rows to return.

10

Returns:

Type Description
Self

A new expression.

unique()

Return unique values of this expression.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").unique().sum()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_unique:

>>> agnostic_unique(df_pd)
   a   b
0  9  12
>>> agnostic_unique(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 9   ┆ 12  │
└─────┴─────┘
>>> agnostic_unique(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[9]]
b: [[12]]

var(*, ddof=1)

Get variance.

Parameters:

Name Type Description Default
ddof int

"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

1

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_var(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").var(ddof=0)).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to agnostic_var:

>>> agnostic_var(df_pd)
            a         b
0  316.666667  1.602222
>>> agnostic_var(df_pl)
shape: (1, 2)
┌────────────┬──────────┐
│ a          ┆ b        │
│ ---        ┆ ---      │
│ f64        ┆ f64      │
╞════════════╪══════════╡
│ 316.666667 ┆ 1.602222 │
└────────────┴──────────┘
>>> agnostic_var(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[316.6666666666667]]
b: [[1.6022222222222222]]