Skip to content

narwhals.Expr

abs()

Return absolute value of each element.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, -2], "b": [-3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").abs()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a  b
0  1  3
1  2  4
>>> my_library_agnostic_function(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 3   │
│ 2   ┆ 4   │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1,2]]
b: [[3,4]]

alias(name)

Rename the expression.

Parameters:

Name Type Description Default
name str

The new name.

required

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df_pa = pa.table({"a": [1, 2], "b": [4, 5]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select((nw.col("b") + 10).alias("c")).to_native()

We can pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
    c
0  14
1  15
>>> my_library_agnostic_function(df_pl)
shape: (2, 1)
┌─────┐
│ c   │
│ --- │
│ i64 │
╞═════╡
│ 14  │
│ 15  │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
c: int64
----
c: [[14,15]]

all()

Return whether all values in the column are True.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pa = pa.table({"a": [True, False], "b": [True, True]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").all()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
       a     b
0  False  True
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌───────┬──────┐
│ a     ┆ b    │
│ ---   ┆ ---  │
│ bool  ┆ bool │
╞═══════╪══════╡
│ false ┆ true │
└───────┴──────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false]]
b: [[true]]

any()

Return whether any of the values in the column are True.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pa = pa.table({"a": [True, False], "b": [True, True]})

We define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").any()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
      a     b
0  True  True
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ bool ┆ bool │
╞══════╪══════╡
│ true ┆ true │
└──────┴──────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true]]
b: [[true]]

arg_max()

Returns the index of the maximum value.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [150, 100]})
>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [150, 100]})
>>> df_pa = pa.table({"a": [10, 20], "b": [150, 100]})

Let's define a dataframe-agnostic function:

>>> def agnostic_arg_max(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a", "b").arg_max().name.suffix("_arg_max")
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow:

>>> agnostic_arg_max(df_pd)
   a_arg_max  b_arg_max
0          1          0
>>> agnostic_arg_max(df_pl)
shape: (1, 2)
┌───────────┬───────────┐
│ a_arg_max ┆ b_arg_max │
│ ---       ┆ ---       │
│ u32       ┆ u32       │
╞═══════════╪═══════════╡
│ 1         ┆ 0         │
└───────────┴───────────┘
>>> agnostic_arg_max(df_pa)
pyarrow.Table
a_arg_max: int64
b_arg_max: int64
----
a_arg_max: [[1]]
b_arg_max: [[0]]

arg_min()

Returns the index of the minimum value.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [150, 100]})
>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [150, 100]})
>>> df_pa = pa.table({"a": [10, 20], "b": [150, 100]})

Let's define a dataframe-agnostic function:

>>> def agnostic_arg_min(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a", "b").arg_min().name.suffix("_arg_min")
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow:

>>> agnostic_arg_min(df_pd)
   a_arg_min  b_arg_min
0          0          1
>>> agnostic_arg_min(df_pl)
shape: (1, 2)
┌───────────┬───────────┐
│ a_arg_min ┆ b_arg_min │
│ ---       ┆ ---       │
│ u32       ┆ u32       │
╞═══════════╪═══════════╡
│ 0         ┆ 1         │
└───────────┴───────────┘
>>> agnostic_arg_min(df_pa)
pyarrow.Table
a_arg_min: int64
b_arg_min: int64
----
a_arg_min: [[0]]
b_arg_min: [[1]]

arg_true()

Find elements where boolean expression is True.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, None, None, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").is_null().arg_true()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a
1  1
2  2
>>> my_library_agnostic_function(df_pl)
shape: (2, 1)
┌─────┐
│ a   │
│ --- │
│ u32 │
╞═════╡
│ 1   │
│ 2   │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[1,2]]

cast(dtype)

Redefine an object's data type.

Parameters:

Name Type Description Default
dtype DType | type[DType]

Data type that the object will be cast into.

required

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> from datetime import date
>>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
>>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
>>> df_pa = pa.table({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   foo  bar
0  1.0    6
1  2.0    7
2  3.0    8
>>> my_library_agnostic_function(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ f32 ┆ u8  │
╞═════╪═════╡
│ 1.0 ┆ 6   │
│ 2.0 ┆ 7   │
│ 3.0 ┆ 8   │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
foo: float
bar: uint8
----
foo: [[1,2,3]]
bar: [[6,7,8]]

count()

Returns the number of non-null elements in the column.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
>>> df_pa = pa.table({"a": [1, 2, 3], "b": [None, 4, 4]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().count()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a  b
0  3  2
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 3   ┆ 2   │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[3]]
b: [[2]]

cum_count(*, reverse=False)

Return the cumulative count of the non-null values in the column.

Parameters:

Name Type Description Default
reverse bool

reverse the operation

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": ["x", "k", None, "d"]}

We define a library agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         nw.col("a").cum_count().alias("cum_count"),
...         nw.col("a").cum_count(reverse=True).alias("cum_count_reverse"),
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(pd.DataFrame(data))
      a  cum_count  cum_count_reverse
0     x          1                  3
1     k          2                  2
2  None          2                  1
3     d          3                  1
>>> my_library_agnostic_function(pl.DataFrame(data))
shape: (4, 3)
┌──────┬───────────┬───────────────────┐
│ a    ┆ cum_count ┆ cum_count_reverse │
│ ---  ┆ ---       ┆ ---               │
│ str  ┆ u32       ┆ u32               │
╞══════╪═══════════╪═══════════════════╡
│ x    ┆ 1         ┆ 3                 │
│ k    ┆ 2         ┆ 2                 │
│ null ┆ 2         ┆ 1                 │
│ d    ┆ 3         ┆ 1                 │
└──────┴───────────┴───────────────────┘
>>> my_library_agnostic_function(pa.table(data))
pyarrow.Table
a: string
cum_count: uint32
cum_count_reverse: uint32
----
a: [["x","k",null,"d"]]
cum_count: [[1,2,2,3]]
cum_count_reverse: [[3,2,1,1]]

cum_max(*, reverse=False)

Return the cumulative max of the non-null values in the column.

Parameters:

Name Type Description Default
reverse bool

reverse the operation

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 3, None, 2]}

We define a library agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         nw.col("a").cum_max().alias("cum_max"),
...         nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"),
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(pd.DataFrame(data))
     a  cum_max  cum_max_reverse
0  1.0      1.0              3.0
1  3.0      3.0              3.0
2  NaN      NaN              NaN
3  2.0      3.0              2.0
>>> my_library_agnostic_function(pl.DataFrame(data))
shape: (4, 3)
┌──────┬─────────┬─────────────────┐
│ a    ┆ cum_max ┆ cum_max_reverse │
│ ---  ┆ ---     ┆ ---             │
│ i64  ┆ i64     ┆ i64             │
╞══════╪═════════╪═════════════════╡
│ 1    ┆ 1       ┆ 3               │
│ 3    ┆ 3       ┆ 3               │
│ null ┆ null    ┆ null            │
│ 2    ┆ 3       ┆ 2               │
└──────┴─────────┴─────────────────┘
>>> my_library_agnostic_function(pa.table(data))
pyarrow.Table
a: int64
cum_max: int64
cum_max_reverse: int64
----
a: [[1,3,null,2]]
cum_max: [[1,3,null,3]]
cum_max_reverse: [[3,3,null,2]]

cum_min(*, reverse=False)

Return the cumulative min of the non-null values in the column.

Parameters:

Name Type Description Default
reverse bool

reverse the operation

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [3, 1, None, 2]}

We define a library agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         nw.col("a").cum_min().alias("cum_min"),
...         nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"),
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(pd.DataFrame(data))
     a  cum_min  cum_min_reverse
0  3.0      3.0              1.0
1  1.0      1.0              1.0
2  NaN      NaN              NaN
3  2.0      1.0              2.0
>>> my_library_agnostic_function(pl.DataFrame(data))
shape: (4, 3)
┌──────┬─────────┬─────────────────┐
│ a    ┆ cum_min ┆ cum_min_reverse │
│ ---  ┆ ---     ┆ ---             │
│ i64  ┆ i64     ┆ i64             │
╞══════╪═════════╪═════════════════╡
│ 3    ┆ 3       ┆ 1               │
│ 1    ┆ 1       ┆ 1               │
│ null ┆ null    ┆ null            │
│ 2    ┆ 1       ┆ 2               │
└──────┴─────────┴─────────────────┘
>>> my_library_agnostic_function(pa.table(data))
pyarrow.Table
a: int64
cum_min: int64
cum_min_reverse: int64
----
a: [[3,1,null,2]]
cum_min: [[3,1,null,1]]
cum_min_reverse: [[1,1,null,2]]

cum_prod(*, reverse=False)

Return the cumulative product of the non-null values in the column.

Parameters:

Name Type Description Default
reverse bool

reverse the operation

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 3, None, 2]}

We define a library agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         nw.col("a").cum_prod().alias("cum_prod"),
...         nw.col("a").cum_prod(reverse=True).alias("cum_prod_reverse"),
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(pd.DataFrame(data))
     a  cum_prod  cum_prod_reverse
0  1.0       1.0               6.0
1  3.0       3.0               6.0
2  NaN       NaN               NaN
3  2.0       6.0               2.0
>>> my_library_agnostic_function(pl.DataFrame(data))
shape: (4, 3)
┌──────┬──────────┬──────────────────┐
│ a    ┆ cum_prod ┆ cum_prod_reverse │
│ ---  ┆ ---      ┆ ---              │
│ i64  ┆ i64      ┆ i64              │
╞══════╪══════════╪══════════════════╡
│ 1    ┆ 1        ┆ 6                │
│ 3    ┆ 3        ┆ 6                │
│ null ┆ null     ┆ null             │
│ 2    ┆ 6        ┆ 2                │
└──────┴──────────┴──────────────────┘
>>> my_library_agnostic_function(pa.table(data))
pyarrow.Table
a: int64
cum_prod: int64
cum_prod_reverse: int64
----
a: [[1,3,null,2]]
cum_prod: [[1,3,null,6]]
cum_prod_reverse: [[6,6,null,2]]

cum_sum(*, reverse=False)

Return cumulative sum.

Parameters:

Name Type Description Default
reverse bool

reverse the operation

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").cum_sum()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
    a   b
0   1   2
1   2   6
2   5  10
3  10  16
4  15  22
>>> my_library_agnostic_function(df_pl)
shape: (5, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 2   │
│ 2   ┆ 6   │
│ 5   ┆ 10  │
│ 10  ┆ 16  │
│ 15  ┆ 22  │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1,2,5,10,15]]
b: [[2,6,10,16,22]]

diff()

Returns the difference between each element and the previous one.

Returns:

Type Description
Self

A new expression.

Notes

pandas may change the dtype here, for example when introducing missing values in an integer column. To ensure, that the dtype doesn't change, you may want to use fill_null and cast. For example, to calculate the diff and fill missing values with 0 in a Int64 column, you could do:

nw.col("a").diff().fill_null(0).cast(nw.Int64)

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(a_diff=nw.col("a").diff()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a_diff
0     NaN
1     0.0
2     2.0
3     2.0
4     0.0
>>> my_library_agnostic_function(df_pl)
shape: (5, 1)
┌────────┐
│ a_diff │
│ ---    │
│ i64    │
╞════════╡
│ null   │
│ 0      │
│ 2      │
│ 2      │
│ 0      │
└────────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a_diff: int64
----
a_diff: [[null,0,2,2,0]]

drop_nulls()

Drop null values.

Returns:

Type Description
Self

A new expression.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
>>> df_pl = pl.DataFrame({"a": [2.0, 4.0, None, 3.0, None, 5.0]})
>>> df_pa = pa.table({"a": [2.0, 4.0, None, 3.0, None, 5.0]})

Let's define a dataframe-agnostic function:

>>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").drop_nulls()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_drop_nulls:

>>> agnostic_drop_nulls(df_pd)
     a
0  2.0
1  4.0
3  3.0
5  5.0
>>> agnostic_drop_nulls(df_pl)
shape: (4, 1)
┌─────┐
│ a   │
│ --- │
│ f64 │
╞═════╡
│ 2.0 │
│ 4.0 │
│ 3.0 │
│ 5.0 │
└─────┘
>>> agnostic_drop_nulls(df_pa)
pyarrow.Table
a: double
----
a: [[2,4,3,5]]

ewm_mean(*, com=None, span=None, half_life=None, alpha=None, adjust=True, min_periods=1, ignore_nulls=False)

Compute exponentially-weighted moving average.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Parameters:

Name Type Description Default
com float | None

Specify decay in terms of center of mass, \(\gamma\), with
\(\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0\)

None
span float | None

Specify decay in terms of span, \(\theta\), with
\(\alpha = \frac{2}{\theta + 1} \forall \theta \geq 1\)

None
half_life float | None

Specify decay in terms of half-life, \(\tau\), with
\(\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0\)

None
alpha float | None

Specify smoothing factor alpha directly, \(0 < \alpha \leq 1\).

None
adjust bool

Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings

  • When adjust=True (the default) the EW function is calculated using weights \(w_i = (1 - \alpha)^i\)
  • When adjust=False the EW function is calculated recursively by $$ y_0=x_0 $$ $$ y_t = (1 - \alpha)y_{t - 1} + \alpha x_t $$
True
min_periods int

Minimum number of observations in window required to have a value, (otherwise result is null).

1
ignore_nulls bool

Ignore missing values when calculating weights.

  • When ignore_nulls=False (default), weights are based on absolute positions. For example, the weights of \(x_0\) and \(x_2\) used in calculating the final weighted average of \([x_0, None, x_2]\) are \((1-\alpha)^2\) and \(1\) if adjust=True, and \((1-\alpha)^2\) and \(\alpha\) if adjust=False.
  • When ignore_nulls=True, weights are based on relative positions. For example, the weights of \(x_0\) and \(x_2\) used in calculating the final weighted average of \([x_0, None, x_2]\) are \(1-\alpha\) and \(1\) if adjust=True, and \(1-\alpha\) and \(\alpha\) if adjust=False.
False

Returns:

Type Description
Self

Expr

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, 2, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

We define a library agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a").ewm_mean(com=1, ignore_nulls=False)
...     ).to_native()

We can then pass either pandas or Polars to func:

>>> my_library_agnostic_function(df_pd)
          a
0  1.000000
1  1.666667
2  2.428571
>>> my_library_agnostic_function(df_pl)
shape: (3, 1)
┌──────────┐
│ a        │
│ ---      │
│ f64      │
╞══════════╡
│ 1.0      │
│ 1.666667 │
│ 2.428571 │
└──────────┘

fill_null(value=None, strategy=None, limit=None)

Fill null values with given value.

Parameters:

Name Type Description Default
value Any | None

Value used to fill null values.

None
strategy Literal['forward', 'backward'] | None

Strategy used to fill null values.

None
limit int | None

Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.

None

Returns:

Type Description
Self

A new expression.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame(
...     {
...         "a": [2, 4, None, None, 3, 5],
...         "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
...     }
... )
>>> df_pl = pl.DataFrame(
...     {
...         "a": [2, 4, None, None, 3, 5],
...         "b": [2.0, 4.0, None, None, 3.0, 5.0],
...     }
... )
>>> df_pa = pa.table(
...     {
...         "a": [2, 4, None, None, 3, 5],
...         "b": [2.0, 4.0, None, None, 3.0, 5.0],
...     }
... )

Let's define a dataframe-agnostic function:

>>> def agnostic_fill_null(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(nw.col("a", "b").fill_null(0)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_fill_null:

>>> agnostic_fill_null(df_pd)
     a    b
0  2.0  2.0
1  4.0  4.0
2  0.0  0.0
3  0.0  0.0
4  3.0  3.0
5  5.0  5.0
>>> agnostic_fill_null(df_pl)
shape: (6, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 2   ┆ 2.0 │
│ 4   ┆ 4.0 │
│ 0   ┆ 0.0 │
│ 0   ┆ 0.0 │
│ 3   ┆ 3.0 │
│ 5   ┆ 5.0 │
└─────┴─────┘
>>> agnostic_fill_null(df_pa)
pyarrow.Table
a: int64
b: double
----
a: [[2,4,0,0,3,5]]
b: [[2,4,0,0,3,5]]

Using a strategy:

>>> def agnostic_fill_null_with_strategy(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         nw.col("a", "b")
...         .fill_null(strategy="forward", limit=1)
...         .name.suffix("_filled")
...     ).to_native()
>>> agnostic_fill_null_with_strategy(df_pd)
     a    b  a_filled  b_filled
0  2.0  2.0       2.0       2.0
1  4.0  4.0       4.0       4.0
2  NaN  NaN       4.0       4.0
3  NaN  NaN       NaN       NaN
4  3.0  3.0       3.0       3.0
5  5.0  5.0       5.0       5.0
>>> agnostic_fill_null_with_strategy(df_pl)
shape: (6, 4)
┌──────┬──────┬──────────┬──────────┐
│ a    ┆ b    ┆ a_filled ┆ b_filled │
│ ---  ┆ ---  ┆ ---      ┆ ---      │
│ i64  ┆ f64  ┆ i64      ┆ f64      │
╞══════╪══════╪══════════╪══════════╡
│ 2    ┆ 2.0  ┆ 2        ┆ 2.0      │
│ 4    ┆ 4.0  ┆ 4        ┆ 4.0      │
│ null ┆ null ┆ 4        ┆ 4.0      │
│ null ┆ null ┆ null     ┆ null     │
│ 3    ┆ 3.0  ┆ 3        ┆ 3.0      │
│ 5    ┆ 5.0  ┆ 5        ┆ 5.0      │
└──────┴──────┴──────────┴──────────┘
>>> agnostic_fill_null_with_strategy(df_pa)
pyarrow.Table
a: int64
b: double
a_filled: int64
b_filled: double
----
a: [[2,4,null,null,3,5]]
b: [[2,4,null,null,3,5]]
a_filled: [[2,4,4,null,3,5]]
b_filled: [[2,4,4,null,3,5]]

filter(*predicates)

Filters elements based on a condition, returning a new expression.

Parameters:

Name Type Description Default
predicates Any

Conditions to filter by (which get ANDed together).

()

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
>>> df_pl = pl.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
>>> df_pa = pa.table({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a").filter(nw.col("a") > 4),
...         nw.col("b").filter(nw.col("b") < 13),
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a   b
3  5  10
4  6  11
5  7  12
>>> my_library_agnostic_function(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 5   ┆ 10  │
│ 6   ┆ 11  │
│ 7   ┆ 12  │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[5,6,7]]
b: [[10,11,12]]

gather_every(n, offset=0)

Take every nth value in the Series and return as new Series.

Parameters:

Name Type Description Default
n int

Gather every n-th row.

required
offset int

Starting index.

0

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function in which gather every 2 rows, starting from a offset of 1:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").gather_every(n=2, offset=1)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a
1  2
3  4
>>> my_library_agnostic_function(df_pl)
shape: (2, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 2   │
│ 4   │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[2,4]]

head(n=10)

Get the first n rows.

Parameters:

Name Type Description Default
n int

Number of rows to return.

10

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": list(range(10))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function that returns the first 3 rows:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").head(3)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a
0  0
1  1
2  2
>>> my_library_agnostic_function(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 0   │
│ 1   │
│ 2   │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[0,1,2]]

clip(lower_bound=None, upper_bound=None)

Clip values in the Series.

Parameters:

Name Type Description Default
lower_bound Any | None

Lower bound value.

None
upper_bound Any | None

Upper bound value.

None

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> s = [1, 2, 3]
>>> df_pd = pd.DataFrame({"s": s})
>>> df_pl = pl.DataFrame({"s": s})
>>> df_pa = pa.table({"s": s})

We define a library agnostic function:

>>> def func_lower(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("s").clip(2)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func_lower:

>>> func_lower(df_pd)
   s
0  2
1  2
2  3
>>> func_lower(df_pl)
shape: (3, 1)
┌─────┐
│ s   │
│ --- │
│ i64 │
╞═════╡
│ 2   │
│ 2   │
│ 3   │
└─────┘
>>> func_lower(df_pa)
pyarrow.Table
s: int64
----
s: [[2,2,3]]

We define another library agnostic function:

>>> def func_upper(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("s").clip(upper_bound=2)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func_upper:

>>> func_upper(df_pd)
   s
0  1
1  2
2  2
>>> func_upper(df_pl)
shape: (3, 1)
┌─────┐
│ s   │
│ --- │
│ i64 │
╞═════╡
│ 1   │
│ 2   │
│ 2   │
└─────┘
>>> func_upper(df_pa)
pyarrow.Table
s: int64
----
s: [[1,2,2]]

We can have both at the same time

>>> s = [-1, 1, -3, 3, -5, 5]
>>> df_pd = pd.DataFrame({"s": s})
>>> df_pl = pl.DataFrame({"s": s})
>>> df_pa = pa.table({"s": s})

We define a library agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("s").clip(-1, 3)).to_native()

We can pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   s
0 -1
1  1
2 -1
3  3
4 -1
5  3
>>> my_library_agnostic_function(df_pl)
shape: (6, 1)
┌─────┐
│ s   │
│ --- │
│ i64 │
╞═════╡
│ -1  │
│ 1   │
│ -1  │
│ 3   │
│ -1  │
│ 3   │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
s: int64
----
s: [[-1,1,-1,3,-1,3]]

is_between(lower_bound, upper_bound, closed='both')

Check if this expression is between the given lower and upper bounds.

Parameters:

Name Type Description Default
lower_bound Any

Lower bound value.

required
upper_bound Any

Upper bound value.

required
closed str

Define which sides of the interval are closed (inclusive).

'both'

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
>>> df_pa = pa.table({"a": [1, 2, 3, 4, 5]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").is_between(2, 4, "right")).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
       a
0  False
1  False
2   True
3   True
4  False
>>> my_library_agnostic_function(df_pl)
shape: (5, 1)
┌───────┐
│ a     │
│ ---   │
│ bool  │
╞═══════╡
│ false │
│ false │
│ true  │
│ true  │
│ false │
└───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
----
a: [[false,false,true,true,false]]

is_duplicated()

Return a boolean mask indicating duplicated values.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().is_duplicated()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
       a      b
0   True   True
1  False   True
2  False  False
3   True  False
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ true  ┆ true  │
│ false ┆ true  │
│ false ┆ false │
│ true  ┆ false │
└───────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true,false,false,true]]
b: [[true,true,false,false]]

is_finite()

Returns boolean values indicating which original values are finite.

Warning

Different backend handle null values differently. is_finite will return False for NaN and Null's in the Dask and pandas non-nullable backend, while for Polars, PyArrow and pandas nullable backends null values are kept as such.

Returns:

Type Description
Self

Expression of Boolean data type.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [float("nan"), float("inf"), 2.0, None]}

We define a library agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").is_finite()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(pd.DataFrame(data))
       a
0  False
1  False
2   True
3  False
>>> my_library_agnostic_function(pl.DataFrame(data))
shape: (4, 1)
┌───────┐
│ a     │
│ ---   │
│ bool  │
╞═══════╡
│ false │
│ false │
│ true  │
│ null  │
└───────┘
>>> my_library_agnostic_function(pa.table(data))
pyarrow.Table
a: bool
----
a: [[false,false,true,null]]

is_first_distinct()

Return a boolean mask indicating the first occurrence of each distinct value.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().is_first_distinct()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
       a      b
0   True   True
1   True  False
2   True   True
3  False   True
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ true  ┆ true  │
│ true  ┆ false │
│ true  ┆ true  │
│ false ┆ true  │
└───────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true,true,true,false]]
b: [[true,false,true,true]]

is_in(other)

Check if elements of this expression are present in the other iterable.

Parameters:

Name Type Description Default
other Any

iterable

required

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2, 9, 10]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 9, 10]})
>>> df_pa = pa.table({"a": [1, 2, 9, 10]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(b=nw.col("a").is_in([1, 2])).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
    a      b
0   1   True
1   2   True
2   9  False
3  10  False
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌─────┬───────┐
│ a   ┆ b     │
│ --- ┆ ---   │
│ i64 ┆ bool  │
╞═════╪═══════╡
│ 1   ┆ true  │
│ 2   ┆ true  │
│ 9   ┆ false │
│ 10  ┆ false │
└─────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: bool
----
a: [[1,2,9,10]]
b: [[true,true,false,false]]

is_last_distinct()

Return a boolean mask indicating the last occurrence of each distinct value.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().is_last_distinct()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
       a      b
0  False  False
1   True   True
2   True   True
3   True   True
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ false ┆ false │
│ true  ┆ true  │
│ true  ┆ true  │
│ true  ┆ true  │
└───────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false,true,true,true]]
b: [[false,true,true,true]]

is_null()

Returns a boolean Series indicating which values are null.

Returns:

Type Description
Self

A new expression.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame(
...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )
>>> df_pl = pl.DataFrame(
...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, None, 3.0, 5.0]}
... )
>>> df_pa = pa.table({"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, None, 3.0, 5.0]})

Let's define a dataframe-agnostic function:

>>> def agnostic_is_null(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_is_null:

>>> agnostic_is_null(df_pd)
     a    b  a_is_null  b_is_null
0  2.0  2.0      False      False
1  4.0  4.0      False      False
2  NaN  NaN       True       True
3  3.0  3.0      False      False
4  5.0  5.0      False      False
>>> agnostic_is_null(df_pl)
shape: (5, 4)
┌──────┬──────┬───────────┬───────────┐
│ a    ┆ b    ┆ a_is_null ┆ b_is_null │
│ ---  ┆ ---  ┆ ---       ┆ ---       │
│ i64  ┆ f64  ┆ bool      ┆ bool      │
╞══════╪══════╪═══════════╪═══════════╡
│ 2    ┆ 2.0  ┆ false     ┆ false     │
│ 4    ┆ 4.0  ┆ false     ┆ false     │
│ null ┆ null ┆ true      ┆ true      │
│ 3    ┆ 3.0  ┆ false     ┆ false     │
│ 5    ┆ 5.0  ┆ false     ┆ false     │
└──────┴──────┴───────────┴───────────┘
>>> agnostic_is_null(df_pa)
pyarrow.Table
a: int64
b: double
a_is_null: bool
b_is_null: bool
----
a: [[2,4,null,3,5]]
b: [[2,4,null,3,5]]
a_is_null: [[false,false,true,false,false]]
b_is_null: [[false,false,true,false,false]]

is_unique()

Return a boolean mask indicating unique values.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().is_unique()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
       a      b
0  False  False
1   True  False
2   True   True
3  False   True
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ false ┆ false │
│ true  ┆ false │
│ true  ┆ true  │
│ false ┆ true  │
└───────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false,true,true,false]]
b: [[false,false,true,true]]

len()

Return the number of elements in the column.

Null values count towards the total.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function that computes the len over different values of "b" column:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
...         nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a1  a2
0   2   1
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a1  ┆ a2  │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 2   ┆ 1   │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a1: int64
a2: int64
----
a1: [[2]]
a2: [[1]]

map_batches(function, return_dtype=None)

Apply a custom python function to a whole Series or sequence of Series.

The output of this custom function is presumed to be either a Series, or a NumPy array (in which case it will be automatically converted into a Series).

Parameters:

Name Type Description Default
function Callable[[Any], Self]

Function to apply to Series.

required
return_dtype DType | None

Dtype of the output Series. If not set, the dtype will be inferred based on the first non-null value that is returned by the function.

None

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a", "b").map_batches(
...             lambda s: s.to_numpy() + 1, return_dtype=nw.Float64
...         )
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
     a    b
0  2.0  5.0
1  3.0  6.0
2  4.0  7.0
>>> my_library_agnostic_function(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 2.0 ┆ 5.0 │
│ 3.0 ┆ 6.0 │
│ 4.0 ┆ 7.0 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[2,3,4]]
b: [[5,6,7]]

max()

Returns the maximum value(s) from a column(s).

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [50, 100]})
>>> df_pa = pa.table({"a": [10, 20], "b": [50, 100]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.max("a", "b")).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
    a    b
0  20  100
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 20  ┆ 100 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[20]]
b: [[100]]

mean()

Get mean value.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
>>> df_pl = pl.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
>>> df_pa = pa.table({"a": [-1, 0, 1], "b": [2, 4, 6]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").mean()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
     a    b
0  0.0  4.0
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 0.0 ┆ 4.0 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[0]]
b: [[4]]

median()

Get median value.

Returns:

Type Description
Self

A new expression.

Notes

Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
>>> df_pl = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
>>> df_pa = pa.table({"a": [1, 8, 3], "b": [4, 5, 2]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").median()).to_native()

We can then pass any supported library such as pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
     a    b
0  3.0  4.0
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 3.0 ┆ 4.0 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[3]]
b: [[4]]

min()

Returns the minimum value(s) from a column(s).

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 3]})
>>> df_pa = pa.table({"a": [1, 2], "b": [4, 3]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.min("a", "b")).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a  b
0  1  3
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 3   │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1]]
b: [[3]]

mode()

Compute the most occurring value(s).

Can return multiple values.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {
...     "a": [1, 1, 2, 3],
...     "b": [1, 1, 2, 2],
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").mode()).sort("a").to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a
0  1
>>> my_library_agnostic_function(df_pl)
shape: (1, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 1   │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[1]]

null_count()

Count null values.

Returns:

Type Description
Self

A new expression.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.all().null_count()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to agnostic_null_count:

>>> agnostic_null_count(df_pd)
   a  b
0  1  2
>>> agnostic_null_count(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 1   ┆ 2   │
└─────┴─────┘
>>> agnostic_null_count(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1]]
b: [[2]]

n_unique()

Returns count of unique values.

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
>>> df_pa = pa.table({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").n_unique()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a  b
0  5  3
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 5   ┆ 3   │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[5]]
b: [[3]]

over(*keys)

Compute expressions over the given groups.

Parameters:

Name Type Description Default
keys str | Iterable[str]

Names of columns to compute window expression over. Must be names of columns, as opposed to expressions - so, this is a bit less flexible than Polars' Expr.over.

()

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def agnostic_min_over_b(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         a_min_per_group=nw.col("a").min().over("b")
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> agnostic_min_over_b(df_pd)
   a  b  a_min_per_group
0  1  1                1
1  2  1                1
2  3  2                3
>>> agnostic_min_over_b(df_pl)
shape: (3, 3)
┌─────┬─────┬─────────────────┐
│ a   ┆ b   ┆ a_min_per_group │
│ --- ┆ --- ┆ ---             │
│ i64 ┆ i64 ┆ i64             │
╞═════╪═════╪═════════════════╡
│ 1   ┆ 1   ┆ 1               │
│ 2   ┆ 1   ┆ 1               │
│ 3   ┆ 2   ┆ 3               │
└─────┴─────┴─────────────────┘
>>> agnostic_min_over_b(df_pa)
pyarrow.Table
a: int64
b: int64
a_min_per_group: int64
----
a: [[1,2,3]]
b: [[1,1,2]]
a_min_per_group: [[1,1,3]]

Cumulative operations are also supported, but (currently) only for pandas and Polars:

>>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(c=nw.col("a").cum_sum().over("b")).to_native()
>>> agnostic_cum_sum(df_pd)
   a  b  c
0  1  1  1
1  2  1  3
2  3  2  3
>>> agnostic_cum_sum(df_pl)
shape: (3, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1   ┆ 1   ┆ 1   │
│ 2   ┆ 1   ┆ 3   │
│ 3   ┆ 2   ┆ 3   │
└─────┴─────┴─────┘

pipe(function, *args, **kwargs)

Pipe function call.

Parameters:

Name Type Description Default
function Callable[[Any], Self]

Function to apply.

required
args Any

Positional arguments to pass to function.

()
kwargs Any

Keyword arguments to pass to function.

{}

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, 2, 3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Lets define a library-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").pipe(lambda x: x + 1)).to_native()

We can pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a
0  2
1  3
2  4
3  5
>>> my_library_agnostic_function(df_pl)
shape: (4, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 2   │
│ 3   │
│ 4   │
│ 5   │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[2,3,4,5]]

quantile(quantile, interpolation)

Get quantile value.

Parameters:

Name Type Description Default
quantile float

Quantile between 0.0 and 1.0.

required
interpolation Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']

Interpolation method.

required

Returns:

Type Description
Self

A new expression.

Note
  • pandas and Polars may have implementation differences for a given interpolation method.
  • dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the native 'dask' - method.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": list(range(50)), "b": list(range(50, 100))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a", "b").quantile(0.5, interpolation="linear")
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
      a     b
0  24.5  74.5
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 24.5 ┆ 74.5 │
└──────┴──────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[24.5]]
b: [[74.5]]

replace_strict(old, new=None, *, return_dtype=None)

Replace all values by different values.

This function must replace all non-null input values (else it raises an error).

Parameters:

Name Type Description Default
old Sequence[Any] | Mapping[Any, Any]

Sequence of values to replace. It also accepts a mapping of values to their replacement as syntactic sugar for replace_all(old=list(mapping.keys()), new=list(mapping.values())).

required
new Sequence[Any] | None

Sequence of values to replace by. Length must match the length of old.

None
return_dtype DType | type[DType] | None

The data type of the resulting expression. If set to None (default), the data type is determined automatically based on the other inputs.

None

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> df_pd = pd.DataFrame({"a": [3, 0, 1, 2]})
>>> df_pl = pl.DataFrame({"a": [3, 0, 1, 2]})
>>> df_pa = pa.table({"a": [3, 0, 1, 2]})

Let's define dataframe-agnostic functions:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         b=nw.col("a").replace_strict(
...             [0, 1, 2, 3],
...             ["zero", "one", "two", "three"],
...             return_dtype=nw.String,
...         )
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a      b
0  3  three
1  0   zero
2  1    one
3  2    two
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌─────┬───────┐
│ a   ┆ b     │
│ --- ┆ ---   │
│ i64 ┆ str   │
╞═════╪═══════╡
│ 3   ┆ three │
│ 0   ┆ zero  │
│ 1   ┆ one   │
│ 2   ┆ two   │
└─────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: string
----
a: [[3,0,1,2]]
b: [["three","zero","one","two"]]

rolling_mean(window_size, *, min_periods=None, center=False)

Apply a rolling mean (moving mean) over the values.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

A window of length window_size will traverse the values. The resulting values will be aggregated to their mean.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Parameters:

Name Type Description Default
window_size int

The length of the window in number of elements. It must be a strictly positive integer.

required
min_periods int | None

The number of values in the window that should be non-null before computing a result. If set to None (default), it will be set equal to window_size. If provided, it must be a strictly positive integer, and less than or equal to window_size

None
center bool

Set the labels at the center of the window.

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_rolling_mean(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         b=nw.col("a").rolling_mean(window_size=3, min_periods=1)
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> agnostic_rolling_mean(df_pd)
     a    b
0  1.0  1.0
1  2.0  1.5
2  NaN  1.5
3  4.0  3.0
>>> agnostic_rolling_mean(df_pl)
shape: (4, 2)
┌──────┬─────┐
│ a    ┆ b   │
│ ---  ┆ --- │
│ f64  ┆ f64 │
╞══════╪═════╡
│ 1.0  ┆ 1.0 │
│ 2.0  ┆ 1.5 │
│ null ┆ 1.5 │
│ 4.0  ┆ 3.0 │
└──────┴─────┘
>>> agnostic_rolling_mean(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[1,1.5,1.5,3]]

rolling_std(window_size, *, min_periods=None, center=False, ddof=1)

Apply a rolling standard deviation (moving standard deviation) over the values.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

A window of length window_size will traverse the values. The resulting values will be aggregated to their standard deviation.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Parameters:

Name Type Description Default
window_size int

The length of the window in number of elements. It must be a strictly positive integer.

required
min_periods int | None

The number of values in the window that should be non-null before computing a result. If set to None (default), it will be set equal to window_size. If provided, it must be a strictly positive integer, and less than or equal to window_size.

None
center bool

Set the labels at the center of the window.

False
ddof int

Delta Degrees of Freedom; the divisor for a length N window is N - ddof.

1

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_rolling_std(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         b=nw.col("a").rolling_std(window_size=3, min_periods=1)
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> agnostic_rolling_std(df_pd)
     a         b
0  1.0       NaN
1  2.0  0.707107
2  NaN  0.707107
3  4.0  1.414214
>>> agnostic_rolling_std(df_pl)
shape: (4, 2)
┌──────┬──────────┐
│ a    ┆ b        │
│ ---  ┆ ---      │
│ f64  ┆ f64      │
╞══════╪══════════╡
│ 1.0  ┆ null     │
│ 2.0  ┆ 0.707107 │
│ null ┆ 0.707107 │
│ 4.0  ┆ 1.414214 │
└──────┴──────────┘
>>> agnostic_rolling_std(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[nan,0.7071067811865476,0.7071067811865476,1.4142135623730951]]

rolling_sum(window_size, *, min_periods=None, center=False)

Apply a rolling sum (moving sum) over the values.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

A window of length window_size will traverse the values. The resulting values will be aggregated to their sum.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Parameters:

Name Type Description Default
window_size int

The length of the window in number of elements. It must be a strictly positive integer.

required
min_periods int | None

The number of values in the window that should be non-null before computing a result. If set to None (default), it will be set equal to window_size. If provided, it must be a strictly positive integer, and less than or equal to window_size

None
center bool

Set the labels at the center of the window.

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_rolling_sum(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         b=nw.col("a").rolling_sum(window_size=3, min_periods=1)
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> agnostic_rolling_sum(df_pd)
     a    b
0  1.0  1.0
1  2.0  3.0
2  NaN  3.0
3  4.0  6.0
>>> agnostic_rolling_sum(df_pl)
shape: (4, 2)
┌──────┬─────┐
│ a    ┆ b   │
│ ---  ┆ --- │
│ f64  ┆ f64 │
╞══════╪═════╡
│ 1.0  ┆ 1.0 │
│ 2.0  ┆ 3.0 │
│ null ┆ 3.0 │
│ 4.0  ┆ 6.0 │
└──────┴─────┘
>>> agnostic_rolling_sum(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[1,3,3,6]]

rolling_var(window_size, *, min_periods=None, center=False, ddof=1)

Apply a rolling variance (moving variance) over the values.

Warning

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

A window of length window_size will traverse the values. The resulting values will be aggregated to their variance.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Parameters:

Name Type Description Default
window_size int

The length of the window in number of elements. It must be a strictly positive integer.

required
min_periods int | None

The number of values in the window that should be non-null before computing a result. If set to None (default), it will be set equal to window_size. If provided, it must be a strictly positive integer, and less than or equal to window_size.

None
center bool

Set the labels at the center of the window.

False
ddof int

Delta Degrees of Freedom; the divisor for a length N window is N - ddof.

1

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

We define a library agnostic function:

>>> def agnostic_rolling_var(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.with_columns(
...         b=nw.col("a").rolling_var(window_size=3, min_periods=1)
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> agnostic_rolling_var(df_pd)
     a    b
0  1.0  NaN
1  2.0  0.5
2  NaN  0.5
3  4.0  2.0
>>> agnostic_rolling_var(df_pl)
shape: (4, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 1.0  ┆ null │
│ 2.0  ┆ 0.5  │
│ null ┆ 0.5  │
│ 4.0  ┆ 2.0  │
└──────┴──────┘
>>> agnostic_rolling_var(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[nan,0.5,0.5,2]]

round(decimals=0)

Round underlying floating point data by decimals digits.

Parameters:

Name Type Description Default
decimals int

Number of decimals to round by.

0

Returns:

Type Description
Self

A new expression.

Notes

For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.

pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and 4.5 to 4.0, etc..).

Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1.12345, 2.56789, 3.901234]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function that rounds to the first decimal:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").round(1)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
     a
0  1.1
1  2.6
2  3.9
>>> my_library_agnostic_function(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ f64 │
╞═════╡
│ 1.1 │
│ 2.6 │
│ 3.9 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
----
a: [[1.1,2.6,3.9]]

sample(n=None, *, fraction=None, with_replacement=False, seed=None)

Sample randomly from this expression.

Parameters:

Name Type Description Default
n int | None

Number of items to return. Cannot be used with fraction.

None
fraction float | None

Fraction of items to return. Cannot be used with n.

None
with_replacement bool

Allow values to be sampled more than once.

False
seed int | None

Seed for the random number generator. If set to None (default), a random seed is generated for each sample operation.

None

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> df_pd = pd.DataFrame({"a": [1, 2, 3]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3]})
>>> df_pa = pa.table({"a": [1, 2, 3]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a").sample(fraction=1.0, with_replacement=True)
...     ).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a
2  3
0  1
2  3
>>> my_library_agnostic_function(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ f64 │
╞═════╡
│ 2   │
│ 3   │
│ 3   │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[1,3,3]]

shift(n)

Shift values by n positions.

Parameters:

Name Type Description Default
n int

Number of positions to shift values by.

required

Returns:

Type Description
Self

A new expression.

Notes

pandas may change the dtype here, for example when introducing missing values in an integer column. To ensure, that the dtype doesn't change, you may want to use fill_null and cast. For example, to shift and fill missing values with 0 in a Int64 column, you could do:

nw.col("a").shift(1).fill_null(0).cast(nw.Int64)

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(a_shift=nw.col("a").shift(n=1)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a_shift
0      NaN
1      1.0
2      1.0
3      3.0
4      5.0
>>> my_library_agnostic_function(df_pl)
shape: (5, 1)
┌─────────┐
│ a_shift │
│ ---     │
│ i64     │
╞═════════╡
│ null    │
│ 1       │
│ 1       │
│ 3       │
│ 5       │
└─────────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a_shift: int64
----
a_shift: [[null,1,1,3,5]]

sort(*, descending=False, nulls_last=False)

Sort this column. Place null values first.

Parameters:

Name Type Description Default
descending bool

Sort in descending order.

False
nulls_last bool

Place null values last instead of first.

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> df_pd = pd.DataFrame({"a": [5, None, 1, 2]})
>>> df_pl = pl.DataFrame({"a": [5, None, 1, 2]})
>>> df_pa = pa.table({"a": [5, None, 1, 2]})

Let's define dataframe-agnostic functions:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").sort()).to_native()
>>> def func_descend(df):
...     df = nw.from_native(df)
...     df = df.select(nw.col("a").sort(descending=True))
...     return nw.to_native(df)

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
     a
1  NaN
2  1.0
3  2.0
0  5.0
>>> my_library_agnostic_function(df_pl)
shape: (4, 1)
┌──────┐
│ a    │
│ ---  │
│ i64  │
╞══════╡
│ null │
│ 1    │
│ 2    │
│ 5    │
└──────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[null,1,2,5]]
>>> func_descend(df_pd)
     a
1  NaN
0  5.0
3  2.0
2  1.0
>>> func_descend(df_pl)
shape: (4, 1)
┌──────┐
│ a    │
│ ---  │
│ i64  │
╞══════╡
│ null │
│ 5    │
│ 2    │
│ 1    │
└──────┘
>>> func_descend(df_pa)
pyarrow.Table
a: int64
----
a: [[null,5,2,1]]

skew()

Calculate the sample skewness of a column.

Returns:

Type Description
Self

An expression representing the sample skewness of the column.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
>>> df_pa = pa.Table.from_pandas(df_pd)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").skew())

We can then pass pandas, Polars, or PyArrow to func:

>>> func(df_pd)
     a         b
0  0.0  1.472427
>>> func(df_pl)
shape: (1, 2)
┌─────┬──────────┐
│ a   ┆ b        │
│ --- ┆ ---      │
│ f64 ┆ f64      │
╞═════╪══════════╡
│ 0.0 ┆ 1.472427 │
└─────┴──────────┘
>>> func(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[0]]
b: [[1.4724267269058975]]

std(*, ddof=1)

Get standard deviation.

Parameters:

Name Type Description Default
ddof int

"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

1

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df_pl = pl.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df_pa = pa.table({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").std(ddof=0)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
          a         b
0  17.79513  1.265789
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌──────────┬──────────┐
│ a        ┆ b        │
│ ---      ┆ ---      │
│ f64      ┆ f64      │
╞══════════╪══════════╡
│ 17.79513 ┆ 1.265789 │
└──────────┴──────────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[17.795130420052185]]
b: [[1.2657891697365016]]

sum()

Return the sum value.

Returns:

Type Description
Expr

A new expression.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [5, 10], "b": [50, 100]})
>>> df_pl = pl.DataFrame({"a": [5, 10], "b": [50, 100]})
>>> df_pa = pa.table({"a": [5, 10], "b": [50, 100]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").sum()).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
    a    b
0  15  150
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 15  ┆ 150 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[15]]
b: [[150]]

tail(n=10)

Get the last n rows.

Parameters:

Name Type Description Default
n int

Number of rows to return.

10

Returns:

Type Description
Self

A new expression.

Examples:

>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": list(range(10))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)

Let's define a dataframe-agnostic function that returns the last 3 rows:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a").tail(3)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a
7  7
8  8
9  9
>>> my_library_agnostic_function(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 7   │
│ 8   │
│ 9   │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[7,8,9]]

unique(*, maintain_order=False)

Return unique values of this expression.

Parameters:

Name Type Description Default
maintain_order bool

Keep the same order as the original expression. This may be more expensive to compute. Settings this to True blocks the possibility to run on the streaming engine for Polars.

False

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})

Let's define a dataframe-agnostic function:

>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").unique(maintain_order=True)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> my_library_agnostic_function(df_pd)
   a  b
0  1  2
1  3  4
2  5  6
>>> my_library_agnostic_function(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 2   │
│ 3   ┆ 4   │
│ 5   ┆ 6   │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1,3,5]]
b: [[2,4,6]]

var(*, ddof=1)

Get variance.

Parameters:

Name Type Description Default
ddof int

"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

1

Returns:

Type Description
Self

A new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df_pl = pl.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df_pa = pa.table({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})

Let's define a dataframe-agnostic function:

>>> def agnostic_var(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(nw.col("a", "b").var(ddof=0)).to_native()

We can then pass any supported library such as Pandas, Polars, or PyArrow to func:

>>> agnostic_var(df_pd)
            a         b
0  316.666667  1.602222
>>> agnostic_var(df_pl)
shape: (1, 2)
┌────────────┬──────────┐
│ a          ┆ b        │
│ ---        ┆ ---      │
│ f64        ┆ f64      │
╞════════════╪══════════╡
│ 316.666667 ┆ 1.602222 │
└────────────┴──────────┘
>>> agnostic_var(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[316.6666666666667]]
b: [[1.6022222222222222]]