narwhals.Expr
abs()
Return absolute value of each element.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, -2], "b": [-3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_abs(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").abs()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_abs
:
>>> agnostic_abs(df_pd)
a b
0 1 3
1 2 4
>>> agnostic_abs(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
│ 2 ┆ 4 │
└─────┴─────┘
>>> agnostic_abs(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1,2]]
b: [[3,4]]
alias(name)
Rename the expression.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name
|
str
|
The new name. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2], "b": [4, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_alias(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select((nw.col("b") + 10).alias("c")).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_alias
:
>>> agnostic_alias(df_pd)
c
0 14
1 15
>>> agnostic_alias(df_pl)
shape: (2, 1)
┌─────┐
│ c │
│ --- │
│ i64 │
╞═════╡
│ 14 │
│ 15 │
└─────┘
>>> agnostic_alias(df_pa)
pyarrow.Table
c: int64
----
c: [[14,15]]
all()
Return whether all values in the column are True
.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [True, False], "b": [True, True]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").all()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_all
:
>>> agnostic_all(df_pd)
a b
0 False True
>>> agnostic_all(df_pl)
shape: (1, 2)
┌───────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪══════╡
│ false ┆ true │
└───────┴──────┘
>>> agnostic_all(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false]]
b: [[true]]
any()
Return whether any of the values in the column are True
.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [True, False], "b": [True, True]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a dataframe-agnostic function:
>>> def agnostic_any(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").any()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_any
:
>>> agnostic_any(df_pd)
a b
0 True True
>>> agnostic_any(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞══════╪══════╡
│ true ┆ true │
└──────┴──────┘
>>> agnostic_any(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true]]
b: [[true]]
arg_max()
Returns the index of the maximum value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [10, 20], "b": [150, 100]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_arg_max(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a", "b").arg_max().name.suffix("_arg_max")
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or
PyArrow to agnostic_arg_max
:
>>> agnostic_arg_max(df_pd)
a_arg_max b_arg_max
0 1 0
>>> agnostic_arg_max(df_pl)
shape: (1, 2)
┌───────────┬───────────┐
│ a_arg_max ┆ b_arg_max │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═══════════╪═══════════╡
│ 1 ┆ 0 │
└───────────┴───────────┘
>>> agnostic_arg_max(df_pa)
pyarrow.Table
a_arg_max: int64
b_arg_max: int64
----
a_arg_max: [[1]]
b_arg_max: [[0]]
arg_min()
Returns the index of the minimum value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [10, 20], "b": [150, 100]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_arg_min(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a", "b").arg_min().name.suffix("_arg_min")
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or
PyArrow to agnostic_arg_min
:
>>> agnostic_arg_min(df_pd)
a_arg_min b_arg_min
0 0 1
>>> agnostic_arg_min(df_pl)
shape: (1, 2)
┌───────────┬───────────┐
│ a_arg_min ┆ b_arg_min │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═══════════╪═══════════╡
│ 0 ┆ 1 │
└───────────┴───────────┘
>>> agnostic_arg_min(df_pa)
pyarrow.Table
a_arg_min: int64
b_arg_min: int64
----
a_arg_min: [[0]]
b_arg_min: [[1]]
arg_true()
Find elements where boolean expression is True.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
cast(dtype)
Redefine an object's data type.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dtype
|
DType | type[DType]
|
Data type that the object will be cast into. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_cast(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_cast
:
>>> agnostic_cast(df_pd)
foo bar
0 1.0 6
1 2.0 7
2 3.0 8
>>> agnostic_cast(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ f32 ┆ u8 │
╞═════╪═════╡
│ 1.0 ┆ 6 │
│ 2.0 ┆ 7 │
│ 3.0 ┆ 8 │
└─────┴─────┘
>>> agnostic_cast(df_pa)
pyarrow.Table
foo: float
bar: uint8
----
foo: [[1,2,3]]
bar: [[6,7,8]]
count()
Returns the number of non-null elements in the column.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3], "b": [None, 4, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_count(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().count()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_count
:
>>> agnostic_count(df_pd)
a b
0 3 2
>>> agnostic_count(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 3 ┆ 2 │
└─────┴─────┘
>>> agnostic_count(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[3]]
b: [[2]]
cum_count(*, reverse=False)
Return the cumulative count of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": ["x", "k", None, "d"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_cum_count(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... nw.col("a").cum_count().alias("cum_count"),
... nw.col("a").cum_count(reverse=True).alias("cum_count_reverse"),
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_cum_count
:
>>> agnostic_cum_count(df_pd)
a cum_count cum_count_reverse
0 x 1 3
1 k 2 2
2 None 2 1
3 d 3 1
>>> agnostic_cum_count(df_pl)
shape: (4, 3)
┌──────┬───────────┬───────────────────┐
│ a ┆ cum_count ┆ cum_count_reverse │
│ --- ┆ --- ┆ --- │
│ str ┆ u32 ┆ u32 │
╞══════╪═══════════╪═══════════════════╡
│ x ┆ 1 ┆ 3 │
│ k ┆ 2 ┆ 2 │
│ null ┆ 2 ┆ 1 │
│ d ┆ 3 ┆ 1 │
└──────┴───────────┴───────────────────┘
>>> agnostic_cum_count(df_pa)
pyarrow.Table
a: string
cum_count: uint32
cum_count_reverse: uint32
----
a: [["x","k",null,"d"]]
cum_count: [[1,2,2,3]]
cum_count_reverse: [[3,2,1,1]]
cum_max(*, reverse=False)
Return the cumulative max of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 3, None, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_cum_max(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... nw.col("a").cum_max().alias("cum_max"),
... nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"),
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_
:
>>> agnostic_cum_max(df_pd)
a cum_max cum_max_reverse
0 1.0 1.0 3.0
1 3.0 3.0 3.0
2 NaN NaN NaN
3 2.0 3.0 2.0
>>> agnostic_cum_max(df_pl)
shape: (4, 3)
┌──────┬─────────┬─────────────────┐
│ a ┆ cum_max ┆ cum_max_reverse │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞══════╪═════════╪═════════════════╡
│ 1 ┆ 1 ┆ 3 │
│ 3 ┆ 3 ┆ 3 │
│ null ┆ null ┆ null │
│ 2 ┆ 3 ┆ 2 │
└──────┴─────────┴─────────────────┘
>>> agnostic_cum_max(df_pa)
pyarrow.Table
a: int64
cum_max: int64
cum_max_reverse: int64
----
a: [[1,3,null,2]]
cum_max: [[1,3,null,3]]
cum_max_reverse: [[3,3,null,2]]
cum_min(*, reverse=False)
Return the cumulative min of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [3, 1, None, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_cum_min(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... nw.col("a").cum_min().alias("cum_min"),
... nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"),
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_cum_min
:
>>> agnostic_cum_min(df_pd)
a cum_min cum_min_reverse
0 3.0 3.0 1.0
1 1.0 1.0 1.0
2 NaN NaN NaN
3 2.0 1.0 2.0
>>> agnostic_cum_min(df_pl)
shape: (4, 3)
┌──────┬─────────┬─────────────────┐
│ a ┆ cum_min ┆ cum_min_reverse │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞══════╪═════════╪═════════════════╡
│ 3 ┆ 3 ┆ 1 │
│ 1 ┆ 1 ┆ 1 │
│ null ┆ null ┆ null │
│ 2 ┆ 1 ┆ 2 │
└──────┴─────────┴─────────────────┘
>>> agnostic_cum_min(df_pa)
pyarrow.Table
a: int64
cum_min: int64
cum_min_reverse: int64
----
a: [[3,1,null,2]]
cum_min: [[3,1,null,1]]
cum_min_reverse: [[1,1,null,2]]
cum_prod(*, reverse=False)
Return the cumulative product of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 3, None, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_cum_prod(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... nw.col("a").cum_prod().alias("cum_prod"),
... nw.col("a").cum_prod(reverse=True).alias("cum_prod_reverse"),
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_cum_prod
:
>>> agnostic_cum_prod(df_pd)
a cum_prod cum_prod_reverse
0 1.0 1.0 6.0
1 3.0 3.0 6.0
2 NaN NaN NaN
3 2.0 6.0 2.0
>>> agnostic_cum_prod(df_pl)
shape: (4, 3)
┌──────┬──────────┬──────────────────┐
│ a ┆ cum_prod ┆ cum_prod_reverse │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞══════╪══════════╪══════════════════╡
│ 1 ┆ 1 ┆ 6 │
│ 3 ┆ 3 ┆ 6 │
│ null ┆ null ┆ null │
│ 2 ┆ 6 ┆ 2 │
└──────┴──────────┴──────────────────┘
>>> agnostic_cum_prod(df_pa)
pyarrow.Table
a: int64
cum_prod: int64
cum_prod_reverse: int64
----
a: [[1,3,null,2]]
cum_prod: [[1,3,null,6]]
cum_prod_reverse: [[6,6,null,2]]
cum_sum(*, reverse=False)
Return cumulative sum.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").cum_sum()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_cum_sum
:
>>> agnostic_cum_sum(df_pd)
a b
0 1 2
1 2 6
2 5 10
3 10 16
4 15 22
>>> agnostic_cum_sum(df_pl)
shape: (5, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 2 │
│ 2 ┆ 6 │
│ 5 ┆ 10 │
│ 10 ┆ 16 │
│ 15 ┆ 22 │
└─────┴─────┘
>>> agnostic_cum_sum(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1,2,5,10,15]]
b: [[2,6,10,16,22]]
diff()
Returns the difference between each element and the previous one.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas may change the dtype here, for example when introducing missing
values in an integer column. To ensure, that the dtype doesn't change,
you may want to use fill_null
and cast
. For example, to calculate
the diff and fill missing values with 0
in a Int64 column, you could
do:
nw.col("a").diff().fill_null(0).cast(nw.Int64)
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 1, 3, 5, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_diff(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(a_diff=nw.col("a").diff()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_diff
:
>>> agnostic_diff(df_pd)
a_diff
0 NaN
1 0.0
2 2.0
3 2.0
4 0.0
>>> agnostic_diff(df_pl)
shape: (5, 1)
┌────────┐
│ a_diff │
│ --- │
│ i64 │
╞════════╡
│ null │
│ 0 │
│ 2 │
│ 2 │
│ 0 │
└────────┘
>>> agnostic_diff(df_pa)
pyarrow.Table
a_diff: int64
----
a_diff: [[null,0,2,2,0]]
drop_nulls()
Drop null values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
>>> df_pl = pl.DataFrame({"a": [2.0, 4.0, None, 3.0, None, 5.0]})
>>> df_pa = pa.table({"a": [2.0, 4.0, None, 3.0, None, 5.0]})
Let's define a dataframe-agnostic function:
>>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").drop_nulls()).to_native()
We can then pass any supported library such as Pandas, Polars, or
PyArrow to agnostic_drop_nulls
:
>>> agnostic_drop_nulls(df_pd)
a
0 2.0
1 4.0
3 3.0
5 5.0
>>> agnostic_drop_nulls(df_pl)
shape: (4, 1)
┌─────┐
│ a │
│ --- │
│ f64 │
╞═════╡
│ 2.0 │
│ 4.0 │
│ 3.0 │
│ 5.0 │
└─────┘
>>> agnostic_drop_nulls(df_pa)
pyarrow.Table
a: double
----
a: [[2,4,3,5]]
ewm_mean(*, com=None, span=None, half_life=None, alpha=None, adjust=True, min_periods=1, ignore_nulls=False)
Compute exponentially-weighted moving average.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
com
|
float | None
|
Specify decay in terms of center of mass, \(\gamma\), with |
None
|
span
|
float | None
|
Specify decay in terms of span, \(\theta\), with |
None
|
half_life
|
float | None
|
Specify decay in terms of half-life, \(\tau\), with |
None
|
alpha
|
float | None
|
Specify smoothing factor alpha directly, \(0 < \alpha \leq 1\). |
None
|
adjust
|
bool
|
Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
|
True
|
min_periods
|
int
|
Minimum number of observations in window required to have a value, (otherwise result is null). |
1
|
ignore_nulls
|
bool
|
Ignore missing values when calculating weights.
|
False
|
Returns:
Type | Description |
---|---|
Self
|
Expr |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
We define a library agnostic function:
>>> def agnostic_ewm_mean(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a").ewm_mean(com=1, ignore_nulls=False)
... ).to_native()
We can then pass either pandas or Polars to agnostic_ewm_mean
:
>>> agnostic_ewm_mean(df_pd)
a
0 1.000000
1 1.666667
2 2.428571
>>> agnostic_ewm_mean(df_pl)
shape: (3, 1)
┌──────────┐
│ a │
│ --- │
│ f64 │
╞══════════╡
│ 1.0 │
│ 1.666667 │
│ 2.428571 │
└──────────┘
fill_null(value=None, strategy=None, limit=None)
Fill null values with given value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value
|
Any | None
|
Value used to fill null values. |
None
|
strategy
|
Literal['forward', 'backward'] | None
|
Strategy used to fill null values. |
None
|
limit
|
int | None
|
Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> df_pd = pd.DataFrame(
... {
... "a": [2, 4, None, None, 3, 5],
... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
... }
... )
>>> data = {
... "a": [2, 4, None, None, 3, 5],
... "b": [2.0, 4.0, None, None, 3.0, 5.0],
... }
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_fill_null(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(nw.col("a", "b").fill_null(0)).to_native()
We can then pass any supported library such as Pandas, Polars, or
PyArrow to agnostic_fill_null
:
>>> agnostic_fill_null(df_pd)
a b
0 2.0 2.0
1 4.0 4.0
2 0.0 0.0
3 0.0 0.0
4 3.0 3.0
5 5.0 5.0
>>> agnostic_fill_null(df_pl)
shape: (6, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 2 ┆ 2.0 │
│ 4 ┆ 4.0 │
│ 0 ┆ 0.0 │
│ 0 ┆ 0.0 │
│ 3 ┆ 3.0 │
│ 5 ┆ 5.0 │
└─────┴─────┘
>>> agnostic_fill_null(df_pa)
pyarrow.Table
a: int64
b: double
----
a: [[2,4,0,0,3,5]]
b: [[2,4,0,0,3,5]]
Using a strategy:
>>> def agnostic_fill_null_with_strategy(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... nw.col("a", "b")
... .fill_null(strategy="forward", limit=1)
... .name.suffix("_filled")
... ).to_native()
>>> agnostic_fill_null_with_strategy(df_pd)
a b a_filled b_filled
0 2.0 2.0 2.0 2.0
1 4.0 4.0 4.0 4.0
2 NaN NaN 4.0 4.0
3 NaN NaN NaN NaN
4 3.0 3.0 3.0 3.0
5 5.0 5.0 5.0 5.0
>>> agnostic_fill_null_with_strategy(df_pl)
shape: (6, 4)
┌──────┬──────┬──────────┬──────────┐
│ a ┆ b ┆ a_filled ┆ b_filled │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ i64 ┆ f64 │
╞══════╪══════╪══════════╪══════════╡
│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │
│ 4 ┆ 4.0 ┆ 4 ┆ 4.0 │
│ null ┆ null ┆ 4 ┆ 4.0 │
│ null ┆ null ┆ null ┆ null │
│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │
│ 5 ┆ 5.0 ┆ 5 ┆ 5.0 │
└──────┴──────┴──────────┴──────────┘
>>> agnostic_fill_null_with_strategy(df_pa)
pyarrow.Table
a: int64
b: double
a_filled: int64
b_filled: double
----
a: [[2,4,null,null,3,5]]
b: [[2,4,null,null,3,5]]
a_filled: [[2,4,4,null,3,5]]
b_filled: [[2,4,4,null,3,5]]
filter(*predicates)
Filters elements based on a condition, returning a new expression.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
predicates
|
Any
|
Conditions to filter by (which get ANDed together). |
()
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a").filter(nw.col("a") > 4),
... nw.col("b").filter(nw.col("b") < 13),
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_filter
:
>>> agnostic_filter(df_pd)
a b
3 5 10
4 6 11
5 7 12
>>> agnostic_filter(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 5 ┆ 10 │
│ 6 ┆ 11 │
│ 7 ┆ 12 │
└─────┴─────┘
>>> agnostic_filter(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[5,6,7]]
b: [[10,11,12]]
gather_every(n, offset=0)
Take every nth value in the Series and return as new Series.
Warning
Expr.gather_every
is deprecated and will be removed in a future version.
Hint: instead of df.select(nw.col('a').gather_every())
, use
df.select(nw.col('a')).gather_every()
instead.
Note: this will remain available in narwhals.stable.v1
.
See stable api for more information.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Gather every n-th row. |
required |
offset
|
int
|
Starting index. |
0
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
head(n=10)
Get the first n
rows.
Warning
Expr.head
is deprecated and will be removed in a future version.
Hint: instead of df.select(nw.col('a').head())
, use
df.select(nw.col('a')).head()
instead.
Note: this will remain available in narwhals.stable.v1
.
See stable api for more information.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Number of rows to return. |
10
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
clip(lower_bound=None, upper_bound=None)
Clip values in the Series.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
lower_bound
|
IntoExpr | Any | None
|
Lower bound value. |
None
|
upper_bound
|
IntoExpr | Any | None
|
Upper bound value. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_clip_lower(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").clip(2)).to_native()
We can then pass any supported library such as Pandas, Polars, or
PyArrow to agnostic_clip_lower
:
>>> agnostic_clip_lower(df_pd)
a
0 2
1 2
2 3
>>> agnostic_clip_lower(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 2 │
│ 3 │
└─────┘
>>> agnostic_clip_lower(df_pa)
pyarrow.Table
a: int64
----
a: [[2,2,3]]
We define another library agnostic function:
>>> def agnostic_clip_upper(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").clip(upper_bound=2)).to_native()
We can then pass any supported library such as Pandas, Polars, or
PyArrow to agnostic_clip_upper
:
>>> agnostic_clip_upper(df_pd)
a
0 1
1 2
2 2
>>> agnostic_clip_upper(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 1 │
│ 2 │
│ 2 │
└─────┘
>>> agnostic_clip_upper(df_pa)
pyarrow.Table
a: int64
----
a: [[1,2,2]]
We can have both at the same time
>>> data = {"a": [-1, 1, -3, 3, -5, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_clip(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").clip(-1, 3)).to_native()
We can pass any supported library such as Pandas, Polars, or
PyArrow to agnostic_clip
:
>>> agnostic_clip(df_pd)
a
0 -1
1 1
2 -1
3 3
4 -1
5 3
>>> agnostic_clip(df_pl)
shape: (6, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ -1 │
│ 1 │
│ -1 │
│ 3 │
│ -1 │
│ 3 │
└─────┘
>>> agnostic_clip(df_pa)
pyarrow.Table
a: int64
----
a: [[-1,1,-1,3,-1,3]]
is_between(lower_bound, upper_bound, closed='both')
Check if this expression is between the given lower and upper bounds.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
lower_bound
|
Any | IntoExpr
|
Lower bound value. |
required |
upper_bound
|
Any | IntoExpr
|
Upper bound value. |
required |
closed
|
Literal['left', 'right', 'none', 'both']
|
Define which sides of the interval are closed (inclusive). |
'both'
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 4, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_is_between(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").is_between(2, 4, "right")).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_is_between
:
>>> agnostic_is_between(df_pd)
a
0 False
1 False
2 True
3 True
4 False
>>> agnostic_is_between(df_pl)
shape: (5, 1)
┌───────┐
│ a │
│ --- │
│ bool │
╞═══════╡
│ false │
│ false │
│ true │
│ true │
│ false │
└───────┘
>>> agnostic_is_between(df_pa)
pyarrow.Table
a: bool
----
a: [[false,false,true,true,false]]
is_duplicated()
Return a boolean mask indicating duplicated values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_is_duplicated(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().is_duplicated()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_is_duplicated
:
>>> agnostic_is_duplicated(df_pd)
a b
0 True True
1 False True
2 False False
3 True False
>>> agnostic_is_duplicated(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ true ┆ true │
│ false ┆ true │
│ false ┆ false │
│ true ┆ false │
└───────┴───────┘
>>> agnostic_is_duplicated(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true,false,false,true]]
b: [[true,true,false,false]]
is_finite()
Returns boolean values indicating which original values are finite.
Warning
Different backend handle null values differently. is_finite
will return
False for NaN and Null's in the Dask and pandas non-nullable backend, while
for Polars, PyArrow and pandas nullable backends null values are kept as such.
Returns:
Type | Description |
---|---|
Self
|
Expression of |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [float("nan"), float("inf"), 2.0, None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_is_finite(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").is_finite()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_is_finite
:
>>> agnostic_is_finite(df_pd)
a
0 False
1 False
2 True
3 False
>>> agnostic_is_finite(df_pl)
shape: (4, 1)
┌───────┐
│ a │
│ --- │
│ bool │
╞═══════╡
│ false │
│ false │
│ true │
│ null │
└───────┘
>>> agnostic_is_finite(df_pa)
pyarrow.Table
a: bool
----
a: [[false,false,true,null]]
is_first_distinct()
Return a boolean mask indicating the first occurrence of each distinct value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_is_first_distinct(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().is_first_distinct()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_is_first_distinct
:
>>> agnostic_is_first_distinct(df_pd)
a b
0 True True
1 True False
2 True True
3 False True
>>> agnostic_is_first_distinct(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ true ┆ true │
│ true ┆ false │
│ true ┆ true │
│ false ┆ true │
└───────┴───────┘
>>> agnostic_is_first_distinct(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true,true,true,false]]
b: [[true,false,true,true]]
is_in(other)
Check if elements of this expression are present in the other iterable.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other
|
Any
|
iterable |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 9, 10]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_is_in(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(b=nw.col("a").is_in([1, 2])).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_is_in
:
>>> agnostic_is_in(df_pd)
a b
0 1 True
1 2 True
2 9 False
3 10 False
>>> agnostic_is_in(df_pl)
shape: (4, 2)
┌─────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ bool │
╞═════╪═══════╡
│ 1 ┆ true │
│ 2 ┆ true │
│ 9 ┆ false │
│ 10 ┆ false │
└─────┴───────┘
>>> agnostic_is_in(df_pa)
pyarrow.Table
a: int64
b: bool
----
a: [[1,2,9,10]]
b: [[true,true,false,false]]
is_last_distinct()
Return a boolean mask indicating the last occurrence of each distinct value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_is_last_distinct(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().is_last_distinct()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_is_last_distinct
:
>>> agnostic_is_last_distinct(df_pd)
a b
0 False False
1 True True
2 True True
3 True True
>>> agnostic_is_last_distinct(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ false ┆ false │
│ true ┆ true │
│ true ┆ true │
│ true ┆ true │
└───────┴───────┘
>>> agnostic_is_last_distinct(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false,true,true,true]]
b: [[false,true,true,true]]
is_nan()
Indicate which values are NaN.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"orig": [0.0, None, 2.0]}
>>> df_pd = pd.DataFrame(data).astype({"orig": "Float64"})
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_self_div_is_nan(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... divided=nw.col("orig") / nw.col("orig"),
... divided_is_nan=(nw.col("orig") / nw.col("orig")).is_nan(),
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or
PyArrow to agnostic_self_div_is_nan
:
>>> print(agnostic_self_div_is_nan(df_pd))
orig divided divided_is_nan
0 0.0 NaN True
1 <NA> <NA> <NA>
2 2.0 1.0 False
>>> print(agnostic_self_div_is_nan(df_pl))
shape: (3, 3)
┌──────┬─────────┬────────────────┐
│ orig ┆ divided ┆ divided_is_nan │
│ --- ┆ --- ┆ --- │
│ f64 ┆ f64 ┆ bool │
╞══════╪═════════╪════════════════╡
│ 0.0 ┆ NaN ┆ true │
│ null ┆ null ┆ null │
│ 2.0 ┆ 1.0 ┆ false │
└──────┴─────────┴────────────────┘
>>> print(agnostic_self_div_is_nan(df_pa))
pyarrow.Table
orig: double
divided: double
divided_is_nan: bool
----
orig: [[0,null,2]]
divided: [[nan,null,1]]
divided_is_nan: [[true,null,false]]
is_null()
Returns a boolean Series indicating which values are null.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> df_pd = pd.DataFrame(
... {
... "a": [2, 4, None, 3, 5],
... "b": [2.0, 4.0, float("nan"), 3.0, 5.0],
... }
... )
>>> data = {
... "a": [2, 4, None, 3, 5],
... "b": [2.0, 4.0, None, 3.0, 5.0],
... }
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_is_null(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or
PyArrow to agnostic_is_null
:
>>> agnostic_is_null(df_pd)
a b a_is_null b_is_null
0 2.0 2.0 False False
1 4.0 4.0 False False
2 NaN NaN True True
3 3.0 3.0 False False
4 5.0 5.0 False False
>>> agnostic_is_null(df_pl)
shape: (5, 4)
┌──────┬──────┬───────────┬───────────┐
│ a ┆ b ┆ a_is_null ┆ b_is_null │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ bool ┆ bool │
╞══════╪══════╪═══════════╪═══════════╡
│ 2 ┆ 2.0 ┆ false ┆ false │
│ 4 ┆ 4.0 ┆ false ┆ false │
│ null ┆ null ┆ true ┆ true │
│ 3 ┆ 3.0 ┆ false ┆ false │
│ 5 ┆ 5.0 ┆ false ┆ false │
└──────┴──────┴───────────┴───────────┘
>>> agnostic_is_null(df_pa)
pyarrow.Table
a: int64
b: double
a_is_null: bool
b_is_null: bool
----
a: [[2,4,null,3,5]]
b: [[2,4,null,3,5]]
a_is_null: [[false,false,true,false,false]]
b_is_null: [[false,false,true,false,false]]
is_unique()
Return a boolean mask indicating unique values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_is_unique(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().is_unique()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_is_unique
:
>>> agnostic_is_unique(df_pd)
a b
0 False False
1 True False
2 True True
3 False True
>>> agnostic_is_unique(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ false ┆ false │
│ true ┆ false │
│ true ┆ true │
│ false ┆ true │
└───────┴───────┘
>>> agnostic_is_unique(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false,true,true,false]]
b: [[false,false,true,true]]
len()
Return the number of elements in the column.
Null values count towards the total.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function that computes the len over different values of "b" column:
>>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_len
:
>>> agnostic_len(df_pd)
a1 a2
0 2 1
>>> agnostic_len(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a1 ┆ a2 │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 2 ┆ 1 │
└─────┴─────┘
>>> agnostic_len(df_pa)
pyarrow.Table
a1: int64
a2: int64
----
a1: [[2]]
a2: [[1]]
map_batches(function, return_dtype=None)
Apply a custom python function to a whole Series or sequence of Series.
The output of this custom function is presumed to be either a Series, or a NumPy array (in which case it will be automatically converted into a Series).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
function
|
Callable[[Any], Self]
|
Function to apply to Series. |
required |
return_dtype
|
DType | None
|
Dtype of the output Series. If not set, the dtype will be inferred based on the first non-null value that is returned by the function. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_map_batches(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a", "b").map_batches(
... lambda s: s.to_numpy() + 1, return_dtype=nw.Float64
... )
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_map_batches
:
>>> agnostic_map_batches(df_pd)
a b
0 2.0 5.0
1 3.0 6.0
2 4.0 7.0
>>> agnostic_map_batches(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 2.0 ┆ 5.0 │
│ 3.0 ┆ 6.0 │
│ 4.0 ┆ 7.0 │
└─────┴─────┘
>>> agnostic_map_batches(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[2,3,4]]
b: [[5,6,7]]
max()
Returns the maximum value(s) from a column(s).
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [10, 20], "b": [50, 100]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.max("a", "b")).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_max
:
>>> agnostic_max(df_pd)
a b
0 20 100
>>> agnostic_max(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 20 ┆ 100 │
└─────┴─────┘
>>> agnostic_max(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[20]]
b: [[100]]
mean()
Get mean value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [-1, 0, 1], "b": [2, 4, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").mean()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_mean
:
>>> agnostic_mean(df_pd)
a b
0 0.0 4.0
>>> agnostic_mean(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 0.0 ┆ 4.0 │
└─────┴─────┘
>>> agnostic_mean(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[0]]
b: [[4]]
median()
Get median value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 8, 3], "b": [4, 5, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").median()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_median
:
>>> agnostic_median(df_pd)
a b
0 3.0 4.0
>>> agnostic_median(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 3.0 ┆ 4.0 │
└─────┴─────┘
>>> agnostic_median(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[3]]
b: [[4]]
min()
Returns the minimum value(s) from a column(s).
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2], "b": [4, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.min("a", "b")).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_min
:
>>> agnostic_min(df_pd)
a b
0 1 3
>>> agnostic_min(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
└─────┴─────┘
>>> agnostic_min(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1]]
b: [[3]]
mode()
Compute the most occurring value(s).
Can return multiple values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {
... "a": [1, 1, 2, 3],
... "b": [1, 1, 2, 2],
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_mode(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").mode()).sort("a").to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_mode
:
>>> agnostic_mode(df_pd)
a
0 1
>>> agnostic_mode(df_pl)
shape: (1, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 1 │
└─────┘
>>> agnostic_mode(df_pa)
pyarrow.Table
a: int64
----
a: [[1]]
null_count()
Count null values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().null_count()).to_native()
We can then pass any supported library such as Pandas, Polars, or
PyArrow to agnostic_null_count
:
>>> agnostic_null_count(df_pd)
a b
0 1 2
>>> agnostic_null_count(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 1 ┆ 2 │
└─────┴─────┘
>>> agnostic_null_count(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1]]
b: [[2]]
n_unique()
Returns count of unique values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_n_unique(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").n_unique()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_n_unique
:
>>> agnostic_n_unique(df_pd)
a b
0 5 3
>>> agnostic_n_unique(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 5 ┆ 3 │
└─────┴─────┘
>>> agnostic_n_unique(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[5]]
b: [[3]]
over(*keys)
Compute expressions over the given groups.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
keys
|
str | Iterable[str]
|
Names of columns to compute window expression over.
Must be names of columns, as opposed to expressions -
so, this is a bit less flexible than Polars' |
()
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_min_over_b(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... a_min_per_group=nw.col("a").min().over("b")
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_min_over_b
:
>>> agnostic_min_over_b(df_pd)
a b a_min_per_group
0 1 1 1
1 2 1 1
2 3 2 3
>>> agnostic_min_over_b(df_pl)
shape: (3, 3)
┌─────┬─────┬─────────────────┐
│ a ┆ b ┆ a_min_per_group │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════════════════╡
│ 1 ┆ 1 ┆ 1 │
│ 2 ┆ 1 ┆ 1 │
│ 3 ┆ 2 ┆ 3 │
└─────┴─────┴─────────────────┘
>>> agnostic_min_over_b(df_pa)
pyarrow.Table
a: int64
b: int64
a_min_per_group: int64
----
a: [[1,2,3]]
b: [[1,1,2]]
a_min_per_group: [[1,1,3]]
Cumulative operations are also supported, but (currently) only for pandas and Polars:
>>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(c=nw.col("a").cum_sum().over("b")).to_native()
>>> agnostic_cum_sum(df_pd)
a b c
0 1 1 1
1 2 1 3
2 3 2 3
>>> agnostic_cum_sum(df_pl)
shape: (3, 3)
┌─────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1 ┆ 1 ┆ 1 │
│ 2 ┆ 1 ┆ 3 │
│ 3 ┆ 2 ┆ 3 │
└─────┴─────┴─────┘
pipe(function, *args, **kwargs)
Pipe function call.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
function
|
Callable[[Any], Self]
|
Function to apply. |
required |
args
|
Any
|
Positional arguments to pass to function. |
()
|
kwargs
|
Any
|
Keyword arguments to pass to function. |
{}
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Lets define a library-agnostic function:
>>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").pipe(lambda x: x + 1)).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_pipe
:
>>> agnostic_pipe(df_pd)
a
0 2
1 3
2 4
3 5
>>> agnostic_pipe(df_pl)
shape: (4, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 3 │
│ 4 │
│ 5 │
└─────┘
>>> agnostic_pipe(df_pa)
pyarrow.Table
a: int64
----
a: [[2,3,4,5]]
quantile(quantile, interpolation)
Get quantile value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
quantile
|
float
|
Quantile between 0.0 and 1.0. |
required |
interpolation
|
Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']
|
Interpolation method. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Note
- pandas and Polars may have implementation differences for a given interpolation method.
- dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the native 'dask' - method.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": list(range(50)), "b": list(range(50, 100))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_quantile(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a", "b").quantile(0.5, interpolation="linear")
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_quantile
:
>>> agnostic_quantile(df_pd)
a b
0 24.5 74.5
>>> agnostic_quantile(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════╪══════╡
│ 24.5 ┆ 74.5 │
└──────┴──────┘
>>> agnostic_quantile(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[24.5]]
b: [[74.5]]
rank(method='average', *, descending=False)
Assign ranks to data, dealing with ties appropriately.
Notes
The resulting dtype may differ between backends.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
method
|
Literal['average', 'min', 'max', 'dense', 'ordinal']
|
The method used to assign ranks to tied elements. The following methods are available (default is 'average'):
|
'average'
|
descending
|
bool
|
Rank in descending order. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression with rank data. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [3, 6, 1, 1, 6]}
We define a dataframe-agnostic function that computes the dense rank for the data:
>>> def agnostic_dense_rank(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... result = df.with_columns(rnk=nw.col("a").rank(method="dense"))
... return result.to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_dense_rank
:
>>> agnostic_dense_rank(pd.DataFrame(data))
a rnk
0 3 2.0
1 6 3.0
2 1 1.0
3 1 1.0
4 6 3.0
>>> agnostic_dense_rank(pl.DataFrame(data))
shape: (5, 2)
┌─────┬─────┐
│ a ┆ rnk │
│ --- ┆ --- │
│ i64 ┆ u32 │
╞═════╪═════╡
│ 3 ┆ 2 │
│ 6 ┆ 3 │
│ 1 ┆ 1 │
│ 1 ┆ 1 │
│ 6 ┆ 3 │
└─────┴─────┘
>>> agnostic_dense_rank(pa.table(data))
pyarrow.Table
a: int64
rnk: uint64
----
a: [[3,6,1,1,6]]
rnk: [[2,3,1,1,3]]
replace_strict(old, new=None, *, return_dtype=None)
Replace all values by different values.
This function must replace all non-null input values (else it raises an error).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
old
|
Sequence[Any] | Mapping[Any, Any]
|
Sequence of values to replace. It also accepts a mapping of values to
their replacement as syntactic sugar for
|
required |
new
|
Sequence[Any] | None
|
Sequence of values to replace by. Length must match the length of |
None
|
return_dtype
|
DType | type[DType] | None
|
The data type of the resulting expression. If set to |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [3, 0, 1, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define dataframe-agnostic functions:
>>> def agnostic_replace_strict(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... b=nw.col("a").replace_strict(
... [0, 1, 2, 3],
... ["zero", "one", "two", "three"],
... return_dtype=nw.String,
... )
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_replace_strict
:
>>> agnostic_replace_strict(df_pd)
a b
0 3 three
1 0 zero
2 1 one
3 2 two
>>> agnostic_replace_strict(df_pl)
shape: (4, 2)
┌─────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪═══════╡
│ 3 ┆ three │
│ 0 ┆ zero │
│ 1 ┆ one │
│ 2 ┆ two │
└─────┴───────┘
>>> agnostic_replace_strict(df_pa)
pyarrow.Table
a: int64
b: string
----
a: [[3,0,1,2]]
b: [["three","zero","one","two"]]
rolling_mean(window_size, *, min_periods=None, center=False)
Apply a rolling mean (moving mean) over the values.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
A window of length window_size
will traverse the values. The resulting values
will be aggregated to their mean.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_periods
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_rolling_mean(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... b=nw.col("a").rolling_mean(window_size=3, min_periods=1)
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_rolling_mean
:
>>> agnostic_rolling_mean(df_pd)
a b
0 1.0 1.0
1 2.0 1.5
2 NaN 1.5
3 4.0 3.0
>>> agnostic_rolling_mean(df_pl)
shape: (4, 2)
┌──────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════╪═════╡
│ 1.0 ┆ 1.0 │
│ 2.0 ┆ 1.5 │
│ null ┆ 1.5 │
│ 4.0 ┆ 3.0 │
└──────┴─────┘
>>> agnostic_rolling_mean(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[1,1.5,1.5,3]]
rolling_std(window_size, *, min_periods=None, center=False, ddof=1)
Apply a rolling standard deviation (moving standard deviation) over the values.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
A window of length window_size
will traverse the values. The resulting values
will be aggregated to their standard deviation.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_periods
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
ddof
|
int
|
Delta Degrees of Freedom; the divisor for a length N window is N - ddof. |
1
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_rolling_std(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... b=nw.col("a").rolling_std(window_size=3, min_periods=1)
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_rolling_std
:
>>> agnostic_rolling_std(df_pd)
a b
0 1.0 NaN
1 2.0 0.707107
2 NaN 0.707107
3 4.0 1.414214
>>> agnostic_rolling_std(df_pl)
shape: (4, 2)
┌──────┬──────────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════╪══════════╡
│ 1.0 ┆ null │
│ 2.0 ┆ 0.707107 │
│ null ┆ 0.707107 │
│ 4.0 ┆ 1.414214 │
└──────┴──────────┘
>>> agnostic_rolling_std(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[nan,0.7071067811865476,0.7071067811865476,1.4142135623730951]]
rolling_sum(window_size, *, min_periods=None, center=False)
Apply a rolling sum (moving sum) over the values.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
A window of length window_size
will traverse the values. The resulting values
will be aggregated to their sum.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_periods
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_rolling_sum(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... b=nw.col("a").rolling_sum(window_size=3, min_periods=1)
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_rolling_sum
:
>>> agnostic_rolling_sum(df_pd)
a b
0 1.0 1.0
1 2.0 3.0
2 NaN 3.0
3 4.0 6.0
>>> agnostic_rolling_sum(df_pl)
shape: (4, 2)
┌──────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════╪═════╡
│ 1.0 ┆ 1.0 │
│ 2.0 ┆ 3.0 │
│ null ┆ 3.0 │
│ 4.0 ┆ 6.0 │
└──────┴─────┘
>>> agnostic_rolling_sum(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[1,3,3,6]]
rolling_var(window_size, *, min_periods=None, center=False, ddof=1)
Apply a rolling variance (moving variance) over the values.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
A window of length window_size
will traverse the values. The resulting values
will be aggregated to their variance.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_periods
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
ddof
|
int
|
Delta Degrees of Freedom; the divisor for a length N window is N - ddof. |
1
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_rolling_var(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... b=nw.col("a").rolling_var(window_size=3, min_periods=1)
... ).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_rolling_var
:
>>> agnostic_rolling_var(df_pd)
a b
0 1.0 NaN
1 2.0 0.5
2 NaN 0.5
3 4.0 2.0
>>> agnostic_rolling_var(df_pl)
shape: (4, 2)
┌──────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════╪══════╡
│ 1.0 ┆ null │
│ 2.0 ┆ 0.5 │
│ null ┆ 0.5 │
│ 4.0 ┆ 2.0 │
└──────┴──────┘
>>> agnostic_rolling_var(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[nan,0.5,0.5,2]]
round(decimals=0)
Round underlying floating point data by decimals
digits.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
decimals
|
int
|
Number of decimals to round by. |
0
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and 4.5 to 4.0, etc..).
Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1.12345, 2.56789, 3.901234]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function that rounds to the first decimal:
>>> def agnostic_round(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").round(1)).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_round
:
>>> agnostic_round(df_pd)
a
0 1.1
1 2.6
2 3.9
>>> agnostic_round(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ f64 │
╞═════╡
│ 1.1 │
│ 2.6 │
│ 3.9 │
└─────┘
>>> agnostic_round(df_pa)
pyarrow.Table
a: double
----
a: [[1.1,2.6,3.9]]
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
Sample randomly from this expression.
Warning
Expr.sample
is deprecated and will be removed in a future version.
Hint: instead of df.select(nw.col('a').sample())
, use
df.select(nw.col('a')).sample()
instead.
Note: this will remain available in narwhals.stable.v1
.
See stable api for more information.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int | None
|
Number of items to return. Cannot be used with fraction. |
None
|
fraction
|
float | None
|
Fraction of items to return. Cannot be used with n. |
None
|
with_replacement
|
bool
|
Allow values to be sampled more than once. |
False
|
seed
|
int | None
|
Seed for the random number generator. If set to None (default), a random seed is generated for each sample operation. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
shift(n)
Shift values by n
positions.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Number of positions to shift values by. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas may change the dtype here, for example when introducing missing
values in an integer column. To ensure, that the dtype doesn't change,
you may want to use fill_null
and cast
. For example, to shift
and fill missing values with 0
in a Int64 column, you could
do:
nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 1, 3, 5, 5]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_shift(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(a_shift=nw.col("a").shift(n=1)).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_shift
:
>>> agnostic_shift(df_pd)
a_shift
0 NaN
1 1.0
2 1.0
3 3.0
4 5.0
>>> agnostic_shift(df_pl)
shape: (5, 1)
┌─────────┐
│ a_shift │
│ --- │
│ i64 │
╞═════════╡
│ null │
│ 1 │
│ 1 │
│ 3 │
│ 5 │
└─────────┘
>>> agnostic_shift(df_pa)
pyarrow.Table
a_shift: int64
----
a_shift: [[null,1,1,3,5]]
sort(*, descending=False, nulls_last=False)
Sort this column. Place null values first.
Warning
Expr.sort
is deprecated and will be removed in a future version.
Hint: instead of df.select(nw.col('a').sort())
, use
df.select(nw.col('a')).sort()
instead.
Note: this will remain available in narwhals.stable.v1
.
See stable api for more information.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
descending
|
bool
|
Sort in descending order. |
False
|
nulls_last
|
bool
|
Place null values last instead of first. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
skew()
Calculate the sample skewness of a column.
Returns:
Type | Description |
---|---|
Self
|
An expression representing the sample skewness of the column. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_skew(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").skew()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_skew
:
>>> agnostic_skew(df_pd)
a b
0 0.0 1.472427
>>> agnostic_skew(df_pl)
shape: (1, 2)
┌─────┬──────────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪══════════╡
│ 0.0 ┆ 1.472427 │
└─────┴──────────┘
>>> agnostic_skew(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[0]]
b: [[1.4724267269058975]]
std(*, ddof=1)
Get standard deviation.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
ddof
|
int
|
"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. |
1
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_std(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").std(ddof=0)).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_std
:
>>> agnostic_std(df_pd)
a b
0 17.79513 1.265789
>>> agnostic_std(df_pl)
shape: (1, 2)
┌──────────┬──────────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════════╪══════════╡
│ 17.79513 ┆ 1.265789 │
└──────────┴──────────┘
>>> agnostic_std(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[17.795130420052185]]
b: [[1.2657891697365016]]
sum()
Return the sum value.
Returns:
Type | Description |
---|---|
Expr
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [5, 10], "b": [50, 100]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").sum()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_sum
:
>>> agnostic_sum(df_pd)
a b
0 15 150
>>> agnostic_sum(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 15 ┆ 150 │
└─────┴─────┘
>>> agnostic_sum(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[15]]
b: [[150]]
tail(n=10)
Get the last n
rows.
Warning
Expr.tail
is deprecated and will be removed in a future version.
Hint: instead of df.select(nw.col('a').tail())
, use
df.select(nw.col('a')).tail()
instead.
Note: this will remain available in narwhals.stable.v1
.
See stable api for more information.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Number of rows to return. |
10
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
unique()
Return unique values of this expression.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").unique().sum()).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_unique
:
>>> agnostic_unique(df_pd)
a b
0 9 12
>>> agnostic_unique(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 9 ┆ 12 │
└─────┴─────┘
>>> agnostic_unique(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[9]]
b: [[12]]
var(*, ddof=1)
Get variance.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
ddof
|
int
|
"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. |
1
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def agnostic_var(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").var(ddof=0)).to_native()
We can then pass any supported library such as pandas, Polars, or
PyArrow to agnostic_var
:
>>> agnostic_var(df_pd)
a b
0 316.666667 1.602222
>>> agnostic_var(df_pl)
shape: (1, 2)
┌────────────┬──────────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞════════════╪══════════╡
│ 316.666667 ┆ 1.602222 │
└────────────┴──────────┘
>>> agnostic_var(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[316.6666666666667]]
b: [[1.6022222222222222]]