narwhals.Expr
abs()
Return absolute value of each element.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, -2], "b": [-3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").abs()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 1 3
1 2 4
>>> my_library_agnostic_function(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
│ 2 ┆ 4 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1,2]]
b: [[3,4]]
alias(name)
Rename the expression.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name
|
str
|
The new name. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df_pa = pa.table({"a": [1, 2], "b": [4, 5]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select((nw.col("b") + 10).alias("c")).to_native()
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
c
0 14
1 15
>>> my_library_agnostic_function(df_pl)
shape: (2, 1)
┌─────┐
│ c │
│ --- │
│ i64 │
╞═════╡
│ 14 │
│ 15 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
c: int64
----
c: [[14,15]]
all()
Return whether all values in the column are True
.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pa = pa.table({"a": [True, False], "b": [True, True]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").all()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 False True
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌───────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪══════╡
│ false ┆ true │
└───────┴──────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false]]
b: [[true]]
any()
Return whether any of the values in the column are True
.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pa = pa.table({"a": [True, False], "b": [True, True]})
We define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").any()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 True True
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞══════╪══════╡
│ true ┆ true │
└──────┴──────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true]]
b: [[true]]
arg_true()
Find elements where boolean expression is True.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, None, None, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").is_null().arg_true()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
1 1
2 2
>>> my_library_agnostic_function(df_pl)
shape: (2, 1)
┌─────┐
│ a │
│ --- │
│ u32 │
╞═════╡
│ 1 │
│ 2 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[1,2]]
cast(dtype)
Redefine an object's data type.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dtype
|
DType | type[DType]
|
Data type that the object will be cast into. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> from datetime import date
>>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
>>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
>>> df_pa = pa.table({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
foo bar
0 1.0 6
1 2.0 7
2 3.0 8
>>> my_library_agnostic_function(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ f32 ┆ u8 │
╞═════╪═════╡
│ 1.0 ┆ 6 │
│ 2.0 ┆ 7 │
│ 3.0 ┆ 8 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
foo: float
bar: uint8
----
foo: [[1,2,3]]
bar: [[6,7,8]]
count()
Returns the number of non-null elements in the column.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
>>> df_pa = pa.table({"a": [1, 2, 3], "b": [None, 4, 4]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().count()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 3 2
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 3 ┆ 2 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[3]]
b: [[2]]
cum_count(*, reverse=False)
Return the cumulative count of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": ["x", "k", None, "d"]}
We define a library agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... nw.col("a").cum_count().alias("cum_count"),
... nw.col("a").cum_count(reverse=True).alias("cum_count_reverse"),
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
a cum_count cum_count_reverse
0 x 1 3
1 k 2 2
2 None 2 1
3 d 3 1
>>> my_library_agnostic_function(pl.DataFrame(data))
shape: (4, 3)
┌──────┬───────────┬───────────────────┐
│ a ┆ cum_count ┆ cum_count_reverse │
│ --- ┆ --- ┆ --- │
│ str ┆ u32 ┆ u32 │
╞══════╪═══════════╪═══════════════════╡
│ x ┆ 1 ┆ 3 │
│ k ┆ 2 ┆ 2 │
│ null ┆ 2 ┆ 1 │
│ d ┆ 3 ┆ 1 │
└──────┴───────────┴───────────────────┘
>>> my_library_agnostic_function(pa.table(data))
pyarrow.Table
a: string
cum_count: uint32
cum_count_reverse: uint32
----
a: [["x","k",null,"d"]]
cum_count: [[1,2,2,3]]
cum_count_reverse: [[3,2,1,1]]
cum_max(*, reverse=False)
Return the cumulative max of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 3, None, 2]}
We define a library agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... nw.col("a").cum_max().alias("cum_max"),
... nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"),
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
a cum_max cum_max_reverse
0 1.0 1.0 3.0
1 3.0 3.0 3.0
2 NaN NaN NaN
3 2.0 3.0 2.0
>>> my_library_agnostic_function(pl.DataFrame(data))
shape: (4, 3)
┌──────┬─────────┬─────────────────┐
│ a ┆ cum_max ┆ cum_max_reverse │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞══════╪═════════╪═════════════════╡
│ 1 ┆ 1 ┆ 3 │
│ 3 ┆ 3 ┆ 3 │
│ null ┆ null ┆ null │
│ 2 ┆ 3 ┆ 2 │
└──────┴─────────┴─────────────────┘
>>> my_library_agnostic_function(pa.table(data))
pyarrow.Table
a: int64
cum_max: int64
cum_max_reverse: int64
----
a: [[1,3,null,2]]
cum_max: [[1,3,null,3]]
cum_max_reverse: [[3,3,null,2]]
cum_min(*, reverse=False)
Return the cumulative min of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [3, 1, None, 2]}
We define a library agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... nw.col("a").cum_min().alias("cum_min"),
... nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"),
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
a cum_min cum_min_reverse
0 3.0 3.0 1.0
1 1.0 1.0 1.0
2 NaN NaN NaN
3 2.0 1.0 2.0
>>> my_library_agnostic_function(pl.DataFrame(data))
shape: (4, 3)
┌──────┬─────────┬─────────────────┐
│ a ┆ cum_min ┆ cum_min_reverse │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞══════╪═════════╪═════════════════╡
│ 3 ┆ 3 ┆ 1 │
│ 1 ┆ 1 ┆ 1 │
│ null ┆ null ┆ null │
│ 2 ┆ 1 ┆ 2 │
└──────┴─────────┴─────────────────┘
>>> my_library_agnostic_function(pa.table(data))
pyarrow.Table
a: int64
cum_min: int64
cum_min_reverse: int64
----
a: [[3,1,null,2]]
cum_min: [[3,1,null,1]]
cum_min_reverse: [[1,1,null,2]]
cum_prod(*, reverse=False)
Return the cumulative product of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 3, None, 2]}
We define a library agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... nw.col("a").cum_prod().alias("cum_prod"),
... nw.col("a").cum_prod(reverse=True).alias("cum_prod_reverse"),
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
a cum_prod cum_prod_reverse
0 1.0 1.0 6.0
1 3.0 3.0 6.0
2 NaN NaN NaN
3 2.0 6.0 2.0
>>> my_library_agnostic_function(pl.DataFrame(data))
shape: (4, 3)
┌──────┬──────────┬──────────────────┐
│ a ┆ cum_prod ┆ cum_prod_reverse │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞══════╪══════════╪══════════════════╡
│ 1 ┆ 1 ┆ 6 │
│ 3 ┆ 3 ┆ 6 │
│ null ┆ null ┆ null │
│ 2 ┆ 6 ┆ 2 │
└──────┴──────────┴──────────────────┘
>>> my_library_agnostic_function(pa.table(data))
pyarrow.Table
a: int64
cum_prod: int64
cum_prod_reverse: int64
----
a: [[1,3,null,2]]
cum_prod: [[1,3,null,6]]
cum_prod_reverse: [[6,6,null,2]]
cum_sum(*, reverse=False)
Return cumulative sum.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").cum_sum()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 1 2
1 2 6
2 5 10
3 10 16
4 15 22
>>> my_library_agnostic_function(df_pl)
shape: (5, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 2 │
│ 2 ┆ 6 │
│ 5 ┆ 10 │
│ 10 ┆ 16 │
│ 15 ┆ 22 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1,2,5,10,15]]
b: [[2,6,10,16,22]]
diff()
Returns the difference between each element and the previous one.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas may change the dtype here, for example when introducing missing
values in an integer column. To ensure, that the dtype doesn't change,
you may want to use fill_null
and cast
. For example, to calculate
the diff and fill missing values with 0
in a Int64 column, you could
do:
nw.col("a").diff().fill_null(0).cast(nw.Int64)
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(a_diff=nw.col("a").diff()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a_diff
0 NaN
1 0.0
2 2.0
3 2.0
4 0.0
>>> my_library_agnostic_function(df_pl)
shape: (5, 1)
┌────────┐
│ a_diff │
│ --- │
│ i64 │
╞════════╡
│ null │
│ 0 │
│ 2 │
│ 2 │
│ 0 │
└────────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a_diff: int64
----
a_diff: [[null,0,2,2,0]]
drop_nulls()
Remove missing values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
>>> df_pl = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
>>> df_pa = pa.table({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").drop_nulls()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
0 2.0
1 4.0
3 3.0
5 5.0
>>> my_library_agnostic_function(df_pl) # nan != null for polars
shape: (5, 1)
┌─────┐
│ a │
│ --- │
│ f64 │
╞═════╡
│ 2.0 │
│ 4.0 │
│ NaN │
│ 3.0 │
│ 5.0 │
└─────┘
>>> my_library_agnostic_function(df_pa) # nan != null for pyarrow
pyarrow.Table
a: double
----
a: [[2,4,nan,3,5]]
ewm_mean(*, com=None, span=None, half_life=None, alpha=None, adjust=True, min_periods=1, ignore_nulls=False)
Compute exponentially-weighted moving average.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
com
|
float | None
|
Specify decay in terms of center of mass, \(\gamma\), with |
None
|
span
|
float | None
|
Specify decay in terms of span, \(\theta\), with |
None
|
half_life
|
float | None
|
Specify decay in terms of half-life, \(\tau\), with |
None
|
alpha
|
float | None
|
Specify smoothing factor alpha directly, \(0 < \alpha \leq 1\). |
None
|
adjust
|
bool
|
Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
|
True
|
min_periods
|
int
|
Minimum number of observations in window required to have a value, (otherwise result is null). |
1
|
ignore_nulls
|
bool
|
Ignore missing values when calculating weights.
|
False
|
Returns:
Type | Description |
---|---|
Self
|
Expr |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, 2, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
We define a library agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a").ewm_mean(com=1, ignore_nulls=False)
... ).to_native()
We can then pass either pandas or Polars to func
:
>>> my_library_agnostic_function(df_pd)
a
0 1.000000
1 1.666667
2 2.428571
>>> my_library_agnostic_function(df_pl)
shape: (3, 1)
┌──────────┐
│ a │
│ --- │
│ f64 │
╞══════════╡
│ 1.0 │
│ 1.666667 │
│ 2.428571 │
└──────────┘
fill_null(value=None, strategy=None, limit=None)
Fill null values with given value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value
|
Any | None
|
Value used to fill null values. |
None
|
strategy
|
Literal['forward', 'backward'] | None
|
Strategy used to fill null values. |
None
|
limit
|
int | None
|
Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame(
... {
... "a": [2, 4, None, None, 3, 5],
... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
... }
... )
>>> df_pl = pl.DataFrame(
... {
... "a": [2, 4, None, None, 3, 5],
... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
... }
... )
>>> df_pa = pa.table(
... {
... "a": [2, 4, None, None, 3, 5],
... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0],
... }
... )
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(nw.col("a", "b").fill_null(0)).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 2.0 2.0
1 4.0 4.0
2 0.0 0.0
3 0.0 0.0
4 3.0 3.0
5 5.0 5.0
>>> my_library_agnostic_function(df_pl) # nan != null for polars
shape: (6, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 2 ┆ 2.0 │
│ 4 ┆ 4.0 │
│ 0 ┆ NaN │
│ 0 ┆ NaN │
│ 3 ┆ 3.0 │
│ 5 ┆ 5.0 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa) # nan != null for pyarrow
pyarrow.Table
a: int64
b: double
----
a: [[2,4,0,0,3,5]]
b: [[2,4,nan,nan,3,5]]
Using a strategy:
>>> def func_strategies(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... nw.col("a", "b")
... .fill_null(strategy="forward", limit=1)
... .name.suffix("_filled")
... ).to_native()
>>> func_strategies(df_pd)
a b a_filled b_filled
0 2.0 2.0 2.0 2.0
1 4.0 4.0 4.0 4.0
2 NaN NaN 4.0 4.0
3 NaN NaN NaN NaN
4 3.0 3.0 3.0 3.0
5 5.0 5.0 5.0 5.0
>>> func_strategies(df_pl) # nan != null for polars
shape: (6, 4)
┌──────┬─────┬──────────┬──────────┐
│ a ┆ b ┆ a_filled ┆ b_filled │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ i64 ┆ f64 │
╞══════╪═════╪══════════╪══════════╡
│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │
│ 4 ┆ 4.0 ┆ 4 ┆ 4.0 │
│ null ┆ NaN ┆ 4 ┆ NaN │
│ null ┆ NaN ┆ null ┆ NaN │
│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │
│ 5 ┆ 5.0 ┆ 5 ┆ 5.0 │
└──────┴─────┴──────────┴──────────┘
>>> func_strategies(df_pa) # nan != null for pyarrow
pyarrow.Table
a: int64
b: double
a_filled: int64
b_filled: double
----
a: [[2,4,null,null,3,5]]
b: [[2,4,nan,nan,3,5]]
a_filled: [[2,4,4,null,3,5]]
b_filled: [[2,4,nan,nan,3,5]]
filter(*predicates)
Filters elements based on a condition, returning a new expression.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
predicates
|
Any
|
Conditions to filter by (which get ANDed together). |
()
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
>>> df_pl = pl.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
>>> df_pa = pa.table({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a").filter(nw.col("a") > 4),
... nw.col("b").filter(nw.col("b") < 13),
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
3 5 10
4 6 11
5 7 12
>>> my_library_agnostic_function(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 5 ┆ 10 │
│ 6 ┆ 11 │
│ 7 ┆ 12 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[5,6,7]]
b: [[10,11,12]]
gather_every(n, offset=0)
Take every nth value in the Series and return as new Series.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Gather every n-th row. |
required |
offset
|
int
|
Starting index. |
0
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function in which gather every 2 rows, starting from a offset of 1:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").gather_every(n=2, offset=1)).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
1 2
3 4
>>> my_library_agnostic_function(df_pl)
shape: (2, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 4 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[2,4]]
head(n=10)
Get the first n
rows.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Number of rows to return. |
10
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": list(range(10))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function that returns the first 3 rows:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").head(3)).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
0 0
1 1
2 2
>>> my_library_agnostic_function(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 0 │
│ 1 │
│ 2 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[0,1,2]]
clip(lower_bound=None, upper_bound=None)
Clip values in the Series.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
lower_bound
|
Any | None
|
Lower bound value. |
None
|
upper_bound
|
Any | None
|
Upper bound value. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> s = [1, 2, 3]
>>> df_pd = pd.DataFrame({"s": s})
>>> df_pl = pl.DataFrame({"s": s})
>>> df_pa = pa.table({"s": s})
We define a library agnostic function:
>>> def func_lower(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("s").clip(2)).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func_lower
:
>>> func_lower(df_pd)
s
0 2
1 2
2 3
>>> func_lower(df_pl)
shape: (3, 1)
┌─────┐
│ s │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 2 │
│ 3 │
└─────┘
>>> func_lower(df_pa)
pyarrow.Table
s: int64
----
s: [[2,2,3]]
We define another library agnostic function:
>>> def func_upper(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("s").clip(upper_bound=2)).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func_upper
:
>>> func_upper(df_pd)
s
0 1
1 2
2 2
>>> func_upper(df_pl)
shape: (3, 1)
┌─────┐
│ s │
│ --- │
│ i64 │
╞═════╡
│ 1 │
│ 2 │
│ 2 │
└─────┘
>>> func_upper(df_pa)
pyarrow.Table
s: int64
----
s: [[1,2,2]]
We can have both at the same time
>>> s = [-1, 1, -3, 3, -5, 5]
>>> df_pd = pd.DataFrame({"s": s})
>>> df_pl = pl.DataFrame({"s": s})
>>> df_pa = pa.table({"s": s})
We define a library agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("s").clip(-1, 3)).to_native()
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
s
0 -1
1 1
2 -1
3 3
4 -1
5 3
>>> my_library_agnostic_function(df_pl)
shape: (6, 1)
┌─────┐
│ s │
│ --- │
│ i64 │
╞═════╡
│ -1 │
│ 1 │
│ -1 │
│ 3 │
│ -1 │
│ 3 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
s: int64
----
s: [[-1,1,-1,3,-1,3]]
is_between(lower_bound, upper_bound, closed='both')
Check if this expression is between the given lower and upper bounds.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
lower_bound
|
Any
|
Lower bound value. |
required |
upper_bound
|
Any
|
Upper bound value. |
required |
closed
|
str
|
Define which sides of the interval are closed (inclusive). |
'both'
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
>>> df_pa = pa.table({"a": [1, 2, 3, 4, 5]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").is_between(2, 4, "right")).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
0 False
1 False
2 True
3 True
4 False
>>> my_library_agnostic_function(df_pl)
shape: (5, 1)
┌───────┐
│ a │
│ --- │
│ bool │
╞═══════╡
│ false │
│ false │
│ true │
│ true │
│ false │
└───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
----
a: [[false,false,true,true,false]]
is_duplicated()
Return a boolean mask indicating duplicated values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().is_duplicated()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 True True
1 False True
2 False False
3 True False
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ true ┆ true │
│ false ┆ true │
│ false ┆ false │
│ true ┆ false │
└───────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true,false,false,true]]
b: [[true,true,false,false]]
is_finite()
Returns boolean values indicating which original values are finite.
Warning
Different backend handle null values differently. is_finite
will return
False for NaN and Null's in the Dask and pandas non-nullable backend, while
for Polars, PyArrow and pandas nullable backends null values are kept as such.
Returns:
Type | Description |
---|---|
Self
|
Expression of |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [float("nan"), float("inf"), 2.0, None]}
We define a library agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").is_finite()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(pd.DataFrame(data))
a
0 False
1 False
2 True
3 False
>>> my_library_agnostic_function(pl.DataFrame(data))
shape: (4, 1)
┌───────┐
│ a │
│ --- │
│ bool │
╞═══════╡
│ false │
│ false │
│ true │
│ null │
└───────┘
>>> my_library_agnostic_function(pa.table(data))
pyarrow.Table
a: bool
----
a: [[false,false,true,null]]
is_first_distinct()
Return a boolean mask indicating the first occurrence of each distinct value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().is_first_distinct()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 True True
1 True False
2 True True
3 False True
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ true ┆ true │
│ true ┆ false │
│ true ┆ true │
│ false ┆ true │
└───────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[true,true,true,false]]
b: [[true,false,true,true]]
is_in(other)
Check if elements of this expression are present in the other iterable.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other
|
Any
|
iterable |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2, 9, 10]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 9, 10]})
>>> df_pa = pa.table({"a": [1, 2, 9, 10]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(b=nw.col("a").is_in([1, 2])).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 1 True
1 2 True
2 9 False
3 10 False
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌─────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ bool │
╞═════╪═══════╡
│ 1 ┆ true │
│ 2 ┆ true │
│ 9 ┆ false │
│ 10 ┆ false │
└─────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: bool
----
a: [[1,2,9,10]]
b: [[true,true,false,false]]
is_last_distinct()
Return a boolean mask indicating the last occurrence of each distinct value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().is_last_distinct()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 False False
1 True True
2 True True
3 True True
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ false ┆ false │
│ true ┆ true │
│ true ┆ true │
│ true ┆ true │
└───────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false,true,true,true]]
b: [[false,true,true,true]]
is_null()
Returns a boolean Series indicating which values are null.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas, Polars and PyArrow handle null values differently. Polars and PyArrow distinguish between NaN and Null, whereas pandas doesn't.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame(
... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )
>>> df_pl = pl.DataFrame(
... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )
>>> df_pa = pa.table(
... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b a_is_null b_is_null
0 2.0 2.0 False False
1 4.0 4.0 False False
2 NaN NaN True True
3 3.0 3.0 False False
4 5.0 5.0 False False
>>> my_library_agnostic_function(df_pl) # nan != null for polars
shape: (5, 4)
┌──────┬─────┬───────────┬───────────┐
│ a ┆ b ┆ a_is_null ┆ b_is_null │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ bool ┆ bool │
╞══════╪═════╪═══════════╪═══════════╡
│ 2 ┆ 2.0 ┆ false ┆ false │
│ 4 ┆ 4.0 ┆ false ┆ false │
│ null ┆ NaN ┆ true ┆ false │
│ 3 ┆ 3.0 ┆ false ┆ false │
│ 5 ┆ 5.0 ┆ false ┆ false │
└──────┴─────┴───────────┴───────────┘
>>> my_library_agnostic_function(df_pa) # nan != null for pyarrow
pyarrow.Table
a: int64
b: double
a_is_null: bool
b_is_null: bool
----
a: [[2,4,null,3,5]]
b: [[2,4,nan,3,5]]
a_is_null: [[false,false,true,false,false]]
b_is_null: [[false,false,false,false,false]]
is_unique()
Return a boolean mask indicating unique values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().is_unique()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 False False
1 True False
2 True True
3 False True
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ false ┆ false │
│ true ┆ false │
│ true ┆ true │
│ false ┆ true │
└───────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: bool
b: bool
----
a: [[false,true,true,false]]
b: [[false,false,true,true]]
len()
Return the number of elements in the column.
Null values count towards the total.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function that computes the len over different values of "b" column:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a1 a2
0 2 1
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a1 ┆ a2 │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 2 ┆ 1 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a1: int64
a2: int64
----
a1: [[2]]
a2: [[1]]
map_batches(function, return_dtype=None)
Apply a custom python function to a whole Series or sequence of Series.
The output of this custom function is presumed to be either a Series, or a NumPy array (in which case it will be automatically converted into a Series).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
function
|
Callable[[Any], Self]
|
Function to apply to Series. |
required |
return_dtype
|
DType | None
|
Dtype of the output Series. If not set, the dtype will be inferred based on the first non-null value that is returned by the function. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, 2, 3], "b": [4, 5, 6]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a", "b").map_batches(
... lambda s: s.to_numpy() + 1, return_dtype=nw.Float64
... )
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 2.0 5.0
1 3.0 6.0
2 4.0 7.0
>>> my_library_agnostic_function(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 2.0 ┆ 5.0 │
│ 3.0 ┆ 6.0 │
│ 4.0 ┆ 7.0 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[2,3,4]]
b: [[5,6,7]]
max()
Returns the maximum value(s) from a column(s).
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [50, 100]})
>>> df_pa = pa.table({"a": [10, 20], "b": [50, 100]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.max("a", "b")).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 20 100
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 20 ┆ 100 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[20]]
b: [[100]]
mean()
Get mean value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
>>> df_pl = pl.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
>>> df_pa = pa.table({"a": [-1, 0, 1], "b": [2, 4, 6]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").mean()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 0.0 4.0
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 0.0 ┆ 4.0 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[0]]
b: [[4]]
median()
Get median value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
>>> df_pl = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
>>> df_pa = pa.table({"a": [1, 8, 3], "b": [4, 5, 2]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").median()).to_native()
We can then pass any supported library such as pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 3.0 4.0
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 3.0 ┆ 4.0 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[3]]
b: [[4]]
min()
Returns the minimum value(s) from a column(s).
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 3]})
>>> df_pa = pa.table({"a": [1, 2], "b": [4, 3]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.min("a", "b")).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 1 3
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1]]
b: [[3]]
mode()
Compute the most occurring value(s).
Can return multiple values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {
... "a": [1, 1, 2, 3],
... "b": [1, 1, 2, 2],
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").mode()).sort("a").to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
0 1
>>> my_library_agnostic_function(df_pl)
shape: (1, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 1 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[1]]
null_count()
Count null values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.all().null_count()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 1 2
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 1 ┆ 2 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1]]
b: [[2]]
n_unique()
Returns count of unique values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
>>> df_pa = pa.table({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").n_unique()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 5 3
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 5 ┆ 3 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[5]]
b: [[3]]
over(*keys)
Compute expressions over the given groups.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
keys
|
str | Iterable[str]
|
Names of columns to compute window expression over.
Must be names of columns, as opposed to expressions -
so, this is a bit less flexible than Polars' |
()
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... a_min_per_group=nw.col("a").min().over("b")
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b a_min_per_group
0 1 1 1
1 2 1 1
2 3 2 3
>>> my_library_agnostic_function(df_pl)
shape: (3, 3)
┌─────┬─────┬─────────────────┐
│ a ┆ b ┆ a_min_per_group │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════════════════╡
│ 1 ┆ 1 ┆ 1 │
│ 2 ┆ 1 ┆ 1 │
│ 3 ┆ 2 ┆ 3 │
└─────┴─────┴─────────────────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
a_min_per_group: int64
----
a: [[1,2,3]]
b: [[1,1,2]]
a_min_per_group: [[1,1,3]]
pipe(function, *args, **kwargs)
Pipe function call.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
function
|
Callable[[Any], Self]
|
Function to apply. |
required |
args
|
Any
|
Positional arguments to pass to function. |
()
|
kwargs
|
Any
|
Keyword arguments to pass to function. |
{}
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> data = {"a": [1, 2, 3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Lets define a library-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").pipe(lambda x: x + 1)).to_native()
We can pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
0 2
1 3
2 4
3 5
>>> my_library_agnostic_function(df_pl)
shape: (4, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 3 │
│ 4 │
│ 5 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[2,3,4,5]]
quantile(quantile, interpolation)
Get quantile value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
quantile
|
float
|
Quantile between 0.0 and 1.0. |
required |
interpolation
|
Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']
|
Interpolation method. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Note
- pandas and Polars may have implementation differences for a given interpolation method.
- dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the native 'dask' - method.
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": list(range(50)), "b": list(range(50, 100))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a", "b").quantile(0.5, interpolation="linear")
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 24.5 74.5
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════╪══════╡
│ 24.5 ┆ 74.5 │
└──────┴──────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[24.5]]
b: [[74.5]]
replace_strict(old, new=None, *, return_dtype=None)
Replace all values by different values.
This function must replace all non-null input values (else it raises an error).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
old
|
Sequence[Any] | Mapping[Any, Any]
|
Sequence of values to replace. It also accepts a mapping of values to
their replacement as syntactic sugar for
|
required |
new
|
Sequence[Any] | None
|
Sequence of values to replace by. Length must match the length of |
None
|
return_dtype
|
DType | type[DType] | None
|
The data type of the resulting expression. If set to |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> df_pd = pd.DataFrame({"a": [3, 0, 1, 2]})
>>> df_pl = pl.DataFrame({"a": [3, 0, 1, 2]})
>>> df_pa = pa.table({"a": [3, 0, 1, 2]})
Let's define dataframe-agnostic functions:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... b=nw.col("a").replace_strict(
... [0, 1, 2, 3],
... ["zero", "one", "two", "three"],
... return_dtype=nw.String,
... )
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 3 three
1 0 zero
2 1 one
3 2 two
>>> my_library_agnostic_function(df_pl)
shape: (4, 2)
┌─────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪═══════╡
│ 3 ┆ three │
│ 0 ┆ zero │
│ 1 ┆ one │
│ 2 ┆ two │
└─────┴───────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: string
----
a: [[3,0,1,2]]
b: [["three","zero","one","two"]]
rolling_mean(window_size, *, min_periods=None, center=False)
Apply a rolling mean (moving mean) over the values.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
A window of length window_size
will traverse the values. The resulting values
will be aggregated to their mean.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_periods
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_rolling_mean(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... b=nw.col("a").rolling_mean(window_size=3, min_periods=1)
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> agnostic_rolling_mean(df_pd)
a b
0 1.0 1.0
1 2.0 1.5
2 NaN 1.5
3 4.0 3.0
>>> agnostic_rolling_mean(df_pl)
shape: (4, 2)
┌──────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════╪═════╡
│ 1.0 ┆ 1.0 │
│ 2.0 ┆ 1.5 │
│ null ┆ 1.5 │
│ 4.0 ┆ 3.0 │
└──────┴─────┘
>>> agnostic_rolling_mean(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[1,1.5,1.5,3]]
rolling_sum(window_size, *, min_periods=None, center=False)
Apply a rolling sum (moving sum) over the values.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
A window of length window_size
will traverse the values. The resulting values
will be aggregated to their sum.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_periods
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1.0, 2.0, None, 4.0]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
We define a library agnostic function:
>>> def agnostic_rolling_sum(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.with_columns(
... b=nw.col("a").rolling_sum(window_size=3, min_periods=1)
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> agnostic_rolling_sum(df_pd)
a b
0 1.0 1.0
1 2.0 3.0
2 NaN 3.0
3 4.0 6.0
>>> agnostic_rolling_sum(df_pl)
shape: (4, 2)
┌──────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════╪═════╡
│ 1.0 ┆ 1.0 │
│ 2.0 ┆ 3.0 │
│ null ┆ 3.0 │
│ 4.0 ┆ 6.0 │
└──────┴─────┘
>>> agnostic_rolling_sum(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[1,2,null,4]]
b: [[1,3,3,6]]
round(decimals=0)
Round underlying floating point data by decimals
digits.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
decimals
|
int
|
Number of decimals to round by. |
0
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and 4.5 to 4.0, etc..).
Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": [1.12345, 2.56789, 3.901234]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function that rounds to the first decimal:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").round(1)).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
0 1.1
1 2.6
2 3.9
>>> my_library_agnostic_function(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ f64 │
╞═════╡
│ 1.1 │
│ 2.6 │
│ 3.9 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
----
a: [[1.1,2.6,3.9]]
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
Sample randomly from this expression.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int | None
|
Number of items to return. Cannot be used with fraction. |
None
|
fraction
|
float | None
|
Fraction of items to return. Cannot be used with n. |
None
|
with_replacement
|
bool
|
Allow values to be sampled more than once. |
False
|
seed
|
int | None
|
Seed for the random number generator. If set to None (default), a random seed is generated for each sample operation. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> df_pd = pd.DataFrame({"a": [1, 2, 3]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3]})
>>> df_pa = pa.table({"a": [1, 2, 3]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a").sample(fraction=1.0, with_replacement=True)
... ).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
2 3
0 1
2 3
>>> my_library_agnostic_function(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ f64 │
╞═════╡
│ 2 │
│ 3 │
│ 3 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[1,3,3]]
shift(n)
Shift values by n
positions.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Number of positions to shift values by. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas may change the dtype here, for example when introducing missing
values in an integer column. To ensure, that the dtype doesn't change,
you may want to use fill_null
and cast
. For example, to shift
and fill missing values with 0
in a Int64 column, you could
do:
nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(a_shift=nw.col("a").shift(n=1)).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a_shift
0 NaN
1 1.0
2 1.0
3 3.0
4 5.0
>>> my_library_agnostic_function(df_pl)
shape: (5, 1)
┌─────────┐
│ a_shift │
│ --- │
│ i64 │
╞═════════╡
│ null │
│ 1 │
│ 1 │
│ 3 │
│ 5 │
└─────────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a_shift: int64
----
a_shift: [[null,1,1,3,5]]
sort(*, descending=False, nulls_last=False)
Sort this column. Place null values first.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
descending
|
bool
|
Sort in descending order. |
False
|
nulls_last
|
bool
|
Place null values last instead of first. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> df_pd = pd.DataFrame({"a": [5, None, 1, 2]})
>>> df_pl = pl.DataFrame({"a": [5, None, 1, 2]})
>>> df_pa = pa.table({"a": [5, None, 1, 2]})
Let's define dataframe-agnostic functions:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").sort()).to_native()
>>> def func_descend(df):
... df = nw.from_native(df)
... df = df.select(nw.col("a").sort(descending=True))
... return nw.to_native(df)
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
1 NaN
2 1.0
3 2.0
0 5.0
>>> my_library_agnostic_function(df_pl)
shape: (4, 1)
┌──────┐
│ a │
│ --- │
│ i64 │
╞══════╡
│ null │
│ 1 │
│ 2 │
│ 5 │
└──────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[null,1,2,5]]
>>> func_descend(df_pd)
a
1 NaN
0 5.0
3 2.0
2 1.0
>>> func_descend(df_pl)
shape: (4, 1)
┌──────┐
│ a │
│ --- │
│ i64 │
╞══════╡
│ null │
│ 5 │
│ 2 │
│ 1 │
└──────┘
>>> func_descend(df_pa)
pyarrow.Table
a: int64
----
a: [[null,5,2,1]]
std(*, ddof=1)
Get standard deviation.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
ddof
|
int
|
“Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. |
1
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df_pl = pl.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df_pa = pa.table({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").std(ddof=0)).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 17.79513 1.265789
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌──────────┬──────────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════════╪══════════╡
│ 17.79513 ┆ 1.265789 │
└──────────┴──────────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: double
b: double
----
a: [[17.795130420052185]]
b: [[1.2657891697365016]]
sum()
Return the sum value.
Returns:
Type | Description |
---|---|
Expr
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [5, 10], "b": [50, 100]})
>>> df_pl = pl.DataFrame({"a": [5, 10], "b": [50, 100]})
>>> df_pa = pa.table({"a": [5, 10], "b": [50, 100]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").sum()).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 15 150
>>> my_library_agnostic_function(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 15 ┆ 150 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[15]]
b: [[150]]
tail(n=10)
Get the last n
rows.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Number of rows to return. |
10
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> import pandas as pd
>>> import polars as pl
>>> import pyarrow as pa
>>> data = {"a": list(range(10))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
>>> df_pa = pa.table(data)
Let's define a dataframe-agnostic function that returns the last 3 rows:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a").tail(3)).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a
7 7
8 8
9 9
>>> my_library_agnostic_function(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 7 │
│ 8 │
│ 9 │
└─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
----
a: [[7,8,9]]
unique(*, maintain_order=False)
Return unique values of this expression.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
maintain_order
|
bool
|
Keep the same order as the original expression. This may be more
expensive to compute. Settings this to |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pa = pa.table({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
Let's define a dataframe-agnostic function:
>>> def my_library_agnostic_function(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(nw.col("a", "b").unique(maintain_order=True)).to_native()
We can then pass any supported library such as Pandas, Polars, or PyArrow to func
:
>>> my_library_agnostic_function(df_pd)
a b
0 1 2
1 3 4
2 5 6
>>> my_library_agnostic_function(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 2 │
│ 3 ┆ 4 │
│ 5 ┆ 6 │
└─────┴─────┘
>>> my_library_agnostic_function(df_pa)
pyarrow.Table
a: int64
b: int64
----
a: [[1,3,5]]
b: [[2,4,6]]