Skip to content

narwhals.Expr

abs()

Return absolute value of each element.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> data = {"a": [1, -2], "b": [-3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").abs())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a  b
0  1  3
1  2  4
>>> func(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 3   │
│ 2   ┆ 4   │
└─────┴─────┘

alias(name)

Rename the expression.

Parameters:

Name Type Description Default
name str

The new name.

required

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 5]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select((nw.col("b") + 10).alias("c"))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
    c
0  14
1  15
>>> func(df_pl)
shape: (2, 1)
┌─────┐
│ c   │
│ --- │
│ i64 │
╞═════╡
│ 14  │
│ 15  │
└─────┘

all()

Return whether all values in the column are True.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").all())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
       a     b
0  False  True
>>> func(df_pl)
shape: (1, 2)
┌───────┬──────┐
│ a     ┆ b    │
│ ---   ┆ ---  │
│ bool  ┆ bool │
╞═══════╪══════╡
│ false ┆ true │
└───────┴──────┘

any()

Return whether any of the values in the column are True

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})

We define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").any())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
      a     b
0  True  True
>>> func(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ bool ┆ bool │
╞══════╪══════╡
│ true ┆ true │
└──────┴──────┘

arg_true()

Find elements where boolean expression is True.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = {"a": [1, None, None, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

We define a library agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").is_null().arg_true())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a
1  1
2  2
>>> func(df_pl)
shape: (2, 1)
┌─────┐
│ a   │
│ --- │
│ u32 │
╞═════╡
│ 1   │
│ 2   │
└─────┘

cast(dtype)

Redefine an object's data type.

Parameters:

Name Type Description Default
dtype Any

Data type that the object will be cast into.

required

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> from datetime import date
>>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
>>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(
...         nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
...     )

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   foo  bar
0  1.0    6
1  2.0    7
2  3.0    8
>>> func(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ f32 ┆ u8  │
╞═════╪═════╡
│ 1.0 ┆ 6   │
│ 2.0 ┆ 7   │
│ 3.0 ┆ 8   │
└─────┴─────┘

count()

Returns the number of non-null elements in the column.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.all().count())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a  b
0  3  2
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 3   ┆ 2   │
└─────┴─────┘

cum_sum()

Return cumulative sum.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").cum_sum())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
    a   b
0   1   2
1   2   6
2   5  10
3  10  16
4  15  22
>>> func(df_pl)
shape: (5, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 2   │
│ 2   ┆ 6   │
│ 5   ┆ 10  │
│ 10  ┆ 16  │
│ 15  ┆ 22  │
└─────┴─────┘

diff()

Returns the difference between each element and the previous one.

Notes

pandas may change the dtype here, for example when introducing missing values in an integer column. To ensure, that the dtype doesn't change, you may want to use fill_null and cast. For example, to calculate the diff and fill missing values with 0 in a Int64 column, you could do:

nw.col("a").diff().fill_null(0).cast(nw.Int64)

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(a_diff=nw.col("a").diff())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a_diff
0     NaN
1     0.0
2     2.0
3     2.0
4     0.0
>>> func(df_pl)
shape: (5, 1)
┌────────┐
│ a_diff │
│ ---    │
│ i64    │
╞════════╡
│ null   │
│ 0      │
│ 2      │
│ 2      │
│ 0      │
└────────┘

drop_nulls()

Remove missing values.

Notes

pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
>>> df_pl = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").drop_nulls())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
     a
0  2.0
1  4.0
3  3.0
5  5.0
>>> func(df_pl)  # nan != null for polars
shape: (5, 1)
┌─────┐
│ a   │
│ --- │
│ f64 │
╞═════╡
│ 2.0 │
│ 4.0 │
│ NaN │
│ 3.0 │
│ 5.0 │
└─────┘

fill_null(value)

Fill null values with given value.

Notes

pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame(
...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )
>>> df_pl = pl.DataFrame(
...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.with_columns(nw.col("a", "b").fill_null(0))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
     a    b
0  2.0  2.0
1  4.0  4.0
2  0.0  0.0
3  3.0  3.0
4  5.0  5.0
>>> func(df_pl)  # nan != null for polars
shape: (5, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 2   ┆ 2.0 │
│ 4   ┆ 4.0 │
│ 0   ┆ NaN │
│ 3   ┆ 3.0 │
│ 5   ┆ 5.0 │
└─────┴─────┘

filter(*predicates)

Filters elements based on a condition, returning a new expression.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
>>> df_pl = pl.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(
...         nw.col("a").filter(nw.col("a") > 4),
...         nw.col("b").filter(nw.col("b") < 13),
...     )

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a   b
3  5  10
4  6  11
5  7  12
>>> func(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 5   ┆ 10  │
│ 6   ┆ 11  │
│ 7   ┆ 12  │
└─────┴─────┘

gather_every(n, offset=0)

Take every nth value in the Series and return as new Series.

Parameters:

Name Type Description Default
n int

Gather every n-th row.

required
offset int

Starting index.

0

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function in which gather every 2 rows, starting from a offset of 1:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").gather_every(n=2, offset=1))
>>> func(df_pd)
   a
1  2
3  4
>>> func(df_pl)
shape: (2, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 2   │
│ 4   │
└─────┘

head(n=10)

Get the first n rows.

Parameters:

Name Type Description Default
n int

Number of rows to return.

10

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": list(range(10))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function that returns the first 3 rows:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").head(3))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a
0  0
1  1
2  2
>>> func(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 0   │
│ 1   │
│ 2   │
└─────┘

clip(lower_bound=None, upper_bound=None)

Clip values in the Series.

Parameters:

Name Type Description Default
lower_bound Any | None

Lower bound value.

None
upper_bound Any | None

Upper bound value.

None

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> s = [1, 2, 3]
>>> df_pd = pd.DataFrame({"s": s})
>>> df_pl = pl.DataFrame({"s": s})

We define a library agnostic function:

>>> @nw.narwhalify
... def func_lower(df):
...     return df.select(nw.col("s").clip(2))

We can then pass either pandas or Polars to func_lower:

>>> func_lower(df_pd)
   s
0  2
1  2
2  3
>>> func_lower(df_pl)
shape: (3, 1)
┌─────┐
│ s   │
│ --- │
│ i64 │
╞═════╡
│ 2   │
│ 2   │
│ 3   │
└─────┘

We define another library agnostic function:

>>> @nw.narwhalify
... def func_upper(df):
...     return df.select(nw.col("s").clip(upper_bound=2))

We can then pass either pandas or Polars to func_upper:

>>> func_upper(df_pd)
   s
0  1
1  2
2  2
>>> func_upper(df_pl)
shape: (3, 1)
┌─────┐
│ s   │
│ --- │
│ i64 │
╞═════╡
│ 1   │
│ 2   │
│ 2   │
└─────┘

We can have both at the same time

>>> s = [-1, 1, -3, 3, -5, 5]
>>> df_pd = pd.DataFrame({"s": s})
>>> df_pl = pl.DataFrame({"s": s})

We define a library agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("s").clip(-1, 3))

We can pass either pandas or Polars to func:

>>> func(df_pd)
   s
0 -1
1  1
2 -1
3  3
4 -1
5  3
>>> func(df_pl)
shape: (6, 1)
┌─────┐
│ s   │
│ --- │
│ i64 │
╞═════╡
│ -1  │
│ 1   │
│ -1  │
│ 3   │
│ -1  │
│ 3   │
└─────┘

is_between(lower_bound, upper_bound, closed='both')

Check if this expression is between the given lower and upper bounds.

Parameters:

Name Type Description Default
lower_bound Any

Lower bound value.

required
upper_bound Any

Upper bound value.

required
closed str

Define which sides of the interval are closed (inclusive).

'both'

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").is_between(2, 4, "right"))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
       a
0  False
1  False
2   True
3   True
4  False
>>> func(df_pl)
shape: (5, 1)
┌───────┐
│ a     │
│ ---   │
│ bool  │
╞═══════╡
│ false │
│ false │
│ true  │
│ true  │
│ false │
└───────┘

is_duplicated()

Return a boolean mask indicating duplicated values.

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.all().is_duplicated())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
       a      b
0   True   True
1  False   True
2  False  False
3   True  False
>>> func(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ true  ┆ true  │
│ false ┆ true  │
│ false ┆ false │
│ true  ┆ false │
└───────┴───────┘

is_first_distinct()

Return a boolean mask indicating the first occurrence of each distinct value.

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.all().is_first_distinct())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
       a      b
0   True   True
1   True  False
2   True   True
3  False   True
>>> func(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ true  ┆ true  │
│ true  ┆ false │
│ true  ┆ true  │
│ false ┆ true  │
└───────┴───────┘

is_in(other)

Check if elements of this expression are present in the other iterable.

Parameters:

Name Type Description Default
other Any

iterable

required

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2, 9, 10]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 9, 10]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.with_columns(b=nw.col("a").is_in([1, 2]))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
    a      b
0   1   True
1   2   True
2   9  False
3  10  False
>>> func(df_pl)
shape: (4, 2)
┌─────┬───────┐
│ a   ┆ b     │
│ --- ┆ ---   │
│ i64 ┆ bool  │
╞═════╪═══════╡
│ 1   ┆ true  │
│ 2   ┆ true  │
│ 9   ┆ false │
│ 10  ┆ false │
└─────┴───────┘

is_last_distinct()

Return a boolean mask indicating the last occurrence of each distinct value.

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.all().is_last_distinct())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
       a      b
0  False  False
1   True   True
2   True   True
3   True   True
>>> func(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ false ┆ false │
│ true  ┆ true  │
│ true  ┆ true  │
│ true  ┆ true  │
└───────┴───────┘

is_null()

Returns a boolean Series indicating which values are null.

Notes

pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame(
...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )
>>> df_pl = pl.DataFrame(
...     {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.with_columns(
...         a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
...     )

We can then pass either pandas or Polars to func:

>>> func(df_pd)
     a    b  a_is_null  b_is_null
0  2.0  2.0      False      False
1  4.0  4.0      False      False
2  NaN  NaN       True       True
3  3.0  3.0      False      False
4  5.0  5.0      False      False
>>> func(df_pl)  # nan != null for polars
shape: (5, 4)
┌──────┬─────┬───────────┬───────────┐
│ a    ┆ b   ┆ a_is_null ┆ b_is_null │
│ ---  ┆ --- ┆ ---       ┆ ---       │
│ i64  ┆ f64 ┆ bool      ┆ bool      │
╞══════╪═════╪═══════════╪═══════════╡
│ 2    ┆ 2.0 ┆ false     ┆ false     │
│ 4    ┆ 4.0 ┆ false     ┆ false     │
│ null ┆ NaN ┆ true      ┆ false     │
│ 3    ┆ 3.0 ┆ false     ┆ false     │
│ 5    ┆ 5.0 ┆ false     ┆ false     │
└──────┴─────┴───────────┴───────────┘

is_unique()

Return a boolean mask indicating unique values.

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.all().is_unique())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
       a      b
0  False  False
1   True  False
2   True   True
3  False   True
>>> func(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a     ┆ b     │
│ ---   ┆ ---   │
│ bool  ┆ bool  │
╞═══════╪═══════╡
│ false ┆ false │
│ true  ┆ false │
│ true  ┆ true  │
│ false ┆ true  │
└───────┴───────┘

len()

Return the number of elements in the column.

Null values count towards the total.

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function that computes the len over different values of "b" column:

>>> @nw.narwhalify
... def func(df):
...     return df.select(
...         nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
...         nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
...     )

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a1  a2
0   2   1
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a1  ┆ a2  │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 2   ┆ 1   │
└─────┴─────┘

max()

Returns the maximum value(s) from a column(s).

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [50, 100]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.max("a", "b"))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
    a    b
0  20  100
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 20  ┆ 100 │
└─────┴─────┘

mean()

Get mean value.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
>>> df_pl = pl.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").mean())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
     a    b
0  0.0  4.0
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 0.0 ┆ 4.0 │
└─────┴─────┘

min()

Returns the minimum value(s) from a column(s).

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 3]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.min("a", "b"))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a  b
0  1  3
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 3   │
└─────┴─────┘

mode()

Compute the most occurring value(s).

Can return multiple values.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = {
...     "a": [1, 1, 2, 3],
...     "b": [1, 1, 2, 2],
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

We define a library agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").mode()).sort("a")

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a
0  1
>>> func(df_pl)
shape: (1, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 1   │
└─────┘

null_count()

Count null values.

Notes

pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.all().null_count())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a  b
0  1  2
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 1   ┆ 2   │
└─────┴─────┘

n_unique()

Returns count of unique values

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").n_unique())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a  b
0  5  3
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 5   ┆ 3   │
└─────┴─────┘

over(*keys)

Compute expressions over the given groups.

Parameters:

Name Type Description Default
keys str | Iterable[str]

Names of columns to compute window expression over. Must be names of columns, as opposed to expressions - so, this is a bit less flexible than Polars' Expr.over.

()

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.with_columns(a_min_per_group=nw.col("a").min().over("b"))

We can then pass either pandas or Polars:

>>> func(df_pd)
   a  b  a_min_per_group
0  1  1                1
1  2  1                1
2  3  2                3
>>> func(df_pl)
shape: (3, 3)
┌─────┬─────┬─────────────────┐
│ a   ┆ b   ┆ a_min_per_group │
│ --- ┆ --- ┆ ---             │
│ i64 ┆ i64 ┆ i64             │
╞═════╪═════╪═════════════════╡
│ 1   ┆ 1   ┆ 1               │
│ 2   ┆ 1   ┆ 1               │
│ 3   ┆ 2   ┆ 3               │
└─────┴─────┴─────────────────┘

pipe(function, *args, **kwargs)

Pipe function call.

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> data = {"a": [1, 2, 3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Lets define a library-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").pipe(lambda x: x + 1))

We can then pass any supported library:

>>> func(df_pd)
   a
0  2
1  3
2  4
3  5
>>> func(df_pl)
shape: (4, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 2   │
│ 3   │
│ 4   │
│ 5   │
└─────┘

quantile(quantile, interpolation)

Get quantile value.

Note
  • pandas and Polars may have implementation differences for a given interpolation method.
  • dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the native 'dask' - method.

Parameters:

Name Type Description Default
quantile

float Quantile between 0.0 and 1.0.

required
interpolation

{'nearest', 'higher', 'lower', 'midpoint', 'linear'} Interpolation method.

required

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": list(range(50)), "b": list(range(50, 100))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
      a     b
0  24.5  74.5
>>> func(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a    ┆ b    │
│ ---  ┆ ---  │
│ f64  ┆ f64  │
╞══════╪══════╡
│ 24.5 ┆ 74.5 │
└──────┴──────┘

round(decimals=0)

Round underlying floating point data by decimals digits.

Parameters:

Name Type Description Default
decimals int

Number of decimals to round by.

0
Notes

For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.

pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and 4.5 to 4.0, etc..).

Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1.12345, 2.56789, 3.901234]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function that rounds to the first decimal:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").round(1))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
     a
0  1.1
1  2.6
2  3.9
>>> func(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ f64 │
╞═════╡
│ 1.1 │
│ 2.6 │
│ 3.9 │
└─────┘

sample(n=None, *, fraction=None, with_replacement=False, seed=None)

Sample randomly from this expression.

Parameters:

Name Type Description Default
n int | None

Number of items to return. Cannot be used with fraction.

None
fraction float | None

Fraction of items to return. Cannot be used with n.

None
with_replacement bool

Allow values to be sampled more than once.

False
seed int | None

Seed for the random number generator. If set to None (default), a random seed is generated for each sample operation.

None

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> df_pd = pd.DataFrame({"a": [1, 2, 3]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").sample(fraction=1.0, with_replacement=True))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a
2  3
0  1
2  3
>>> func(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ f64 │
╞═════╡
│ 2   │
│ 3   │
│ 3   │
└─────┘

shift(n)

Shift values by n positions.

Notes

pandas may change the dtype here, for example when introducing missing values in an integer column. To ensure, that the dtype doesn't change, you may want to use fill_null and cast. For example, to shift and fill missing values with 0 in a Int64 column, you could do:

nw.col("a").shift(1).fill_null(0).cast(nw.Int64)

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(a_shift=nw.col("a").shift(n=1))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a_shift
0      NaN
1      1.0
2      1.0
3      3.0
4      5.0
>>> func(df_pl)
shape: (5, 1)
┌─────────┐
│ a_shift │
│ ---     │
│ i64     │
╞═════════╡
│ null    │
│ 1       │
│ 1       │
│ 3       │
│ 5       │
└─────────┘

sort(*, descending=False, nulls_last=False)

Sort this column. Place null values first.

Parameters:

Name Type Description Default
descending bool

Sort in descending order.

False
nulls_last bool

Place null values last instead of first.

False

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> df_pd = pd.DataFrame({"a": [5, None, 1, 2]})
>>> df_pl = pl.DataFrame({"a": [5, None, 1, 2]})

Let's define dataframe-agnostic functions:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").sort())
>>> def func_descend(df):
...     df = nw.from_native(df)
...     df = df.select(nw.col("a").sort(descending=True))
...     return nw.to_native(df)

We can then pass either pandas or Polars to func:

>>> func(df_pd)
     a
1  NaN
2  1.0
3  2.0
0  5.0
>>> func(df_pl)
shape: (4, 1)
┌──────┐
│ a    │
│ ---  │
│ i64  │
╞══════╡
│ null │
│ 1    │
│ 2    │
│ 5    │
└──────┘
>>> func_descend(df_pd)
     a
1  NaN
0  5.0
3  2.0
2  1.0
>>> func_descend(df_pl)
shape: (4, 1)
┌──────┐
│ a    │
│ ---  │
│ i64  │
╞══════╡
│ null │
│ 5    │
│ 2    │
│ 1    │
└──────┘

std(*, ddof=1)

Get standard deviation.

Parameters:

Name Type Description Default
ddof int

“Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

1

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df_pl = pl.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").std(ddof=0))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
          a         b
0  17.79513  1.265789
>>> func(df_pl)
shape: (1, 2)
┌──────────┬──────────┐
│ a        ┆ b        │
│ ---      ┆ ---      │
│ f64      ┆ f64      │
╞══════════╪══════════╡
│ 17.79513 ┆ 1.265789 │
└──────────┴──────────┘

sum()

Return the sum value.

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [5, 10], "b": [50, 100]})
>>> df_pl = pl.DataFrame({"a": [5, 10], "b": [50, 100]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").sum())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
    a    b
0  15  150
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 15  ┆ 150 │
└─────┴─────┘

tail(n=10)

Get the last n rows.

Parameters:

Name Type Description Default
n int

Number of rows to return.

10

Examples:

>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": list(range(10))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

Let's define a dataframe-agnostic function that returns the last 3 rows:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a").tail(3))

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a
7  7
8  8
9  9
>>> func(df_pl)
shape: (3, 1)
┌─────┐
│ a   │
│ --- │
│ i64 │
╞═════╡
│ 7   │
│ 8   │
│ 9   │
└─────┘

unique()

Return unique values

Examples:

>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})

Let's define a dataframe-agnostic function:

>>> @nw.narwhalify
... def func(df):
...     return df.select(nw.col("a", "b").unique())

We can then pass either pandas or Polars to func:

>>> func(df_pd)
   a  b
0  1  2
1  3  4
2  5  6
>>> func(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a   ┆ b   │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1   ┆ 2   │
│ 3   ┆ 4   │
│ 5   ┆ 6   │
└─────┴─────┘