`narwhals.Expr`

abs

abs() -> Self

Return absolute value of each element.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, -2], "b": [-3, 4]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.col("a", "b").abs().name.suffix("_abs"))
┌─────────────────────┐
| Narwhals DataFrame  |
|---------------------|
|   a  b  a_abs  b_abs|
|0  1 -3      1      3|
|1 -2  4      2      4|
└─────────────────────┘

alias

alias(name: str) -> Self

Rename the expression.

Parameters:

Name	Type	Description	Default
`name`	`str`	The new name.	required

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df = nw.from_native(df_native)
>>> df.select((nw.col("b") + 10).alias("c"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|          c       |
|      0  14       |
|      1  15       |
└──────────────────┘

all

all() -> Self

Return whether all values in the column are True.

If there are no non-null elements, the result is True.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").all())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|         a     b  |
|  0  False  True  |
└──────────────────┘

any

any() -> Self

Return whether any of the values in the column are True.

If there are no non-null elements, the result is False.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").any())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a     b   |
|  0  True  True   |
└──────────────────┘

any_value

any_value(*, ignore_nulls: bool = False) -> Self

Get a random value from the column.

Warning

This functionality is considered unstable as it diverges from the polars API. It may be changed at any point without it being considered a breaking change.

Parameters:

Name	Type	Description	Default
`ignore_nulls`	`bool`	Whether to ignore null values or not. If `True` and there are no not-null elements, then `None` is returned.	`False`

Examples:

>>> import pyarrow as pa
>>> import narwhals as nw
>>> data = {"a": [1, 1, 2, 2], "b": [None, "foo", "baz", None]}
>>> df_native = pa.table(data)
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().any_value(ignore_nulls=False))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|  pyarrow.Table   |
|  a: int64        |
|  b: null         |
|  ----            |
|  a: [[1]]        |
|  b: [1 nulls]    |
└──────────────────┘

>>> df.group_by("a").agg(nw.col("b").any_value(ignore_nulls=True))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|pyarrow.Table     |
|a: int64          |
|b: string         |
|----              |
|a: [[1,2]]        |
|b: [["foo","baz"]]|
└──────────────────┘

cast

cast(dtype: IntoDType) -> Self

Redefine an object's data type.

Parameters:

Name	Type	Description	Default
`dtype`	`IntoDType`	Data type that the object will be cast into.	required

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|      foo  bar    |
|   0  1.0    6    |
|   1  2.0    7    |
|   2  3.0    8    |
└──────────────────┘

ceil

ceil() -> Self

Compute the numerical ceiling.

Examples:

>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_native = pa.table({"values": [1.1, 4.3, -1.3]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(ceil=nw.col("values").ceil())
>>> result
┌────────────────────────┐
|   Narwhals DataFrame   |
|------------------------|
|pyarrow.Table           |
|values: double          |
|ceil: double            |
|----                    |
|values: [[1.1,4.3,-1.3]]|
|ceil: [[2,5,-1]]        |
└────────────────────────┘

clip

clip(
    lower_bound: (
        IntoExpr | NumericLiteral | TemporalLiteral | None
    ) = None,
    upper_bound: (
        IntoExpr | NumericLiteral | TemporalLiteral | None
    ) = None,
) -> Self

Clip values in the Series.

Parameters:

Name	Type	Description	Default
`lower_bound`	`IntoExpr \| NumericLiteral \| TemporalLiteral \| None`	Lower bound value. String literals are treated as column names.	`None`
`upper_bound`	`IntoExpr \| NumericLiteral \| TemporalLiteral \| None`	Upper bound value. String literals are treated as column names.	`None`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_clipped=nw.col("a").clip(-1, 3))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|    a  a_clipped  |
| 0  1          1  |
| 1  2          2  |
| 2  3          3  |
└──────────────────┘

cos

cos() -> Self

Compute the element-wise value for the cosine.

Examples:

>>> import pyarrow as pa
>>> import narwhals as nw
>>> from math import pi
>>> df_native = pa.table({"values": [0, pi / 2, pi]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(cos=nw.col("values").cos()).select(
...     nw.all().round(4)
... )
>>> result
┌───────────────────────────┐
|    Narwhals DataFrame     |
|---------------------------|
|pyarrow.Table              |
|values: double             |
|cos: double                |
|----                       |
|values: [[0,1.5708,3.1416]]|
|cos: [[1,0,-1]]            |
└───────────────────────────┘

count

count() -> Self

Returns the number of non-null elements in the column.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().count())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a  b      |
|     0  3  2      |
└──────────────────┘

cum_count

cum_count(*, reverse: bool = False) -> Self

Return the cumulative count of the non-null values in the column.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`reverse`	`bool`	reverse the operation	`False`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": ["x", "k", None, "d"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     nw.col("a").cum_count().alias("a_cum_count"),
...     nw.col("a").cum_count(reverse=True).alias("a_cum_count_reverse"),
... )
┌────────────────────────────────────────┐
|           Narwhals DataFrame           |
|----------------------------------------|
|     a  a_cum_count  a_cum_count_reverse|
|0    x            1                    3|
|1    k            2                    2|
|2  NaN            2                    1|
|3    d            3                    1|
└────────────────────────────────────────┘

cum_max

cum_max(*, reverse: bool = False) -> Self

Return the cumulative max of the non-null values in the column.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`reverse`	`bool`	reverse the operation	`False`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 3, None, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     nw.col("a").cum_max().alias("a_cum_max"),
...     nw.col("a").cum_max(reverse=True).alias("a_cum_max_reverse"),
... )
┌────────────────────────────────────┐
|         Narwhals DataFrame         |
|------------------------------------|
|     a  a_cum_max  a_cum_max_reverse|
|0  1.0        1.0                3.0|
|1  3.0        3.0                3.0|
|2  NaN        NaN                NaN|
|3  2.0        3.0                2.0|
└────────────────────────────────────┘

cum_min

cum_min(*, reverse: bool = False) -> Self

Return the cumulative min of the non-null values in the column.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`reverse`	`bool`	reverse the operation	`False`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [3, 1, None, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     nw.col("a").cum_min().alias("a_cum_min"),
...     nw.col("a").cum_min(reverse=True).alias("a_cum_min_reverse"),
... )
┌────────────────────────────────────┐
|         Narwhals DataFrame         |
|------------------------------------|
|     a  a_cum_min  a_cum_min_reverse|
|0  3.0        3.0                1.0|
|1  1.0        1.0                1.0|
|2  NaN        NaN                NaN|
|3  2.0        1.0                2.0|
└────────────────────────────────────┘

cum_prod

cum_prod(*, reverse: bool = False) -> Self

Return the cumulative product of the non-null values in the column.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`reverse`	`bool`	reverse the operation	`False`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 3, None, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     nw.col("a").cum_prod().alias("a_cum_prod"),
...     nw.col("a").cum_prod(reverse=True).alias("a_cum_prod_reverse"),
... )
┌──────────────────────────────────────┐
|          Narwhals DataFrame          |
|--------------------------------------|
|     a  a_cum_prod  a_cum_prod_reverse|
|0  1.0         1.0                 6.0|
|1  3.0         3.0                 6.0|
|2  NaN         NaN                 NaN|
|3  2.0         6.0                 2.0|
└──────────────────────────────────────┘

cum_sum

cum_sum(*, reverse: bool = False) -> Self

Return cumulative sum.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`reverse`	`bool`	reverse the operation	`False`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_cum_sum=nw.col("a").cum_sum())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|   a  b  a_cum_sum|
|0  1  2          1|
|1  1  4          2|
|2  3  4          5|
|3  5  6         10|
|4  5  6         15|
└──────────────────┘

diff

diff() -> Self

Returns the difference between each element and the previous one.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Notes

pandas may change the dtype here, for example when introducing missing values in an integer column. To ensure, that the dtype doesn't change, you may want to use fill_null and cast. For example, to calculate the diff and fill missing values with 0 in a Int64 column, you could do:

nw.col("a").diff().fill_null(0).cast(nw.Int64)

Examples:

>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_diff=nw.col("a").diff())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| shape: (5, 2)    |
| ┌─────┬────────┐ |
| │ a   ┆ a_diff │ |
| │ --- ┆ ---    │ |
| │ i64 ┆ i64    │ |
| ╞═════╪════════╡ |
| │ 1   ┆ null   │ |
| │ 1   ┆ 0      │ |
| │ 3   ┆ 2      │ |
| │ 5   ┆ 2      │ |
| │ 5   ┆ 0      │ |
| └─────┴────────┘ |
└──────────────────┘

drop_nulls

drop_nulls() -> Self

Drop null values.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a").drop_nulls())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|  shape: (5, 1)   |
|  ┌─────┐         |
|  │ a   │         |
|  │ --- │         |
|  │ f64 │         |
|  ╞═════╡         |
|  │ 2.0 │         |
|  │ 4.0 │         |
|  │ NaN │         |
|  │ 3.0 │         |
|  │ 5.0 │         |
|  └─────┘         |
└──────────────────┘

ewm_mean

ewm_mean(
    *,
    com: float | None = None,
    span: float | None = None,
    half_life: float | None = None,
    alpha: float | None = None,
    adjust: bool = True,
    min_samples: int = 1,
    ignore_nulls: bool = False
) -> Self

Compute exponentially-weighted moving average.

Parameters:

Name	Type	Description	Default
`com`	`float \| None`	Specify decay in terms of center of mass, $\gamma$, with $\alpha = \frac{1}{1+\gamma}\forall\gamma\geq0$	`None`
`span`	`float \| None`	Specify decay in terms of span, $\theta$, with $\alpha = \frac{2}{\theta + 1} \forall \theta \geq 1$	`None`
`half_life`	`float \| None`	Specify decay in terms of half-life, $\tau$, with $\alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \forall \tau > 0$	`None`
`alpha`	`float \| None`	Specify smoothing factor alpha directly, $0 < \alpha \leq 1$.	`None`
`adjust`	`bool`	Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings When `adjust=True` (the default) the EW function is calculated using weights $w_i = (1 - \alpha)^i$ When `adjust=False` the EW function is calculated recursively by $$ y_0=x_0 $$ $$ y_t = (1 - \alpha)y_{t - 1} + \alpha x_t $$	`True`
`min_samples`	`int`	Minimum number of observations in window required to have a value, (otherwise result is null).	`1`
`ignore_nulls`	`bool`	Ignore missing values when calculating weights. When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of $x_0$ and $x_2$ used in calculating the final weighted average of $[x_0, None, x_2]$ are $(1-\alpha)^2$ and $1$ if `adjust=True`, and $(1-\alpha)^2$ and $\alpha$ if `adjust=False`. When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of $x_0$ and $x_2$ used in calculating the final weighted average of $[x_0, None, x_2]$ are $1-\alpha$ and $1$ if `adjust=True`, and $1-\alpha$ and $\alpha$ if `adjust=False`.	`False`

Examples:

>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)

We define a library agnostic function:

>>> def agnostic_ewm_mean(df_native: IntoFrameT) -> IntoFrameT:
...     df = nw.from_native(df_native)
...     return df.select(
...         nw.col("a").ewm_mean(com=1, ignore_nulls=False)
...     ).to_native()

We can then pass either pandas or Polars to agnostic_ewm_mean:

>>> agnostic_ewm_mean(df_pd)
          a
0  1.000000
1  1.666667
2  2.428571

>>> agnostic_ewm_mean(df_pl)
shape: (3, 1)
┌──────────┐
│ a        │
│ ---      │
│ f64      │
╞══════════╡
│ 1.0      │
│ 1.666667 │
│ 2.428571 │
└──────────┘

exp

exp() -> Self

Compute the exponent.

Examples:

>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_native = pa.table({"values": [-1, 0, 1]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(exp=nw.col("values").exp())
>>> result
┌────────────────────────────────────────────────┐
|               Narwhals DataFrame               |
|------------------------------------------------|
|pyarrow.Table                                   |
|values: int64                                   |
|exp: double                                     |
|----                                            |
|values: [[-1,0,1]]                              |
|exp: [[0.36787944117144233,1,2.718281828459045]]|
└────────────────────────────────────────────────┘

fill_nan

fill_nan(value: float | None) -> Self

Fill floating point NaN values with given value.

Parameters:

Name	Type	Description	Default
`value`	`float \| None`	Value used to fill NaN values.	required

Notes

This function only fills 'NaN' values, not null ones, except for pandas which doesn't distinguish between them. See null_handling for reference.

Examples:

>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql(
...     "SELECT * FROM VALUES (5.::DOUBLE, 50.::DOUBLE), ('NaN', null) df(a, b)"
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.col("a", "b").fill_nan(0).name.suffix("_nans_filled"))
┌───────────────────────────────────────────────────┐
|                Narwhals LazyFrame                 |
|---------------------------------------------------|
|┌────────┬────────┬───────────────┬───────────────┐|
|│   a    │   b    │ a_nans_filled │ b_nans_filled │|
|│ double │ double │    double     │    double     │|
|├────────┼────────┼───────────────┼───────────────┤|
|│    5.0 │   50.0 │           5.0 │          50.0 │|
|│    nan │   NULL │           0.0 │          NULL │|
|└────────┴────────┴───────────────┴───────────────┘|
└───────────────────────────────────────────────────┘

fill_null

fill_null(
    value: Expr | NonNestedLiteral = None,
    strategy: FillNullStrategy | None = None,
    limit: int | None = None,
) -> Self

Fill null values with given value.

Parameters:

Name	Type	Description	Default
`value`	`Expr \| NonNestedLiteral`	Value or expression used to fill null values.	`None`
`strategy`	`FillNullStrategy \| None`	Strategy used to fill null values.	`None`
`limit`	`int \| None`	Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.	`None`

Notes

pandas handles null values differently from other libraries. See null_handling for reference.
For pandas Series of object dtype, fill_null will not automatically change the Series' dtype as pandas used to do. Explicitly call cast if you want the dtype to change.

Examples:

>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame(
...     {
...         "a": [2, None, None, 3],
...         "b": [2.0, float("nan"), float("nan"), 3.0],
...         "c": [1, 2, 3, 4],
...     }
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     nw.col("a", "b").fill_null(0).name.suffix("_filled"),
...     nw.col("a").fill_null(nw.col("c")).name.suffix("_filled_with_c"),
... )
┌────────────────────────────────────────────────────────────┐
|                     Narwhals DataFrame                     |
|------------------------------------------------------------|
|shape: (4, 6)                                               |
|┌──────┬─────┬─────┬──────────┬──────────┬─────────────────┐|
|│ a    ┆ b   ┆ c   ┆ a_filled ┆ b_filled ┆ a_filled_with_c │|
|│ ---  ┆ --- ┆ --- ┆ ---      ┆ ---      ┆ ---             │|
|│ i64  ┆ f64 ┆ i64 ┆ i64      ┆ f64      ┆ i64             │|
|╞══════╪═════╪═════╪══════════╪══════════╪═════════════════╡|
|│ 2    ┆ 2.0 ┆ 1   ┆ 2        ┆ 2.0      ┆ 2               │|
|│ null ┆ NaN ┆ 2   ┆ 0        ┆ NaN      ┆ 2               │|
|│ null ┆ NaN ┆ 3   ┆ 0        ┆ NaN      ┆ 3               │|
|│ 3    ┆ 3.0 ┆ 4   ┆ 3        ┆ 3.0      ┆ 3               │|
|└──────┴─────┴─────┴──────────┴──────────┴─────────────────┘|
└────────────────────────────────────────────────────────────┘

Using a strategy:

>>> df.select(
...     nw.col("a", "b"),
...     nw.col("a", "b")
...     .fill_null(strategy="forward", limit=1)
...     .name.suffix("_nulls_forward_filled"),
... )
┌────────────────────────────────────────────────────────────────┐
|                       Narwhals DataFrame                       |
|----------------------------------------------------------------|
|shape: (4, 4)                                                   |
|┌──────┬─────┬────────────────────────┬────────────────────────┐|
|│ a    ┆ b   ┆ a_nulls_forward_filled ┆ b_nulls_forward_filled │|
|│ ---  ┆ --- ┆ ---                    ┆ ---                    │|
|│ i64  ┆ f64 ┆ i64                    ┆ f64                    │|
|╞══════╪═════╪════════════════════════╪════════════════════════╡|
|│ 2    ┆ 2.0 ┆ 2                      ┆ 2.0                    │|
|│ null ┆ NaN ┆ 2                      ┆ NaN                    │|
|│ null ┆ NaN ┆ null                   ┆ NaN                    │|
|│ 3    ┆ 3.0 ┆ 3                      ┆ 3.0                    │|
|└──────┴─────┴────────────────────────┴────────────────────────┘|
└────────────────────────────────────────────────────────────────┘

filter

filter(*predicates: Any) -> Self

Filters elements based on a condition, returning a new expression.

Parameters:

Name	Type	Description	Default
`predicates`	`Any`	Conditions to filter by (which get AND-ed together).	`()`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame(
...     {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]}
... )
>>> df = nw.from_native(df_native)
>>> df.select(
...     nw.col("a").filter(nw.col("a") > 4),
...     nw.col("b").filter(nw.col("b") < 13),
... )
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a   b     |
|     3  5  10     |
|     4  6  11     |
|     5  7  12     |
└──────────────────┘

first

first(order_by: str | Iterable[str] | None = None) -> Self

Get the first value.

Notes

For lazy backends, this can only be used with over or with order_by.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> data = {"a": [1, 1, 2, 2], "b": ["foo", None, None, "baz"]}
>>> df_native = pd.DataFrame(data)
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().first())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|       a    b     |
|    0  1  foo     |
└──────────────────┘

>>> df.group_by("a").agg(nw.col("b").first())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|       a    b     |
|    0  1  foo     |
|    1  2  NaN     |
└──────────────────┘

floor

floor() -> Self

Compute the numerical floor.

Examples:

>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_native = pa.table({"values": [1.1, 4.3, -1.3]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(floor=nw.col("values").floor())
>>> result
┌────────────────────────┐
|   Narwhals DataFrame   |
|------------------------|
|pyarrow.Table           |
|values: double          |
|floor: double           |
|----                    |
|values: [[1.1,4.3,-1.3]]|
|floor: [[1,4,-2]]       |
└────────────────────────┘

is_between

is_between(
    lower_bound: Any | IntoExpr,
    upper_bound: Any | IntoExpr,
    closed: ClosedInterval = "both",
) -> Self

Check if this expression is between the given lower and upper bounds.

Parameters:

Name	Type	Description	Default
`lower_bound`	`Any \| IntoExpr`	Lower bound value. String literals are interpreted as column names.	required
`upper_bound`	`Any \| IntoExpr`	Upper bound value. String literals are interpreted as column names.	required
`closed`	`ClosedInterval`	Define which sides of the interval are closed (inclusive). Options are {"left", "right", "none", "both"}.	`'both'`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(b=nw.col("a").is_between(2, 4, "right"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|      a      b    |
|   0  1  False    |
|   1  2  False    |
|   2  3   True    |
|   3  4   True    |
|   4  5  False    |
└──────────────────┘

is_close

is_close(
    other: Expr | Series[Any] | NumericLiteral,
    *,
    abs_tol: float = 0.0,
    rel_tol: float = 1e-09,
    nans_equal: bool = False
) -> Self

Check if this expression is close, i.e. almost equal, to the other expression.

Two values a and b are considered close if the following condition holds:

\[ |a-b| \le max \{ \text{rel\_tol} \cdot max \{ |a|, |b| \}, \text{abs\_tol} \} \]

Parameters:

Name	Type	Description	Default
`other`	`Expr \| Series[Any] \| NumericLiteral`	Values to compare with.	required
`abs_tol`	`float`	Absolute tolerance. This is the maximum allowed absolute difference between two values. Must be non-negative.	`0.0`
`rel_tol`	`float`	Relative tolerance. This is the maximum allowed difference between two values, relative to the larger absolute value. Must be in the range [0, 1).	`1e-09`
`nans_equal`	`bool`	Whether NaN values should be considered equal.	`False`

Notes

The implementation of this method is symmetric and mirrors the behavior of math.isclose. Specifically note that this behavior is different to numpy.isclose.

Examples:

>>> import duckdb
>>> import pyarrow as pa
>>> import narwhals as nw
>>>
>>> data = {
...     "x": [1.0, float("inf"), 1.41, None, float("nan")],
...     "y": [1.2, float("inf"), 1.40, None, float("nan")],
... }
>>> _table = pa.table(data)
>>> df_native = duckdb.table("_table")
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     is_close=nw.col("x").is_close(
...         nw.col("y"), abs_tol=0.1, nans_equal=True
...     )
... )
┌──────────────────────────────┐
|      Narwhals LazyFrame      |
|------------------------------|
|┌────────┬────────┬──────────┐|
|│   x    │   y    │ is_close │|
|│ double │ double │ boolean  │|
|├────────┼────────┼──────────┤|
|│    1.0 │    1.2 │ false    │|
|│    inf │    inf │ true     │|
|│   1.41 │    1.4 │ true     │|
|│   NULL │   NULL │ NULL     │|
|│    nan │    nan │ true     │|
|└────────┴────────┴──────────┘|
└──────────────────────────────┘

is_duplicated

is_duplicated() -> Self

Return a boolean mask indicating duplicated values.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.all().is_duplicated().name.suffix("_is_duplicated"))
┌─────────────────────────────────────────┐
|           Narwhals DataFrame            |
|-----------------------------------------|
|   a  b  a_is_duplicated  b_is_duplicated|
|0  1  a             True             True|
|1  2  a            False             True|
|2  3  b            False            False|
|3  1  c             True            False|
└─────────────────────────────────────────┘

is_finite

is_finite() -> Self

Returns boolean values indicating which original values are finite.

Warning

pandas handles null values differently from Polars and PyArrow. See null_handling for reference. is_finite will return False for NaN and Null's in the Dask and pandas non-nullable backend, while for Polars, PyArrow and pandas nullable backends null values are kept as such.

Examples:

>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [float("nan"), float("inf"), 2.0, None]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_is_finite=nw.col("a").is_finite())
┌──────────────────────┐
|  Narwhals DataFrame  |
|----------------------|
|shape: (4, 2)         |
|┌──────┬─────────────┐|
|│ a    ┆ a_is_finite │|
|│ ---  ┆ ---         │|
|│ f64  ┆ bool        │|
|╞══════╪═════════════╡|
|│ NaN  ┆ false       │|
|│ inf  ┆ false       │|
|│ 2.0  ┆ true        │|
|│ null ┆ null        │|
|└──────┴─────────────┘|
└──────────────────────┘

is_first_distinct

is_first_distinct() -> Self

Return a boolean mask indicating the first occurrence of each distinct value.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     nw.all().is_first_distinct().name.suffix("_is_first_distinct")
... )
┌─────────────────────────────────────────────────┐
|               Narwhals DataFrame                |
|-------------------------------------------------|
|   a  b  a_is_first_distinct  b_is_first_distinct|
|0  1  a                 True                 True|
|1  2  a                 True                False|
|2  3  b                 True                 True|
|3  1  c                False                 True|
└─────────────────────────────────────────────────┘

is_in

is_in(other: Any) -> Self

Check if elements of this expression are present in the other iterable.

Parameters:

Name	Type	Description	Default
`other`	`Any`	iterable	required

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 9, 10]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(b=nw.col("a").is_in([1, 2]))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|       a      b   |
|   0   1   True   |
|   1   2   True   |
|   2   9  False   |
|   3  10  False   |
└──────────────────┘

is_last_distinct

is_last_distinct() -> Self

Return a boolean mask indicating the last occurrence of each distinct value.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     nw.all().is_last_distinct().name.suffix("_is_last_distinct")
... )
┌───────────────────────────────────────────────┐
|              Narwhals DataFrame               |
|-----------------------------------------------|
|   a  b  a_is_last_distinct  b_is_last_distinct|
|0  1  a               False               False|
|1  2  a                True                True|
|2  3  b                True                True|
|3  1  c                True                True|
└───────────────────────────────────────────────┘

is_nan

is_nan() -> Self

Indicate which values are NaN.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql(
...     "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)"
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     a_is_nan=nw.col("a").is_nan(), b_is_nan=nw.col("b").is_nan()
... )
┌────────────────────────────────────────┐
|           Narwhals LazyFrame           |
|----------------------------------------|
|┌───────┬────────┬──────────┬──────────┐|
|│   a   │   b    │ a_is_nan │ b_is_nan │|
|│ int32 │ double │ boolean  │ boolean  │|
|├───────┼────────┼──────────┼──────────┤|
|│  NULL │    nan │ NULL     │ true     │|
|│     2 │    2.0 │ false    │ false    │|
|└───────┴────────┴──────────┴──────────┘|
└────────────────────────────────────────┘

is_null

is_null() -> Self

Returns a boolean Series indicating which values are null.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql(
...     "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)"
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
... )
┌──────────────────────────────────────────┐
|            Narwhals LazyFrame            |
|------------------------------------------|
|┌───────┬────────┬───────────┬───────────┐|
|│   a   │   b    │ a_is_null │ b_is_null │|
|│ int32 │ double │  boolean  │  boolean  │|
|├───────┼────────┼───────────┼───────────┤|
|│  NULL │    nan │ true      │ false     │|
|│     2 │    2.0 │ false     │ false     │|
|└───────┴────────┴───────────┴───────────┘|
└──────────────────────────────────────────┘

is_unique

is_unique() -> Self

Return a boolean mask indicating unique values.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.all().is_unique().name.suffix("_is_unique"))
┌─────────────────────────────────┐
|       Narwhals DataFrame        |
|---------------------------------|
|   a  b  a_is_unique  b_is_unique|
|0  1  a        False        False|
|1  2  a         True        False|
|2  3  b         True         True|
|3  1  c        False         True|
└─────────────────────────────────┘

kurtosis

kurtosis() -> Self

Compute the kurtosis (Fisher's definition) without bias correction.

Kurtosis is the fourth central moment divided by the square of the variance. The Fisher's definition is used where 3.0 is subtracted from the result to give 0.0 for a normal distribution.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").kurtosis())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|      a         b |
| 0 -1.3  0.210657 |
└──────────────────┘

last

last(order_by: str | Iterable[str] | None = None) -> Self

Get the last value.

Notes

For lazy backends, this can only be used with over or with order_by.

Examples:

>>> import pyarrow as pa
>>> import narwhals as nw
>>> data = {"a": [1, 1, 2, 2], "b": ["foo", None, None, "baz"]}
>>> df_native = pa.table(data)
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().last())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|  pyarrow.Table   |
|  a: int64        |
|  b: string       |
|  ----            |
|  a: [[2]]        |
|  b: [["baz"]]    |
└──────────────────┘

>>> df.group_by("a").agg(nw.col("b").last())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|pyarrow.Table     |
|a: int64          |
|b: string         |
|----              |
|a: [[1,2]]        |
|b: [[null,"baz"]] |
└──────────────────┘

len

len() -> Self

Return the number of elements in the column.

Null values count towards the total.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": ["x", "y", "z"], "b": [1, 2, 1]})
>>> df = nw.from_native(df_native)
>>> df.select(
...     nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
...     nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
... )
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|       a1  a2     |
|    0   2   1     |
└──────────────────┘

log

log(base: float = math.e) -> Self

Compute the logarithm to a given base.

Parameters:

Name	Type	Description	Default
`base`	`float`	Given base, defaults to `e`	`e`

Examples:

>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_native = pa.table({"values": [1, 2, 4]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(
...     log=nw.col("values").log(), log_2=nw.col("values").log(base=2)
... )
>>> result
┌────────────────────────────────────────────────┐
|               Narwhals DataFrame               |
|------------------------------------------------|
|pyarrow.Table                                   |
|values: int64                                   |
|log: double                                     |
|log_2: double                                   |
|----                                            |
|values: [[1,2,4]]                               |
|log: [[0,0.6931471805599453,1.3862943611198906]]|
|log_2: [[0,1,2]]                                |
└────────────────────────────────────────────────┘

map_batches

map_batches(
    function: Callable[[Any], CompliantExpr[Any, Any]],
    return_dtype: DType | None = None,
    *,
    returns_scalar: bool = False
) -> Self

Apply a custom python function to a whole Series or sequence of Series.

The output of this custom function is presumed to be either a Series, or a NumPy array (in which case it will be automatically converted into a Series).

Parameters:

Name	Type	Description	Default
`function`	`Callable[[Any], CompliantExpr[Any, Any]]`	Function to apply to Series.	required
`return_dtype`	`DType \| None`	Dtype of the output Series. If not set, the dtype will be inferred based on the first non-null value that is returned by the function.	`None`
`returns_scalar`	`bool`	If the function returns a scalar, by default it will be wrapped in a list in the output, since the assumption is that the function always returns something Series-like. If you want to keep the result as a scalar, set this argument to True.	`False`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     nw.col("a", "b")
...     .map_batches(lambda s: s.to_numpy() + 1, return_dtype=nw.Float64)
...     .name.suffix("_mapped")
... )
┌───────────────────────────┐
|    Narwhals DataFrame     |
|---------------------------|
|   a  b  a_mapped  b_mapped|
|0  1  4       2.0       5.0|
|1  2  5       3.0       6.0|
|2  3  6       4.0       7.0|
└───────────────────────────┘

max

max() -> Self

Returns the maximum value(s) from a column(s).

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.max("a", "b"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a    b    |
|    0  20  100    |
└──────────────────┘

mean

mean() -> Self

Get mean value.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").mean())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a    b    |
|   0  0.0  4.0    |
└──────────────────┘

median

median() -> Self

Get median value.

Notes

Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").median())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a    b    |
|   0  3.0  4.0    |
└──────────────────┘

min

min() -> Self

Returns the minimum value(s) from a column(s).

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.min("a", "b"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a  b      |
|     0  1  3      |
└──────────────────┘

mode

mode(*, keep: ModeKeepStrategy = 'all') -> Self

Compute the most occurring value(s).

Can return multiple values.

Parameters:

Name	Type	Description	Default
`keep`	`ModeKeepStrategy`	Whether to keep all modes or any mode found. Remark that `keep='any'` is not deterministic for multimodal values.	`'all'`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 1, 2, 3], "b": [1, 1, 2, 2]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a").mode()).sort("a")
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|          a       |
|       0  1       |
└──────────────────┘

n_unique

n_unique() -> Self

Returns count of unique values.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").n_unique())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a  b      |
|     0  5  3      |
└──────────────────┘

null_count

null_count() -> Self

Count null values.

Notes

pandas handles null values differently from Polars and PyArrow. See null_handling for reference.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame(
...     {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
... )
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().null_count())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a  b      |
|     0  1  2      |
└──────────────────┘

over

over(
    *partition_by: str | Sequence[str],
    order_by: str | Sequence[str] | None = None
) -> Self

Compute expressions over the given groups (optionally with given order).

Parameters:

Name	Type	Description	Default
`partition_by`	`str \| Sequence[str]`	Names of columns to compute window expression over. Must be names of columns, as opposed to expressions - so, this is a bit less flexible than Polars' `Expr.over`. If not specified, the expression is computed over the entire frame (i.e., no grouping is applied).	`()`
`order_by`	`str \| Sequence[str] \| None`	Column(s) to order window functions by. For lazy backends, this argument is required when `over` is applied to order-dependent functions, see order-dependence. When `order_by` is specified, the expression is evaluated on the frame sorted by the given column(s), and, if applicable, the results are returned with the original row order preserved.	`None`

Note

At least one of partition_by or order_by must be provided.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 4], "b": ["x", "x", "y"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_min_per_group=nw.col("a").min().over("b"))
┌────────────────────────┐
|   Narwhals DataFrame   |
|------------------------|
|   a  b  a_min_per_group|
|0  1  x                1|
|1  2  x                1|
|2  4  y                4|
└────────────────────────┘

When partition_by is omitted, the expression is computed over the entire frame. This is useful with order_by for order-dependent operations without grouping:

>>> import duckdb
>>> import pyarrow as pa
>>> import narwhals as nw
>>>
>>> data = {"a": [3, 1, 2], "b": ["x", "y", "z"]}
>>> _table = pa.table(data)
>>> df = nw.from_native(duckdb.table("_table"))
>>> expr = nw.col("a").cum_sum().over(order_by="a")
>>> df.with_columns(a_cum_sum=expr).sort("a")
┌───────────────────────────────┐
|      Narwhals LazyFrame       |
|-------------------------------|
|┌───────┬─────────┬───────────┐|
|│   a   │    b    │ a_cum_sum │|
|│ int64 │ varchar │  int128   │|
|├───────┼─────────┼───────────┤|
|│     1 │ y       │         1 │|
|│     2 │ z       │         3 │|
|│     3 │ x       │         6 │|
|└───────┴─────────┴───────────┘|
└───────────────────────────────┘

pipe

pipe(
    function: Callable[Concatenate[Self, PS], R],
    *args: args,
    **kwargs: kwargs
) -> R

Pipe function call.

Parameters:

Name	Type	Description	Default
`function`	`Callable[Concatenate[Self, PS], R]`	Function to apply.	required
`args`	`args`	Positional arguments to pass to function.	`()`
`kwargs`	`kwargs`	Keyword arguments to pass to function.	`{}`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_piped=nw.col("a").pipe(lambda x: x + 1))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|     a  a_piped   |
|  0  1        2   |
|  1  2        3   |
|  2  3        4   |
|  3  4        5   |
└──────────────────┘

quantile

quantile(
    quantile: float,
    interpolation: RollingInterpolationMethod,
) -> Self

Get quantile value.

Parameters:

Name	Type	Description	Default
`quantile`	`float`	Quantile between 0.0 and 1.0.	required
`interpolation`	`RollingInterpolationMethod`	Interpolation method.	required

Note

pandas and Polars may have implementation differences for a given interpolation method.
dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the native 'dask' - method.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame(
...     {"a": list(range(50)), "b": list(range(50, 100))}
... )
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a     b   |
|  0  24.5  74.5   |
└──────────────────┘

rank

rank(
    method: RankMethod = "average",
    *,
    descending: bool = False
) -> Self

Assign ranks to data, dealing with ties appropriately.

Notes

The resulting dtype may differ between backends.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`method`	`RankMethod`	The method used to assign ranks to tied elements. The following methods are available (default is 'average') "average": The average of the ranks that would have been assigned to all the tied values is assigned to each value. "min": The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.) "max": The maximum of the ranks that would have been assigned to all the tied values is assigned to each value. "dense": Like "min", but the rank of the next highest element is assigned the rank immediately after those assigned to the tied elements. "ordinal": All values are given a distinct rank, corresponding to the order that the values occur in the Series.	`'average'`
`descending`	`bool`	Rank in descending order.	`False`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [3, 6, 1, 1, 6]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(rank=nw.col("a").rank(method="dense"))
>>> result
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|       a  rank    |
|    0  3   2.0    |
|    1  6   3.0    |
|    2  1   1.0    |
|    3  1   1.0    |
|    4  6   3.0    |
└──────────────────┘

replace_strict

replace_strict(
    old: Sequence[Any] | Mapping[Any, Any],
    new: Sequence[Any] | None = None,
    *,
    default: Any | NoDefault = NO_DEFAULT,
    return_dtype: IntoDType | None = None
) -> Self

Replace all values by different values.

Parameters:

Name	Type	Description	Default
`old`	`Sequence[Any] \| Mapping[Any, Any]`	Sequence of values to replace. It also accepts a mapping of values to their replacement as syntactic sugar for `replace_strict(old=list(mapping.keys()), new=list(mapping.values()))`.	required
`new`	`Sequence[Any] \| None`	Sequence of values to replace by. Length must match the length of `old`.	`None`
`default`	`Any \| NoDefault`	Set values that were not replaced to this value. If no default is specified, (default), an error is raised if any values were not replaced. Accepts expression input. Non-expression inputs are parsed as literals.	`NO_DEFAULT`
`return_dtype`	`IntoDType \| None`	The data type of the resulting expression. If set to `None` (default), the data type is determined automatically based on the other inputs.	`None`

Raises:

Type	Description
`InvalidOperationError`	If any non-null values in the original column were not replaced, and no default was specified.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [3, 0, 1, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     b=nw.col("a").replace_strict(
...         [0, 1, 2, 3],
...         ["zero", "one", "two", "three"],
...         return_dtype=nw.String,
...     )
... )
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|      a      b    |
|   0  3  three    |
|   1  0   zero    |
|   2  1    one    |
|   3  2    two    |
└──────────────────┘

Replace values and set a default for values not in the mapping:

>>> data = {"a": [1, 2, 3, 4], "b": ["beluga", "narwhal", "orca", "vaquita"]}
>>> df = nw.from_native(pd.DataFrame(data))
>>> df.with_columns(
...     a_replaced=nw.col("a").replace_strict(
...         {1: "one", 2: "two"},
...         default=nw.concat_str(nw.lit("default_"), nw.col("b")),
...         return_dtype=nw.String,
...     )
... )
┌──────────────────────────────┐
|      Narwhals DataFrame      |
|------------------------------|
|   a        b       a_replaced|
|0  1   beluga              one|
|1  2  narwhal              two|
|2  3     orca     default_orca|
|3  4  vaquita  default_vaquita|
└──────────────────────────────┘

rolling_mean

rolling_mean(
    window_size: int,
    *,
    min_samples: int | None = None,
    center: bool = False
) -> Self

Apply a rolling mean (moving mean) over the values.

A window of length window_size will traverse the values. The resulting values will be aggregated to their mean.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`window_size`	`int`	The length of the window in number of elements. It must be a strictly positive integer.	required
`min_samples`	`int \| None`	The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. If provided, it must be a strictly positive integer, and less than or equal to `window_size`	`None`
`center`	`bool`	Set the labels at the center of the window.	`False`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     a_rolling_mean=nw.col("a").rolling_mean(window_size=3, min_samples=1)
... )
┌──────────────────────┐
|  Narwhals DataFrame  |
|----------------------|
|     a  a_rolling_mean|
|0  1.0             1.0|
|1  2.0             1.5|
|2  NaN             1.5|
|3  4.0             3.0|
└──────────────────────┘

rolling_std

rolling_std(
    window_size: int,
    *,
    min_samples: int | None = None,
    center: bool = False,
    ddof: int = 1
) -> Self

Apply a rolling standard deviation (moving standard deviation) over the values.

A window of length window_size will traverse the values. The resulting values will be aggregated to their standard deviation.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`window_size`	`int`	The length of the window in number of elements. It must be a strictly positive integer.	required
`min_samples`	`int \| None`	The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. If provided, it must be a strictly positive integer, and less than or equal to `window_size`.	`None`
`center`	`bool`	Set the labels at the center of the window.	`False`
`ddof`	`int`	Delta Degrees of Freedom; the divisor for a length N window is N - ddof.	`1`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     a_rolling_std=nw.col("a").rolling_std(window_size=3, min_samples=1)
... )
┌─────────────────────┐
| Narwhals DataFrame  |
|---------------------|
|     a  a_rolling_std|
|0  1.0            NaN|
|1  2.0       0.707107|
|2  NaN       0.707107|
|3  4.0       1.414214|
└─────────────────────┘

rolling_sum

rolling_sum(
    window_size: int,
    *,
    min_samples: int | None = None,
    center: bool = False
) -> Self

Apply a rolling sum (moving sum) over the values.

A window of length window_size will traverse the values. The resulting values will be aggregated to their sum.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`window_size`	`int`	The length of the window in number of elements. It must be a strictly positive integer.	required
`min_samples`	`int \| None`	The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. If provided, it must be a strictly positive integer, and less than or equal to `window_size`	`None`
`center`	`bool`	Set the labels at the center of the window.	`False`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     a_rolling_sum=nw.col("a").rolling_sum(window_size=3, min_samples=1)
... )
┌─────────────────────┐
| Narwhals DataFrame  |
|---------------------|
|     a  a_rolling_sum|
|0  1.0            1.0|
|1  2.0            3.0|
|2  NaN            3.0|
|3  4.0            6.0|
└─────────────────────┘

rolling_var

rolling_var(
    window_size: int,
    *,
    min_samples: int | None = None,
    center: bool = False,
    ddof: int = 1
) -> Self

Apply a rolling variance (moving variance) over the values.

A window of length window_size will traverse the values. The resulting values will be aggregated to their variance.

The window at a given row will include the row itself and the window_size - 1 elements before it.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`window_size`	`int`	The length of the window in number of elements. It must be a strictly positive integer.	required
`min_samples`	`int \| None`	The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. If provided, it must be a strictly positive integer, and less than or equal to `window_size`.	`None`
`center`	`bool`	Set the labels at the center of the window.	`False`
`ddof`	`int`	Delta Degrees of Freedom; the divisor for a length N window is N - ddof.	`1`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
...     a_rolling_var=nw.col("a").rolling_var(window_size=3, min_samples=1)
... )
┌─────────────────────┐
| Narwhals DataFrame  |
|---------------------|
|     a  a_rolling_var|
|0  1.0            NaN|
|1  2.0            0.5|
|2  NaN            0.5|
|3  4.0            2.0|
└─────────────────────┘

round

round(decimals: int = 0) -> Self

Round underlying floating point data by decimals digits.

Parameters:

Name	Type	Description	Default
`decimals`	`int`	Number of decimals to round by.	`0`

Notes

For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.

pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and 4.5 to 4.0, etc..).

Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.12345, 2.56789, 3.901234]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_rounded=nw.col("a").round(1))
┌──────────────────────┐
|  Narwhals DataFrame  |
|----------------------|
|          a  a_rounded|
|0  1.123450        1.1|
|1  2.567890        2.6|
|2  3.901234        3.9|
└──────────────────────┘

shift

shift(n: int) -> Self

Shift values by n positions.

Info

For lazy backends, this operation must be followed by Expr.over with order_by specified, see order-dependence.

Parameters:

Name	Type	Description	Default
`n`	`int`	Number of positions to shift values by.	required

Notes

pandas may change the dtype here, for example when introducing missing values in an integer column. To ensure, that the dtype doesn't change, you may want to use fill_null and cast. For example, to shift and fill missing values with 0 in a Int64 column, you could do:

nw.col("a").shift(1).fill_null(0).cast(nw.Int64)

Examples:

>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_shift=nw.col("a").shift(n=1))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|shape: (5, 2)     |
|┌─────┬─────────┐ |
|│ a   ┆ a_shift │ |
|│ --- ┆ ---     │ |
|│ i64 ┆ i64     │ |
|╞═════╪═════════╡ |
|│ 1   ┆ null    │ |
|│ 1   ┆ 1       │ |
|│ 3   ┆ 1       │ |
|│ 5   ┆ 3       │ |
|│ 5   ┆ 5       │ |
|└─────┴─────────┘ |
└──────────────────┘

sin

sin() -> Self

Compute the element-wise value for the sine.

Examples:

>>> import pyarrow as pa
>>> import narwhals as nw
>>> from math import pi
>>> df_native = pa.table({"values": [0, pi / 2, 3 * pi / 2]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(sin=nw.col("values").sin())
>>> result
┌─────────────────────────────────────────────────┐
|               Narwhals DataFrame                |
|-------------------------------------------------|
|pyarrow.Table                                    |
|values: double                                   |
|sin: double                                      |
|----                                             |
|values: [[0,1.5707963267948966,4.71238898038469]]|
|sin: [[0,1,-1]]                                  |
└─────────────────────────────────────────────────┘

skew

skew() -> Self

Calculate the sample skewness of a column.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").skew())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|      a         b |
| 0  0.0  1.472427 |
└──────────────────┘

sqrt

sqrt() -> Self

Compute the square root of the elements.

Examples:

>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_native = pa.table({"values": [1, 4, 9]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(sqrt=nw.col("values").sqrt())
>>> result
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|pyarrow.Table     |
|values: int64     |
|sqrt: double      |
|----              |
|values: [[1,4,9]] |
|sqrt: [[1,2,3]]   |
└──────────────────┘

std

std(*, ddof: int = 1) -> Self

Get standard deviation.

Parameters:

Name	Type	Description	Default
`ddof`	`int`	"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.	`1`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").std(ddof=0))
┌─────────────────────┐
| Narwhals DataFrame  |
|---------------------|
|          a         b|
|0  17.79513  1.265789|
└─────────────────────┘

sum

sum() -> Self

Return the sum value.

If there are no non-null elements, the result is zero.

Examples:

>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql("SELECT * FROM VALUES (5, 50), (10, 100) df(a, b)")
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").sum())
┌───────────────────┐
|Narwhals LazyFrame |
|-------------------|
|┌────────┬────────┐|
|│   a    │   b    │|
|│ int128 │ int128 │|
|├────────┼────────┤|
|│     15 │    150 │|
|└────────┴────────┘|
└───────────────────┘

unique

unique() -> Self

Return unique values of this expression.

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").unique().sum())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|        a   b     |
|     0  9  12     |
└──────────────────┘

var

var(*, ddof: int = 1) -> Self

Get variance.

Parameters:

Name	Type	Description	Default
`ddof`	`int`	"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.	`1`

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").var(ddof=0))
┌───────────────────────┐
|  Narwhals DataFrame   |
|-----------------------|
|            a         b|
|0  316.666667  1.602222|
└───────────────────────┘