narwhals.Expr
abs
abs() -> Self
Return absolute value of each element.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, -2], "b": [-3, 4]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.col("a", "b").abs().name.suffix("_abs"))
βββββββββββββββββββββββ
| Narwhals DataFrame |
|---------------------|
| a b a_abs b_abs|
|0 1 -3 1 3|
|1 -2 4 2 4|
βββββββββββββββββββββββ
alias
alias(name: str) -> Self
Rename the expression.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
name
|
str
|
The new name. |
required |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df = nw.from_native(df_native)
>>> df.select((nw.col("b") + 10).alias("c"))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| c |
| 0 14 |
| 1 15 |
ββββββββββββββββββββ
all
all() -> Self
Return whether all values in the column are True.
If there are no non-null elements, the result is True.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").all())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 False True |
ββββββββββββββββββββ
any
any() -> Self
Return whether any of the values in the column are True.
If there are no non-null elements, the result is False.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").any())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 True True |
ββββββββββββββββββββ
any_value
any_value(*, ignore_nulls: bool = False) -> Self
Get a random value from the column.
Warning
This functionality is considered unstable as it diverges from the polars API. It may be changed at any point without it being considered a breaking change.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
ignore_nulls
|
bool
|
Whether to ignore null values or not.
If |
False
|
Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>> data = {"a": [1, 1, 2, 2], "b": [None, "foo", "baz", None]}
>>> df_native = pa.table(data)
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().any_value(ignore_nulls=False))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| pyarrow.Table |
| a: int64 |
| b: null |
| ---- |
| a: [[1]] |
| b: [1 nulls] |
ββββββββββββββββββββ
>>> df.group_by("a").agg(nw.col("b").any_value(ignore_nulls=True))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
|pyarrow.Table |
|a: int64 |
|b: string |
|---- |
|a: [[1,2]] |
|b: [["foo","baz"]]|
ββββββββββββββββββββ
cast
cast(dtype: IntoDType) -> Self
Redefine an object's data type.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
dtype
|
IntoDType
|
Data type that the object will be cast into. |
required |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| foo bar |
| 0 1.0 6 |
| 1 2.0 7 |
| 2 3.0 8 |
ββββββββββββββββββββ
ceil
ceil() -> Self
Compute the numerical ceiling.
Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_native = pa.table({"values": [1.1, 4.3, -1.3]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(ceil=nw.col("values").ceil())
>>> result
ββββββββββββββββββββββββββ
| Narwhals DataFrame |
|------------------------|
|pyarrow.Table |
|values: double |
|ceil: double |
|---- |
|values: [[1.1,4.3,-1.3]]|
|ceil: [[2,5,-1]] |
ββββββββββββββββββββββββββ
clip
clip(
lower_bound: (
IntoExpr | NumericLiteral | TemporalLiteral | None
) = None,
upper_bound: (
IntoExpr | NumericLiteral | TemporalLiteral | None
) = None,
) -> Self
Clip values in the Series.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
lower_bound
|
IntoExpr | NumericLiteral | TemporalLiteral | None
|
Lower bound value. String literals are treated as column names. |
None
|
upper_bound
|
IntoExpr | NumericLiteral | TemporalLiteral | None
|
Upper bound value. String literals are treated as column names. |
None
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_clipped=nw.col("a").clip(-1, 3))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a a_clipped |
| 0 1 1 |
| 1 2 2 |
| 2 3 3 |
ββββββββββββββββββββ
cos
cos() -> Self
Compute the element-wise value for the cosine.
Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from math import pi
>>> df_native = pa.table({"values": [0, pi / 2, pi]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(cos=nw.col("values").cos()).select(
... nw.all().round(4)
... )
>>> result
βββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|---------------------------|
|pyarrow.Table |
|values: double |
|cos: double |
|---- |
|values: [[0,1.5708,3.1416]]|
|cos: [[1,0,-1]] |
βββββββββββββββββββββββββββββ
count
count() -> Self
Returns the number of non-null elements in the column.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().count())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 3 2 |
ββββββββββββββββββββ
cum_count
cum_count(*, reverse: bool = False) -> Self
Return the cumulative count of the non-null values in the column.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": ["x", "k", None, "d"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a").cum_count().alias("a_cum_count"),
... nw.col("a").cum_count(reverse=True).alias("a_cum_count_reverse"),
... )
ββββββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|----------------------------------------|
| a a_cum_count a_cum_count_reverse|
|0 x 1 3|
|1 k 2 2|
|2 NaN 2 1|
|3 d 3 1|
ββββββββββββββββββββββββββββββββββββββββββ
cum_max
cum_max(*, reverse: bool = False) -> Self
Return the cumulative max of the non-null values in the column.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 3, None, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a").cum_max().alias("a_cum_max"),
... nw.col("a").cum_max(reverse=True).alias("a_cum_max_reverse"),
... )
ββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|------------------------------------|
| a a_cum_max a_cum_max_reverse|
|0 1.0 1.0 3.0|
|1 3.0 3.0 3.0|
|2 NaN NaN NaN|
|3 2.0 3.0 2.0|
ββββββββββββββββββββββββββββββββββββββ
cum_min
cum_min(*, reverse: bool = False) -> Self
Return the cumulative min of the non-null values in the column.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [3, 1, None, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a").cum_min().alias("a_cum_min"),
... nw.col("a").cum_min(reverse=True).alias("a_cum_min_reverse"),
... )
ββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|------------------------------------|
| a a_cum_min a_cum_min_reverse|
|0 3.0 3.0 1.0|
|1 1.0 1.0 1.0|
|2 NaN NaN NaN|
|3 2.0 1.0 2.0|
ββββββββββββββββββββββββββββββββββββββ
cum_prod
cum_prod(*, reverse: bool = False) -> Self
Return the cumulative product of the non-null values in the column.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 3, None, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a").cum_prod().alias("a_cum_prod"),
... nw.col("a").cum_prod(reverse=True).alias("a_cum_prod_reverse"),
... )
ββββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|--------------------------------------|
| a a_cum_prod a_cum_prod_reverse|
|0 1.0 1.0 6.0|
|1 3.0 3.0 6.0|
|2 NaN NaN NaN|
|3 2.0 6.0 2.0|
ββββββββββββββββββββββββββββββββββββββββ
cum_sum
cum_sum(*, reverse: bool = False) -> Self
Return cumulative sum.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_cum_sum=nw.col("a").cum_sum())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b a_cum_sum|
|0 1 2 1|
|1 1 4 2|
|2 3 4 5|
|3 5 6 10|
|4 5 6 15|
ββββββββββββββββββββ
diff
diff() -> Self
Returns the difference between each element and the previous one.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Notes
pandas may change the dtype here, for example when introducing missing
values in an integer column. To ensure, that the dtype doesn't change,
you may want to use fill_null and cast. For example, to calculate
the diff and fill missing values with 0 in a Int64 column, you could
do:
nw.col("a").diff().fill_null(0).cast(nw.Int64)
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_diff=nw.col("a").diff())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| shape: (5, 2) |
| βββββββ¬βββββββββ |
| β a β a_diff β |
| β --- β --- β |
| β i64 β i64 β |
| βββββββͺβββββββββ‘ |
| β 1 β null β |
| β 1 β 0 β |
| β 3 β 2 β |
| β 5 β 2 β |
| β 5 β 0 β |
| βββββββ΄βββββββββ |
ββββββββββββββββββββ
drop_nulls
drop_nulls() -> Self
Drop null values.
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a").drop_nulls())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| shape: (5, 1) |
| βββββββ |
| β a β |
| β --- β |
| β f64 β |
| βββββββ‘ |
| β 2.0 β |
| β 4.0 β |
| β NaN β |
| β 3.0 β |
| β 5.0 β |
| βββββββ |
ββββββββββββββββββββ
ewm_mean
ewm_mean(
*,
com: float | None = None,
span: float | None = None,
half_life: float | None = None,
alpha: float | None = None,
adjust: bool = True,
min_samples: int = 1,
ignore_nulls: bool = False
) -> Self
Compute exponentially-weighted moving average.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
com
|
float | None
|
Specify decay in terms of center of mass, \(\gamma\), with |
None
|
span
|
float | None
|
Specify decay in terms of span, \(\theta\), with |
None
|
half_life
|
float | None
|
Specify decay in terms of half-life, \(\tau\), with |
None
|
alpha
|
float | None
|
Specify smoothing factor alpha directly, \(0 < \alpha \leq 1\). |
None
|
adjust
|
bool
|
Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
|
True
|
min_samples
|
int
|
Minimum number of observations in window required to have a value, (otherwise result is null). |
1
|
ignore_nulls
|
bool
|
Ignore missing values when calculating weights.
|
False
|
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
We define a library agnostic function:
>>> def agnostic_ewm_mean(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a").ewm_mean(com=1, ignore_nulls=False)
... ).to_native()
We can then pass either pandas or Polars to agnostic_ewm_mean:
>>> agnostic_ewm_mean(df_pd)
a
0 1.000000
1 1.666667
2 2.428571
>>> agnostic_ewm_mean(df_pl)
shape: (3, 1)
ββββββββββββ
β a β
β --- β
β f64 β
ββββββββββββ‘
β 1.0 β
β 1.666667 β
β 2.428571 β
ββββββββββββ
exp
exp() -> Self
Compute the exponent.
Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_native = pa.table({"values": [-1, 0, 1]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(exp=nw.col("values").exp())
>>> result
ββββββββββββββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|------------------------------------------------|
|pyarrow.Table |
|values: int64 |
|exp: double |
|---- |
|values: [[-1,0,1]] |
|exp: [[0.36787944117144233,1,2.718281828459045]]|
ββββββββββββββββββββββββββββββββββββββββββββββββββ
fill_nan
fill_nan(value: float | None) -> Self
Fill floating point NaN values with given value.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
value
|
float | None
|
Value used to fill NaN values. |
required |
Notes
This function only fills 'NaN' values, not null ones, except for pandas
which doesn't distinguish between them.
See null_handling for reference.
Examples:
>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql(
... "SELECT * FROM VALUES (5.::DOUBLE, 50.::DOUBLE), ('NaN', null) df(a, b)"
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.col("a", "b").fill_nan(0).name.suffix("_nans_filled"))
βββββββββββββββββββββββββββββββββββββββββββββββββββββ
| Narwhals LazyFrame |
|---------------------------------------------------|
|ββββββββββ¬βββββββββ¬ββββββββββββββββ¬ββββββββββββββββ|
|β a β b β a_nans_filled β b_nans_filled β|
|β double β double β double β double β|
|ββββββββββΌβββββββββΌββββββββββββββββΌββββββββββββββββ€|
|β 5.0 β 50.0 β 5.0 β 50.0 β|
|β nan β NULL β 0.0 β NULL β|
|ββββββββββ΄βββββββββ΄ββββββββββββββββ΄ββββββββββββββββ|
βββββββββββββββββββββββββββββββββββββββββββββββββββββ
fill_null
fill_null(
value: Expr | NonNestedLiteral = None,
strategy: FillNullStrategy | None = None,
limit: int | None = None,
) -> Self
Fill null values with given value.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
value
|
Expr | NonNestedLiteral
|
Value or expression used to fill null values. |
None
|
strategy
|
FillNullStrategy | None
|
Strategy used to fill null values. |
None
|
limit
|
int | None
|
Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. |
None
|
Notes
- pandas handles null values differently from other libraries. See null_handling for reference.
- For pandas Series of
objectdtype,fill_nullwill not automatically change the Series' dtype as pandas used to do. Explicitly callcastif you want the dtype to change.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame(
... {
... "a": [2, None, None, 3],
... "b": [2.0, float("nan"), float("nan"), 3.0],
... "c": [1, 2, 3, 4],
... }
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a", "b").fill_null(0).name.suffix("_filled"),
... nw.col("a").fill_null(nw.col("c")).name.suffix("_filled_with_c"),
... )
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|------------------------------------------------------------|
|shape: (4, 6) |
|ββββββββ¬ββββββ¬ββββββ¬βββββββββββ¬βββββββββββ¬ββββββββββββββββββ|
|β a β b β c β a_filled β b_filled β a_filled_with_c β|
|β --- β --- β --- β --- β --- β --- β|
|β i64 β f64 β i64 β i64 β f64 β i64 β|
|ββββββββͺββββββͺββββββͺβββββββββββͺβββββββββββͺββββββββββββββββββ‘|
|β 2 β 2.0 β 1 β 2 β 2.0 β 2 β|
|β null β NaN β 2 β 0 β NaN β 2 β|
|β null β NaN β 3 β 0 β NaN β 3 β|
|β 3 β 3.0 β 4 β 3 β 3.0 β 3 β|
|ββββββββ΄ββββββ΄ββββββ΄βββββββββββ΄βββββββββββ΄ββββββββββββββββββ|
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
Using a strategy:
>>> df.select(
... nw.col("a", "b"),
... nw.col("a", "b")
... .fill_null(strategy="forward", limit=1)
... .name.suffix("_nulls_forward_filled"),
... )
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|----------------------------------------------------------------|
|shape: (4, 4) |
|ββββββββ¬ββββββ¬βββββββββββββββββββββββββ¬βββββββββββββββββββββββββ|
|β a β b β a_nulls_forward_filled β b_nulls_forward_filled β|
|β --- β --- β --- β --- β|
|β i64 β f64 β i64 β f64 β|
|ββββββββͺββββββͺβββββββββββββββββββββββββͺβββββββββββββββββββββββββ‘|
|β 2 β 2.0 β 2 β 2.0 β|
|β null β NaN β 2 β NaN β|
|β null β NaN β null β NaN β|
|β 3 β 3.0 β 3 β 3.0 β|
|ββββββββ΄ββββββ΄βββββββββββββββββββββββββ΄βββββββββββββββββββββββββ|
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
filter
filter(*predicates: Any) -> Self
Filters elements based on a condition, returning a new expression.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
predicates
|
Any
|
Conditions to filter by (which get AND-ed together). |
()
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame(
... {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]}
... )
>>> df = nw.from_native(df_native)
>>> df.select(
... nw.col("a").filter(nw.col("a") > 4),
... nw.col("b").filter(nw.col("b") < 13),
... )
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 3 5 10 |
| 4 6 11 |
| 5 7 12 |
ββββββββββββββββββββ
first
first(order_by: str | Iterable[str] | None = None) -> Self
Get the first value.
Notes
For lazy backends, this can only be used with over or with order_by.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> data = {"a": [1, 1, 2, 2], "b": ["foo", None, None, "baz"]}
>>> df_native = pd.DataFrame(data)
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().first())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 1 foo |
ββββββββββββββββββββ
>>> df.group_by("a").agg(nw.col("b").first())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 1 foo |
| 1 2 NaN |
ββββββββββββββββββββ
floor
floor() -> Self
Compute the numerical floor.
Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_native = pa.table({"values": [1.1, 4.3, -1.3]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(floor=nw.col("values").floor())
>>> result
ββββββββββββββββββββββββββ
| Narwhals DataFrame |
|------------------------|
|pyarrow.Table |
|values: double |
|floor: double |
|---- |
|values: [[1.1,4.3,-1.3]]|
|floor: [[1,4,-2]] |
ββββββββββββββββββββββββββ
is_between
is_between(
lower_bound: Any | IntoExpr,
upper_bound: Any | IntoExpr,
closed: ClosedInterval = "both",
) -> Self
Check if this expression is between the given lower and upper bounds.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
lower_bound
|
Any | IntoExpr
|
Lower bound value. String literals are interpreted as column names. |
required |
upper_bound
|
Any | IntoExpr
|
Upper bound value. String literals are interpreted as column names. |
required |
closed
|
ClosedInterval
|
Define which sides of the interval are closed (inclusive). Options are {"left", "right", "none", "both"}. |
'both'
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(b=nw.col("a").is_between(2, 4, "right"))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 1 False |
| 1 2 False |
| 2 3 True |
| 3 4 True |
| 4 5 False |
ββββββββββββββββββββ
is_close
is_close(
other: Expr | Series[Any] | NumericLiteral,
*,
abs_tol: float = 0.0,
rel_tol: float = 1e-09,
nans_equal: bool = False
) -> Self
Check if this expression is close, i.e. almost equal, to the other expression.
Two values a and b are considered close if the following condition holds:
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other
|
Expr | Series[Any] | NumericLiteral
|
Values to compare with. |
required |
abs_tol
|
float
|
Absolute tolerance. This is the maximum allowed absolute difference between two values. Must be non-negative. |
0.0
|
rel_tol
|
float
|
Relative tolerance. This is the maximum allowed difference between two values, relative to the larger absolute value. Must be in the range [0, 1). |
1e-09
|
nans_equal
|
bool
|
Whether NaN values should be considered equal. |
False
|
Notes
The implementation of this method is symmetric and mirrors the behavior of
math.isclose. Specifically note that this behavior is different to
numpy.isclose.
Examples:
>>> import duckdb
>>> import pyarrow as pa
>>> import narwhals as nw
>>>
>>> data = {
... "x": [1.0, float("inf"), 1.41, None, float("nan")],
... "y": [1.2, float("inf"), 1.40, None, float("nan")],
... }
>>> _table = pa.table(data)
>>> df_native = duckdb.table("_table")
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... is_close=nw.col("x").is_close(
... nw.col("y"), abs_tol=0.1, nans_equal=True
... )
... )
ββββββββββββββββββββββββββββββββ
| Narwhals LazyFrame |
|------------------------------|
|ββββββββββ¬βββββββββ¬βββββββββββ|
|β x β y β is_close β|
|β double β double β boolean β|
|ββββββββββΌβββββββββΌβββββββββββ€|
|β 1.0 β 1.2 β false β|
|β inf β inf β true β|
|β 1.41 β 1.4 β true β|
|β NULL β NULL β NULL β|
|β nan β nan β true β|
|ββββββββββ΄βββββββββ΄βββββββββββ|
ββββββββββββββββββββββββββββββββ
is_duplicated
is_duplicated() -> Self
Return a boolean mask indicating duplicated values.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.all().is_duplicated().name.suffix("_is_duplicated"))
βββββββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|-----------------------------------------|
| a b a_is_duplicated b_is_duplicated|
|0 1 a True True|
|1 2 a False True|
|2 3 b False False|
|3 1 c True False|
βββββββββββββββββββββββββββββββββββββββββββ
is_finite
is_finite() -> Self
Returns boolean values indicating which original values are finite.
Warning
pandas handles null values differently from Polars and PyArrow.
See null_handling for reference.
is_finite will return False for NaN and Null's in the Dask and
pandas non-nullable backend, while for Polars, PyArrow and pandas
nullable backends null values are kept as such.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [float("nan"), float("inf"), 2.0, None]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_is_finite=nw.col("a").is_finite())
ββββββββββββββββββββββββ
| Narwhals DataFrame |
|----------------------|
|shape: (4, 2) |
|ββββββββ¬ββββββββββββββ|
|β a β a_is_finite β|
|β --- β --- β|
|β f64 β bool β|
|ββββββββͺββββββββββββββ‘|
|β NaN β false β|
|β inf β false β|
|β 2.0 β true β|
|β null β null β|
|ββββββββ΄ββββββββββββββ|
ββββββββββββββββββββββββ
is_first_distinct
is_first_distinct() -> Self
Return a boolean mask indicating the first occurrence of each distinct value.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.all().is_first_distinct().name.suffix("_is_first_distinct")
... )
βββββββββββββββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|-------------------------------------------------|
| a b a_is_first_distinct b_is_first_distinct|
|0 1 a True True|
|1 2 a True False|
|2 3 b True True|
|3 1 c False True|
βββββββββββββββββββββββββββββββββββββββββββββββββββ
is_in
is_in(other: Any) -> Self
Check if elements of this expression are present in the other iterable.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other
|
Any
|
iterable |
required |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 9, 10]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(b=nw.col("a").is_in([1, 2]))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 1 True |
| 1 2 True |
| 2 9 False |
| 3 10 False |
ββββββββββββββββββββ
is_last_distinct
is_last_distinct() -> Self
Return a boolean mask indicating the last occurrence of each distinct value.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.all().is_last_distinct().name.suffix("_is_last_distinct")
... )
βββββββββββββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|-----------------------------------------------|
| a b a_is_last_distinct b_is_last_distinct|
|0 1 a False False|
|1 2 a True True|
|2 3 b True True|
|3 1 c True True|
βββββββββββββββββββββββββββββββββββββββββββββββββ
is_nan
is_nan() -> Self
Indicate which values are NaN.
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql(
... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)"
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_is_nan=nw.col("a").is_nan(), b_is_nan=nw.col("b").is_nan()
... )
ββββββββββββββββββββββββββββββββββββββββββ
| Narwhals LazyFrame |
|----------------------------------------|
|βββββββββ¬βββββββββ¬βββββββββββ¬βββββββββββ|
|β a β b β a_is_nan β b_is_nan β|
|β int32 β double β boolean β boolean β|
|βββββββββΌβββββββββΌβββββββββββΌβββββββββββ€|
|β NULL β nan β NULL β true β|
|β 2 β 2.0 β false β false β|
|βββββββββ΄βββββββββ΄βββββββββββ΄βββββββββββ|
ββββββββββββββββββββββββββββββββββββββββββ
is_null
is_null() -> Self
Returns a boolean Series indicating which values are null.
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql(
... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)"
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
... )
ββββββββββββββββββββββββββββββββββββββββββββ
| Narwhals LazyFrame |
|------------------------------------------|
|βββββββββ¬βββββββββ¬ββββββββββββ¬ββββββββββββ|
|β a β b β a_is_null β b_is_null β|
|β int32 β double β boolean β boolean β|
|βββββββββΌβββββββββΌββββββββββββΌββββββββββββ€|
|β NULL β nan β true β false β|
|β 2 β 2.0 β false β false β|
|βββββββββ΄βββββββββ΄ββββββββββββ΄ββββββββββββ|
ββββββββββββββββββββββββββββββββββββββββββββ
is_unique
is_unique() -> Self
Return a boolean mask indicating unique values.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.all().is_unique().name.suffix("_is_unique"))
βββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|---------------------------------|
| a b a_is_unique b_is_unique|
|0 1 a False False|
|1 2 a True False|
|2 3 b True True|
|3 1 c False True|
βββββββββββββββββββββββββββββββββββ
kurtosis
kurtosis() -> Self
Compute the kurtosis (Fisher's definition) without bias correction.
Kurtosis is the fourth central moment divided by the square of the variance. The Fisher's definition is used where 3.0 is subtracted from the result to give 0.0 for a normal distribution.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").kurtosis())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 -1.3 0.210657 |
ββββββββββββββββββββ
last
last(order_by: str | Iterable[str] | None = None) -> Self
Get the last value.
Notes
For lazy backends, this can only be used with over or with order_by.
Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>> data = {"a": [1, 1, 2, 2], "b": ["foo", None, None, "baz"]}
>>> df_native = pa.table(data)
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().last())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| pyarrow.Table |
| a: int64 |
| b: string |
| ---- |
| a: [[2]] |
| b: [["baz"]] |
ββββββββββββββββββββ
>>> df.group_by("a").agg(nw.col("b").last())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
|pyarrow.Table |
|a: int64 |
|b: string |
|---- |
|a: [[1,2]] |
|b: [[null,"baz"]] |
ββββββββββββββββββββ
len
len() -> Self
Return the number of elements in the column.
Null values count towards the total.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": ["x", "y", "z"], "b": [1, 2, 1]})
>>> df = nw.from_native(df_native)
>>> df.select(
... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
... )
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a1 a2 |
| 0 2 1 |
ββββββββββββββββββββ
log
log(base: float = math.e) -> Self
Compute the logarithm to a given base.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base
|
float
|
Given base, defaults to |
e
|
Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_native = pa.table({"values": [1, 2, 4]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(
... log=nw.col("values").log(), log_2=nw.col("values").log(base=2)
... )
>>> result
ββββββββββββββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|------------------------------------------------|
|pyarrow.Table |
|values: int64 |
|log: double |
|log_2: double |
|---- |
|values: [[1,2,4]] |
|log: [[0,0.6931471805599453,1.3862943611198906]]|
|log_2: [[0,1,2]] |
ββββββββββββββββββββββββββββββββββββββββββββββββββ
map_batches
map_batches(
function: Callable[[Any], CompliantExpr[Any, Any]],
return_dtype: DType | None = None,
*,
returns_scalar: bool = False
) -> Self
Apply a custom python function to a whole Series or sequence of Series.
The output of this custom function is presumed to be either a Series, or a NumPy array (in which case it will be automatically converted into a Series).
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
function
|
Callable[[Any], CompliantExpr[Any, Any]]
|
Function to apply to Series. |
required |
return_dtype
|
DType | None
|
Dtype of the output Series. If not set, the dtype will be inferred based on the first non-null value that is returned by the function. |
None
|
returns_scalar
|
bool
|
If the function returns a scalar, by default it will be wrapped in a list in the output, since the assumption is that the function always returns something Series-like. If you want to keep the result as a scalar, set this argument to True. |
False
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a", "b")
... .map_batches(lambda s: s.to_numpy() + 1, return_dtype=nw.Float64)
... .name.suffix("_mapped")
... )
βββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|---------------------------|
| a b a_mapped b_mapped|
|0 1 4 2.0 5.0|
|1 2 5 3.0 6.0|
|2 3 6 4.0 7.0|
βββββββββββββββββββββββββββββ
max
max() -> Self
Returns the maximum value(s) from a column(s).
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.max("a", "b"))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 20 100 |
ββββββββββββββββββββ
mean
mean() -> Self
Get mean value.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").mean())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 0.0 4.0 |
ββββββββββββββββββββ
median
median() -> Self
Get median value.
Notes
Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").median())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 3.0 4.0 |
ββββββββββββββββββββ
min
min() -> Self
Returns the minimum value(s) from a column(s).
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.min("a", "b"))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 1 3 |
ββββββββββββββββββββ
mode
mode(*, keep: ModeKeepStrategy = 'all') -> Self
Compute the most occurring value(s).
Can return multiple values.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
keep
|
ModeKeepStrategy
|
Whether to keep all modes or any mode found. Remark that |
'all'
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 1, 2, 3], "b": [1, 1, 2, 2]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a").mode()).sort("a")
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a |
| 0 1 |
ββββββββββββββββββββ
n_unique
n_unique() -> Self
Returns count of unique values.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").n_unique())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 5 3 |
ββββββββββββββββββββ
null_count
null_count() -> Self
Count null values.
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame(
... {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
... )
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().null_count())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 1 2 |
ββββββββββββββββββββ
over
over(
*partition_by: str | Sequence[str],
order_by: str | Sequence[str] | None = None
) -> Self
Compute expressions over the given groups (optionally with given order).
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
partition_by
|
str | Sequence[str]
|
Names of columns to compute window expression over.
Must be names of columns, as opposed to expressions -
so, this is a bit less flexible than Polars' |
()
|
order_by
|
str | Sequence[str] | None
|
Column(s) to order window functions by.
For lazy backends, this argument is required when |
None
|
Note
At least one of partition_by or order_by must be provided.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 4], "b": ["x", "x", "y"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_min_per_group=nw.col("a").min().over("b"))
ββββββββββββββββββββββββββ
| Narwhals DataFrame |
|------------------------|
| a b a_min_per_group|
|0 1 x 1|
|1 2 x 1|
|2 4 y 4|
ββββββββββββββββββββββββββ
When partition_by is omitted, the expression is computed over the
entire frame. This is useful with order_by for order-dependent
operations without grouping:
>>> import duckdb
>>> import pyarrow as pa
>>> import narwhals as nw
>>>
>>> data = {"a": [3, 1, 2], "b": ["x", "y", "z"]}
>>> _table = pa.table(data)
>>> df = nw.from_native(duckdb.table("_table"))
>>> expr = nw.col("a").cum_sum().over(order_by="a")
>>> df.with_columns(a_cum_sum=expr).sort("a")
βββββββββββββββββββββββββββββββββ
| Narwhals LazyFrame |
|-------------------------------|
|βββββββββ¬ββββββββββ¬ββββββββββββ|
|β a β b β a_cum_sum β|
|β int64 β varchar β int128 β|
|βββββββββΌββββββββββΌββββββββββββ€|
|β 1 β y β 1 β|
|β 2 β z β 3 β|
|β 3 β x β 6 β|
|βββββββββ΄ββββββββββ΄ββββββββββββ|
βββββββββββββββββββββββββββββββββ
pipe
pipe(
function: Callable[Concatenate[Self, PS], R],
*args: args,
**kwargs: kwargs
) -> R
Pipe function call.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
function
|
Callable[Concatenate[Self, PS], R]
|
Function to apply. |
required |
args
|
args
|
Positional arguments to pass to function. |
()
|
kwargs
|
kwargs
|
Keyword arguments to pass to function. |
{}
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_piped=nw.col("a").pipe(lambda x: x + 1))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a a_piped |
| 0 1 2 |
| 1 2 3 |
| 2 3 4 |
| 3 4 5 |
ββββββββββββββββββββ
quantile
quantile(
quantile: float,
interpolation: RollingInterpolationMethod,
) -> Self
Get quantile value.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
quantile
|
float
|
Quantile between 0.0 and 1.0. |
required |
interpolation
|
RollingInterpolationMethod
|
Interpolation method. |
required |
Note
- pandas and Polars may have implementation differences for a given interpolation method.
- dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the native 'dask' - method.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame(
... {"a": list(range(50)), "b": list(range(50, 100))}
... )
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 24.5 74.5 |
ββββββββββββββββββββ
rank
rank(
method: RankMethod = "average",
*,
descending: bool = False
) -> Self
Assign ranks to data, dealing with ties appropriately.
Notes
The resulting dtype may differ between backends.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
method
|
RankMethod
|
The method used to assign ranks to tied elements. The following methods are available (default is 'average')
|
'average'
|
descending
|
bool
|
Rank in descending order. |
False
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [3, 6, 1, 1, 6]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(rank=nw.col("a").rank(method="dense"))
>>> result
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a rank |
| 0 3 2.0 |
| 1 6 3.0 |
| 2 1 1.0 |
| 3 1 1.0 |
| 4 6 3.0 |
ββββββββββββββββββββ
replace_strict
replace_strict(
old: Sequence[Any] | Mapping[Any, Any],
new: Sequence[Any] | None = None,
*,
default: Any | NoDefault = NO_DEFAULT,
return_dtype: IntoDType | None = None
) -> Self
Replace all values by different values.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
old
|
Sequence[Any] | Mapping[Any, Any]
|
Sequence of values to replace. It also accepts a mapping of values to
their replacement as syntactic sugar for
|
required |
new
|
Sequence[Any] | None
|
Sequence of values to replace by. Length must match the length of |
None
|
default
|
Any | NoDefault
|
Set values that were not replaced to this value. If no default is specified, (default), an error is raised if any values were not replaced. Accepts expression input. Non-expression inputs are parsed as literals. |
NO_DEFAULT
|
return_dtype
|
IntoDType | None
|
The data type of the resulting expression. If set to |
None
|
Raises:
| Type | Description |
|---|---|
InvalidOperationError
|
If any non-null values in the original column were not replaced, and no default was specified. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [3, 0, 1, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... b=nw.col("a").replace_strict(
... [0, 1, 2, 3],
... ["zero", "one", "two", "three"],
... return_dtype=nw.String,
... )
... )
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 3 three |
| 1 0 zero |
| 2 1 one |
| 3 2 two |
ββββββββββββββββββββ
Replace values and set a default for values not in the mapping:
>>> data = {"a": [1, 2, 3, 4], "b": ["beluga", "narwhal", "orca", "vaquita"]}
>>> df = nw.from_native(pd.DataFrame(data))
>>> df.with_columns(
... a_replaced=nw.col("a").replace_strict(
... {1: "one", 2: "two"},
... default=nw.concat_str(nw.lit("default_"), nw.col("b")),
... return_dtype=nw.String,
... )
... )
ββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|------------------------------|
| a b a_replaced|
|0 1 beluga one|
|1 2 narwhal two|
|2 3 orca default_orca|
|3 4 vaquita default_vaquita|
ββββββββββββββββββββββββββββββββ
rolling_mean
rolling_mean(
window_size: int,
*,
min_samples: int | None = None,
center: bool = False
) -> Self
Apply a rolling mean (moving mean) over the values.
A window of length window_size will traverse the values. The resulting values
will be aggregated to their mean.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_samples
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_rolling_mean=nw.col("a").rolling_mean(window_size=3, min_samples=1)
... )
ββββββββββββββββββββββββ
| Narwhals DataFrame |
|----------------------|
| a a_rolling_mean|
|0 1.0 1.0|
|1 2.0 1.5|
|2 NaN 1.5|
|3 4.0 3.0|
ββββββββββββββββββββββββ
rolling_std
rolling_std(
window_size: int,
*,
min_samples: int | None = None,
center: bool = False,
ddof: int = 1
) -> Self
Apply a rolling standard deviation (moving standard deviation) over the values.
A window of length window_size will traverse the values. The resulting values
will be aggregated to their standard deviation.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_samples
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
ddof
|
int
|
Delta Degrees of Freedom; the divisor for a length N window is N - ddof. |
1
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_rolling_std=nw.col("a").rolling_std(window_size=3, min_samples=1)
... )
βββββββββββββββββββββββ
| Narwhals DataFrame |
|---------------------|
| a a_rolling_std|
|0 1.0 NaN|
|1 2.0 0.707107|
|2 NaN 0.707107|
|3 4.0 1.414214|
βββββββββββββββββββββββ
rolling_sum
rolling_sum(
window_size: int,
*,
min_samples: int | None = None,
center: bool = False
) -> Self
Apply a rolling sum (moving sum) over the values.
A window of length window_size will traverse the values. The resulting values
will be aggregated to their sum.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_samples
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_rolling_sum=nw.col("a").rolling_sum(window_size=3, min_samples=1)
... )
βββββββββββββββββββββββ
| Narwhals DataFrame |
|---------------------|
| a a_rolling_sum|
|0 1.0 1.0|
|1 2.0 3.0|
|2 NaN 3.0|
|3 4.0 6.0|
βββββββββββββββββββββββ
rolling_var
rolling_var(
window_size: int,
*,
min_samples: int | None = None,
center: bool = False,
ddof: int = 1
) -> Self
Apply a rolling variance (moving variance) over the values.
A window of length window_size will traverse the values. The resulting values
will be aggregated to their variance.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_samples
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
ddof
|
int
|
Delta Degrees of Freedom; the divisor for a length N window is N - ddof. |
1
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_rolling_var=nw.col("a").rolling_var(window_size=3, min_samples=1)
... )
βββββββββββββββββββββββ
| Narwhals DataFrame |
|---------------------|
| a a_rolling_var|
|0 1.0 NaN|
|1 2.0 0.5|
|2 NaN 0.5|
|3 4.0 2.0|
βββββββββββββββββββββββ
round
round(decimals: int = 0) -> Self
Round underlying floating point data by decimals digits.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
decimals
|
int
|
Number of decimals to round by. |
0
|
Notes
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and 4.5 to 4.0, etc..).
Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.12345, 2.56789, 3.901234]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_rounded=nw.col("a").round(1))
ββββββββββββββββββββββββ
| Narwhals DataFrame |
|----------------------|
| a a_rounded|
|0 1.123450 1.1|
|1 2.567890 2.6|
|2 3.901234 3.9|
ββββββββββββββββββββββββ
shift
shift(n: int) -> Self
Shift values by n positions.
Info
For lazy backends, this operation must be followed by Expr.over with
order_by specified, see order-dependence.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
n
|
int
|
Number of positions to shift values by. |
required |
Notes
pandas may change the dtype here, for example when introducing missing
values in an integer column. To ensure, that the dtype doesn't change,
you may want to use fill_null and cast. For example, to shift
and fill missing values with 0 in a Int64 column, you could
do:
nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_shift=nw.col("a").shift(n=1))
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
|shape: (5, 2) |
|βββββββ¬ββββββββββ |
|β a β a_shift β |
|β --- β --- β |
|β i64 β i64 β |
|βββββββͺββββββββββ‘ |
|β 1 β null β |
|β 1 β 1 β |
|β 3 β 1 β |
|β 5 β 3 β |
|β 5 β 5 β |
|βββββββ΄ββββββββββ |
ββββββββββββββββββββ
sin
sin() -> Self
Compute the element-wise value for the sine.
Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>> from math import pi
>>> df_native = pa.table({"values": [0, pi / 2, 3 * pi / 2]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(sin=nw.col("values").sin())
>>> result
βββββββββββββββββββββββββββββββββββββββββββββββββββ
| Narwhals DataFrame |
|-------------------------------------------------|
|pyarrow.Table |
|values: double |
|sin: double |
|---- |
|values: [[0,1.5707963267948966,4.71238898038469]]|
|sin: [[0,1,-1]] |
βββββββββββββββββββββββββββββββββββββββββββββββββββ
skew
skew() -> Self
Calculate the sample skewness of a column.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").skew())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 0.0 1.472427 |
ββββββββββββββββββββ
sqrt
sqrt() -> Self
Compute the square root of the elements.
Examples:
>>> import pyarrow as pa
>>> import narwhals as nw
>>> df_native = pa.table({"values": [1, 4, 9]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(sqrt=nw.col("values").sqrt())
>>> result
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
|pyarrow.Table |
|values: int64 |
|sqrt: double |
|---- |
|values: [[1,4,9]] |
|sqrt: [[1,2,3]] |
ββββββββββββββββββββ
std
std(*, ddof: int = 1) -> Self
Get standard deviation.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
ddof
|
int
|
"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. |
1
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").std(ddof=0))
βββββββββββββββββββββββ
| Narwhals DataFrame |
|---------------------|
| a b|
|0 17.79513 1.265789|
βββββββββββββββββββββββ
sum
sum() -> Self
Return the sum value.
If there are no non-null elements, the result is zero.
Examples:
>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql("SELECT * FROM VALUES (5, 50), (10, 100) df(a, b)")
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").sum())
βββββββββββββββββββββ
|Narwhals LazyFrame |
|-------------------|
|ββββββββββ¬βββββββββ|
|β a β b β|
|β int128 β int128 β|
|ββββββββββΌβββββββββ€|
|β 15 β 150 β|
|ββββββββββ΄βββββββββ|
βββββββββββββββββββββ
unique
unique() -> Self
Return unique values of this expression.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").unique().sum())
ββββββββββββββββββββ
|Narwhals DataFrame|
|------------------|
| a b |
| 0 9 12 |
ββββββββββββββββββββ
var
var(*, ddof: int = 1) -> Self
Get variance.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
ddof
|
int
|
"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. |
1
|
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").var(ddof=0))
βββββββββββββββββββββββββ
| Narwhals DataFrame |
|-----------------------|
| a b|
|0 316.666667 1.602222|
βββββββββββββββββββββββββ