narwhals.Expr
abs() -> Self
Return absolute value of each element.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, -2], "b": [-3, 4]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.col("a", "b").abs().name.suffix("_abs"))
┌─────────────────────┐
| Narwhals DataFrame |
|---------------------|
| a b a_abs b_abs|
|0 1 -3 1 3|
|1 -2 4 2 4|
└─────────────────────┘
alias(name: str) -> Self
Rename the expression.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name
|
str
|
The new name. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df = nw.from_native(df_native)
>>> df.select((nw.col("b") + 10).alias("c"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| c |
| 0 14 |
| 1 15 |
└──────────────────┘
all() -> Self
Return whether all values in the column are True
.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").all())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 False True |
└──────────────────┘
any() -> Self
Return whether any of the values in the column are True
.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").any())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 True True |
└──────────────────┘
arg_max() -> Self
Returns the index of the maximum value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [10, 20], "b": [150, 100]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").arg_max().name.suffix("_arg_max"))
┌───────────────────────┐
| Narwhals DataFrame |
|-----------------------|
| a_arg_max b_arg_max|
|0 1 0|
└───────────────────────┘
arg_min() -> Self
Returns the index of the minimum value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [10, 20], "b": [150, 100]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").arg_min().name.suffix("_arg_min"))
┌───────────────────────┐
| Narwhals DataFrame |
|-----------------------|
| a_arg_min b_arg_min|
|0 0 1|
└───────────────────────┘
arg_true() -> Self
Find elements where boolean expression is True.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
cast(dtype: DType | type[DType]) -> Self
Redefine an object's data type.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dtype
|
DType | type[DType]
|
Data type that the object will be cast into. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| foo bar |
| 0 1.0 6 |
| 1 2.0 7 |
| 2 3.0 8 |
└──────────────────┘
count() -> Self
Returns the number of non-null elements in the column.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().count())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 3 2 |
└──────────────────┘
cum_count(*, reverse: bool = False) -> Self
Return the cumulative count of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": ["x", "k", None, "d"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a").cum_count().alias("a_cum_count"),
... nw.col("a").cum_count(reverse=True).alias("a_cum_count_reverse"),
... )
┌─────────────────────────────────────────┐
| Narwhals DataFrame |
|-----------------------------------------|
| a a_cum_count a_cum_count_reverse|
|0 x 1 3|
|1 k 2 2|
|2 None 2 1|
|3 d 3 1|
└─────────────────────────────────────────┘
cum_max(*, reverse: bool = False) -> Self
Return the cumulative max of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 3, None, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a").cum_max().alias("a_cum_max"),
... nw.col("a").cum_max(reverse=True).alias("a_cum_max_reverse"),
... )
┌────────────────────────────────────┐
| Narwhals DataFrame |
|------------------------------------|
| a a_cum_max a_cum_max_reverse|
|0 1.0 1.0 3.0|
|1 3.0 3.0 3.0|
|2 NaN NaN NaN|
|3 2.0 3.0 2.0|
└────────────────────────────────────┘
cum_min(*, reverse: bool = False) -> Self
Return the cumulative min of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [3, 1, None, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a").cum_min().alias("a_cum_min"),
... nw.col("a").cum_min(reverse=True).alias("a_cum_min_reverse"),
... )
┌────────────────────────────────────┐
| Narwhals DataFrame |
|------------------------------------|
| a a_cum_min a_cum_min_reverse|
|0 3.0 3.0 1.0|
|1 1.0 1.0 1.0|
|2 NaN NaN NaN|
|3 2.0 1.0 2.0|
└────────────────────────────────────┘
cum_prod(*, reverse: bool = False) -> Self
Return the cumulative product of the non-null values in the column.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 3, None, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a").cum_prod().alias("a_cum_prod"),
... nw.col("a").cum_prod(reverse=True).alias("a_cum_prod_reverse"),
... )
┌──────────────────────────────────────┐
| Narwhals DataFrame |
|--------------------------------------|
| a a_cum_prod a_cum_prod_reverse|
|0 1.0 1.0 6.0|
|1 3.0 3.0 6.0|
|2 NaN NaN NaN|
|3 2.0 6.0 2.0|
└──────────────────────────────────────┘
cum_sum(*, reverse: bool = False) -> Self
Return cumulative sum.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
reverse
|
bool
|
reverse the operation |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_cum_sum=nw.col("a").cum_sum())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b a_cum_sum|
|0 1 2 1|
|1 1 4 2|
|2 3 4 5|
|3 5 6 10|
|4 5 6 15|
└──────────────────┘
diff() -> Self
Returns the difference between each element and the previous one.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas may change the dtype here, for example when introducing missing
values in an integer column. To ensure, that the dtype doesn't change,
you may want to use fill_null
and cast
. For example, to calculate
the diff and fill missing values with 0
in a Int64 column, you could
do:
nw.col("a").diff().fill_null(0).cast(nw.Int64)
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_diff=nw.col("a").diff())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| shape: (5, 2) |
| ┌─────┬────────┐ |
| │ a ┆ a_diff │ |
| │ --- ┆ --- │ |
| │ i64 ┆ i64 │ |
| ╞═════╪════════╡ |
| │ 1 ┆ null │ |
| │ 1 ┆ 0 │ |
| │ 3 ┆ 2 │ |
| │ 5 ┆ 2 │ |
| │ 5 ┆ 0 │ |
| └─────┴────────┘ |
└──────────────────┘
drop_nulls() -> Self
Drop null values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a").drop_nulls())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| shape: (5, 1) |
| ┌─────┐ |
| │ a │ |
| │ --- │ |
| │ f64 │ |
| ╞═════╡ |
| │ 2.0 │ |
| │ 4.0 │ |
| │ NaN │ |
| │ 3.0 │ |
| │ 5.0 │ |
| └─────┘ |
└──────────────────┘
ewm_mean(*, com: float | None = None, span: float | None = None, half_life: float | None = None, alpha: float | None = None, adjust: bool = True, min_samples: int = 1, ignore_nulls: bool = False) -> Self
Compute exponentially-weighted moving average.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
com
|
float | None
|
Specify decay in terms of center of mass, \(\gamma\), with |
None
|
span
|
float | None
|
Specify decay in terms of span, \(\theta\), with |
None
|
half_life
|
float | None
|
Specify decay in terms of half-life, \(\tau\), with |
None
|
alpha
|
float | None
|
Specify smoothing factor alpha directly, \(0 < \alpha \leq 1\). |
None
|
adjust
|
bool
|
Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings
|
True
|
min_samples
|
int
|
Minimum number of observations in window required to have a value, (otherwise result is null). |
1
|
ignore_nulls
|
bool
|
Ignore missing values when calculating weights.
|
False
|
Returns:
Type | Description |
---|---|
Self
|
Expr |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> from narwhals.typing import IntoFrameT
>>>
>>> data = {"a": [1, 2, 3]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
We define a library agnostic function:
>>> def agnostic_ewm_mean(df_native: IntoFrameT) -> IntoFrameT:
... df = nw.from_native(df_native)
... return df.select(
... nw.col("a").ewm_mean(com=1, ignore_nulls=False)
... ).to_native()
We can then pass either pandas or Polars to agnostic_ewm_mean
:
>>> agnostic_ewm_mean(df_pd)
a
0 1.000000
1 1.666667
2 2.428571
>>> agnostic_ewm_mean(df_pl)
shape: (3, 1)
┌──────────┐
│ a │
│ --- │
│ f64 │
╞══════════╡
│ 1.0 │
│ 1.666667 │
│ 2.428571 │
└──────────┘
fill_null(value: Any | None = None, strategy: Literal['forward', 'backward'] | None = None, limit: int | None = None) -> Self
Fill null values with given value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
value
|
Any | None
|
Value used to fill null values. |
None
|
strategy
|
Literal['forward', 'backward'] | None
|
Strategy used to fill null values. |
None
|
limit
|
int | None
|
Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame(
... {
... "a": [2, None, None, 3],
... "b": [2.0, float("nan"), float("nan"), 3.0],
... }
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a", "b").fill_null(0).name.suffix("_nulls_filled")
... )
┌────────────────────────────────────────────────┐
| Narwhals DataFrame |
|------------------------------------------------|
|shape: (4, 4) |
|┌──────┬─────┬────────────────┬────────────────┐|
|│ a ┆ b ┆ a_nulls_filled ┆ b_nulls_filled │|
|│ --- ┆ --- ┆ --- ┆ --- │|
|│ i64 ┆ f64 ┆ i64 ┆ f64 │|
|╞══════╪═════╪════════════════╪════════════════╡|
|│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │|
|│ null ┆ NaN ┆ 0 ┆ NaN │|
|│ null ┆ NaN ┆ 0 ┆ NaN │|
|│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │|
|└──────┴─────┴────────────────┴────────────────┘|
└────────────────────────────────────────────────┘
Using a strategy:
>>> df.with_columns(
... nw.col("a", "b")
... .fill_null(strategy="forward", limit=1)
... .name.suffix("_nulls_forward_filled")
... )
┌────────────────────────────────────────────────────────────────┐
| Narwhals DataFrame |
|----------------------------------------------------------------|
|shape: (4, 4) |
|┌──────┬─────┬────────────────────────┬────────────────────────┐|
|│ a ┆ b ┆ a_nulls_forward_filled ┆ b_nulls_forward_filled │|
|│ --- ┆ --- ┆ --- ┆ --- │|
|│ i64 ┆ f64 ┆ i64 ┆ f64 │|
|╞══════╪═════╪════════════════════════╪════════════════════════╡|
|│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │|
|│ null ┆ NaN ┆ 2 ┆ NaN │|
|│ null ┆ NaN ┆ null ┆ NaN │|
|│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │|
|└──────┴─────┴────────────────────────┴────────────────────────┘|
└────────────────────────────────────────────────────────────────┘
filter(*predicates: Any) -> Self
Filters elements based on a condition, returning a new expression.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
predicates
|
Any
|
Conditions to filter by (which get ANDed together). |
()
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame(
... {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]}
... )
>>> df = nw.from_native(df_native)
>>> df.select(
... nw.col("a").filter(nw.col("a") > 4),
... nw.col("b").filter(nw.col("b") < 13),
... )
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 3 5 10 |
| 4 6 11 |
| 5 7 12 |
└──────────────────┘
gather_every(n: int, offset: int = 0) -> Self
Take every nth value in the Series and return as new Series.
Warning
Expr.gather_every
is deprecated and will be removed in a future version.
Hint: instead of df.select(nw.col('a').gather_every())
, use
df.select(nw.col('a')).gather_every()
instead.
Note: this will remain available in narwhals.stable.v1
.
See stable api for more information.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Gather every n-th row. |
required |
offset
|
int
|
Starting index. |
0
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
head(n: int = 10) -> Self
Get the first n
rows.
Warning
Expr.head
is deprecated and will be removed in a future version.
Hint: instead of df.select(nw.col('a').head())
, use
df.select(nw.col('a')).head()
instead.
Note: this will remain available in narwhals.stable.v1
.
See stable api for more information.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Number of rows to return. |
10
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
clip(lower_bound: IntoExpr | Any | None = None, upper_bound: IntoExpr | Any | None = None) -> Self
Clip values in the Series.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
lower_bound
|
IntoExpr | Any | None
|
Lower bound value. String literals are treated as column names. |
None
|
upper_bound
|
IntoExpr | Any | None
|
Upper bound value. String literals are treated as column names. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_clipped=nw.col("a").clip(-1, 3))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a a_clipped |
| 0 1 1 |
| 1 2 2 |
| 2 3 3 |
└──────────────────┘
is_between(lower_bound: Any | IntoExpr, upper_bound: Any | IntoExpr, closed: Literal['left', 'right', 'none', 'both'] = 'both') -> Self
Check if this expression is between the given lower and upper bounds.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
lower_bound
|
Any | IntoExpr
|
Lower bound value. String literals are interpreted as column names. |
required |
upper_bound
|
Any | IntoExpr
|
Upper bound value. String literals are interpreted as column names. |
required |
closed
|
Literal['left', 'right', 'none', 'both']
|
Define which sides of the interval are closed (inclusive). |
'both'
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(b=nw.col("a").is_between(2, 4, "right"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 1 False |
| 1 2 False |
| 2 3 True |
| 3 4 True |
| 4 5 False |
└──────────────────┘
is_duplicated() -> Self
Return a boolean mask indicating duplicated values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.all().is_duplicated().name.suffix("_is_duplicated"))
┌─────────────────────────────────────────┐
| Narwhals DataFrame |
|-----------------------------------------|
| a b a_is_duplicated b_is_duplicated|
|0 1 a True True|
|1 2 a False True|
|2 3 b False False|
|3 1 c True False|
└─────────────────────────────────────────┘
is_finite() -> Self
Returns boolean values indicating which original values are finite.
Warning
pandas handles null values differently from Polars and PyArrow.
See null_handling
for reference.
is_finite
will return False for NaN and Null's in the Dask and
pandas non-nullable backend, while for Polars, PyArrow and pandas
nullable backends null values are kept as such.
Returns:
Type | Description |
---|---|
Self
|
Expression of |
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [float("nan"), float("inf"), 2.0, None]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_is_finite=nw.col("a").is_finite())
┌──────────────────────┐
| Narwhals DataFrame |
|----------------------|
|shape: (4, 2) |
|┌──────┬─────────────┐|
|│ a ┆ a_is_finite │|
|│ --- ┆ --- │|
|│ f64 ┆ bool │|
|╞══════╪═════════════╡|
|│ NaN ┆ false │|
|│ inf ┆ false │|
|│ 2.0 ┆ true │|
|│ null ┆ null │|
|└──────┴─────────────┘|
└──────────────────────┘
is_first_distinct() -> Self
Return a boolean mask indicating the first occurrence of each distinct value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.all().is_first_distinct().name.suffix("_is_first_distinct")
... )
┌─────────────────────────────────────────────────┐
| Narwhals DataFrame |
|-------------------------------------------------|
| a b a_is_first_distinct b_is_first_distinct|
|0 1 a True True|
|1 2 a True False|
|2 3 b True True|
|3 1 c False True|
└─────────────────────────────────────────────────┘
is_in(other: Any) -> Self
Check if elements of this expression are present in the other iterable.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other
|
Any
|
iterable |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 9, 10]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(b=nw.col("a").is_in([1, 2]))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 1 True |
| 1 2 True |
| 2 9 False |
| 3 10 False |
└──────────────────┘
is_last_distinct() -> Self
Return a boolean mask indicating the last occurrence of each distinct value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.all().is_last_distinct().name.suffix("_is_last_distinct")
... )
┌───────────────────────────────────────────────┐
| Narwhals DataFrame |
|-----------------------------------------------|
| a b a_is_last_distinct b_is_last_distinct|
|0 1 a False False|
|1 2 a True True|
|2 3 b True True|
|3 1 c True True|
└───────────────────────────────────────────────┘
is_nan() -> Self
Indicate which values are NaN.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql(
... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)"
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_is_nan=nw.col("a").is_nan(), b_is_nan=nw.col("b").is_nan()
... )
┌────────────────────────────────────────┐
| Narwhals LazyFrame |
|----------------------------------------|
|┌───────┬────────┬──────────┬──────────┐|
|│ a │ b │ a_is_nan │ b_is_nan │|
|│ int32 │ double │ boolean │ boolean │|
|├───────┼────────┼──────────┼──────────┤|
|│ NULL │ nan │ NULL │ true │|
|│ 2 │ 2.0 │ false │ false │|
|└───────┴────────┴──────────┴──────────┘|
└────────────────────────────────────────┘
is_null() -> Self
Returns a boolean Series indicating which values are null.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql(
... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)"
... )
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
... )
┌──────────────────────────────────────────┐
| Narwhals LazyFrame |
|------------------------------------------|
|┌───────┬────────┬───────────┬───────────┐|
|│ a │ b │ a_is_null │ b_is_null │|
|│ int32 │ double │ boolean │ boolean │|
|├───────┼────────┼───────────┼───────────┤|
|│ NULL │ nan │ true │ false │|
|│ 2 │ 2.0 │ false │ false │|
|└───────┴────────┴───────────┴───────────┘|
└──────────────────────────────────────────┘
is_unique() -> Self
Return a boolean mask indicating unique values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(nw.all().is_unique().name.suffix("_is_unique"))
┌─────────────────────────────────┐
| Narwhals DataFrame |
|---------------------------------|
| a b a_is_unique b_is_unique|
|0 1 a False False|
|1 2 a True False|
|2 3 b True True|
|3 1 c False True|
└─────────────────────────────────┘
len() -> Self
Return the number of elements in the column.
Null values count towards the total.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": ["x", "y", "z"], "b": [1, 2, 1]})
>>> df = nw.from_native(df_native)
>>> df.select(
... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
... )
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a1 a2 |
| 0 2 1 |
└──────────────────┘
map_batches(function: Callable[[Any], Self], return_dtype: DType | None = None) -> Self
Apply a custom python function to a whole Series or sequence of Series.
The output of this custom function is presumed to be either a Series, or a NumPy array (in which case it will be automatically converted into a Series).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
function
|
Callable[[Any], Self]
|
Function to apply to Series. |
required |
return_dtype
|
DType | None
|
Dtype of the output Series. If not set, the dtype will be inferred based on the first non-null value that is returned by the function. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... nw.col("a", "b")
... .map_batches(lambda s: s.to_numpy() + 1, return_dtype=nw.Float64)
... .name.suffix("_mapped")
... )
┌───────────────────────────┐
| Narwhals DataFrame |
|---------------------------|
| a b a_mapped b_mapped|
|0 1 4 2.0 5.0|
|1 2 5 3.0 6.0|
|2 3 6 4.0 7.0|
└───────────────────────────┘
max() -> Self
Returns the maximum value(s) from a column(s).
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.max("a", "b"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 20 100 |
└──────────────────┘
mean() -> Self
Get mean value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").mean())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 0.0 4.0 |
└──────────────────┘
median() -> Self
Get median value.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
Results might slightly differ across backends due to differences in the underlying algorithms used to compute the median.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").median())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 3.0 4.0 |
└──────────────────┘
min() -> Self
Returns the minimum value(s) from a column(s).
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.min("a", "b"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 1 3 |
└──────────────────┘
mode() -> Self
Compute the most occurring value(s).
Can return multiple values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 1, 2, 3], "b": [1, 1, 2, 2]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a").mode()).sort("a")
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a |
| 0 1 |
└──────────────────┘
null_count() -> Self
Count null values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas handles null values differently from Polars and PyArrow. See null_handling for reference.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame(
... {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
... )
>>> df = nw.from_native(df_native)
>>> df.select(nw.all().null_count())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 1 2 |
└──────────────────┘
n_unique() -> Self
Returns count of unique values.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").n_unique())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 5 3 |
└──────────────────┘
over(*keys: str | Iterable[str]) -> Self
Compute expressions over the given groups.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
keys
|
str | Iterable[str]
|
Names of columns to compute window expression over.
Must be names of columns, as opposed to expressions -
so, this is a bit less flexible than Polars' |
()
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 4], "b": ["x", "x", "y"]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_min_per_group=nw.col("a").min().over("b"))
┌────────────────────────┐
| Narwhals DataFrame |
|------------------------|
| a b a_min_per_group|
|0 1 x 1|
|1 2 x 1|
|2 4 y 4|
└────────────────────────┘
Cumulative operations are also supported, but (currently) only for pandas and Polars:
>>> df.with_columns(a_cum_sum_per_group=nw.col("a").cum_sum().over("b"))
┌────────────────────────────┐
| Narwhals DataFrame |
|----------------------------|
| a b a_cum_sum_per_group|
|0 1 x 1|
|1 2 x 3|
|2 4 y 4|
└────────────────────────────┘
pipe(function: Callable[Concatenate[Self, PS], R], *args: PS.args, **kwargs: PS.kwargs) -> R
Pipe function call.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
function
|
Callable[Concatenate[Self, PS], R]
|
Function to apply. |
required |
args
|
args
|
Positional arguments to pass to function. |
()
|
kwargs
|
kwargs
|
Keyword arguments to pass to function. |
{}
|
Returns:
Type | Description |
---|---|
R
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_piped=nw.col("a").pipe(lambda x: x + 1))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a a_piped |
| 0 1 2 |
| 1 2 3 |
| 2 3 4 |
| 3 4 5 |
└──────────────────┘
quantile(quantile: float, interpolation: Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']) -> Self
Get quantile value.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
quantile
|
float
|
Quantile between 0.0 and 1.0. |
required |
interpolation
|
Literal['nearest', 'higher', 'lower', 'midpoint', 'linear']
|
Interpolation method. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Note
- pandas and Polars may have implementation differences for a given interpolation method.
- dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the native 'dask' - method.
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame(
... {"a": list(range(50)), "b": list(range(50, 100))}
... )
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 24.5 74.5 |
└──────────────────┘
rank(method: Literal['average', 'min', 'max', 'dense', 'ordinal'] = 'average', *, descending: bool = False) -> Self
Assign ranks to data, dealing with ties appropriately.
Notes
The resulting dtype may differ between backends.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
method
|
Literal['average', 'min', 'max', 'dense', 'ordinal']
|
The method used to assign ranks to tied elements. The following methods are available (default is 'average'):
|
'average'
|
descending
|
bool
|
Rank in descending order. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression with rank data. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [3, 6, 1, 1, 6]})
>>> df = nw.from_native(df_native)
>>> result = df.with_columns(rank=nw.col("a").rank(method="dense"))
>>> result
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a rank |
| 0 3 2.0 |
| 1 6 3.0 |
| 2 1 1.0 |
| 3 1 1.0 |
| 4 6 3.0 |
└──────────────────┘
replace_strict(old: Sequence[Any] | Mapping[Any, Any], new: Sequence[Any] | None = None, *, return_dtype: DType | type[DType] | None = None) -> Self
Replace all values by different values.
This function must replace all non-null input values (else it raises an error).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
old
|
Sequence[Any] | Mapping[Any, Any]
|
Sequence of values to replace. It also accepts a mapping of values to
their replacement as syntactic sugar for
|
required |
new
|
Sequence[Any] | None
|
Sequence of values to replace by. Length must match the length of |
None
|
return_dtype
|
DType | type[DType] | None
|
The data type of the resulting expression. If set to |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [3, 0, 1, 2]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... b=nw.col("a").replace_strict(
... [0, 1, 2, 3],
... ["zero", "one", "two", "three"],
... return_dtype=nw.String,
... )
... )
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 3 three |
| 1 0 zero |
| 2 1 one |
| 3 2 two |
└──────────────────┘
rolling_mean(window_size: int, *, min_samples: int | None = None, center: bool = False) -> Self
Apply a rolling mean (moving mean) over the values.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
A window of length window_size
will traverse the values. The resulting values
will be aggregated to their mean.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_samples
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_rolling_mean=nw.col("a").rolling_mean(window_size=3, min_samples=1)
... )
┌──────────────────────┐
| Narwhals DataFrame |
|----------------------|
| a a_rolling_mean|
|0 1.0 1.0|
|1 2.0 1.5|
|2 NaN 1.5|
|3 4.0 3.0|
└──────────────────────┘
rolling_std(window_size: int, *, min_samples: int | None = None, center: bool = False, ddof: int = 1) -> Self
Apply a rolling standard deviation (moving standard deviation) over the values.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
A window of length window_size
will traverse the values. The resulting values
will be aggregated to their standard deviation.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_samples
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
ddof
|
int
|
Delta Degrees of Freedom; the divisor for a length N window is N - ddof. |
1
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_rolling_std=nw.col("a").rolling_std(window_size=3, min_samples=1)
... )
┌─────────────────────┐
| Narwhals DataFrame |
|---------------------|
| a a_rolling_std|
|0 1.0 NaN|
|1 2.0 0.707107|
|2 NaN 0.707107|
|3 4.0 1.414214|
└─────────────────────┘
rolling_sum(window_size: int, *, min_samples: int | None = None, center: bool = False) -> Self
Apply a rolling sum (moving sum) over the values.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
A window of length window_size
will traverse the values. The resulting values
will be aggregated to their sum.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_samples
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_rolling_sum=nw.col("a").rolling_sum(window_size=3, min_samples=1)
... )
┌─────────────────────┐
| Narwhals DataFrame |
|---------------------|
| a a_rolling_sum|
|0 1.0 1.0|
|1 2.0 3.0|
|2 NaN 3.0|
|3 4.0 6.0|
└─────────────────────┘
rolling_var(window_size: int, *, min_samples: int | None = None, center: bool = False, ddof: int = 1) -> Self
Apply a rolling variance (moving variance) over the values.
Warning
This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.
A window of length window_size
will traverse the values. The resulting values
will be aggregated to their variance.
The window at a given row will include the row itself and the window_size - 1
elements before it.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
window_size
|
int
|
The length of the window in number of elements. It must be a strictly positive integer. |
required |
min_samples
|
int | None
|
The number of values in the window that should be non-null before
computing a result. If set to |
None
|
center
|
bool
|
Set the labels at the center of the window. |
False
|
ddof
|
int
|
Delta Degrees of Freedom; the divisor for a length N window is N - ddof. |
1
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(
... a_rolling_var=nw.col("a").rolling_var(window_size=3, min_samples=1)
... )
┌─────────────────────┐
| Narwhals DataFrame |
|---------------------|
| a a_rolling_var|
|0 1.0 NaN|
|1 2.0 0.5|
|2 NaN 0.5|
|3 4.0 2.0|
└─────────────────────┘
round(decimals: int = 0) -> Self
Round underlying floating point data by decimals
digits.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
decimals
|
int
|
Number of decimals to round by. |
0
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and 4.5 to 4.0, etc..).
Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1.12345, 2.56789, 3.901234]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_rounded=nw.col("a").round(1))
┌──────────────────────┐
| Narwhals DataFrame |
|----------------------|
| a a_rounded|
|0 1.123450 1.1|
|1 2.567890 2.6|
|2 3.901234 3.9|
└──────────────────────┘
sample(n: int | None = None, *, fraction: float | None = None, with_replacement: bool = False, seed: int | None = None) -> Self
Sample randomly from this expression.
Warning
Expr.sample
is deprecated and will be removed in a future version.
Hint: instead of df.select(nw.col('a').sample())
, use
df.select(nw.col('a')).sample()
instead.
Note: this will remain available in narwhals.stable.v1
.
See stable api for more information.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int | None
|
Number of items to return. Cannot be used with fraction. |
None
|
fraction
|
float | None
|
Fraction of items to return. Cannot be used with n. |
None
|
with_replacement
|
bool
|
Allow values to be sampled more than once. |
False
|
seed
|
int | None
|
Seed for the random number generator. If set to None (default), a random seed is generated for each sample operation. |
None
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
shift(n: int) -> Self
Shift values by n
positions.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Number of positions to shift values by. |
required |
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Notes
pandas may change the dtype here, for example when introducing missing
values in an integer column. To ensure, that the dtype doesn't change,
you may want to use fill_null
and cast
. For example, to shift
and fill missing values with 0
in a Int64 column, you could
do:
nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
Examples:
>>> import polars as pl
>>> import narwhals as nw
>>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df = nw.from_native(df_native)
>>> df.with_columns(a_shift=nw.col("a").shift(n=1))
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|shape: (5, 2) |
|┌─────┬─────────┐ |
|│ a ┆ a_shift │ |
|│ --- ┆ --- │ |
|│ i64 ┆ i64 │ |
|╞═════╪═════════╡ |
|│ 1 ┆ null │ |
|│ 1 ┆ 1 │ |
|│ 3 ┆ 1 │ |
|│ 5 ┆ 3 │ |
|│ 5 ┆ 5 │ |
|└─────┴─────────┘ |
└──────────────────┘
sort(*, descending: bool = False, nulls_last: bool = False) -> Self
Sort this column. Place null values first.
Warning
Expr.sort
is deprecated and will be removed in a future version.
Hint: instead of df.select(nw.col('a').sort())
, use
df.select(nw.col('a')).sort()
instead.
Note: this will remain available in narwhals.stable.v1
.
See stable api for more information.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
descending
|
bool
|
Sort in descending order. |
False
|
nulls_last
|
bool
|
Place null values last instead of first. |
False
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
skew() -> Self
Calculate the sample skewness of a column.
Returns:
Type | Description |
---|---|
Self
|
An expression representing the sample skewness of the column. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").skew())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 0.0 1.472427 |
└──────────────────┘
std(*, ddof: int = 1) -> Self
Get standard deviation.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
ddof
|
int
|
"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. |
1
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").std(ddof=0))
┌─────────────────────┐
| Narwhals DataFrame |
|---------------------|
| a b|
|0 17.79513 1.265789|
└─────────────────────┘
sum() -> Expr
Return the sum value.
Returns:
Type | Description |
---|---|
Expr
|
A new expression. |
Examples:
>>> import duckdb
>>> import narwhals as nw
>>> df_native = duckdb.sql("SELECT * FROM VALUES (5, 50), (10, 100) df(a, b)")
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").sum())
┌───────────────────┐
|Narwhals LazyFrame |
|-------------------|
|┌────────┬────────┐|
|│ a │ b │|
|│ int128 │ int128 │|
|├────────┼────────┤|
|│ 15 │ 150 │|
|└────────┴────────┘|
└───────────────────┘
tail(n: int = 10) -> Self
Get the last n
rows.
Warning
Expr.tail
is deprecated and will be removed in a future version.
Hint: instead of df.select(nw.col('a').tail())
, use
df.select(nw.col('a')).tail()
instead.
Note: this will remain available in narwhals.stable.v1
.
See stable api for more information.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n
|
int
|
Number of rows to return. |
10
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
unique() -> Self
Return unique values of this expression.
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").unique().sum())
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| a b |
| 0 9 12 |
└──────────────────┘
var(*, ddof: int = 1) -> Self
Get variance.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
ddof
|
int
|
"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. |
1
|
Returns:
Type | Description |
---|---|
Self
|
A new expression. |
Examples:
>>> import pandas as pd
>>> import narwhals as nw
>>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df = nw.from_native(df_native)
>>> df.select(nw.col("a", "b").var(ddof=0))
┌───────────────────────┐
| Narwhals DataFrame |
|-----------------------|
| a b|
|0 316.666667 1.602222|
└───────────────────────┘