narwhals.Expr
abs()
Return absolute value of each element.
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> data = {"a": [1, -2], "b": [-3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").abs())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 1 3
1 2 4
>>> func(df_pl)
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
│ 2 ┆ 4 │
└─────┴─────┘
alias(name)
Rename the expression.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name |
str
|
The new name. |
required |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 5]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select((nw.col("b") + 10).alias("c"))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
c
0 14
1 15
>>> func(df_pl)
shape: (2, 1)
┌─────┐
│ c │
│ --- │
│ i64 │
╞═════╡
│ 14 │
│ 15 │
└─────┘
all()
Return whether all values in the column are True
.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").all())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 False True
>>> func(df_pl)
shape: (1, 2)
┌───────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪══════╡
│ false ┆ true │
└───────┴──────┘
any()
Return whether any of the values in the column are True
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [True, False], "b": [True, True]})
>>> df_pl = pl.DataFrame({"a": [True, False], "b": [True, True]})
We define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").any())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 True True
>>> func(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞══════╪══════╡
│ true ┆ true │
└──────┴──────┘
arg_true()
Find elements where boolean expression is True.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = {"a": [1, None, None, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
We define a library agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").is_null().arg_true())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a
1 1
2 2
>>> func(df_pl)
shape: (2, 1)
┌─────┐
│ a │
│ --- │
│ u32 │
╞═════╡
│ 1 │
│ 2 │
└─────┘
cast(dtype)
Redefine an object's data type.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dtype |
Any
|
Data type that the object will be cast into. |
required |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> from datetime import date
>>> df_pd = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
>>> df_pl = pl.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(
... nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)
... )
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
foo bar
0 1.0 6
1 2.0 7
2 3.0 8
>>> func(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ f32 ┆ u8 │
╞═════╪═════╡
│ 1.0 ┆ 6 │
│ 2.0 ┆ 7 │
│ 3.0 ┆ 8 │
└─────┴─────┘
count()
Returns the number of non-null elements in the column.
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.all().count())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 3 2
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 3 ┆ 2 │
└─────┴─────┘
cum_sum()
Return cumulative sum.
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").cum_sum())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 1 2
1 2 6
2 5 10
3 10 16
4 15 22
>>> func(df_pl)
shape: (5, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 2 │
│ 2 ┆ 6 │
│ 5 ┆ 10 │
│ 10 ┆ 16 │
│ 15 ┆ 22 │
└─────┴─────┘
diff()
Returns the difference between each element and the previous one.
Notes
pandas may change the dtype here, for example when introducing missing
values in an integer column. To ensure, that the dtype doesn't change,
you may want to use fill_null
and cast
. For example, to calculate
the diff and fill missing values with 0
in a Int64 column, you could
do:
nw.col("a").diff().fill_null(0).cast(nw.Int64)
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(a_diff=nw.col("a").diff())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a_diff
0 NaN
1 0.0
2 2.0
3 2.0
4 0.0
>>> func(df_pl)
shape: (5, 1)
┌────────┐
│ a_diff │
│ --- │
│ i64 │
╞════════╡
│ null │
│ 0 │
│ 2 │
│ 2 │
│ 0 │
└────────┘
drop_nulls()
Remove missing values.
Notes
pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
>>> df_pl = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").drop_nulls())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a
0 2.0
1 4.0
3 3.0
5 5.0
>>> func(df_pl) # nan != null for polars
shape: (5, 1)
┌─────┐
│ a │
│ --- │
│ f64 │
╞═════╡
│ 2.0 │
│ 4.0 │
│ NaN │
│ 3.0 │
│ 5.0 │
└─────┘
fill_null(value)
Fill null values with given value.
Notes
pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame(
... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )
>>> df_pl = pl.DataFrame(
... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.with_columns(nw.col("a", "b").fill_null(0))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 2.0 2.0
1 4.0 4.0
2 0.0 0.0
3 3.0 3.0
4 5.0 5.0
>>> func(df_pl) # nan != null for polars
shape: (5, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪═════╡
│ 2 ┆ 2.0 │
│ 4 ┆ 4.0 │
│ 0 ┆ NaN │
│ 3 ┆ 3.0 │
│ 5 ┆ 5.0 │
└─────┴─────┘
filter(*predicates)
Filters elements based on a condition, returning a new expression.
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
>>> df_pl = pl.DataFrame({"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(
... nw.col("a").filter(nw.col("a") > 4),
... nw.col("b").filter(nw.col("b") < 13),
... )
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
3 5 10
4 6 11
5 7 12
>>> func(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 5 ┆ 10 │
│ 6 ┆ 11 │
│ 7 ┆ 12 │
└─────┴─────┘
gather_every(n, offset=0)
Take every nth value in the Series and return as new Series.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n |
int
|
Gather every n-th row. |
required |
offset |
int
|
Starting index. |
0
|
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function in which gather every 2 rows, starting from a offset of 1:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").gather_every(n=2, offset=1))
>>> func(df_pd)
a
1 2
3 4
>>> func(df_pl)
shape: (2, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 4 │
└─────┘
head(n=10)
Get the first n
rows.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n |
int
|
Number of rows to return. |
10
|
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": list(range(10))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function that returns the first 3 rows:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").head(3))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a
0 0
1 1
2 2
>>> func(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 0 │
│ 1 │
│ 2 │
└─────┘
clip(lower_bound=None, upper_bound=None)
Clip values in the Series.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
lower_bound |
Any | None
|
Lower bound value. |
None
|
upper_bound |
Any | None
|
Upper bound value. |
None
|
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> s = [1, 2, 3]
>>> df_pd = pd.DataFrame({"s": s})
>>> df_pl = pl.DataFrame({"s": s})
We define a library agnostic function:
>>> @nw.narwhalify
... def func_lower(df):
... return df.select(nw.col("s").clip(2))
We can then pass either pandas or Polars to func_lower
:
>>> func_lower(df_pd)
s
0 2
1 2
2 3
>>> func_lower(df_pl)
shape: (3, 1)
┌─────┐
│ s │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 2 │
│ 3 │
└─────┘
We define another library agnostic function:
>>> @nw.narwhalify
... def func_upper(df):
... return df.select(nw.col("s").clip(upper_bound=2))
We can then pass either pandas or Polars to func_upper
:
>>> func_upper(df_pd)
s
0 1
1 2
2 2
>>> func_upper(df_pl)
shape: (3, 1)
┌─────┐
│ s │
│ --- │
│ i64 │
╞═════╡
│ 1 │
│ 2 │
│ 2 │
└─────┘
We can have both at the same time
>>> s = [-1, 1, -3, 3, -5, 5]
>>> df_pd = pd.DataFrame({"s": s})
>>> df_pl = pl.DataFrame({"s": s})
We define a library agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("s").clip(-1, 3))
We can pass either pandas or Polars to func
:
>>> func(df_pd)
s
0 -1
1 1
2 -1
3 3
4 -1
5 3
>>> func(df_pl)
shape: (6, 1)
┌─────┐
│ s │
│ --- │
│ i64 │
╞═════╡
│ -1 │
│ 1 │
│ -1 │
│ 3 │
│ -1 │
│ 3 │
└─────┘
is_between(lower_bound, upper_bound, closed='both')
Check if this expression is between the given lower and upper bounds.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
lower_bound |
Any
|
Lower bound value. |
required |
upper_bound |
Any
|
Upper bound value. |
required |
closed |
str
|
Define which sides of the interval are closed (inclusive). |
'both'
|
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").is_between(2, 4, "right"))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a
0 False
1 False
2 True
3 True
4 False
>>> func(df_pl)
shape: (5, 1)
┌───────┐
│ a │
│ --- │
│ bool │
╞═══════╡
│ false │
│ false │
│ true │
│ true │
│ false │
└───────┘
is_duplicated()
Return a boolean mask indicating duplicated values.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.all().is_duplicated())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 True True
1 False True
2 False False
3 True False
>>> func(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ true ┆ true │
│ false ┆ true │
│ false ┆ false │
│ true ┆ false │
└───────┴───────┘
is_first_distinct()
Return a boolean mask indicating the first occurrence of each distinct value.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.all().is_first_distinct())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 True True
1 True False
2 True True
3 False True
>>> func(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ true ┆ true │
│ true ┆ false │
│ true ┆ true │
│ false ┆ true │
└───────┴───────┘
is_in(other)
Check if elements of this expression are present in the other iterable.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
other |
Any
|
iterable |
required |
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2, 9, 10]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 9, 10]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.with_columns(b=nw.col("a").is_in([1, 2]))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 1 True
1 2 True
2 9 False
3 10 False
>>> func(df_pl)
shape: (4, 2)
┌─────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ bool │
╞═════╪═══════╡
│ 1 ┆ true │
│ 2 ┆ true │
│ 9 ┆ false │
│ 10 ┆ false │
└─────┴───────┘
is_last_distinct()
Return a boolean mask indicating the last occurrence of each distinct value.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.all().is_last_distinct())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 False False
1 True True
2 True True
3 True True
>>> func(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ false ┆ false │
│ true ┆ true │
│ true ┆ true │
│ true ┆ true │
└───────┴───────┘
is_null()
Returns a boolean Series indicating which values are null.
Notes
pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame(
... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )
>>> df_pl = pl.DataFrame(
... {"a": [2, 4, None, 3, 5], "b": [2.0, 4.0, float("nan"), 3.0, 5.0]}
... )
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.with_columns(
... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null()
... )
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b a_is_null b_is_null
0 2.0 2.0 False False
1 4.0 4.0 False False
2 NaN NaN True True
3 3.0 3.0 False False
4 5.0 5.0 False False
>>> func(df_pl) # nan != null for polars
shape: (5, 4)
┌──────┬─────┬───────────┬───────────┐
│ a ┆ b ┆ a_is_null ┆ b_is_null │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ bool ┆ bool │
╞══════╪═════╪═══════════╪═══════════╡
│ 2 ┆ 2.0 ┆ false ┆ false │
│ 4 ┆ 4.0 ┆ false ┆ false │
│ null ┆ NaN ┆ true ┆ false │
│ 3 ┆ 3.0 ┆ false ┆ false │
│ 5 ┆ 5.0 ┆ false ┆ false │
└──────┴─────┴───────────┴───────────┘
is_unique()
Return a boolean mask indicating unique values.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.all().is_unique())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 False False
1 True False
2 True True
3 False True
>>> func(df_pl)
shape: (4, 2)
┌───────┬───────┐
│ a ┆ b │
│ --- ┆ --- │
│ bool ┆ bool │
╞═══════╪═══════╡
│ false ┆ false │
│ true ┆ false │
│ true ┆ true │
│ false ┆ true │
└───────┴───────┘
len()
Return the number of elements in the column.
Null values count towards the total.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function that computes the len over different values of "b" column:
>>> @nw.narwhalify
... def func(df):
... return df.select(
... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"),
... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"),
... )
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a1 a2
0 2 1
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a1 ┆ a2 │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 2 ┆ 1 │
└─────┴─────┘
max()
Returns the maximum value(s) from a column(s).
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [10, 20], "b": [50, 100]})
>>> df_pl = pl.DataFrame({"a": [10, 20], "b": [50, 100]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.max("a", "b"))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 20 100
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 20 ┆ 100 │
└─────┴─────┘
mean()
Get mean value.
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
>>> df_pl = pl.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").mean())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 0.0 4.0
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞═════╪═════╡
│ 0.0 ┆ 4.0 │
└─────┴─────┘
min()
Returns the minimum value(s) from a column(s).
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [4, 3]})
>>> df_pl = pl.DataFrame({"a": [1, 2], "b": [4, 3]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.min("a", "b"))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 1 3
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
└─────┴─────┘
mode()
Compute the most occurring value(s).
Can return multiple values.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> data = {
... "a": [1, 1, 2, 3],
... "b": [1, 1, 2, 2],
... }
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
We define a library agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").mode()).sort("a")
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a
0 1
>>> func(df_pl)
shape: (1, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 1 │
└─────┘
null_count()
Count null values.
Notes
pandas and Polars handle null values differently. Polars distinguishes between NaN and Null, whereas pandas doesn't.
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.all().null_count())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 1 2
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 1 ┆ 2 │
└─────┴─────┘
n_unique()
Returns count of unique values
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").n_unique())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 5 3
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ u32 ┆ u32 │
╞═════╪═════╡
│ 5 ┆ 3 │
└─────┴─────┘
over(*keys)
Compute expressions over the given groups.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
keys |
str | Iterable[str]
|
Names of columns to compute window expression over.
Must be names of columns, as opposed to expressions -
so, this is a bit less flexible than Polars' |
()
|
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1, 2, 3], "b": [1, 1, 2]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.with_columns(a_min_per_group=nw.col("a").min().over("b"))
We can then pass either pandas or Polars:
>>> func(df_pd)
a b a_min_per_group
0 1 1 1
1 2 1 1
2 3 2 3
>>> func(df_pl)
shape: (3, 3)
┌─────┬─────┬─────────────────┐
│ a ┆ b ┆ a_min_per_group │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════════════════╡
│ 1 ┆ 1 ┆ 1 │
│ 2 ┆ 1 ┆ 1 │
│ 3 ┆ 2 ┆ 3 │
└─────┴─────┴─────────────────┘
pipe(function, *args, **kwargs)
Pipe function call.
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> data = {"a": [1, 2, 3, 4]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Lets define a library-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").pipe(lambda x: x + 1))
We can then pass any supported library:
>>> func(df_pd)
a
0 2
1 3
2 4
3 5
>>> func(df_pl)
shape: (4, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 2 │
│ 3 │
│ 4 │
│ 5 │
└─────┘
quantile(quantile, interpolation)
Get quantile value.
Note
- pandas and Polars may have implementation differences for a given interpolation method.
- dask has its own method to approximate quantile and it doesn't implement 'nearest', 'higher', 'lower', 'midpoint' as interpolation method - use 'linear' which is closest to the native 'dask' - method.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
quantile |
float Quantile between 0.0 and 1.0. |
required | |
interpolation |
{'nearest', 'higher', 'lower', 'midpoint', 'linear'} Interpolation method. |
required |
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": list(range(50)), "b": list(range(50, 100))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").quantile(0.5, interpolation="linear"))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 24.5 74.5
>>> func(df_pl)
shape: (1, 2)
┌──────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════╪══════╡
│ 24.5 ┆ 74.5 │
└──────┴──────┘
round(decimals=0)
Round underlying floating point data by decimals
digits.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
decimals |
int
|
Number of decimals to round by. |
0
|
Notes
For values exactly halfway between rounded decimal values pandas behaves differently than Polars and Arrow.
pandas rounds to the nearest even value (e.g. -0.5 and 0.5 round to 0.0, 1.5 and 2.5 round to 2.0, 3.5 and 4.5 to 4.0, etc..).
Polars and Arrow round away from 0 (e.g. -0.5 to -1.0, 0.5 to 1.0, 1.5 to 2.0, 2.5 to 3.0, etc..).
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": [1.12345, 2.56789, 3.901234]}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function that rounds to the first decimal:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").round(1))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a
0 1.1
1 2.6
2 3.9
>>> func(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ f64 │
╞═════╡
│ 1.1 │
│ 2.6 │
│ 3.9 │
└─────┘
sample(n=None, *, fraction=None, with_replacement=False, seed=None)
Sample randomly from this expression.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n |
int | None
|
Number of items to return. Cannot be used with fraction. |
None
|
fraction |
float | None
|
Fraction of items to return. Cannot be used with n. |
None
|
with_replacement |
bool
|
Allow values to be sampled more than once. |
False
|
seed |
int | None
|
Seed for the random number generator. If set to None (default), a random seed is generated for each sample operation. |
None
|
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> df_pd = pd.DataFrame({"a": [1, 2, 3]})
>>> df_pl = pl.DataFrame({"a": [1, 2, 3]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").sample(fraction=1.0, with_replacement=True))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a
2 3
0 1
2 3
>>> func(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ f64 │
╞═════╡
│ 2 │
│ 3 │
│ 3 │
└─────┘
shift(n)
Shift values by n
positions.
Notes
pandas may change the dtype here, for example when introducing missing
values in an integer column. To ensure, that the dtype doesn't change,
you may want to use fill_null
and cast
. For example, to shift
and fill missing values with 0
in a Int64 column, you could
do:
nw.col("a").shift(1).fill_null(0).cast(nw.Int64)
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(a_shift=nw.col("a").shift(n=1))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a_shift
0 NaN
1 1.0
2 1.0
3 3.0
4 5.0
>>> func(df_pl)
shape: (5, 1)
┌─────────┐
│ a_shift │
│ --- │
│ i64 │
╞═════════╡
│ null │
│ 1 │
│ 1 │
│ 3 │
│ 5 │
└─────────┘
sort(*, descending=False, nulls_last=False)
Sort this column. Place null values first.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
descending |
bool
|
Sort in descending order. |
False
|
nulls_last |
bool
|
Place null values last instead of first. |
False
|
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> df_pd = pd.DataFrame({"a": [5, None, 1, 2]})
>>> df_pl = pl.DataFrame({"a": [5, None, 1, 2]})
Let's define dataframe-agnostic functions:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").sort())
>>> def func_descend(df):
... df = nw.from_native(df)
... df = df.select(nw.col("a").sort(descending=True))
... return nw.to_native(df)
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a
1 NaN
2 1.0
3 2.0
0 5.0
>>> func(df_pl)
shape: (4, 1)
┌──────┐
│ a │
│ --- │
│ i64 │
╞══════╡
│ null │
│ 1 │
│ 2 │
│ 5 │
└──────┘
>>> func_descend(df_pd)
a
1 NaN
0 5.0
3 2.0
2 1.0
>>> func_descend(df_pl)
shape: (4, 1)
┌──────┐
│ a │
│ --- │
│ i64 │
╞══════╡
│ null │
│ 5 │
│ 2 │
│ 1 │
└──────┘
std(*, ddof=1)
Get standard deviation.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
ddof |
int
|
“Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. |
1
|
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
>>> df_pl = pl.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").std(ddof=0))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 17.79513 1.265789
>>> func(df_pl)
shape: (1, 2)
┌──────────┬──────────┐
│ a ┆ b │
│ --- ┆ --- │
│ f64 ┆ f64 │
╞══════════╪══════════╡
│ 17.79513 ┆ 1.265789 │
└──────────┴──────────┘
sum()
Return the sum value.
Examples:
>>> import pandas as pd
>>> import polars as pl
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [5, 10], "b": [50, 100]})
>>> df_pl = pl.DataFrame({"a": [5, 10], "b": [50, 100]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").sum())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 15 150
>>> func(df_pl)
shape: (1, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 15 ┆ 150 │
└─────┴─────┘
tail(n=10)
Get the last n
rows.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n |
int
|
Number of rows to return. |
10
|
Examples:
>>> import narwhals as nw
>>> import pandas as pd
>>> import polars as pl
>>> data = {"a": list(range(10))}
>>> df_pd = pd.DataFrame(data)
>>> df_pl = pl.DataFrame(data)
Let's define a dataframe-agnostic function that returns the last 3 rows:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a").tail(3))
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a
7 7
8 8
9 9
>>> func(df_pl)
shape: (3, 1)
┌─────┐
│ a │
│ --- │
│ i64 │
╞═════╡
│ 7 │
│ 8 │
│ 9 │
└─────┘
unique()
Return unique values
Examples:
>>> import polars as pl
>>> import pandas as pd
>>> import narwhals as nw
>>> df_pd = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
>>> df_pl = pl.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]})
Let's define a dataframe-agnostic function:
>>> @nw.narwhalify
... def func(df):
... return df.select(nw.col("a", "b").unique())
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
a b
0 1 2
1 3 4
2 5 6
>>> func(df_pl)
shape: (3, 2)
┌─────┬─────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 2 │
│ 3 ┆ 4 │
│ 5 ┆ 6 │
└─────┴─────┘