Null/NaN handling

pandas doesn't distinguish between Null and NaN values as Polars and PyArrow do.

Depending on the data type of the underlying data structure, np.nan, pd.NaT, None and pd.NA all encode missing data in pandas.

Polars and PyArrow, instead, treat NaN as a valid floating point value which is rare to encounter and more often produced as the result of a computation than explicitly set during data initialization; they treat null as the missing data indicator, regardless of the data type.

In Narwhals, then, is_null behaves differently across backends (and so do drop_nulls, fill_null and null_count):

import narwhals as nw
import numpy as np
from narwhals.typing import IntoFrameT

data = {"a": [1.4, float("nan"), np.nan, 4.2, None]}


def check_null_behavior(df: IntoFrameT) -> IntoFrameT:
    return nw.from_native(df).with_columns(a_is_null=nw.col("a").is_null()).to_native()

pandasPolars (eager)PyArrow

import pandas as pd

df = pd.DataFrame(data)
print(check_null_behavior(df))

     a  a_is_null
0  1.4      False
1  NaN       True
2  NaN       True
3  4.2      False
4  NaN       True

import polars as pl

df = pl.DataFrame(data)
print(check_null_behavior(df))

shape: (5, 2)
┌──────┬───────────┐
│ a    ┆ a_is_null │
│ ---  ┆ ---       │
│ f64  ┆ bool      │
╞══════╪═══════════╡
│ 1.4  ┆ false     │
│ NaN  ┆ false     │
│ NaN  ┆ false     │
│ 4.2  ┆ false     │
│ null ┆ true      │
└──────┴───────────┘

import pyarrow as pa

df = pa.table(data)
print(check_null_behavior(df))

pyarrow.Table
a: double
a_is_null: bool
----
a: [[1.4,nan,nan,4.2,null]]
a_is_null: [[false,false,false,false,true]]

Conversely, is_nan is consistent across backends. This consistency comes from Narwhals exploiting its native implementations in Polars and PyArrow, while ensuring that pandas only identifies the floating-point NaN values and not those encoding the missing value indicator.

import narwhals as nw
from narwhals.typing import IntoFrameT

data = {"a": [0.0, None, 2.0]}


def check_nan_behavior(df: IntoFrameT) -> IntoFrameT:
    return (
        nw.from_native(df)
        .with_columns(
            a_div_a=(nw.col("a") / nw.col("a")),
            a_div_a_is_nan=(nw.col("a") / nw.col("a")).is_nan(),
        )
        .to_native()
    )

pandasPolars (eager)PyArrow

import pandas as pd

df = pd.DataFrame(data).astype({"a": "Float64"})
print(check_nan_behavior(df))

      a  a_div_a  a_div_a_is_nan
0   0.0      NaN            True
1  <NA>     <NA>            <NA>
2   2.0      1.0           False

import polars as pl

df = pl.DataFrame(data)
print(check_nan_behavior(df))

shape: (3, 3)
┌──────┬─────────┬────────────────┐
│ a    ┆ a_div_a ┆ a_div_a_is_nan │
│ ---  ┆ ---     ┆ ---            │
│ f64  ┆ f64     ┆ bool           │
╞══════╪═════════╪════════════════╡
│ 0.0  ┆ NaN     ┆ true           │
│ null ┆ null    ┆ null           │
│ 2.0  ┆ 1.0     ┆ false          │
└──────┴─────────┴────────────────┘

import pyarrow as pa

df = pa.table(data)
print(check_nan_behavior(df))

pyarrow.Table
a: double
a_div_a: double
a_div_a_is_nan: bool
----
a: [[0,null,2]]
a_div_a: [[nan,null,1]]
a_div_a_is_nan: [[true,null,false]]