Skip to content

narwhals.testing

assert_frame_equal

assert_frame_equal(
    left: DataFrameT | LazyFrameT,
    right: DataFrameT | LazyFrameT,
    *,
    check_row_order: bool = True,
    check_column_order: bool = True,
    check_dtypes: bool = True,
    check_exact: bool = False,
    rel_tol: float = 1e-05,
    abs_tol: float = 1e-08,
    categorical_as_str: bool = False,
    backend: (
        IntoBackend[Polars | Pandas | Arrow] | None
    ) = None
) -> None

Assert that the left and right frames are equal.

Raises a detailed AssertionError if the frames differ. This function is intended for use in unit tests.

Warning
  1. In the case of backends that do not guarantee the row order, such as DuckDB, Ibis, PySpark, and SQLFrame, check_row_order argument is ignored and the comparands are sorted by all the columns regardless.
  2. In the case of lazy backends a collect(...) operation is triggered.

Parameters:

Name Type Description Default
left DataFrameT | LazyFrameT

The first DataFrame or LazyFrame to compare.

required
right DataFrameT | LazyFrameT

The second DataFrame or LazyFrame to compare.

required
check_row_order bool

Requires row order to match.

This flag is ignored for backends that do not guarantee row order such as DuckDB, Ibis, PySpark, SQLFrame.

True
check_column_order bool

Requires column order to match.

True
check_dtypes bool

Requires data types to match.

True
check_exact bool

Requires float values to match exactly. If set to False, values are considered equal when within tolerance of each other (see rel_tol and abs_tol).

Only affects columns with a Float data type.

False
rel_tol float

Relative tolerance for inexact checking. Fraction of values in right.

1e-05
abs_tol float

Absolute tolerance for inexact checking.

1e-08
categorical_as_str bool

Cast categorical columns to string before comparing. Enabling this helps compare columns that do not share the same string cache.

False
backend IntoBackend[Polars | Pandas | Arrow] | None

Allows to specify which eager backend to collect to. Check out narwhals.LazyFrame.collect for more information.

None

Examples:

>>> import polars as pl
>>> import narwhals as nw
>>> from narwhals.testing import assert_frame_equal
>>>
>>> left_native = pl.LazyFrame({"a": [1, 2, 3]})
>>> right_native = pl.LazyFrame({"a": [1, 5, 3]})
>>> left = nw.from_native(left_native)
>>> right = nw.from_native(right_native)
>>> assert_frame_equal(left, right)
Traceback (most recent call last):
    ...
AssertionError: DataFrames are different (value mismatch for column "a")
[left]:
┌─────────────────┐
| Narwhals Series |
|-----------------|
|shape: (3,)      |
|Series: 'a' [i64]|
|[                |
|        1        |
|        2        |
|        3        |
|]                |
└─────────────────┘
[right]:
┌─────────────────┐
| Narwhals Series |
|-----------------|
|shape: (3,)      |
|Series: 'a' [i64]|
|[                |
|        1        |
|        5        |
|        3        |
|]                |
└─────────────────┘
Source code in narwhals/testing/asserts/frame.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def assert_frame_equal(
    left: DataFrameT | LazyFrameT,
    right: DataFrameT | LazyFrameT,
    *,
    check_row_order: bool = True,
    check_column_order: bool = True,
    check_dtypes: bool = True,
    check_exact: bool = False,
    rel_tol: float = 1e-5,
    abs_tol: float = 1e-8,
    categorical_as_str: bool = False,
    backend: IntoBackend[Polars | Pandas | Arrow] | None = None,
) -> None:
    """Assert that the left and right frames are equal.

    Raises a detailed `AssertionError` if the frames differ.
    This function is intended for use in unit tests.

    Warning:
        1. In the case of backends that do not guarantee the row order, such as DuckDB,
            Ibis, PySpark, and SQLFrame, `check_row_order` argument is ignored and the
            comparands are sorted by all the columns regardless.
        2. In the case of lazy backends a [`collect(...)`](lazyframe.md#narwhals.dataframe.LazyFrame.collect)
            operation is triggered.

    Arguments:
        left: The first DataFrame or LazyFrame to compare.
        right: The second DataFrame or LazyFrame to compare.
        check_row_order: Requires row order to match.

            This flag is ignored for backends that do not guarantee row order such as
            DuckDB, Ibis, PySpark, SQLFrame.
        check_column_order: Requires column order to match.
        check_dtypes: Requires data types to match.
        check_exact: Requires float values to match exactly. If set to `False`, values are
            considered equal when within tolerance of each other (see `rel_tol` and `abs_tol`).

            Only affects columns with a Float data type.
        rel_tol: Relative tolerance for inexact checking. Fraction of values in `right`.
        abs_tol: Absolute tolerance for inexact checking.
        categorical_as_str: Cast categorical columns to string before comparing.
            Enabling this helps compare columns that do not share the same string cache.
        backend: Allows to specify which eager backend to collect to.
            Check out [`narwhals.LazyFrame.collect`](lazyframe.md#narwhals.dataframe.LazyFrame.collect)
            for more information.

    Examples:
        >>> import polars as pl
        >>> import narwhals as nw
        >>> from narwhals.testing import assert_frame_equal
        >>>
        >>> left_native = pl.LazyFrame({"a": [1, 2, 3]})
        >>> right_native = pl.LazyFrame({"a": [1, 5, 3]})
        >>> left = nw.from_native(left_native)
        >>> right = nw.from_native(right_native)
        >>> assert_frame_equal(left, right)  # doctest: +ELLIPSIS
        Traceback (most recent call last):
            ...
        AssertionError: DataFrames are different (value mismatch for column "a")
        [left]:
        ┌─────────────────┐
        | Narwhals Series |
        |-----------------|
        |shape: (3,)      |
        |Series: 'a' [i64]|
        |[                |
        |        1        |
        |        2        |
        |        3        |
        |]                |
        └─────────────────┘
        [right]:
        ┌─────────────────┐
        | Narwhals Series |
        |-----------------|
        |shape: (3,)      |
        |Series: 'a' [i64]|
        |[                |
        |        1        |
        |        5        |
        |        3        |
        |]                |
        └─────────────────┘
    """
    __tracebackhide__ = True

    if any(
        not (is_narwhals_dataframe(obj) or is_narwhals_lazyframe(obj))
        for obj in (left, right)
    ):
        msg = (
            "Expected `narwhals.DataFrame` or `narwhals.LazyFrame` instance, found:\n"
            f"[left]: {qualified_type_name(type(left))}\n"
            f"[right]: {qualified_type_name(type(right))}\n\n"
            "Hint: Use `nw.from_native(obj, allow_series=False)` to convert each native "
            "object into a `narwhals.DataFrame` or `narwhals.LazyFrame` first."
        )
        raise TypeError(msg)

    left_impl, right_impl = left.implementation, right.implementation
    if left_impl != right_impl:
        raise_frame_assertion_error("implementation mismatch", left_impl, right_impl)

    left_eager, right_eager = _check_correct_input_type(left, right, backend=backend)

    _assert_dataframe_equal(
        left=left_eager,
        right=right_eager,
        impl=left_impl,
        check_row_order=check_row_order,
        check_column_order=check_column_order,
        check_dtypes=check_dtypes,
        check_exact=check_exact,
        rel_tol=rel_tol,
        abs_tol=abs_tol,
        categorical_as_str=categorical_as_str,
    )

assert_series_equal

assert_series_equal(
    left: Series[IntoSeriesT],
    right: Series[IntoSeriesT],
    *,
    check_dtypes: bool = True,
    check_names: bool = True,
    check_order: bool = True,
    check_exact: bool = False,
    rel_tol: float = 1e-05,
    abs_tol: float = 1e-08,
    categorical_as_str: bool = False
) -> None

Assert that the left and right Series are equal.

Raises a detailed AssertionError if the Series differ. This function is intended for use in unit tests.

Parameters:

Name Type Description Default
left Series[IntoSeriesT]

The first Series to compare.

required
right Series[IntoSeriesT]

The second Series to compare.

required
check_dtypes bool

Requires data types to match.

True
check_names bool

Requires names to match.

True
check_order bool

Requires elements to appear in the same order.

True
check_exact bool

Requires float values to match exactly. If set to False, values are considered equal when within tolerance of each other (see rel_tol and abs_tol). Only affects columns with a Float data type.

False
rel_tol float

Relative tolerance for inexact checking, given as a fraction of the values in right.

1e-05
abs_tol float

Absolute tolerance for inexact checking.

1e-08
categorical_as_str bool

Cast categorical columns to string before comparing. Enabling this helps compare columns that do not share the same string cache.

False

Examples:

>>> import pandas as pd
>>> import narwhals as nw
>>> from narwhals.testing import assert_series_equal
>>> s1 = nw.from_native(pd.Series([1, 2, 3]), series_only=True)
>>> s2 = nw.from_native(pd.Series([1, 5, 3]), series_only=True)
>>> assert_series_equal(s1, s2)
Traceback (most recent call last):
...
AssertionError: Series are different (exact value mismatch)
[left]:
┌───────────────┐
|Narwhals Series|
|---------------|
| 0    1        |
| 1    2        |
| 2    3        |
| dtype: int64  |
└───────────────┘
[right]:
┌───────────────┐
|Narwhals Series|
|---------------|
| 0    1        |
| 1    5        |
| 2    3        |
| dtype: int64  |
└───────────────┘
Source code in narwhals/testing/asserts/series.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def assert_series_equal(
    left: Series[IntoSeriesT],
    right: Series[IntoSeriesT],
    *,
    check_dtypes: bool = True,
    check_names: bool = True,
    check_order: bool = True,
    check_exact: bool = False,
    rel_tol: float = 1e-05,
    abs_tol: float = 1e-08,
    categorical_as_str: bool = False,
) -> None:
    """Assert that the left and right Series are equal.

    Raises a detailed `AssertionError` if the Series differ.
    This function is intended for use in unit tests.

    Arguments:
        left: The first Series to compare.
        right: The second Series to compare.
        check_dtypes: Requires data types to match.
        check_names: Requires names to match.
        check_order: Requires elements to appear in the same order.
        check_exact: Requires float values to match exactly. If set to `False`, values are
            considered equal when within tolerance of each other (see `rel_tol` and
            `abs_tol`). Only affects columns with a Float data type.
        rel_tol: Relative tolerance for inexact checking, given as a fraction of the
            values in `right`.
        abs_tol: Absolute tolerance for inexact checking.
        categorical_as_str: Cast categorical columns to string before comparing.
            Enabling this helps compare columns that do not share the same string cache.

    Examples:
        >>> import pandas as pd
        >>> import narwhals as nw
        >>> from narwhals.testing import assert_series_equal
        >>> s1 = nw.from_native(pd.Series([1, 2, 3]), series_only=True)
        >>> s2 = nw.from_native(pd.Series([1, 5, 3]), series_only=True)
        >>> assert_series_equal(s1, s2)  # doctest: +ELLIPSIS
        Traceback (most recent call last):
        ...
        AssertionError: Series are different (exact value mismatch)
        [left]:
        ┌───────────────┐
        |Narwhals Series|
        |---------------|
        | 0    1        |
        | 1    2        |
        | 2    3        |
        | dtype: int64  |
        └───────────────┘
        [right]:
        ┌───────────────┐
        |Narwhals Series|
        |---------------|
        | 0    1        |
        | 1    5        |
        | 2    3        |
        | dtype: int64  |
        └───────────────┘
    """
    __tracebackhide__ = True

    if any(not is_narwhals_series(obj) for obj in (left, right)):
        msg = (
            "Expected `narwhals.Series` instance, found:\n"
            f"[left]: {qualified_type_name(type(left))}\n"
            f"[right]: {qualified_type_name(type(right))}\n\n"
            "Hint: Use `nw.from_native(obj, series_only=True) to convert each native "
            "object into a `narwhals.Series` first."
        )
        raise TypeError(msg)

    _check_metadata(left, right, check_dtypes=check_dtypes, check_names=check_names)

    if not check_order:
        if left.dtype.is_nested():
            msg = "`check_order=False` is not supported (yet) with nested data type."
            raise NotImplementedError(msg)
        left, right = left.sort(), right.sort()

    left_vals, right_vals = _check_null_values(left, right)

    if check_exact or not left.dtype.is_float():
        _check_exact_values(
            left_vals,
            right_vals,
            check_dtypes=check_dtypes,
            check_exact=check_exact,
            rel_tol=rel_tol,
            abs_tol=abs_tol,
            categorical_as_str=categorical_as_str,
        )
    else:
        _check_approximate_values(left_vals, right_vals, rel_tol=rel_tol, abs_tol=abs_tol)