Skip to content

Generating SQL

Suppose you want to write Polars syntax and translate it to SQL. For example, what's the SQL equivalent to:

import narwhals as nw
from narwhals.typing import IntoFrameT


def avg_monthly_price(df_native: IntoFrameT) -> IntoFrameT:
    return (
        nw.from_native(df_native)
        .group_by(nw.col("date").dt.truncate("1mo"))
        .agg(nw.col("price").mean())
        .sort("date")
        .to_native()
    )

?

There are several ways to find out.

Via DuckDB

You can also generate SQL directly from DuckDB.

import duckdb

conn = duckdb.connect()
conn.sql("""CREATE TABLE prices (date DATE, price DOUBLE);""")

df = nw.from_native(conn.table("prices"))
print(avg_monthly_price(df).sql_query())
SELECT * FROM (SELECT _date_tmp AS date, price FROM (SELECT _date_tmp, mean(price) AS price FROM (SELECT date, price, date_trunc('month', date) AS _date_tmp FROM main.prices) AS prices GROUP BY ALL) AS prices) AS prices ORDER BY date NULLS FIRST

To make it look a bit prettier, or to then transpile it to other SQL dialects, we can pass it to SQLGlot:

import sqlglot

print(sqlglot.transpile(avg_monthly_price(df).sql_query(), pretty=True)[0])
SELECT
  *
FROM (
  SELECT
    _date_tmp AS date,
    price
  FROM (
    SELECT
      _date_tmp,
      MEAN(price) AS price
    FROM (
      SELECT
        date,
        price,
        DATE_TRUNC('MONTH', date) AS _date_tmp
      FROM main.prices
    ) AS prices
    GROUP BY ALL
  ) AS prices
) AS prices
ORDER BY
  date

Via Ibis

We can also use Ibis to generate SQL:

import ibis

t = ibis.table({"date": "date", "price": "double"}, name="prices")
print(ibis.to_sql(avg_monthly_price(t)))
SELECT
  "t2"."_date_tmp" AS "date",
  "t2"."price"
FROM (
  SELECT
    "t1"."_date_tmp",
    AVG("t1"."price") AS "price"
  FROM (
    SELECT
      "t0"."date",
      "t0"."price",
      DATE_TRUNC('MONTH', "t0"."date") AS "_date_tmp"
    FROM "prices" AS "t0"
  ) AS "t1"
  GROUP BY
    1
) AS "t2"
ORDER BY
  "t2"."_date_tmp" ASC NULLS FIRST

Via SQLFrame

You can also use SQLFrame:

from sqlframe.standalone import StandaloneSession

session = StandaloneSession.builder.getOrCreate()
session.catalog.add_table("prices", column_mapping={"date": "date", "price": "float"})
df = nw.from_native(session.read.table("prices"))

print(avg_monthly_price(df).sql(dialect="duckdb"))
WITH "t20239543" AS (
  SELECT
    CAST(DATE_TRUNC('MONTH', CAST("prices"."date" AS TIMESTAMPTZ)) AS TIMESTAMPTZ) AS "_date_tmp",
    AVG("prices"."price") AS "price"
  FROM "prices" AS "prices"
  GROUP BY
    CAST(DATE_TRUNC('MONTH', CAST("prices"."date" AS TIMESTAMPTZ)) AS TIMESTAMPTZ)
)
SELECT
  "t20239543"."_date_tmp" AS "date",
  "t20239543"."price" AS "price"
FROM "t20239543" AS "t20239543"
ORDER BY
  "date" NULLS FIRST

Or, to print the SQL code in a different dialect (say, databricks):

print(avg_monthly_price(df).sql(dialect="databricks"))
WITH `t20239543` AS (
  SELECT
    CAST(DATE_TRUNC('MONTH', CAST(`prices`.`date` AS TIMESTAMP)) AS TIMESTAMP) AS `_date_tmp`,
    AVG(`prices`.`price`) AS `price`
  FROM `prices` AS `prices`
  GROUP BY
    CAST(DATE_TRUNC('MONTH', CAST(`prices`.`date` AS TIMESTAMP)) AS TIMESTAMP)
)
SELECT
  `t20239543`.`_date_tmp` AS `date`,
  `t20239543`.`price` AS `price`
FROM `t20239543` AS `t20239543`
ORDER BY
  `date`