Generating SQL
Suppose you want to write Polars syntax and translate it to SQL. For example, what's the SQL equivalent to:
import narwhals as nw
from narwhals.typing import IntoFrameT
def avg_monthly_price(df_native: IntoFrameT) -> IntoFrameT:
return (
nw.from_native(df_native)
.group_by(nw.col("date").dt.truncate("1mo"))
.agg(nw.col("price").mean())
.sort("date")
.to_native()
)
?
There are several ways to find out.
Via SQLFrame (most lightweight solution)
The most lightweight solution which does not require any heavy dependencies, nor any actual table or dataframe, is with SQLFrame.
from sqlframe.standalone import StandaloneSession
session = StandaloneSession.builder.getOrCreate()
session.catalog.add_table("prices", column_mapping={"date": "date", "price": "float"})
df = nw.from_native(session.read.table("prices"))
print(avg_monthly_price(df).sql(dialect="duckdb"))
WITH "t20239543" AS (
SELECT
CAST(DATE_TRUNC('MONTH', CAST("prices"."date" AS TIMESTAMPTZ)) AS TIMESTAMPTZ) AS "_date_tmp",
AVG("prices"."price") AS "price"
FROM "prices" AS "prices"
GROUP BY
CAST(DATE_TRUNC('MONTH', CAST("prices"."date" AS TIMESTAMPTZ)) AS TIMESTAMPTZ)
)
SELECT
"t20239543"."_date_tmp" AS "date",
"t20239543"."price" AS "price"
FROM "t20239543" AS "t20239543"
ORDER BY
"date" NULLS FIRST
Or, to print the SQL code in a different dialect (say, databricks):
print(avg_monthly_price(df).sql(dialect="databricks"))
WITH `t20239543` AS (
SELECT
CAST(DATE_TRUNC('MONTH', CAST(`prices`.`date` AS TIMESTAMP)) AS TIMESTAMP) AS `_date_tmp`,
AVG(`prices`.`price`) AS `price`
FROM `prices` AS `prices`
GROUP BY
CAST(DATE_TRUNC('MONTH', CAST(`prices`.`date` AS TIMESTAMP)) AS TIMESTAMP)
)
SELECT
`t20239543`.`_date_tmp` AS `date`,
`t20239543`.`price` AS `price`
FROM `t20239543` AS `t20239543`
ORDER BY
`date`
Via DuckDB
You can also generate SQL directly from DuckDB.
import duckdb
conn = duckdb.connect()
conn.sql("""CREATE TABLE prices (date DATE, price DOUBLE);""")
df = nw.from_native(conn.table("prices"))
print(avg_monthly_price(df).sql_query())
SELECT * FROM (SELECT _date_tmp AS date, price FROM (SELECT _date_tmp, mean(price) AS price FROM (SELECT date, price, time_bucket('1 month', date, '1970-01-01 00:00:00'::TIMESTAMP) AS _date_tmp FROM main.prices) AS prices GROUP BY ALL) AS prices) AS prices ORDER BY date NULLS FIRST
To make it look a bit prettier, we can pass it to SQLGlot:
import sqlglot
print(sqlglot.transpile(avg_monthly_price(df).sql_query(), pretty=True)[0])
SELECT
*
FROM (
SELECT
_date_tmp AS date,
price
FROM (
SELECT
_date_tmp,
MEAN(price) AS price
FROM (
SELECT
date,
price,
TIME_BUCKET('1 month', date, CAST('1970-01-01 00:00:00' AS TIMESTAMP)) AS _date_tmp
FROM main.prices
) AS prices
GROUP BY ALL
) AS prices
) AS prices
ORDER BY
date
Via Ibis
We can also use Ibis to generate SQL:
import ibis
t = ibis.table({"date": "date", "price": "double"}, name="prices")
print(ibis.to_sql(avg_monthly_price(t)))
SELECT
"t2"."_date_tmp" AS "date",
"t2"."price"
FROM (
SELECT
"t1"."_date_tmp",
AVG("t1"."price") AS "price"
FROM (
SELECT
"t0"."date",
"t0"."price",
DATE_TRUNC('MONTH', "t0"."date") AS "_date_tmp"
FROM "prices" AS "t0"
) AS "t1"
GROUP BY
1
) AS "t2"
ORDER BY
"t2"."_date_tmp" ASC NULLS FIRST