Generating SQL
Suppose you want to write Polars syntax and translate it to SQL. For example, what's the SQL equivalent to:
import narwhals as nw
from narwhals.typing import IntoFrameT
def avg_monthly_price(df_native: IntoFrameT) -> IntoFrameT:
return (
nw.from_native(df_native)
.group_by(nw.col("date").dt.truncate("1mo"))
.agg(nw.col("price").mean())
.sort("date")
.to_native()
)
?
There are several ways to find out.
Via DuckDB
You can also generate SQL directly from DuckDB.
import duckdb
conn = duckdb.connect()
conn.sql("""CREATE TABLE prices (date DATE, price DOUBLE);""")
df = nw.from_native(conn.table("prices"))
print(avg_monthly_price(df).sql_query())
SELECT * FROM (SELECT _date_tmp AS date, price FROM (SELECT _date_tmp, mean(price) AS price FROM (SELECT date, price, date_trunc('month', date) AS _date_tmp FROM main.prices) AS prices GROUP BY ALL) AS prices) AS prices ORDER BY date NULLS FIRST
To make it look a bit prettier, or to then transpile it to other SQL dialects, we can pass it to SQLGlot:
import sqlglot
print(sqlglot.transpile(avg_monthly_price(df).sql_query(), pretty=True)[0])
SELECT
*
FROM (
SELECT
_date_tmp AS date,
price
FROM (
SELECT
_date_tmp,
MEAN(price) AS price
FROM (
SELECT
date,
price,
DATE_TRUNC('MONTH', date) AS _date_tmp
FROM main.prices
) AS prices
GROUP BY ALL
) AS prices
) AS prices
ORDER BY
date
Via Ibis
We can also use Ibis to generate SQL:
import ibis
t = ibis.table({"date": "date", "price": "double"}, name="prices")
print(ibis.to_sql(avg_monthly_price(t)))
SELECT
"t2"."_date_tmp" AS "date",
"t2"."price"
FROM (
SELECT
"t1"."_date_tmp",
AVG("t1"."price") AS "price"
FROM (
SELECT
"t0"."date",
"t0"."price",
DATE_TRUNC('MONTH', "t0"."date") AS "_date_tmp"
FROM "prices" AS "t0"
) AS "t1"
GROUP BY
1
) AS "t2"
ORDER BY
"t2"."_date_tmp" ASC NULLS FIRST
Via SQLFrame
You can also use SQLFrame:
from sqlframe.standalone import StandaloneSession
session = StandaloneSession.builder.getOrCreate()
session.catalog.add_table("prices", column_mapping={"date": "date", "price": "float"})
df = nw.from_native(session.read.table("prices"))
print(avg_monthly_price(df).sql(dialect="duckdb"))
WITH "t20239543" AS (
SELECT
CAST(DATE_TRUNC('MONTH', CAST("prices"."date" AS TIMESTAMPTZ)) AS TIMESTAMPTZ) AS "_date_tmp",
AVG("prices"."price") AS "price"
FROM "prices" AS "prices"
GROUP BY
CAST(DATE_TRUNC('MONTH', CAST("prices"."date" AS TIMESTAMPTZ)) AS TIMESTAMPTZ)
)
SELECT
"t20239543"."_date_tmp" AS "date",
"t20239543"."price" AS "price"
FROM "t20239543" AS "t20239543"
ORDER BY
"date" NULLS FIRST
Or, to print the SQL code in a different dialect (say, databricks):
print(avg_monthly_price(df).sql(dialect="databricks"))
WITH `t20239543` AS (
SELECT
CAST(DATE_TRUNC('MONTH', CAST(`prices`.`date` AS TIMESTAMP)) AS TIMESTAMP) AS `_date_tmp`,
AVG(`prices`.`price`) AS `price`
FROM `prices` AS `prices`
GROUP BY
CAST(DATE_TRUNC('MONTH', CAST(`prices`.`date` AS TIMESTAMP)) AS TIMESTAMP)
)
SELECT
`t20239543`.`_date_tmp` AS `date`,
`t20239543`.`price` AS `price`
FROM `t20239543` AS `t20239543`
ORDER BY
`date`