05 Data Warehouse Using Google Big Query
05 Data Warehouse Using Google Big Query
Google BigQuery is a Cloudbased Data Warehouse. It is not only serverless, but also
cost-effective as well as multi-cloud.
* Cost-effective
* Serverless
* Multicloud
You can also create table using BigQuery Editor in Google Cloud Console.
```sql
CREATE TABLE tidy-fort-361710.retail.orders (
order_id INTEGER,
order_date DATE,
order_cusotmer_id INTEGER,
order_status STRING
);
```
```
order_item_id:INTEGER
order_item_order_id:INTEGER
order_item_product_id:INTEGER
order_item_quantity:INTEGER
order_item_subtotal:FLOAT
order_item_product_price:FLOAT
```
```
product_id:INTEGER
product_cateogry_id:INTEGER
product_name:STRING
product_description:STRING
product_price:FLOAT
product_image:STRING
```
You can go through the notebook and make required changes to see if it works as
expected or not. You can also validate by running queries against the table using
Google BigQuery UI.
You can also use `CREATE EXTERNAL TABLE` command to create table using location in
GCS. Here is the [link](https://cloud.google.com/bigquery/docs/reference/standard-
sql/data-definition-language#create_external_table_statement) for the syntax to
create external tables in Google BigQuery.
```sql
CREATE EXTERNAL TABLE retail.orders (
order_id INTEGER,
order_date TIMESTAMP,
order_customer_id INTEGER,
order_status STRING
) OPTIONS (
format = 'CSV',
uris = ['gs://airetail/retail_db/orders/*']
);
```
```
order_item_id:INTEGER
order_item_order_id:INTEGER
order_item_product_id:INTEGER
order_item_quantity:INTEGER
order_item_subtotal:FLOAT
order_item_product_price:FLOAT
```
```
product_id:INTEGER
product_cateogry_id:INTEGER
product_name:STRING
product_description:STRING
product_price:FLOAT
product_image:STRING
```
```sql
SELECT o.order_date,
oi.order_item_product_id,
round(sum(oi.order_item_subtotal), 2) AS revenue
FROM retail.orders AS o
JOIN retail.order_items AS oi
ON o.order_id = oi.order_item_order_id
WHERE o.order_status IN ('COMPLETE', 'CLOSED')
GROUP BY 1, 2
ORDER BY 1, 3 DESC
```
```sql
WITH daily_revenue AS (
SELECT o.order_date,
round(sum(oi.order_item_subtotal), 2) AS revenue
FROM retail.orders AS o
JOIN retail.order_items AS oi
ON o.order_id = oi.order_item_order_id
WHERE o.order_status IN ('COMPLETE', 'CLOSED')
GROUP BY 1
) SELECT format_date('%Y%m', order_date) AS order_month,
order_date,
revenue,
round(sum(revenue) OVER (
PARTITION BY format_date('%Y%m', order_date)
ORDER BY order_date
), 2) AS revenue_cum
FROM daily_revenue
ORDER BY 2;
```
```sql
WITH daily_product_revenue AS (
SELECT o.order_date,
oi.order_item_product_id,
round(sum(oi.order_item_subtotal), 2) AS revenue
FROM retail.orders AS o
JOIN retail.order_items AS oi
ON o.order_id = oi.order_item_order_id
WHERE o.order_status IN ('COMPLETE', 'CLOSED')
GROUP BY 1, 2
) SELECT * FROM (
SELECT format_date('%Y%m', order_date) AS order_month,
order_date,
order_item_product_id,
revenue,
dense_rank() OVER (
PARTITION BY order_date
ORDER BY revenue DESC
) AS drank
FROM daily_product_revenue
) WHERE drank <= 3
ORDER BY 2, 4 DESC
```
```python
import pandas as pd
query = '''
SELECT order_status, count(*) AS order_count
FROM `itversity-rnd.retail.orders`
GROUP BY 1
ORDER BY 2 DESC
'''
project_id = 'itversity-rnd'
df = pd.read_gbq(query, project_id=project_id)
df
```
```sql
SELECT *
FROM EXTERNAL_QUERY(
"tidy-fort-361710.us.retailpgexternal",
"SELECT * FROM information_schema.tables WHERE table_schema = 'public'"
);
SELECT *
FROM EXTERNAL_QUERY(
"tidy-fort-361710.us.retailpgexternal",
"SELECT order_date, count(*) AS order_count FROM orders GROUP BY 1 ORDER BY 2
DESC"
);
SELECT *
FROM EXTERNAL_QUERY(
"tidy-fort-361710.us.retailpgexternal",
"SELECT * FROM products"
) AS p;
SELECT o.order_date,
oi.order_item_product_id,
p.product_name,
round(sum(oi.order_item_subtotal)) AS revenue
FROM EXTERNAL_QUERY(
"tidy-fort-361710.us.retailpgexternal",
"SELECT * FROM products"
) AS p
JOIN retail.order_items AS oi
ON p.product_id = oi.order_item_product_id
JOIN retail.orders AS o
ON oi.order_item_order_id = o.order_id
GROUP BY 1, 2, 3
ORDER BY 1, 4 DESC;
```