Skip to content

Commit 7442619

Browse files
committed
Initial project structure with models and seeds from jaffle_shop
1 parent 983433a commit 7442619

22 files changed

+725
-1
lines changed

.gitignore

+6
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,9 @@ cython_debug/
158158
# and can be added to the global gitignore or merged into this file. For a more nuclear
159159
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160160
#.idea/
161+
162+
# dbt
163+
target/
164+
dbt_packages/
165+
logs/
166+
.user.yml

README.md

+21-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,22 @@
11
# dbt-data-quality
2-
Data quality checks in your dbt flow
2+
Data quality checks in your dbt flow.
3+
4+
Modified from dbt-labs [jaffle_shop](https://github.com/dbt-labs/jaffle_shop).
5+
6+
## Environment setup
7+
```
8+
conda create -n dbtdq python=3.9
9+
conda activate dbtdq
10+
pip install -r requirements.txt
11+
```
12+
13+
`.env` file structure:
14+
```
15+
POSTGRES_HOST=<postgres_host>
16+
POSTGRES_USER=<postgres_user>
17+
POSTGRES_PASSWORD=<postgres_password>
18+
POSTGRES_DATABASE=<postgres_database>
19+
```
20+
21+
## Tools
22+
Postgres database: [supabase](https://supabase.com/).

analyses/.gitkeep

Whitespace-only changes.

dbt_project.yml

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
2+
# Name your project! Project names should contain only lowercase characters
3+
# and underscores. A good package name should reflect your organization's
4+
# name or the intended use of these models
5+
name: 'dqg'
6+
version: '1.0.0'
7+
config-version: 2
8+
9+
# This setting configures which "profile" dbt uses for this project.
10+
profile: 'dqg'
11+
12+
# These configurations specify where dbt should look for different types of files.
13+
# The `model-paths` config, for example, states that models in this project can be
14+
# found in the "models/" directory. You probably won't need to change these!
15+
model-paths: ["models"]
16+
analysis-paths: ["analyses"]
17+
test-paths: ["tests"]
18+
seed-paths: ["seeds"]
19+
macro-paths: ["macros"]
20+
snapshot-paths: ["snapshots"]
21+
22+
clean-targets: # directories to be removed by `dbt clean`
23+
- "target"
24+
- "dbt_packages"
25+
26+
27+
# Configuring models
28+
# Full documentation: https://docs.getdbt.com/docs/configuring-models
29+
30+
# In this example config, we tell dbt to build all models in the example/
31+
# directory as views. These settings can be overridden in the individual model
32+
# files using the `{{ config(...) }}` macro.
33+
models:
34+
dqg:
35+
materialized: table
36+
staging:
37+
materialized: view

macros/.gitkeep

Whitespace-only changes.

models/customers.sql

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
with customers as (
2+
3+
select * from {{ ref('stg_customers') }}
4+
5+
),
6+
7+
orders as (
8+
9+
select * from {{ ref('stg_orders') }}
10+
11+
),
12+
13+
payments as (
14+
15+
select * from {{ ref('stg_payments') }}
16+
17+
),
18+
19+
customer_orders as (
20+
21+
select
22+
customer_id,
23+
24+
min(order_date) as first_order,
25+
max(order_date) as most_recent_order,
26+
count(order_id) as number_of_orders
27+
from orders
28+
29+
group by customer_id
30+
31+
),
32+
33+
customer_payments as (
34+
35+
select
36+
orders.customer_id,
37+
sum(amount) as total_amount
38+
39+
from payments
40+
41+
left join orders on
42+
payments.order_id = orders.order_id
43+
44+
group by orders.customer_id
45+
46+
),
47+
48+
final as (
49+
50+
select
51+
customers.customer_id,
52+
customers.first_name,
53+
customers.last_name,
54+
customer_orders.first_order,
55+
customer_orders.most_recent_order,
56+
customer_orders.number_of_orders,
57+
customer_payments.total_amount as customer_lifetime_value
58+
59+
from customers
60+
61+
left join customer_orders
62+
on customers.customer_id = customer_orders.customer_id
63+
64+
left join customer_payments
65+
on customers.customer_id = customer_payments.customer_id
66+
67+
)
68+
69+
select * from final

models/docs.md

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{% docs orders_status %}
2+
3+
Orders can be one of the following statuses:
4+
5+
| status | description |
6+
|----------------|------------------------------------------------------------------------------------------------------------------------|
7+
| placed | The order has been placed but has not yet left the warehouse |
8+
| shipped | The order has ben shipped to the customer and is currently in transit |
9+
| completed | The order has been received by the customer |
10+
| return_pending | The customer has indicated that they would like to return the order, but it has not yet been received at the warehouse |
11+
| returned | The order has been returned by the customer and received at the warehouse |
12+
13+
14+
{% enddocs %}

models/orders.sql

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %}
2+
3+
with orders as (
4+
5+
select * from {{ ref('stg_orders') }}
6+
7+
),
8+
9+
payments as (
10+
11+
select * from {{ ref('stg_payments') }}
12+
13+
),
14+
15+
order_payments as (
16+
17+
select
18+
order_id,
19+
20+
{% for payment_method in payment_methods -%}
21+
sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount,
22+
{% endfor -%}
23+
24+
sum(amount) as total_amount
25+
26+
from payments
27+
28+
group by order_id
29+
30+
),
31+
32+
final as (
33+
34+
select
35+
orders.order_id,
36+
orders.customer_id,
37+
orders.order_date,
38+
orders.status,
39+
40+
{% for payment_method in payment_methods -%}
41+
42+
order_payments.{{ payment_method }}_amount,
43+
44+
{% endfor -%}
45+
46+
order_payments.total_amount as amount
47+
48+
from orders
49+
50+
51+
left join order_payments
52+
on orders.order_id = order_payments.order_id
53+
54+
)
55+
56+
select * from final

models/overview.md

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{% docs __overview__ %}
2+
3+
## Data Documentation for Jaffle Shop
4+
5+
`jaffle_shop` is a fictional ecommerce store.
6+
7+
This [dbt](https://www.getdbt.com/) project is for testing out code.
8+
9+
The source code can be found [here](https://github.com/clrcrl/jaffle_shop).
10+
11+
{% enddocs %}

models/schema.yml

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
version: 2
2+
3+
models:
4+
- name: customers
5+
description: This table has basic information about a customer, as well as some derived facts based on a customer's orders
6+
7+
columns:
8+
- name: customer_id
9+
description: This is a unique identifier for a customer
10+
tests:
11+
- unique
12+
- not_null
13+
14+
- name: first_name
15+
description: Customer's first name. PII.
16+
17+
- name: last_name
18+
description: Customer's last name. PII.
19+
20+
- name: first_order
21+
description: Date (UTC) of a customer's first order
22+
23+
- name: most_recent_order
24+
description: Date (UTC) of a customer's most recent order
25+
26+
- name: number_of_orders
27+
description: Count of the number of orders a customer has placed
28+
29+
- name: total_order_amount
30+
description: Total value (AUD) of a customer's orders
31+
32+
- name: orders
33+
description: This table has basic information about orders, as well as some derived facts based on payments
34+
35+
columns:
36+
- name: order_id
37+
tests:
38+
- unique
39+
- not_null
40+
description: This is a unique identifier for an order
41+
42+
- name: customer_id
43+
description: Foreign key to the customers table
44+
tests:
45+
- not_null
46+
- relationships:
47+
to: ref('customers')
48+
field: customer_id
49+
50+
- name: order_date
51+
description: Date (UTC) that the order was placed
52+
53+
- name: status
54+
description: '{{ doc("orders_status") }}'
55+
tests:
56+
- accepted_values:
57+
values: ['placed', 'shipped', 'completed', 'return_pending', 'returned']
58+
59+
- name: amount
60+
description: Total amount (AUD) of the order
61+
tests:
62+
- not_null
63+
64+
- name: credit_card_amount
65+
description: Amount of the order (AUD) paid for by credit card
66+
tests:
67+
- not_null
68+
69+
- name: coupon_amount
70+
description: Amount of the order (AUD) paid for by coupon
71+
tests:
72+
- not_null
73+
74+
- name: bank_transfer_amount
75+
description: Amount of the order (AUD) paid for by bank transfer
76+
tests:
77+
- not_null
78+
79+
- name: gift_card_amount
80+
description: Amount of the order (AUD) paid for by gift card
81+
tests:
82+
- not_null

models/staging/schema.yml

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
version: 2
2+
3+
models:
4+
- name: stg_customers
5+
columns:
6+
- name: customer_id
7+
tests:
8+
- unique
9+
- not_null
10+
11+
- name: stg_orders
12+
columns:
13+
- name: order_id
14+
tests:
15+
- unique
16+
- not_null
17+
- name: status
18+
tests:
19+
- accepted_values:
20+
values: ['placed', 'shipped', 'completed', 'return_pending', 'returned']
21+
22+
- name: stg_payments
23+
columns:
24+
- name: payment_id
25+
tests:
26+
- unique
27+
- not_null
28+
- name: payment_method
29+
tests:
30+
- accepted_values:
31+
values: ['credit_card', 'coupon', 'bank_transfer', 'gift_card']

models/staging/stg_customers.sql

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
with source as (
2+
3+
{#-
4+
Normally we would select from the table here, but we are using seeds to load
5+
our data in this project
6+
#}
7+
select * from {{ ref('raw_customers') }}
8+
9+
),
10+
11+
renamed as (
12+
13+
select
14+
id as customer_id,
15+
first_name,
16+
last_name
17+
18+
from source
19+
20+
)
21+
22+
select * from renamed

models/staging/stg_orders.sql

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
with source as (
2+
3+
{#-
4+
Normally we would select from the table here, but we are using seeds to load
5+
our data in this project
6+
#}
7+
select * from {{ ref('raw_orders') }}
8+
9+
),
10+
11+
renamed as (
12+
13+
select
14+
id as order_id,
15+
user_id as customer_id,
16+
order_date,
17+
status
18+
19+
from source
20+
21+
)
22+
23+
select * from renamed

0 commit comments

Comments
 (0)