Compare commits

...

9 Commits

Author SHA1 Message Date
Sung Won Chung
602556f48c clearer logs 2022-10-11 10:17:19 -07:00
Sung Won Chung
bba5811f34 remove private nodes from depends_on 2022-10-11 10:11:50 -07:00
Sung Won Chung
2bd4c9f2da working code 2022-10-10 16:08:51 -07:00
Sung Won Chung
3b4527d0b9 example manifest 2022-10-10 11:31:45 -07:00
Sung Won Chung
0bdfe14b54 comment over code to touch 2022-10-10 10:57:59 -07:00
Sung Won Chung
772bb79c52 Merge branch 'main' of https://github.com/dbt-labs/dbt into feature/monorepo-public-private-models 2022-10-10 10:45:03 -07:00
Sung Won Chung
6882572da5 remove quotes 2022-10-06 11:50:20 -07:00
Sung Won Chung
3542fe96b6 draft examples 2022-10-06 11:44:58 -07:00
Sung Won Chung
b1366a0fe7 placeholder test file 2022-10-05 10:58:26 -07:00
7 changed files with 23102 additions and 2 deletions

View File

@@ -944,6 +944,30 @@ def raise_ambiguous_alias(node_1, node_2, duped_name=None):
)
# Compilation Error
# dbt found two resources with the database representation "ANALYTICS.dbt_sung.my_first_model".
# dbt cannot create two resources with identical database representations. To fix this,
# change the configuration of one of these resources:
# - model.tpch.my_first_model (models/demo_examples/my_first_model.sql)
# - model.tpch.my_second_model (models/demo_examples/my_second_model.sql)
# TODO: raise a private alias error
# def raise_private_resource(node_list):
# if duped_name is None:
# duped_name = f"{node_1.database}.{node_1.schema}.{node_1.alias}"
# raise_compiler_error(
# 'dbt found private resources with the database representation "{}".\ndbt '
# "cannot create private subfolder models. "
# "To fix this,\nchange the ref_permissions configuration of these resources:"
# "\n- {} ({})\n- {} ({})".format(
# duped_name,
# node_list.unique_id,
# node_list.original_file_path,
# )
# )
def raise_ambiguous_catalog_match(unique_id, match_1, match_2):
def get_match_string(match):
return "{}.{}".format(

View File

@@ -218,9 +218,29 @@ class ManifestLoader:
manifest = loader.load()
_check_manifest(manifest, config)
private_nodes = _check_manifest(manifest, config)
private_nodes_keys = private_nodes.keys()
print(f"private_nodes_keys: {private_nodes_keys}")
manifest.build_flat_graph()
# for each node in the manifest, remove the node from depends_on if it's in the private nodes dictionary above
for node in manifest.nodes.values():
print("-------------")
print(f"node.unique_id: {node.unique_id}")
# get the node.dependencies before
print("depends_on.nodes: BEFORE REMOVING PRIVATE NODES")
print(node.depends_on.nodes)
# remove the private nodes from the node.dependencies
node.depends_on.nodes = [
x for x in node.depends_on.nodes if x not in private_nodes_keys
]
# get the node.dependencies after removing the private nodes
print("depends_on.nodes: AFTER REMOVING PRIVATE NODES")
print(node.depends_on.nodes)
print("-------------")
# This needs to happen after loading from a partial parse,
# so that the adapter has the query headers from the macro_hook.
loader.save_macros_to_adapter(adapter)
@@ -1042,10 +1062,47 @@ def _check_resource_uniqueness(
if existing_alias is not None:
dbt.exceptions.raise_ambiguous_alias(existing_alias, node, full_node_name)
# TODO: check for private ref_permissions and raise an error if private
# private_alias = alias_resources.get(full_node_name)
# if existing_alias is not None:
# dbt.exceptions.raise_private_alias(existing_alias, node, full_node_name)
names_resources[name] = node
alias_resources[full_node_name] = node
def _check_resource_ref_permissions(
manifest: Manifest,
config: RuntimeConfig,
) -> dict:
# names_resources: Dict[str, ManifestNode] = {}
# alias_resources: Dict[str, ManifestNode] = {}
# loop through all nodes and check if they have a private ref_permission
# example:
# private_nodes = {}
# private_nodes = {'ANALYTICS.dbt_sung.stg_tpch_orders': 'models/staging/tpch', 'ANALYTICS.dbt_sung.stg_tpch_part_suppliers': 'models/staging/tpch', 'ANALYTICS.dbt_sung.stg_tpch_suppliers': 'models/staging/tpch', 'ANALYTICS.dbt_sung.stg_tpch_regions': 'models/staging/tpch', 'ANALYTICS.dbt_sung.stg_tpch_nations': 'models/staging/tpch', 'ANALYTICS.dbt_sung.stg_tpch_line_items': 'models/staging/tpch', 'ANALYTICS.dbt_sung.stg_tpch_parts': 'models/staging/tpch', 'ANALYTICS.dbt_sung.dim_customers': 'models/marts/core'}
private_nodes = {}
# summary of all private subfolder paths
private_subfolder_paths = set()
for resource, node in manifest.nodes.items():
if not node.is_relational:
continue
# the full node name is really defined by the adapter's relation
# relation_cls = get_relation_class_by_name(config.credentials.type)
ref_permissions = node.config._extra.get("ref_permissions")
if ref_permissions == "private":
parsed_subfolder, file_name = os.path.split(node.original_file_path)
private_subfolder_paths.add(parsed_subfolder)
private_node = {node.unique_id: parsed_subfolder}
private_nodes.update(private_node)
print(f"private_subfolder_paths = {private_subfolder_paths}")
print(f"private_nodes: {private_nodes}")
return private_nodes
def _warn_for_unused_resource_config_paths(manifest: Manifest, config: RuntimeConfig) -> None:
resource_fqns: Mapping[str, PathSet] = manifest.get_resource_fqns()
disabled_fqns: PathSet = frozenset(
@@ -1054,9 +1111,11 @@ def _warn_for_unused_resource_config_paths(manifest: Manifest, config: RuntimeCo
config.warn_for_unused_resource_config_paths(resource_fqns, disabled_fqns)
def _check_manifest(manifest: Manifest, config: RuntimeConfig) -> None:
def _check_manifest(manifest: Manifest, config: RuntimeConfig) -> dict:
_check_resource_uniqueness(manifest, config)
_warn_for_unused_resource_config_paths(manifest, config)
private_nodes = _check_resource_ref_permissions(manifest, config)
return private_nodes
def _get_node_column(node, column_name):

View File

@@ -0,0 +1 @@
# placeholder file

View File

@@ -0,0 +1,77 @@
# Name your project! Project names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'tpch'
version: '1.0.0'
config-version: 2
# This setting configures which "profile" dbt uses for this project.
profile: 'tpch'
# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
model-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
seed-paths: ["data"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
target-path: "target" # directory which will store compiled SQL files
clean-targets: # directories to be removed by `dbt clean`
- "target"
- "dbt_modules"
on-run-start:
- "alter warehouse transforming set warehouse_size=small;"
- '{{create_udfs()}}' # comment / uncomment this line to build UDFs called in the create_udfs macro
on-run-end:
- "{% if target.name == 'prod' %}{{ dbt_artifacts.upload_results(results) }}{% endif %}"
- "alter warehouse transforming set warehouse_size=xsmall;"
- "{{ grant_all_on_schemas(schemas, 'transformer') }}"
vars:
sharded_database: 'sharded_database'
sharded_schema: 'sharded_database'
sharded_table_name: 'sharded_table_name'
sharded_table_ref: "{{ var('sharded_database') }}.{{ var('sharded_schema') }}.{{ var('sharded_table_name') }}"
# dbt_metrics_calendar_model: ref('metric_calendar_custom')
load_type: 'I'
start_date: '1999-01-01'
test: 'false' # to trigger runs for unit testing - override in a CLI param in testing job
fct_order_items: 'mock_source__fct_order_items' # this is a map for unit testing
dbt_artifacts_schema: dbt_artifacts_sung # optional, default is 'dbt_artifacts'
# Configuring models
# Full documentation: https://docs.getdbt.com/docs/configuring-models
models:
dbt_artifacts:
+docs:
show: false
+schema: dbt_artifacts_sung
staging:
+schema: dbt_artifacts_sung
+ref_permissions: private
tpch:
staging:
database: '{{ env_var("DBT_DYNAMIC_DATABASE","analytics") }}'
+materialized: view
+docs:
node_color: "#FB6962"
marts:
intermediate:
+docs:
node_color: "#3AAAE0"
core:
+tags: "owner:sung"
materialized: table
+docs:
node_color: "#F6AE3E"
+ref_permissions: private # defaults it all to private

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,197 @@
version: 2
models:
- name: dim_customers
ref_permissions: private # overrides dbt_project.yml settings
description: Customer dimensions table
columns:
- name: customer_key
description: Primary key on the customers table
tests:
- unique
- not_null
- name: region
description: region name
tests:
- accepted_values:
values: ['AFRICA','MIDDLE EAST','ASIA','EUROPE','AMERICA']
severity: warn
- dbt_expectations.expect_column_distinct_count_to_be_greater_than:
value: 4
row_condition: "region is not null" # (Optional)
- name: name
description: customer id
- name: address
description: address of the customer
- name: nation
description: nation name
- name: phone_number
description: phone number of the customer
- name: account_balance
description: '{{ doc("account_balance") }}'
- name: market_segment
description: market segment of the customer
tests:
- not_null
meta:
fal:
scripts:
- scripts/print_hello.py
- name: dim_parts
description: Parts dimensions table
columns:
- name: part_key
description: primary key of the model
tests:
- unique
- not_null
- name: manufacturer
description: manufacturer of the part
- name: name
description: name of the part
- name: brand
description: brand of the part
- name: type
description: type of part including material
- name: size
description: size of the part
- name: container
description: container of the part
- name: retail_price
description: '{{ doc("retail_price") }}'
- name: dim_suppliers
description: Suppliers dimensions table
columns:
- name: supplier_key
description: primary key of the model
tests:
- unique
- not_null
- name: supplier_name
description: '{{ doc("supplier_name") }}'
- name: supplier_address
description: '{{ doc("supplier_address") }}'
- name: nation
description: nation name
- name: region
description: region name
- name: phone_number
description: '{{ doc("phone_number") }}'
- name: account_balance
description: '{{ doc("account_balance") }}'
- name: fct_order_items
description: order items fact table
columns:
- name: order_item_key
description: '{{ doc("order_item_key") }}'
tests:
- unique
- not_null
- name: order_key
description: foreign key for orders
- name: order_date
description: date of the order
- name: customer_key
description: foreign key for customers
- name: part_key
description: foreign key for part
- name: supplier_key
description: foreign key for suppliers
- name: order_item_status_code
description: status of the order item
- name: return_flag
description: '{{ doc("return_flag") }}'
- name: line_number
description: '{{ doc("line_number") }}'
- name: ship_date
description: '{{ doc("ship_date") }}'
- name: commit_date
description: '{{ doc("commit_date") }}'
- name: receipt_date
description: '{{ doc("receipt_date") }}'
- name: ship_mode
description: '{{ doc("ship_mode") }}'
- name: supplier_cost
description: '{{ doc("cost") }}'
- name: base_price
description: '{{ doc("base_price") }}'
- name: discount_percentage
description: '{{ doc("discount_percentage") }}'
- name: discounted_price
description: '{{ doc("discounted_price") }}'
- name: tax_rate
description: '{{ doc("tax_rate") }}'
- name: order_item_count
description: count of order items
- name: quantity
description: total units
- name: gross_item_sales_amount
description: '{{ doc("gross_item_sales_amount") }}'
- name: discounted_item_sales_amount
description: '{{ doc("discounted_item_sales_amount") }}'
- name: item_discount_amount
description: '{{ doc("item_discount_amount") }}'
- name: item_tax_amount
description: '{{ doc("item_tax_amount") }}'
- name: net_item_sales_amount
description: '{{ doc("net_item_sales_amount") }}'
- name: fct_orders
docs:
node_color: "#E9CBFF"
description: orders fact table
columns:
- name: order_key
description: primary key of the model
tests:
- unique
- not_null
- name: customer_key
description: foreign key for customers
tests:
- relationships:
to: ref('dim_customers')
field: customer_key
severity: error
- name: order_date
description: date of the order
- name: status_code
description: status of the order
- name: priority_code
description: code associated with the order
- name: clerk_name
description: id of the clerk
- name: ship_priority
description: numeric representation of the shipping priority, zero being the default
- name: order_count
description: count of order
- name: gross_item_sales_amount
description: '{{ doc("gross_item_sales_amount") }}'
- name: item_discount_amount
description: '{{ doc("item_discount_amount") }}'
- name: item_tax_amount
description: '{{ doc("item_tax_amount") }}'
- name: net_item_sales_amount
description: '{{ doc("net_item_sales_amount") }}'
# metrics:
# - name: orders_over_time
# label: Orders over time
# model: ref('fct_orders')
# description: "The sum of orders over time"
# type: sum
# sql: order_count # superfluous here, but shown as an example
# timestamp: order_date
# time_grains: [day, week, month]
# dimensions:
# - status_code
# - ship_priority
# meta: {team: Finance}

View File

@@ -0,0 +1,27 @@
/*
Welcome to your first dbt model!
Did you know that you can also configure models directly within SQL files?
This will override configurations stated in dbt_project.yml
Try changing "table" to "view" below
*/
{{ config(materialized='view', ref_permissions='public') }} # overrides schema.yml and dbt_project.yml settings
with source_data as (
select 1 as id
union all
select 2 as id
)
select *
from source_data
/*
Uncomment the line below to remove records with null `id` values
*/
-- where id is not null