Compare commits

...

9 Commits

Author SHA1 Message Date
MichelleArk
21574bb867 improve test 2025-12-15 17:47:39 -05:00
MichelleArk
4df6b3ffa4 changelog entry 2025-12-15 17:43:13 -05:00
MichelleArk
4fb7ec0b88 update to use EMPTY_SEED_SIZE approach 2025-12-15 17:22:27 -05:00
MichelleArk
728e5ba9b0 Merge branch 'main' into empty-seed 2025-12-15 16:23:59 -05:00
Michelle Ark
3f297cb4e3 fix test_config_with_meta_key (#12284) 2025-12-15 16:13:14 -05:00
MichelleArk
3420afdd93 Merge branch 'main' into empty-seed 2025-12-15 16:08:06 -05:00
MichelleArk
e34b881a5f safer access of empty flag 2025-12-11 12:26:08 -05:00
MichelleArk
1a8f190124 Merge branch 'main' into empty-seed 2025-12-11 12:23:14 -05:00
MichelleArk
446d2671e0 Support --empty flag for seeds 2025-12-09 16:02:53 -08:00
8 changed files with 77 additions and 14 deletions

View File

@@ -0,0 +1,6 @@
kind: Features
body: Support --empty flag for dbt seed
time: 2025-12-15T17:43:08.807815-05:00
custom:
Author: michelleark
Issue: "8981"

View File

@@ -0,0 +1,6 @@
kind: Under the Hood
body: Bump lower bound for dbt-common to 1.37.2
time: 2025-12-15T15:50:46.857793-05:00
custom:
Author: michelleark
Issue: "12284"

View File

@@ -691,6 +691,7 @@ def run_operation(ctx, **kwargs):
@cli.command("seed") @cli.command("seed")
@click.pass_context @click.pass_context
@global_flags @global_flags
@p.empty
@p.exclude @p.exclude
@p.full_refresh @p.full_refresh
@p.profiles_dir @p.profiles_dir

View File

@@ -6,6 +6,9 @@ SECRET_PLACEHOLDER = "$$$DBT_SECRET_START$$${}$$$DBT_SECRET_END$$$"
MAXIMUM_SEED_SIZE = 1 * 1024 * 1024 MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
MAXIMUM_SEED_SIZE_NAME = "1MB" MAXIMUM_SEED_SIZE_NAME = "1MB"
# Number of rows to load as agate table to obtain column types for empty seed table creation
# Seed materializations themselves avoid loading the data to the warehouse
EMPTY_SEED_SIZE = 5
PIN_PACKAGE_URL = ( PIN_PACKAGE_URL = (
"https://docs.getdbt.com/docs/package-management#section-specifying-package-versions" "https://docs.getdbt.com/docs/package-management#section-specifying-package-versions"

View File

@@ -43,7 +43,7 @@ from dbt.clients.jinja import (
) )
from dbt.clients.jinja_static import statically_parse_unrendered_config from dbt.clients.jinja_static import statically_parse_unrendered_config
from dbt.config import IsFQNResource, Project, RuntimeConfig from dbt.config import IsFQNResource, Project, RuntimeConfig
from dbt.constants import DEFAULT_ENV_PLACEHOLDER from dbt.constants import DEFAULT_ENV_PLACEHOLDER, EMPTY_SEED_SIZE
from dbt.context.base import Var, contextmember, contextproperty from dbt.context.base import Var, contextmember, contextproperty
from dbt.context.configured import FQNLookup from dbt.context.configured import FQNLookup
from dbt.context.context_config import ContextConfig from dbt.context.context_config import ContextConfig
@@ -1269,6 +1269,8 @@ class ProviderContext(ManifestContext):
delimiter = self.model.config.delimiter delimiter = self.model.config.delimiter
try: try:
table = agate_helper.from_csv(path, text_columns=column_types, delimiter=delimiter) table = agate_helper.from_csv(path, text_columns=column_types, delimiter=delimiter)
if getattr(self.config.args, "EMPTY", False):
table = table.limit(EMPTY_SEED_SIZE) # type: ignore
except ValueError as e: except ValueError as e:
raise LoadAgateTableValueError(e, node=self.model) raise LoadAgateTableValueError(e, node=self.model)
# this is used by some adapters # this is used by some adapters

View File

@@ -54,7 +54,7 @@ dependencies = [
"dbt-extractor>=0.5.0,<=0.6", "dbt-extractor>=0.5.0,<=0.6",
"dbt-semantic-interfaces>=0.9.0,<0.10", "dbt-semantic-interfaces>=0.9.0,<0.10",
# Minor versions for these are expected to be backwards-compatible # Minor versions for these are expected to be backwards-compatible
"dbt-common>=1.37.0,<2.0", "dbt-common>=1.37.2,<2.0",
"dbt-adapters>=1.15.5,<2.0", "dbt-adapters>=1.15.5,<2.0",
"dbt-protos>=1.0.405,<2.0", "dbt-protos>=1.0.405,<2.0",
"pydantic<3", "pydantic<3",

View File

@@ -33,7 +33,7 @@ select {{ config.require('meta_key') }} as col_value
meta_model_meta_require_sql = """ meta_model_meta_require_sql = """
-- models/meta_model.sql -- models/meta_model.sql
select {{ config.require('meta_key') }} as col_value select {{ config.meta_require('meta_key') }} as col_value
""" """
@@ -66,11 +66,11 @@ class TestConfigGetMeta:
self, self,
project, project,
): ):
# This test runs a model with a config.get(key, default) # This test runs a model with a config.get(key, default) -> default value returned
results = run_dbt(["run"], expect_pass=False) results = run_dbt(["run"], expect_pass=False)
assert len(results) == 1 assert len(results) == 1
assert str(results[0].status) == "error" assert str(results[0].status) == "error"
assert 'column "my_meta_value" does not exist' in results[0].message assert 'column "meta_default_value" does not exist' in results[0].message
write_file(meta_model_meta_get_sql, "models", "meta_model.sql") write_file(meta_model_meta_get_sql, "models", "meta_model.sql")
results = run_dbt(["run"], expect_pass=False) results = run_dbt(["run"], expect_pass=False)
@@ -95,10 +95,10 @@ class TestConfigGetMetaRequire:
results = run_dbt(["run"], expect_pass=False) results = run_dbt(["run"], expect_pass=False)
assert len(results) == 1 assert len(results) == 1
assert str(results[0].status) == "error" assert str(results[0].status) == "error"
assert 'column "my_meta_value" does not exist' in results[0].message assert "does not define a required config parameter 'meta_key'" in results[0].message
write_file(meta_model_meta_require_sql, "models", "meta_model.sql") write_file(meta_model_meta_require_sql, "models", "meta_model.sql")
results = run_dbt(["run"], expect_pass=False) results = run_dbt(["run"], expect_pass=False)
assert len(results) == 1 assert len(results) == 1
assert str(results[0].status) == "error" assert str(results[0].status) == "error"
assert 'column "my_meta_value" does not exist' in results[0].message assert 'column "none" does not exist' in results[0].message

View File

@@ -15,6 +15,10 @@ raw_source_csv = """id
3 3
""" """
raw_seed_csv = """a,b,c,d,e,f
3.2,3,US,2025-01-01,none,false
4.5,4,UK,2025-01-02,2,true
"""
model_sql = """ model_sql = """
select * select *
@@ -67,8 +71,29 @@ unit_tests:
2 2
""" """
unit_tests_seed_yml = """
unit_tests:
- name: test_my_seed
model: model
given:
- input: ref('raw_seed')
expect:
format: csv
rows: |
a,b,c,d,e,f
3.2,3,US,2025-01-01,none,false
4.5,4,UK,2025-01-02,2,true
"""
class TestEmptyFlag:
class BaseTestEmptyFlag:
def assert_row_count(self, project, relation_name: str, expected_row_count: int):
relation = relation_from_name(project.adapter, relation_name)
result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one")
assert result[0] == expected_row_count
class TestEmptyFlag(BaseTestEmptyFlag):
@pytest.fixture(scope="class") @pytest.fixture(scope="class")
def seeds(self): def seeds(self):
return { return {
@@ -86,14 +111,10 @@ class TestEmptyFlag:
"unit_tests.yml": unit_tests_yml, "unit_tests.yml": unit_tests_yml,
} }
def assert_row_count(self, project, relation_name: str, expected_row_count: int):
relation = relation_from_name(project.adapter, relation_name)
result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one")
assert result[0] == expected_row_count
def test_run_with_empty(self, project): def test_run_with_empty(self, project):
# create source from seed # Create source from seed for run and build command testing
run_dbt(["seed"]) run_dbt(["seed"])
self.assert_row_count(project, "raw_source", 1)
# run without empty - 3 expected rows in output - 1 from each input # run without empty - 3 expected rows in output - 1 from each input
run_dbt(["run"]) run_dbt(["run"])
@@ -113,3 +134,27 @@ class TestEmptyFlag:
# ensure dbt compile supports --empty flag # ensure dbt compile supports --empty flag
run_dbt(["compile", "--empty"]) run_dbt(["compile", "--empty"])
class TestEmptyFlagSeed(BaseTestEmptyFlag):
@pytest.fixture(scope="class")
def seeds(self):
return {
"raw_seed.csv": raw_seed_csv,
}
@pytest.fixture(scope="class")
def models(self):
return {
"model.sql": "select * from {{ ref('raw_seed') }}",
"unit_tests.yml": unit_tests_seed_yml,
}
def test_run_with_empty(self, project):
run_dbt(["seed", "--empty"])
self.assert_row_count(project, "raw_seed", 0)
results = run_dbt(["build", "--empty"])
self.assert_row_count(project, "raw_seed", 0)
assert len(results) == 3