Compare commits

..

9 Commits

Author SHA1 Message Date
MichelleArk
21574bb867 improve test 2025-12-15 17:47:39 -05:00
MichelleArk
4df6b3ffa4 changelog entry 2025-12-15 17:43:13 -05:00
MichelleArk
4fb7ec0b88 update to use EMPTY_SEED_SIZE approach 2025-12-15 17:22:27 -05:00
MichelleArk
728e5ba9b0 Merge branch 'main' into empty-seed 2025-12-15 16:23:59 -05:00
Michelle Ark
3f297cb4e3 fix test_config_with_meta_key (#12284) 2025-12-15 16:13:14 -05:00
MichelleArk
3420afdd93 Merge branch 'main' into empty-seed 2025-12-15 16:08:06 -05:00
MichelleArk
e34b881a5f safer access of empty flag 2025-12-11 12:26:08 -05:00
MichelleArk
1a8f190124 Merge branch 'main' into empty-seed 2025-12-11 12:23:14 -05:00
MichelleArk
446d2671e0 Support --empty flag for seeds 2025-12-09 16:02:53 -08:00
9 changed files with 131 additions and 68 deletions

View File

@@ -0,0 +1,6 @@
kind: Features
body: Support --empty flag for dbt seed
time: 2025-12-15T17:43:08.807815-05:00
custom:
Author: michelleark
Issue: "8981"

View File

@@ -0,0 +1,6 @@
kind: Under the Hood
body: Bump lower bound for dbt-common to 1.37.2
time: 2025-12-15T15:50:46.857793-05:00
custom:
Author: michelleark
Issue: "12284"

View File

@@ -108,62 +108,62 @@ jobs:
echo "dbt-postgres-ref=${{ steps.core-ref.outputs.ref }}" echo "dbt-postgres-ref=${{ steps.core-ref.outputs.ref }}"
echo "dbt-core-ref=${{ steps.common-ref.outputs.ref }}" echo "dbt-core-ref=${{ steps.common-ref.outputs.ref }}"
integration-tests-postgres: # integration-tests-postgres:
name: "dbt-postgres integration tests" # name: "dbt-postgres integration tests"
needs: [job-prep] # needs: [job-prep]
runs-on: ubuntu-latest # runs-on: ubuntu-latest
defaults: # defaults:
run: # run:
working-directory: "./dbt-postgres" # working-directory: "./dbt-postgres"
environment: # environment:
name: "dbt-postgres" # name: "dbt-postgres"
env: # env:
POSTGRES_TEST_HOST: ${{ vars.POSTGRES_TEST_HOST }} # POSTGRES_TEST_HOST: ${{ vars.POSTGRES_TEST_HOST }}
POSTGRES_TEST_PORT: ${{ vars.POSTGRES_TEST_PORT }} # POSTGRES_TEST_PORT: ${{ vars.POSTGRES_TEST_PORT }}
POSTGRES_TEST_USER: ${{ vars.POSTGRES_TEST_USER }} # POSTGRES_TEST_USER: ${{ vars.POSTGRES_TEST_USER }}
POSTGRES_TEST_PASS: ${{ secrets.POSTGRES_TEST_PASS }} # POSTGRES_TEST_PASS: ${{ secrets.POSTGRES_TEST_PASS }}
POSTGRES_TEST_DATABASE: ${{ vars.POSTGRES_TEST_DATABASE }} # POSTGRES_TEST_DATABASE: ${{ vars.POSTGRES_TEST_DATABASE }}
POSTGRES_TEST_THREADS: ${{ vars.POSTGRES_TEST_THREADS }} # POSTGRES_TEST_THREADS: ${{ vars.POSTGRES_TEST_THREADS }}
services: # services:
postgres: # postgres:
image: postgres # image: postgres
env: # env:
POSTGRES_PASSWORD: postgres # POSTGRES_PASSWORD: postgres
options: >- # options: >-
--health-cmd pg_isready # --health-cmd pg_isready
--health-interval 10s # --health-interval 10s
--health-timeout 5s # --health-timeout 5s
--health-retries 5 # --health-retries 5
ports: # ports:
- ${{ vars.POSTGRES_TEST_PORT }}:5432 # - ${{ vars.POSTGRES_TEST_PORT }}:5432
steps: # steps:
- name: "Check out dbt-adapters@${{ needs.job-prep.outputs.dbt-postgres-ref }}" # - name: "Check out dbt-adapters@${{ needs.job-prep.outputs.dbt-postgres-ref }}"
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4 # uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # actions/checkout@v4
with: # with:
repository: dbt-labs/dbt-adapters # repository: dbt-labs/dbt-adapters
ref: ${{ needs.job-prep.outputs.dbt-postgres-ref }} # ref: ${{ needs.job-prep.outputs.dbt-postgres-ref }}
- name: "Set up Python" # - name: "Set up Python"
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # actions/setup-python@v5 # uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # actions/setup-python@v5
with: # with:
python-version: ${{ inputs.python-version }} # python-version: ${{ inputs.python-version }}
- name: "Set environment variables" # - name: "Set environment variables"
run: | # run: |
echo "HATCH_PYTHON=${{ inputs.python-version }}" >> $GITHUB_ENV # echo "HATCH_PYTHON=${{ inputs.python-version }}" >> $GITHUB_ENV
echo "PIP_ONLY_BINARY=psycopg2-binary" >> $GITHUB_ENV # echo "PIP_ONLY_BINARY=psycopg2-binary" >> $GITHUB_ENV
- name: "Setup test database" # - name: "Setup test database"
run: psql -f ./scripts/setup_test_database.sql # run: psql -f ./scripts/setup_test_database.sql
env: # env:
PGHOST: ${{ vars.POSTGRES_TEST_HOST }} # PGHOST: ${{ vars.POSTGRES_TEST_HOST }}
PGPORT: ${{ vars.POSTGRES_TEST_PORT }} # PGPORT: ${{ vars.POSTGRES_TEST_PORT }}
PGUSER: postgres # PGUSER: postgres
PGPASSWORD: postgres # PGPASSWORD: postgres
PGDATABASE: postgres # PGDATABASE: postgres
- name: "Install hatch" # - name: "Install hatch"
uses: pypa/hatch@257e27e51a6a5616ed08a39a408a21c35c9931bc # pypa/hatch@install # uses: pypa/hatch@257e27e51a6a5616ed08a39a408a21c35c9931bc # pypa/hatch@install
- name: "Run integration tests" # - name: "Run integration tests"
run: hatch run ${{ inputs.hatch-env }}:integration-tests # run: hatch run ${{ inputs.hatch-env }}:integration-tests

View File

@@ -691,6 +691,7 @@ def run_operation(ctx, **kwargs):
@cli.command("seed") @cli.command("seed")
@click.pass_context @click.pass_context
@global_flags @global_flags
@p.empty
@p.exclude @p.exclude
@p.full_refresh @p.full_refresh
@p.profiles_dir @p.profiles_dir

View File

@@ -6,6 +6,9 @@ SECRET_PLACEHOLDER = "$$$DBT_SECRET_START$$${}$$$DBT_SECRET_END$$$"
MAXIMUM_SEED_SIZE = 1 * 1024 * 1024 MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
MAXIMUM_SEED_SIZE_NAME = "1MB" MAXIMUM_SEED_SIZE_NAME = "1MB"
# Number of rows to load as agate table to obtain column types for empty seed table creation
# Seed materializations themselves avoid loading the data to the warehouse
EMPTY_SEED_SIZE = 5
PIN_PACKAGE_URL = ( PIN_PACKAGE_URL = (
"https://docs.getdbt.com/docs/package-management#section-specifying-package-versions" "https://docs.getdbt.com/docs/package-management#section-specifying-package-versions"

View File

@@ -43,7 +43,7 @@ from dbt.clients.jinja import (
) )
from dbt.clients.jinja_static import statically_parse_unrendered_config from dbt.clients.jinja_static import statically_parse_unrendered_config
from dbt.config import IsFQNResource, Project, RuntimeConfig from dbt.config import IsFQNResource, Project, RuntimeConfig
from dbt.constants import DEFAULT_ENV_PLACEHOLDER from dbt.constants import DEFAULT_ENV_PLACEHOLDER, EMPTY_SEED_SIZE
from dbt.context.base import Var, contextmember, contextproperty from dbt.context.base import Var, contextmember, contextproperty
from dbt.context.configured import FQNLookup from dbt.context.configured import FQNLookup
from dbt.context.context_config import ContextConfig from dbt.context.context_config import ContextConfig
@@ -1269,6 +1269,8 @@ class ProviderContext(ManifestContext):
delimiter = self.model.config.delimiter delimiter = self.model.config.delimiter
try: try:
table = agate_helper.from_csv(path, text_columns=column_types, delimiter=delimiter) table = agate_helper.from_csv(path, text_columns=column_types, delimiter=delimiter)
if getattr(self.config.args, "EMPTY", False):
table = table.limit(EMPTY_SEED_SIZE) # type: ignore
except ValueError as e: except ValueError as e:
raise LoadAgateTableValueError(e, node=self.model) raise LoadAgateTableValueError(e, node=self.model)
# this is used by some adapters # this is used by some adapters

View File

@@ -54,7 +54,7 @@ dependencies = [
"dbt-extractor>=0.5.0,<=0.6", "dbt-extractor>=0.5.0,<=0.6",
"dbt-semantic-interfaces>=0.9.0,<0.10", "dbt-semantic-interfaces>=0.9.0,<0.10",
# Minor versions for these are expected to be backwards-compatible # Minor versions for these are expected to be backwards-compatible
"dbt-common>=1.37.0,<2.0", "dbt-common>=1.37.2,<2.0",
"dbt-adapters>=1.15.5,<2.0", "dbt-adapters>=1.15.5,<2.0",
"dbt-protos>=1.0.405,<2.0", "dbt-protos>=1.0.405,<2.0",
"pydantic<3", "pydantic<3",

View File

@@ -33,7 +33,7 @@ select {{ config.require('meta_key') }} as col_value
meta_model_meta_require_sql = """ meta_model_meta_require_sql = """
-- models/meta_model.sql -- models/meta_model.sql
select {{ config.require('meta_key') }} as col_value select {{ config.meta_require('meta_key') }} as col_value
""" """
@@ -66,11 +66,11 @@ class TestConfigGetMeta:
self, self,
project, project,
): ):
# This test runs a model with a config.get(key, default) # This test runs a model with a config.get(key, default) -> default value returned
results = run_dbt(["run"], expect_pass=False) results = run_dbt(["run"], expect_pass=False)
assert len(results) == 1 assert len(results) == 1
assert str(results[0].status) == "error" assert str(results[0].status) == "error"
assert 'column "my_meta_value" does not exist' in results[0].message assert 'column "meta_default_value" does not exist' in results[0].message
write_file(meta_model_meta_get_sql, "models", "meta_model.sql") write_file(meta_model_meta_get_sql, "models", "meta_model.sql")
results = run_dbt(["run"], expect_pass=False) results = run_dbt(["run"], expect_pass=False)
@@ -95,10 +95,10 @@ class TestConfigGetMetaRequire:
results = run_dbt(["run"], expect_pass=False) results = run_dbt(["run"], expect_pass=False)
assert len(results) == 1 assert len(results) == 1
assert str(results[0].status) == "error" assert str(results[0].status) == "error"
assert 'column "my_meta_value" does not exist' in results[0].message assert "does not define a required config parameter 'meta_key'" in results[0].message
write_file(meta_model_meta_require_sql, "models", "meta_model.sql") write_file(meta_model_meta_require_sql, "models", "meta_model.sql")
results = run_dbt(["run"], expect_pass=False) results = run_dbt(["run"], expect_pass=False)
assert len(results) == 1 assert len(results) == 1
assert str(results[0].status) == "error" assert str(results[0].status) == "error"
assert 'column "my_meta_value" does not exist' in results[0].message assert 'column "none" does not exist' in results[0].message

View File

@@ -15,6 +15,10 @@ raw_source_csv = """id
3 3
""" """
raw_seed_csv = """a,b,c,d,e,f
3.2,3,US,2025-01-01,none,false
4.5,4,UK,2025-01-02,2,true
"""
model_sql = """ model_sql = """
select * select *
@@ -67,8 +71,29 @@ unit_tests:
2 2
""" """
unit_tests_seed_yml = """
unit_tests:
- name: test_my_seed
model: model
given:
- input: ref('raw_seed')
expect:
format: csv
rows: |
a,b,c,d,e,f
3.2,3,US,2025-01-01,none,false
4.5,4,UK,2025-01-02,2,true
"""
class TestEmptyFlag:
class BaseTestEmptyFlag:
def assert_row_count(self, project, relation_name: str, expected_row_count: int):
relation = relation_from_name(project.adapter, relation_name)
result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one")
assert result[0] == expected_row_count
class TestEmptyFlag(BaseTestEmptyFlag):
@pytest.fixture(scope="class") @pytest.fixture(scope="class")
def seeds(self): def seeds(self):
return { return {
@@ -86,14 +111,10 @@ class TestEmptyFlag:
"unit_tests.yml": unit_tests_yml, "unit_tests.yml": unit_tests_yml,
} }
def assert_row_count(self, project, relation_name: str, expected_row_count: int):
relation = relation_from_name(project.adapter, relation_name)
result = project.run_sql(f"select count(*) as num_rows from {relation}", fetch="one")
assert result[0] == expected_row_count
def test_run_with_empty(self, project): def test_run_with_empty(self, project):
# create source from seed # Create source from seed for run and build command testing
run_dbt(["seed"]) run_dbt(["seed"])
self.assert_row_count(project, "raw_source", 1)
# run without empty - 3 expected rows in output - 1 from each input # run without empty - 3 expected rows in output - 1 from each input
run_dbt(["run"]) run_dbt(["run"])
@@ -113,3 +134,27 @@ class TestEmptyFlag:
# ensure dbt compile supports --empty flag # ensure dbt compile supports --empty flag
run_dbt(["compile", "--empty"]) run_dbt(["compile", "--empty"])
class TestEmptyFlagSeed(BaseTestEmptyFlag):
@pytest.fixture(scope="class")
def seeds(self):
return {
"raw_seed.csv": raw_seed_csv,
}
@pytest.fixture(scope="class")
def models(self):
return {
"model.sql": "select * from {{ ref('raw_seed') }}",
"unit_tests.yml": unit_tests_seed_yml,
}
def test_run_with_empty(self, project):
run_dbt(["seed", "--empty"])
self.assert_row_count(project, "raw_seed", 0)
results = run_dbt(["build", "--empty"])
self.assert_row_count(project, "raw_seed", 0)
assert len(results) == 3