Compare commits

...

1472 Commits

Author SHA1 Message Date
Connor McArthur
c4892d9f33 Merge pull request #1745 from fishtown-analytics/dev/0.14.2
dbt 0.14.2
2019-09-13 12:55:12 -04:00
Connor McArthur
6e5456d14c update date in CHANGELOG 2019-09-13 09:49:23 -04:00
Connor McArthur
5e706816e4 undo accidental werkzeug bumpversion, fix RELEASE instructions 2019-09-13 09:48:51 -04:00
Connor McArthur
7151ac1f81 Bump version: 0.14.1 → 0.14.2 2019-09-12 15:58:43 -04:00
Drew Banin
ee921cc0ca Update CHANGELOG.md 2019-09-12 15:01:05 -04:00
Jacob Beck
3e09319f8c Merge pull request #1734 from fishtown-analytics/fix/0.14.2-disable-cache-logging
Fix/0.14.2 disable cache logging (#1725)
2019-09-10 12:31:50 -06:00
Jacob Beck
2d52eda730 disable dbt cache logging 2019-09-10 11:15:43 -06:00
Drew Banin
491ad692d7 Merge pull request #1729 from fishtown-analytics/fix/0141-regression-snapshot-freshness
Fix/0141 regression snapshot freshness
2019-09-10 13:03:49 -04:00
Drew Banin
26f573fdc8 pr feedback 2019-09-10 11:34:52 -04:00
Drew Banin
689a0ea4cc (#1728) exclude ephemeral models from freshness task 2019-09-10 11:15:58 -04:00
Drew Banin
23bfc67d62 Merge pull request #1726 from fishtown-analytics/fix/0.14.2-docs-updates
(#1724) bump docs site for 0.14.2
2019-09-05 16:04:05 -04:00
Drew Banin
e0f725363c (#1724) bump docs site for 0.14.2 2019-09-05 14:36:39 -04:00
Connor McArthur
141bdd6f96 Bump version: 0.14.1rc2 → 0.14.1 2019-09-03 19:52:54 -04:00
Drew Banin
54f548eec8 Update CHANGELOG.md 2019-08-29 17:01:23 -04:00
Connor McArthur
786791670e Bump version: 0.14.1rc1 → 0.14.1rc2 2019-08-28 18:08:14 -04:00
Drew Banin
ce58da82c5 Merge pull request #1709 from fishtown-analytics/fix/simplify-snapshot-check-logic
make snapshot check strategy simpler and still correct
2019-08-28 15:39:50 -04:00
Drew Banin
3437b0f2b9 Update CHANGELOG.md 2019-08-28 14:43:21 -04:00
Drew Banin
0970285956 make snapshot check strategy simpler and still correct 2019-08-28 11:30:27 -04:00
Drew Banin
c0028587dd Merge pull request #1701 from fishtown-analytics/feature/expose-config-in-generation-macros
add configs to ParsedNodes before generating a schema/alias
2019-08-27 14:33:35 -04:00
Drew Banin
7fafa2adeb Update CHANGELOG.md 2019-08-27 13:10:35 -04:00
Drew Banin
d1cc5deaac add tests for accessing config properties in generate macros 2019-08-26 22:54:17 -04:00
Drew Banin
952b1fc61b add configs to ParsedNodes before generating a schema/alias 2019-08-26 22:10:51 -04:00
Drew Banin
1b03db6ab5 Update CHANGELOG.md 2019-08-26 09:55:45 -04:00
Connor McArthur
8a19ba4862 Bump version: 0.14.1a1 → 0.14.1rc1 2019-08-22 12:02:16 -04:00
Drew Banin
f73d561557 Merge pull request #1678 from fishtown-analytics/0.14.1-changelog
0.14.1 changelog
2019-08-21 15:54:03 -04:00
Drew Banin
83003a7d8f Update CHANGELOG.md 2019-08-21 15:50:48 -04:00
Drew Banin
84a991520f Merge pull request #1694 from fishtown-analytics/fix/connection-name-snowflake
Fix for errant method call in error handler
2019-08-21 09:40:33 -04:00
Drew Banin
89c4dbcdba Fix for errant method call in error handler 2019-08-20 20:35:28 -04:00
Drew Banin
e7a24a2062 Merge pull request #1689 from bastienboutonnet/feature/snowflake_clustering
Implement Clustering for Snowflake
2019-08-20 12:27:22 -04:00
Drew Banin
0e897f7751 Merge pull request #1688 from mikaelene/project_cols_data_test
Updated injected_node.wrapped_sql for data tests
2019-08-20 12:26:14 -04:00
Drew Banin
ea898e25ad Merge pull request #1690 from fishtown-analytics/fix/no-col-expansion-on-snowflake
(#1687) no-op column expansion on Snowflake + BQ
2019-08-20 11:54:40 -04:00
Mikael Ene
6d38226a1c last try before I start over 2019-08-20 16:55:51 +02:00
Mikael Ene
b415e0eed3 last try before I start over 2019-08-20 16:54:02 +02:00
Bastien Boutonnet
90e8e75716 Apply suggestions from code review
Co-Authored-By: Drew Banin <drew@fishtownanalytics.com>
2019-08-20 16:27:06 +02:00
Drew Banin
5a74918e1e pep8 2019-08-20 10:26:14 -04:00
Drew Banin
d8551a6c9c use atomic column type alter statement on snowflake 2019-08-20 09:40:29 -04:00
Mikael Ene
51b546f50f try again 2019-08-20 15:25:16 +02:00
Mikael Ene
4bc4c65795 fixed indention for flake and passed the local test. Cmon 2019-08-20 06:42:46 +02:00
Bastien Boutonnet
a0493b000e add a check before clustering that cluster keys were provided 2019-08-19 21:21:24 +02:00
Bastien Boutonnet
7071c1d200 fix god aweful logical flow 2019-08-19 21:13:23 +02:00
Drew Banin
64ee763c99 Merge pull request #1686 from fishtown-analytics/fix/panoply
Fix for run and docs generate on Panoply Redshift
2019-08-19 13:04:01 -04:00
Drew Banin
e78d979d4c Merge pull request #1650 from boxysean/add_postgres_ctas_adapter
Add Postgres CTAS adapter macro with support for unlogged parameter
2019-08-19 09:23:16 -04:00
mikael.ene
845529cc9d made line shorter according to flake8 2019-08-19 12:01:38 +02:00
mikael.ene
c71b2dc312 made line shorter according to flake8 2019-08-19 11:52:03 +02:00
Sean McIntyre
d8775d1054 Add unlogged as a Postgres AdapterSpecificConfigs 2019-08-18 12:07:51 -04:00
Bastien Boutonnet
db2e564e7a remove cluster key list join from if statement block 2019-08-17 21:21:04 +02:00
Bastien Boutonnet
7fc71272fd apply patch from PR 1591 2019-08-17 15:46:29 +02:00
Mikael Ene
8011bc7b7a changed the sql for data-tests for supporting sql server 2019-08-17 07:59:11 +02:00
Drew Banin
171fdf792e (#1687) no-op column expansion on Snowflake + BQ 2019-08-16 15:18:00 -04:00
Drew Banin
7a01ba7b49 (#1479) fix for run and docs generate on Panoply Redshift 2019-08-16 10:51:52 -04:00
Drew Banin
2aee9eedad Merge pull request #1670 from fishtown-analytics/fix/snowflake-unpickleable-datetime-timezone
fix for unpickleable datetime tzs set by snowflake
2019-08-13 13:17:04 -04:00
Connor McArthur
db203371fe Bump version: 0.14.0 → 0.14.1a1 2019-08-12 20:18:30 -04:00
Drew Banin
50fa1baf96 fix for unpickleable datetime tzs set by snowflake 2019-08-12 13:46:46 -04:00
Drew Banin
da7c9501d2 Merge pull request #1673 from sjwhitworth/bq-priority
Support job priority in BigQuery
2019-08-09 19:27:56 -04:00
Stephen
be53b67e68 Add unit test for priority 2019-08-09 23:22:15 +01:00
Drew Banin
ffcaac5b4c Merge pull request #1672 from sjwhitworth/fix-iternal-execption
Fix typo: "IternalException" -> "InternalException"
2019-08-09 14:46:10 -04:00
Stephen
65c3bf6d0f Support job priority in BigQuery 2019-08-08 21:50:29 +01:00
Stephen
58976b46f9 Fix typo: "IternalException" -> "InternalException" 2019-08-08 20:25:14 +01:00
Drew Banin
3e3c69eaf9 Merge pull request #1663 from edmundyan/ey_create_schemas_lowered
Do case-insensitive schema comparisons to test for schema existance
2019-08-06 13:45:26 -04:00
Edmund Yan
e867cfa4a2 Remove profile schema as we no longer run 'use schema' 2019-08-05 23:28:48 -04:00
Drew Banin
7901413a97 Merge pull request #1666 from fishtown-analytics/fix/snapshot-support-old-pg-versions
(#1665) Fix for casting error on old versions of postgres in snapshots
2019-08-05 22:50:49 -04:00
Drew Banin
b7e8670b43 (#1665) Fix for casting error on old versions of postgres in snapshot queries 2019-08-05 22:16:16 -04:00
Drew Banin
e2531edb02 Merge pull request #1662 from fishtown-analytics/template/bug-and-feature-templates
GitHub issue / PR touchups
2019-08-05 16:41:47 -04:00
Drew Banin
96913732e4 Merge pull request #1658 from fishtown-analytics/docs/0.14.1
Update CHANGELOG.md
2019-08-05 16:41:25 -04:00
Drew Banin
06a89446ab Merge pull request #1664 from vitorbaptista/patch-1
Fix typo "paramteter" -> "parameter"
2019-08-05 16:31:21 -04:00
Vitor Baptista
ac128da45a Fix typo "paramteter" -> "parameter" 2019-08-05 20:56:04 +01:00
Edmund Yan
265f6d3ce5 Do case-insensitive schema comparison before trying to create a schema 2019-08-05 14:12:36 -04:00
Drew Banin
388fd0bd00 Update bug report and feature request templates 2019-08-05 12:11:57 -04:00
Drew Banin
1c6945cb75 Update CHANGELOG.md 2019-08-05 11:23:58 -04:00
Drew Banin
772cb0d326 Update CHANGELOG.md 2019-08-05 10:50:28 -04:00
Drew Banin
b2f2e69377 Merge pull request #1540 from elexisvenator/fix/postgres-catalog
Change postgres `get_catalog` to not use `information_schema`
2019-08-04 19:29:30 -04:00
Drew Banin
377d5b7f58 Merge pull request #1657 from fishtown-analytics/bump/0.14.1-docs-site
bump docs site to 0.14.1
2019-08-04 19:25:22 -04:00
Drew Banin
b4da0686c8 bump docs site to 0.14.1 2019-08-04 19:20:38 -04:00
Drew Banin
f3baa69aad Merge pull request #1656 from fishtown-analytics/fix/bad-profile-dir
(#1645) fix for errant cookie generation
2019-08-04 19:13:04 -04:00
Drew Banin
9527626ffe Merge pull request #1609 from aminamos/dev/0.14.1
updated dbt.exceptions reference to exceptions in .sql files
2019-08-04 18:06:22 -04:00
Drew Banin
e13568117b Merge pull request #1654 from fishtown-analytics/fix/summarize-warns
(#1597) summarize warnings at end of test invocations
2019-08-04 17:47:48 -04:00
Drew Banin
57c6b11d47 (#1645) fix for errant cookie generation 2019-08-03 14:14:04 -04:00
Drew Banin
800355ec2f (#1597) summarize warnings at end of test invocations 2019-08-03 12:49:28 -04:00
Drew Banin
4478a89f28 Merge pull request #1647 from levimalott/fix/recover-from-failed-cleanup-rollbacks
Log, but allow, failures during cleanup rollbacks.
2019-08-02 16:25:08 -04:00
Drew Banin
9e07912e1c Merge pull request #1644 from fishtown-analytics/fix/seed-nonascii-chars
Fix for unicode chars in seed files
2019-08-02 13:31:41 -04:00
Jacob Beck
8fd768e46b for agate, use the "Urb" mode on python 2, handle BOM fiddling 2019-08-01 12:41:51 -06:00
boxysean
dad3dcacfe Add Postgres-specific CTAS adapter macro 2019-07-31 18:20:19 -04:00
Levi Malott
0927093303 Slim down the rollback failure log to appease flake8. 2019-07-31 16:22:29 -05:00
Levi Malott
3099119815 Log failed rollbacks to debug logs rather than to stdout. 2019-07-31 14:59:14 -05:00
Levi Malott
7a026c7e10 Log, but allow, failures during cleanup rollbacks.
In Postgres, rollbacks can fail if the transaction was killed
by the database. One common scenario is that the
`idle_in_transaction_session_timeout` is enabled.

If the
transaction was cancelled, the connection is left open
in `dbt`. `dbt` attempts to close that connection after issuing
a `ROLLBACK`. But it fails since the transaction was severed.
Since the cleanup is carried out in a `finally` statement, the
`psycopg2.InternalDatabaseError` is thrown and prevents the
test case results from ever being shown.

Changes here wrap the `ROLLBACK` in a try-catch such that
if there is an exception thrown, it is logged appropriately,
but ultimately proceeds.
2019-07-31 10:18:12 -05:00
Drew Banin
7177a6543b Merge pull request #1638 from fishtown-analytics/fix/bq-repeated-record-execute
(#1626) fix for RPC error with BQ nested fields
2019-07-30 16:27:08 -04:00
Drew Banin
b2b0f78587 Merge pull request #1642 from fishtown-analytics/fix/ls-ephemeral-selection
exclude ephemeral addins in dbt ls command
2019-07-30 14:57:47 -04:00
Drew Banin
7001afbcbe (#1632) fix for unicode chars in seed files 2019-07-30 14:33:32 -04:00
Drew Banin
3eb28198bd remove unnecessary reordering of table cols 2019-07-30 12:47:02 -04:00
Drew Banin
81deb8d828 exclude ephemeral addins in dbt ls command 2019-07-30 12:39:11 -04:00
Drew Banin
e30ba80d6a Merge pull request #1636 from fishtown-analytics/feature/test-freshness-disable
add test for nulled out freshness spec
2019-07-30 09:45:42 -04:00
Drew Banin
22d13ba881 serialize as json 2019-07-29 23:31:15 -04:00
Drew Banin
6cfbcf1ac8 (#1626) fix for RPC error with BQ nested fields 2019-07-29 22:35:16 -04:00
Drew Banin
21daca9faf add test for nulled out freshness spec 2019-07-29 15:27:44 -04:00
Amin
142edcff38 Update core/dbt/exceptions.py
Co-Authored-By: Drew Banin <drew@fishtownanalytics.com>
2019-07-29 11:32:26 -07:00
aminamos
70d82ed48e removed CODE from NotImplementedException class 2019-07-26 13:52:15 -07:00
Jacob Beck
52c9234621 Merge pull request #1629 from fishtown-analytics/feature/compile-tricks-flag
Add environment variables for macro debugging flags (#1628)
2019-07-26 09:44:02 -06:00
Drew Banin
78d309551f Merge pull request #1633 from fishtown-analytics/fix/source-column-description-interpolation
Fix for unrendered source column descriptions
2019-07-26 11:29:17 -04:00
Drew Banin
f91109570c pep8 2019-07-26 09:19:31 -04:00
Drew Banin
291ef56bc7 (#1619) fix for unrendered source column descriptions 2019-07-25 22:25:04 -04:00
Drew Banin
b12484bb6f Merge pull request #1614 from fishtown-analytics/fix/snapshot-check-cols-cycle
possible fix for re-used check cols on BQ
2019-07-24 12:56:38 -04:00
Jacob Beck
709ee2a0e8 Add environment variables for macro debugging flags 2019-07-23 17:26:53 -06:00
Drew Banin
8d4f2bd126 Merge pull request #1623 from fishtown-analytics/feature/set-snowflake-application-name
Set application name in snowflake connections
2019-07-22 13:21:59 -04:00
Drew Banin
b6e7351431 snapshot surrogate key whitespace control 2019-07-22 11:14:03 -04:00
Drew Banin
329145c13f Merge branch 'dev/0.14.1' into fix/snapshot-check-cols-cycle 2019-07-21 15:26:44 -04:00
Drew Banin
35d1a7a1b5 add tests 2019-07-21 15:26:40 -04:00
Drew Banin
a2e801c2de pep8 2019-07-21 13:40:43 -04:00
Drew Banin
e86c11e5de set application name in snowflake connections 2019-07-20 15:59:36 -04:00
Jacob Beck
e46800f5b4 Merge pull request #1615 from fishtown-analytics/fix/linear-time-selection
Make node selection O(n)
2019-07-17 14:50:22 -06:00
Jacob Beck
0648737fc1 add the flat graph back in with caching, PR feedback 2019-07-17 15:30:19 -04:00
Jacob Beck
1a4daaba10 That was the last reference to to_flat_graph, so goodbye to all that 2019-07-17 12:52:35 -04:00
Jacob Beck
6be4ac044c remove repated to_flat_graph() call 2019-07-17 12:46:44 -04:00
Jacob Beck
c0aabc7d0b linear time wooo 2019-07-17 12:34:46 -04:00
Drew Banin
4df0bbd814 touchup var name and sql formatting 2019-07-16 23:43:03 -04:00
Drew Banin
5e6e746951 possible fix for re-used check cols on BQ 2019-07-16 23:24:19 -04:00
aminamos
a55a27acf6 removed extra line 2019-07-16 16:13:58 -07:00
aminamos
8046992e08 added string interpolation to raise_not_implemented 2019-07-16 14:28:31 -07:00
aminamos
de56e88a00 updated raise_not_implemented, commented on profile.py 2019-07-16 14:18:52 -07:00
aminamos
fa6fb1b53d updated dbt.exceptions reference to exceptions in .sql files 2019-07-15 19:14:22 -07:00
Drew Banin
5a1f0bdda5 Merge pull request #1608 from fishtown-analytics/fix/is-incremental-check-materialization
Make is_incremental live up to its name
2019-07-15 15:17:04 -04:00
Drew Banin
405748c744 make is_incremental live up to its name 2019-07-15 14:15:20 -04:00
Drew Banin
cc90b048af Merge pull request #1605 from fishtown-analytics/dev/0.14.1-merge-wilt
Merge 0.14.latest into dev/0.14.1
2019-07-12 16:39:48 -04:00
Drew Banin
6886228992 Merge branch '0.14.latest' into dev/0.14.1-merge-wilt 2019-07-12 09:14:43 -04:00
Drew Banin
4569c905a5 Merge pull request #1596 from fishtown-analytics/fix/contrib-typos
Update CONTRIBUTING.md
2019-07-09 21:49:47 -04:00
Connor McArthur
453e81e895 Bump version: 0.14.0rc1 → 0.14.0 2019-07-09 21:44:06 -04:00
Drew Banin
3af8696761 Update CONTRIBUTING.md 2019-07-09 21:43:59 -04:00
Connor McArthur
399b33822a update changelog date 2019-07-09 21:42:18 -04:00
Drew Banin
913a296cc4 Merge pull request #1583 from fishtown-analytics/update-contributing-guide
Update CONTRIBUTING.md
2019-07-09 17:59:45 -04:00
Drew Banin
bd55569703 Merge branch 'dev/wilt-chamberlain' into update-contributing-guide 2019-07-09 17:59:05 -04:00
Drew Banin
0e2d3f833d Merge pull request #1593 from fishtown-analytics/fix/docs-links
Fix immediately-obvious links that will break
2019-07-09 17:49:57 -04:00
Drew Banin
3646969779 update changelog links 2019-07-09 15:39:07 -04:00
Claire Carroll
d5bfb9f6aa Update docs link 2019-07-09 14:15:26 -04:00
Drew Banin
56a2d9dc0a Merge pull request #1590 from fishtown-analytics/fix/snapshot-check-cols-dupe-scd-id
[Wilt Chamberlain] fix for dupe check_cols values in snapshot strategy
2019-07-08 11:00:42 -04:00
Claire Carroll
e90c05c8f8 Fix immediately-obvious links that will break 2019-07-07 16:05:51 -04:00
Drew Banin
08d79cc324 (#1588) fix for dupe check_cols values in snapshot strategy 2019-07-05 21:14:54 -04:00
Drew Banin
990b0c93a5 Merge pull request #1587 from fishtown-analytics/fix/list-description
Fix the list subparser description
2019-07-03 17:04:45 -04:00
Claire Carroll
14d638c588 Fix the list subparser description 2019-07-03 16:34:47 -04:00
Ben Edwards
2645667257 Fix table type
v = view
r, f, p = all are different forms of table
2019-07-03 07:32:54 +10:00
Ben Edwards
38c2d82c88 Cleaned up filtering to be in line with information_schema logic 2019-07-02 09:09:33 +10:00
Connor McArthur
986f5b7b4e Bump version: 0.14.0a2 → 0.14.0rc1 2019-07-01 15:29:32 -04:00
Drew Banin
ef76c04ae8 Merge pull request #1585 from fishtown-analytics/fix/persist-docs-validation
add validation for extend dict fields that receive non-dicts
2019-07-01 13:18:52 -04:00
Drew Banin
d4d5393faa add validation for extend dict fields that receive non-dicts 2019-07-01 11:45:46 -04:00
Drew Banin
9ffc4bf928 Update CONTRIBUTING.md 2019-06-28 12:26:26 -04:00
Jacob Beck
51b6fd6f86 Merge pull request #1582 from fishtown-analytics/fix/set-io-encoding
set PYTHONIOENCODING on the dockerfile to suppress encoding errors
2019-06-27 23:15:14 -04:00
Jacob Beck
be765dc4e8 set PYTHONIOENCODING on the dockerfile to suppress encoding errors 2019-06-27 20:40:02 -06:00
Drew Banin
7febd9328d Merge pull request #1542 from fishtown-analytics/changelog/0.14.0
Update CHANGELOG.md
2019-06-27 22:25:28 -04:00
Drew Banin
33a80fca5a Update CHANGELOG.md 2019-06-27 22:24:51 -04:00
Jacob Beck
f368820b7e Merge pull request #1578 from heisencoder/fix/test-cleanup
Fix/test cleanup
2019-06-27 12:06:26 -04:00
Matt Ball
2f1dbc2dae change assert{Not}Equals to assert{Not}Equal in integration tests
This is to remove deprecation warnings during testing.

Linux commands (with globstar shell option enable):

$ cd test
$ sed -i 's/self.assertEquals/self.assertEqual/g' **/*.py
$ sed -i 's/self.assertNotEquals/self.assertNotEqual/g' **/*.py
2019-06-27 09:10:57 -06:00
Matt Ball
3ad30217c4 remove DeprecationWarnings from test/unit files
We're getting a bunch of deprecation warnings when running the unit
tests due to using statements like assertEquals instead of assertEqual.
This change removes these warnings.

Change completed via these Linux commands:

$ cd test/unit
$ sed -i 's/self.assertEquals/self.assertEqual/g' test_*.py
$ sed -i 's/self.assertNotEquals/self.assertNotEqual/g' test_*.py
2019-06-27 08:36:11 -06:00
Matt Ball
4a10f2cb37 update test_context.py to use a local import of mock_adapter
This is consistent with the way that unit tests import utils.py and
also fixes an import issue with our test environment.
2019-06-27 08:28:02 -06:00
Drew Banin
f3948295e9 Update CHANGELOG.md 2019-06-26 19:35:05 -04:00
Drew Banin
42fb12027c Merge pull request #1575 from fishtown-analytics/tracking/adapter-type-and-rpc-requests
(#1574) track adapter type and rpc requests
2019-06-26 19:24:28 -04:00
Drew Banin
91124d2d4f (#1574) track adapter type and rpc requests 2019-06-26 17:55:37 -04:00
Drew Banin
2e2ce9a57a Update CHANGELOG.md 2019-06-26 17:52:59 -04:00
Drew Banin
fdcb395739 Merge pull request #1571 from josegalarza/dev/wilt-chamberlain
Add ExternalTable relation type, update Snowflake adapter (issue #1505)
2019-06-26 11:24:32 -04:00
Drew Banin
7d1fed2eb9 Merge pull request #1572 from cclauss/patch-1
Undefined name: import shutil for line 60
2019-06-26 09:47:04 -04:00
cclauss
462a1516d2 Undefined name: import shutil for line 60 2019-06-26 13:24:12 +02:00
Jose
654f70d901 Add ExternalTable relation type, update Snowflake adapter (issue #1505) 2019-06-26 12:11:41 +10:00
Connor McArthur
31f20348c9 Bump version: 0.14.0a1 → 0.14.0a2 2019-06-25 09:45:09 -04:00
Drew Banin
bd0876e2e6 Update CHANGELOG.md 2019-06-24 19:48:57 -04:00
Jacob Beck
a47c09e5d2 Merge pull request #1565 from fishtown-analytics/feature/new-dockerfile
Pull in new dockerfile
2019-06-21 15:47:49 -04:00
Drew Banin
164468f990 Merge pull request #1562 from fishtown-analytics/feature/statement-sugar
add sugar over statements (run_query)
2019-06-21 13:40:19 -04:00
Jacob Beck
c56b631700 Pull in new dockerfile 2019-06-21 12:48:18 -04:00
Jacob Beck
2636969807 Merge pull request #1558 from fishtown-analytics/fix/created-relations-casing
Fix casing comparisons on dbt-created relations (#1555)
2019-06-21 10:10:56 -04:00
Jacob Beck
1e17303b97 Merge pull request #1561 from fishtown-analytics/fix/revert-strict-undefined
Remove strict undefined + tests
2019-06-20 15:22:07 -06:00
Drew Banin
7fa8d891ef Merge pull request #1560 from fishtown-analytics/fix/support-error-on-nondeterministic-merge-snowflake
add strategy switch for incremental merge behavior (snowflake)
2019-06-20 16:36:39 -04:00
Drew Banin
c029dfe3fa add sugar over statements (run_query) 2019-06-20 16:33:31 -04:00
Drew Banin
c4ef120b74 Merge branch 'dev/wilt-chamberlain' into fix/support-error-on-nondeterministic-merge-snowflake 2019-06-20 14:56:13 -04:00
Drew Banin
8644ce1cb8 rm strategy alias; rename overwrite strategy 2019-06-20 14:52:32 -04:00
Jacob Beck
b0b3cdc21f Remove strict undefined + tests 2019-06-20 14:48:07 -04:00
Drew Banin
e3d30d8a35 add strategy switch for incremental merge behavior (snowflake) 2019-06-20 14:04:24 -04:00
Jacob Beck
0ef9c189c0 Merge pull request #1550 from fishtown-analytics/fix/block-parser-ignore-more
clean up block parsing to make it dumber and more effective (#1547)
2019-06-20 06:33:48 -06:00
Jacob Beck
18953536f1 PR feedback 2019-06-19 14:36:07 -06:00
Jacob Beck
dd02f33482 new test 2019-06-19 11:33:57 -06:00
Jacob Beck
3576839199 oops, fix bq/sf too 2019-06-19 11:33:49 -06:00
Drew Banin
4ed668ef93 Update CHANGELOG.md 2019-06-19 13:30:06 -04:00
Drew Banin
f8344469e1 Merge pull request #1549 from heisencoder/feature/add-project-dir-flag
add --project-dir flag to allow specifying project directory
2019-06-19 13:28:24 -04:00
Jacob Beck
1d94fb67da Fix casing comparisons on dbt-created relations
When dbt creates a relation in the db, add a special flag
When checking a node name match:
 - if that flag is present and quoting is disabled, do a lowercase compare
 - otherwise remain case sensitive
2019-06-19 09:44:37 -06:00
Drew Banin
9ad85127e4 Merge pull request #1553 from fishtown-analytics/fix/include-stuff
Add macro subdirectories to redshift/postgres (#1552)
2019-06-18 19:13:46 -04:00
Drew Banin
3845abeff8 Update CHANGELOG.md 2019-06-18 18:34:52 -04:00
Jacob Beck
f95c712f95 Merge pull request #1551 from fishtown-analytics/fix/package-without-version
require versions in package defs (#1546)
2019-06-18 13:16:45 -06:00
Matt Ball
f5c3300304 remove tab character in base.py 2019-06-18 13:08:53 -06:00
Matt Ball
84fa83b4dd add unit test for new --project-dir flag
See https://github.com/fishtown-analytics/dbt/issues/1544
2019-06-18 12:51:02 -06:00
Jacob Beck
927c37470a a different message 2019-06-18 12:41:57 -06:00
Jacob Beck
b80fa53b2a include macros/**/*.sql as well 2019-06-18 12:19:48 -06:00
Jacob Beck
cce5ae01f8 require versions 2019-06-18 11:21:06 -06:00
Jacob Beck
92ef783948 clean up block parsing to make it dumber and more effective 2019-06-18 10:45:21 -06:00
Matt Ball
2e7c1fd2cc add --project-dir flag to allow specifying project directory
In particular, this is the directory that contains the dbt_project.yml
file and all the related project files and directories.

Without this flag, it is necessary to cd to the project directory before
running dbt.

See https://github.com/fishtown-analytics/dbt/issues/1544
2019-06-17 16:22:45 -06:00
Jacob Beck
85164b616e Merge pull request #1543 from fishtown-analytics/fix/unquoted-strings-errors
Handle quotes in jinja block bodies (#1533)
2019-06-14 12:03:18 -06:00
Jacob Beck
c67a1ac9f7 link to issue in test case comment 2019-06-14 11:37:24 -06:00
Jacob Beck
a2cae7df29 Block parsing now uses recursive descent... 2019-06-14 10:50:32 -06:00
Drew Banin
f44e3bc9d8 Update CHANGELOG.md 2019-06-14 09:44:24 -07:00
Ben Edwards
03bc58116c Fix incorrectly named column table_type 2019-06-14 17:21:08 +10:00
Ben Edwards
9b88eb67a1 Change postgres get_catalog to not use information_schema
- `information_schema` in Postgres is not very performant due to the complex views used to create it
 - use underlying `pg_catalog` tables/views instead
 - returns the same rows/columns as the `information_schema` version
 - order of rows is different, this is because there was only a partial sort on the `information_schema` version
 - `column_type` will return different values to before
   - some arrays were `ARRAY`, will now be `type[]`
   - user-defined types were previously `USER_DEFINED`, now will be the name of the user-defined type <-- main point of this PR
 - performance is 2-5x faster, depending on query caching
2019-06-14 13:51:10 +10:00
Connor McArthur
6e5fa7de3c add changelog section for wilt 2019-06-13 17:16:21 -04:00
Jacob Beck
d327394057 Merge pull request #1537 from fishtown-analytics/fix/bigquery-fix-schema-404s
List 10k datasets instead of all (#1536)
2019-06-13 14:32:24 -06:00
Drew Banin
b7c06941e3 Merge pull request #1539 from fishtown-analytics/docs/0.14.0-updates
(#1474) incorporate docs site updates
2019-06-13 10:42:44 -07:00
Drew Banin
57adfc8683 (#1474) incorporate docs site updates 2019-06-13 10:22:00 -07:00
Jacob Beck
e13d805197 list a ton of datasets instead of all to avoid pagination 2019-06-13 10:42:20 -06:00
Jacob Beck
f0635a0df4 Merge pull request #1514 from fishtown-analytics/fix/dbt-ls-selectors
Fix dbt ls selectors
2019-06-13 10:40:57 -06:00
Jacob Beck
24adb74498 Merge pull request #1526 from fishtown-analytics/fix/refs-in-run-operations
Add ref and source support to run-operation macros (#1507)
2019-06-13 10:18:33 -06:00
Jacob Beck
16519b11aa Merge pull request #1534 from fishtown-analytics/fix/deprecate-python-2
Emit a big deprecation message on python 2.x at the end of the run (#1531)
2019-06-13 10:14:10 -06:00
Jacob Beck
0a666caa13 Emit a big deprecation warning on python 2.x at the end of the run 2019-06-13 08:44:40 -06:00
Jacob Beck
5833acbc8c Print fqns, not unique ids
FQNs for sources
Allow package names as an optional prefix for source: selectors
make "dbt ls" output more intuitive by including source:* by default
Fix the tests
2019-06-12 19:46:27 -06:00
Jacob Beck
e57c7b651d Merge pull request #1519 from fishtown-analytics/feature/render-severity-configs
render severity configurations (#1512) (#1511)
2019-06-12 19:40:31 -06:00
Jacob Beck
9a40395cdb Add ref and source support to run-operation macros
Removed the "runtime" provider's generate_macro function
Added an "operation" provider that does similar but:
  ref() does not check for itself in depends_on
  source() does not try to append to sources
Macros assume that the rendered form the the ref exists in the database
Also changed `--macro` to a required positional argument
  you can run macro_name with 'dbt run-operation macro_name --args ...'
Make providers objects, unify some ref processing stuff
Remove an empty method call on operations, add some comments
Raise a compiler error if you try to ref() an ephemeral model in your run-operation macro
2019-06-12 19:36:16 -06:00
Jacob Beck
03aecc8d0c Merge pull request #1481 from fishtown-analytics/feature/archive-migration-script
archive table migration script
2019-06-12 19:01:59 -06:00
Jacob Beck
a554b383a2 Merge pull request #1520 from fishtown-analytics/feature/rename-models-to-select
for snapshots, make the argument "--select", not "--models" (#1517)
2019-06-12 16:14:04 -06:00
Jacob Beck
a4be1e1dcb when warning about undefined, handle the case where the node is none 2019-06-12 16:13:25 -06:00
Jacob Beck
7b498f4179 archive table migration script
Also migrate archive configs
PR feedback
archives are now snapshots
Make the script a subcommand
2019-06-12 15:45:43 -06:00
Jacob Beck
4a10c8dce7 fix bigquery test 2019-06-12 15:39:17 -06:00
Jacob Beck
0cc99c50a7 for snapshots, make the argument "--select", not "--models" 2019-06-12 15:39:12 -06:00
Jacob Beck
92fdf45f0c Support lowercase test severity 2019-06-12 15:38:05 -06:00
Jacob Beck
f3cafae030 render severity configurations 2019-06-12 15:37:35 -06:00
Jacob Beck
12e5bf6036 Merge pull request #1522 from fishtown-analytics/fix/check-cols-null
handle null check-cols (#1516)
2019-06-12 15:32:40 -06:00
Jacob Beck
f0ab957edb Merge pull request #1521 from fishtown-analytics/fix/quote-columns-in-snapshots
Snapshots: Quote columns, fix print output, fix tests (#1510)
2019-06-12 15:32:11 -06:00
Jacob Beck
4308b28aa6 Merge pull request #1527 from fishtown-analytics/feature/parallel-integration-tests
parallel tests
2019-06-12 15:07:40 -06:00
Jacob Beck
f79619ec0a parallelize the tests
Set -n4 in ci, as -n auto chose 32 and caused problems
python 2.x and flaky/pytest-xdist do not get along, so py3 only
Mark some rpc tests as flaky, especially with parallelism
 - socket contention is a problem
Remove coverage stuff
Coallate and upload the log files we generate
Added special logging for a particular rare CI error case
Set an "initial root" at import time
  chdir into it at setUp time as well as tearDown time

I think py2 might work now?
2019-06-12 14:37:54 -06:00
Jacob Beck
5dd147123a HACK to get docs tests happy, need to settle on rules for what path emits wrt symlinks 2019-06-12 14:37:34 -06:00
Jacob Beck
63d6ab2006 make integration tests run in their own directories
Make tempdir, write profile + project there, use it as the profile directory
fixed 015 tests to turn that behavior off and do custom things
Migrate all the tests
Make run_dbt_and_check more like run_dbt
Fix windows ls tests
2019-06-12 14:37:07 -06:00
Jacob Beck
1951e0f5e8 Merge pull request #1532 from fishtown-analytics/build/pr-fixup-1285
merged and updated PR for #1031
2019-06-12 14:29:13 -06:00
Jacob Beck
fb69b89ef9 Merge pull request #1509 from heisencoder/feature/upgrade-to-networkx-2
Feature/upgrade to networkx 2
2019-06-12 12:07:25 -06:00
Jacob Beck
2d84dd4fbd Merge branch 'dev/wilt-chamberlain' into build/pr-fixup-1285 2019-06-12 11:43:08 -06:00
Jacob Beck
d760229abc Merge pull request #1476 from buremba/fix/pg-schema
Pass schema in credentials to Postgresql
2019-06-12 11:38:35 -06:00
Jacob Beck
ffb38a21e3 redshift tests 2019-06-12 10:45:03 -06:00
Matt Ball
85a2f48c80 update test_linker.py to handle a temp file using try/finally
This is based on the recommendation from @beckjake:
b8d5a341f4 (discussion_r292032021)
2019-06-12 10:33:34 -06:00
Jacob Beck
9f208f711e Merge pull request #1515 from tbescherer/project-inner-loop
move target_model vars inside loop to avoid reuse on subsequent refs
2019-06-10 08:29:15 -06:00
Jacob Beck
ea2637395c Merge pull request #1525 from fishtown-analytics/fix/increase-tracking-timeout
increase tracking timeout 2s -> 5s (#1499)
2019-06-10 08:07:27 -06:00
Jacob Beck
0ca6026124 increase tracking timeout 2s -> 5s 2019-06-10 07:13:46 -06:00
Matt Ball
b8d5a341f4 update test_graph.py to handle different sequence type in networkx 2.x 2019-06-07 14:56:08 -06:00
Matt Ball
014a8f9222 Merge branch 'dev/wilt-chamberlain' of https://github.com/fishtown-analytics/dbt into feature/upgrade-to-networkx-2 2019-06-07 14:05:21 -06:00
Jacob Beck
9599b3f584 handle null check-cols 2019-06-07 14:00:14 -06:00
tom.bescherer
ca31b79cc0 add a newline for readability 2019-06-07 11:57:19 -04:00
Jacob Beck
675e858050 Quote columns, fix print output, fix tests 2019-06-07 09:19:36 -06:00
Jacob Beck
c04517bf04 Merge pull request #1506 from fishtown-analytics/feature/rename-archives-to-snapshots
Rename archives to snapshots (#1497)
2019-06-07 07:17:47 -06:00
tom.bescherer
eb12ef1dcd move target_model vars inside loop to avoid reuse on subsequent refs 2019-06-06 16:40:32 -04:00
Jacob Beck
03f50f560b PR feedback, fix "usage" line 2019-06-06 14:35:22 -06:00
Jacob Beck
c1387c5692 Merge pull request #1513 from fishtown-analytics/fix/only-parse-docs-blocks
Only parse docs blocks contents when reading md files (#988)
2019-06-06 13:53:15 -06:00
Jacob Beck
a4a9221d95 add a missing test file 2019-06-06 12:04:53 -06:00
Matt Ball
7ed0036af0 update networkx dependency to allow version 2.x
https://github.com/fishtown-analytics/dbt/issues/1496
2019-06-06 11:03:14 -06:00
Jacob Beck
1489393489 documentation -> docs 2019-06-06 10:59:29 -06:00
Matt Ball
c4939368ae update linker.py to be compatible with networkx-2.x
Also, update test_linker.py to run under Python 2 unit tests and add
test coverage for the modified functions.

linker.py should now be compatible with both networkx-1 and -2.

This addresses https://github.com/fishtown-analytics/dbt/issues/1496

This change was tested via unit tests, but was not tested via
integration tests.

As a follow-up change, core/setup.py should have its networkx dependency
updated to allow version 2.x.
2019-06-06 10:31:36 -06:00
Jacob Beck
ab59ebe4f2 Fix docs blocks parsing issues
Rename documentation node type to docs so we can filter on it (is this breaking?)
Fix block extractor bug with macros/docs that contain quotes
Fix block extractor bug with expressions
2019-06-06 09:32:50 -06:00
Jacob Beck
f3701ab837 archives -> snapshots, except legacy stuff 2019-06-05 09:43:11 -06:00
Jacob Beck
f48f78fc58 rename a couple things, this will not work atm 2019-06-05 07:45:55 -06:00
Jacob Beck
ddb1785698 Merge pull request #1473 from fishtown-analytics/fix/windows-rpc
Fix windows rpc
2019-06-04 15:50:46 -06:00
Jacob Beck
963b0e23ee remove drop_existing flag + its deprecation warning, make flags update themselves, reset flags from args on windows processes 2019-06-04 12:01:07 -06:00
Jacob Beck
2a9ae83270 Re-enable windows tests
Disable kill tests
Don't expect logs on timed-out windows tests (windows is slow!)
2019-06-04 11:59:33 -06:00
Jacob Beck
788507e046 Get windows up and running for RPC
Refactor process bootstrapping to a function
 - avoid trying to pickle "RequestTaskHandler"s on windows
Move user config consequences out of main
 - We need this for RPC stuff too
Reset profile values and plugins on windows process start
Disable "kill" command on Windows
2019-06-04 11:59:33 -06:00
Jacob Beck
60001ad6b4 Merge pull request #1483 from fishtown-analytics/fix/swap-alias-generator-args
flip around generate_alias_name args, add node to generate_schema_name args
2019-06-04 09:32:20 -06:00
Jacob Beck
6c9d5c7370 Merge pull request #1500 from fishtown-analytics/feature/remove-archive-blocks
remove archive configs
2019-06-04 09:32:08 -06:00
Jacob Beck
1c3a02b2c8 PR feedback 2019-06-04 07:20:55 -06:00
Jacob Beck
28dc10ed98 PR feedback 2019-06-03 16:32:20 -06:00
Jacob Beck
0d49295b94 tests, my ancient nemesis 2019-06-03 16:32:14 -06:00
Jacob Beck
679784735e Swap aliases ordering and add node parameter to generate_schema_name
Fix many tests
Support single-arg generate_schema_name macros
Add repeat flag to warn_or_error to suppress duplicate warnings
Add a warning if a user's macro does not take a second argument
2019-06-03 16:32:14 -06:00
Drew Banin
99f62e850f fix tests 2019-06-03 16:32:14 -06:00
Drew Banin
001b9abce9 flip around generate_alias_name args 2019-06-03 16:32:14 -06:00
Jacob Beck
248ca3ff76 fix more tests 2019-06-03 16:22:50 -06:00
Jacob Beck
704ee58846 fix the exit code tests too 2019-06-03 16:17:21 -06:00
Jacob Beck
f26948dde2 remove archive blocks 2019-06-03 15:57:32 -06:00
Drew Banin
3cac2d3ab7 Merge pull request #1478 from fishtown-analytics/feature/archive-blocks-as-regex-materialization
Use merge pattern for Archival queries
2019-06-03 11:50:18 -04:00
Drew Banin
82793a02d3 fix for tests in different logical databases 2019-06-01 11:40:14 -04:00
Jacob Beck
00cbe3ec3b Merge pull request #1494 from fishtown-analytics/fix/render-descriptions-properly
Render source_description fields in sources (#1484)
2019-05-30 12:58:48 -06:00
Drew Banin
8ecdab817a fix for pg tests 2019-05-30 13:14:27 -04:00
Drew Banin
94ae9fd4a7 fix test 2019-05-30 13:08:27 -04:00
Jacob Beck
75c8f32186 Render source_description fields in sources, fix tests to make sure we actually do that... 2019-05-30 10:54:47 -06:00
Drew Banin
69621fe6f9 cleanup tests 2019-05-30 12:21:42 -04:00
Drew Banin
81f4c1bd7c cleanup merge 2019-05-30 12:18:58 -04:00
Drew Banin
bb7cfb7dc2 Merge branch 'dev/wilt-chamberlain' into feature/archive-blocks-as-regex-materialization 2019-05-30 12:17:24 -04:00
Drew Banin
b98ea32add code review 2019-05-30 12:11:08 -04:00
Jacob Beck
17157f2973 Merge pull request #1489 from fishtown-analytics/fix/redshift-concurrent-tests
fix concurrent transactions test
2019-05-30 09:58:01 -06:00
Drew Banin
478b17a4dc fixups 2019-05-30 11:12:22 -04:00
Jacob Beck
f14225f7e4 Merge pull request #1491 from fishtown-analytics/feature/hub-site-retries
add a retry + sleep loop to registry calls (#1451)
2019-05-30 07:31:40 -06:00
Drew Banin
8a8f7a9929 Merge branch 'dev/wilt-chamberlain' into build/pr-fixup-1285 2019-05-30 09:07:11 -04:00
Drew Banin
7d490d4886 Implement archival using a merge abstraction 2019-05-29 21:41:10 -04:00
Jacob Beck
af13b2c745 add a retry + sleep loop to registry calls 2019-05-29 15:29:53 -06:00
Jacob Beck
a164d83dad in the concurrent transactions test, use a completely separate adapter for our goofy sql running 2019-05-29 13:08:33 -06:00
Jacob Beck
d10e340823 Merge pull request #1482 from fishtown-analytics/fix/archives-no-overwrite
detect duplicate archive and node names (#1480)
2019-05-29 06:15:42 -06:00
Jacob Beck
e7bb9d14b2 Merge pull request #1475 from fishtown-analytics/fix/faster-snowflake-tests
Try to make snowflake tests a bit more performant (#1433)
2019-05-29 06:13:26 -06:00
Burak Emre Kabakcı
4e7c096c34 Fix Code Style 2019-05-28 12:59:58 +03:00
Burak Emre Kabakcı
0f5ce12dad Update .gitignore 2019-05-27 15:09:41 +03:00
Burak Emre Kabakcı
2f4e92a728 Fix linting issues 2019-05-27 15:08:14 +03:00
Jacob Beck
b047ed82b6 detect duplicate archive and node names 2019-05-24 12:47:51 -06:00
Burak Emre Kabakcı
82f165625f Use get method for extracting search_path 2019-05-24 11:55:39 +03:00
Burak Emre Kabakcı
2f5aa3bd0e Remove redundant init 2019-05-22 14:24:03 +03:00
Burak Emre Kabakcı
63ef8e3f17 Rename config to search_path 2019-05-22 14:14:05 +03:00
Burak Emre Kabakcı
f1eaeb4ed2 Fix typo in Redshift tests 2019-05-21 19:48:32 +03:00
Burak Emre Kabakcı
a2ffe8e938 Add tests & fix failing tests 2019-05-21 19:36:29 +03:00
Burak Emre Kabakcı
355d2ad6fc Pass schema in credentials to Postgresql 2019-05-21 19:09:06 +03:00
Jacob Beck
f089b4b077 try to make snowflake assertManyTablesEqual faster
use "show columns" instead of selecting from the information schema
calculate numbers of rows while we calculate mismatched rows
use "show objects" instead of selecting from the information schema for get_models_in_schema
2019-05-21 09:30:32 -06:00
Jacob Beck
73607b85b7 Merge pull request #1465 from fishtown-analytics/dev/merge-in-0.13.1
merge 0.13.1
2019-05-16 07:23:55 -06:00
Drew Banin
7b022f3afa Merge pull request #1459 from fishtown-analytics/feature/printer-width-tests
Make printer width configurable
2019-05-15 17:49:50 -04:00
Drew Banin
0a2e4f761b Merge pull request #1409 from bastienboutonnet/snowflake_create_or_replace
Snowflake create or replace
2019-05-15 17:48:25 -04:00
Jacob Beck
ddd73cd73b Merge branch '0.13.latest' into dev/wilt-chamberlain 2019-05-15 12:44:44 -06:00
Kriselle Sanchez
efdb837a50 Make printer width configurable 2019-05-13 14:52:20 -04:00
Drew Banin
90abc2d2f3 (closes #1455) Qualify Snowflake temp tables with a database and schema 2019-05-13 14:12:30 -04:00
Connor McArthur
f60938aab0 Bump version: 0.13.1a2 → 0.13.1 2019-05-13 11:58:03 -04:00
Connor McArthur
af7c752fc6 set release date 2019-05-13 11:57:29 -04:00
Connor McArthur
a80989952a Merge pull request #1392 from fishtown-analytics/dev/0.13.1
dbt 0.13.1
2019-05-13 11:56:37 -04:00
Drew Banin
8d74550609 fix tests 2019-05-10 19:12:38 -04:00
Jacob Beck
d5774b3da4 Merge pull request #1452 from fishtown-analytics/feature/strip-seed-bom
Handle seeds with utf-8 BOM (#1177)
2019-05-10 10:40:53 -06:00
Jacob Beck
7f7002f36c Merge pull request #1454 from fishtown-analytics/feature/increment-version
Bump version: 0.13.0 → 0.14.0a1
2019-05-10 10:40:41 -06:00
Drew Banin
b62ba4a985 py2 compat 2019-05-10 10:52:57 -04:00
Drew Banin
2b3370887e add missing import 2019-05-10 10:19:27 -04:00
Jacob Beck
26427d2af0 PR feedback 2019-05-10 07:50:46 -06:00
Jacob Beck
d502b33ef4 Merge pull request #1453 from fishtown-analytics/feature/warn-on-unpinned-packages
Warn on unpinned git packages (#1446)
2019-05-10 07:01:28 -06:00
Jacob Beck
210cf43574 Merge pull request #1440 from fishtown-analytics/feature/output-run-hooks
Output run hooks (#696)
2019-05-09 21:06:37 -06:00
Jacob Beck
d74e37d4ea Always specify encodings to open()
Fixes some neat windows behavior where the default code page can be cp1252
Add BOM unit test
2019-05-09 21:04:52 -06:00
Jacob Beck
ea8825996d PR feedback 2019-05-09 20:57:56 -06:00
Jacob Beck
336368195e Bump version: 0.13.0 → 0.14.0a1 2019-05-09 20:21:24 -06:00
Jacob Beck
70206b1635 warn on unpinned dependencies, unless warn-unpinned: False is set in packages.yml 2019-05-09 20:09:20 -06:00
Jacob Beck
191ae61b02 python 2 2019-05-09 19:48:26 -06:00
Jacob Beck
f6bf8d912a PR feedback
Order hooks in a deterministic way:
 - in the root project, hooks in index order
 - foreach dependency project in alphabetical order, hooks in index order
Since these hooks always have a valid "index", no need to devise a default
Add some tests
2019-05-09 19:16:53 -06:00
Jacob Beck
30b7407597 if the first byte of a csv seed is a BOM, strip it before passing it to agate 2019-05-09 17:56:05 -06:00
Jacob Beck
58bf73d4bf Update core/dbt/task/run.py
Co-Authored-By: Drew Banin <drew@fishtownanalytics.com>
2019-05-09 14:30:30 -06:00
Drew Banin
ec61073560 update plugin readmes 2019-05-09 15:58:44 -04:00
Drew Banin
b0f81edf96 0.13.1a2 bump, include README.md in pypi description 2019-05-09 15:14:49 -04:00
Jacob Beck
cc8ef47747 Merge pull request #1436 from fishtown-analytics/feature/dbt-ls
dbt ls command (#467)
2019-05-09 12:55:06 -06:00
Jacob Beck
5d05bf0aba PR feedback 2019-05-09 12:54:38 -06:00
Drew Banin
1d18a54b5e Update CHANGELOG.md 2019-05-09 14:52:17 -04:00
Drew Banin
a4605ec844 Merge pull request #1450 from fishtown-analytics/0-13-1-docs
bump docs for 0.13.1
2019-05-09 14:46:03 -04:00
Drew Banin
9f58400ba8 bump docs for 0.13.1 2019-05-09 14:44:42 -04:00
Jacob Beck
ec8277b0e4 add color to hooks, give hooks names 2019-05-09 10:53:46 -06:00
Jacob Beck
8c7763acf6 Merge pull request #1445 from fishtown-analytics/fix/anonymous-tracking-no-hangs
Improve tracking failure handling (#1063)
2019-05-09 10:47:06 -06:00
Jacob Beck
8e426e60c9 remove completed TODO 2019-05-09 10:40:46 -06:00
Jacob Beck
3a7f931a3a Merge pull request #1428 from fishtown-analytics/fix/strip-url-trailing-dotgit
On mostly-duplicate git urls, pick whichever came first (#1084)
2019-05-08 19:33:07 -06:00
Drew Banin
314ca6c361 Update core/dbt/tracking.py
Co-Authored-By: beckjake <beckjake@users.noreply.github.com>
2019-05-08 12:40:33 -06:00
Jacob Beck
12f0887d28 PR feedback
remove incorrect comment
fix an issue where we looked deps up by the wrong key
add a test
2019-05-08 10:24:24 -06:00
Jacob Beck
a986ae247d Improve tracking failure handling
Add a 1s timeout to tracking http calls
Add an on-failure handler that disables tracking after the first failure
2019-05-08 09:43:32 -06:00
Jacob Beck
9507669b42 PR feedback
Truncate long run-hooks, replacing newlines with spaces
Include run-hook status/timing in output
Clean up run-hook execution a bit
2019-05-07 09:53:37 -06:00
Jacob Beck
715155a1e9 Merge pull request #1441 from fishtown-analytics/feature/dynamic-target-paths
Add modules and tracking info to the configuration parsing context (#1320)
2019-05-07 08:00:11 -06:00
Jacob Beck
32c5679039 PR Feedback
Fixed error logging to display errors in dbt ls
Add models flag
Make all of models, select, exclude have a metavar of 'SELECTOR' for -h
2019-05-07 07:57:06 -06:00
Jacob Beck
89d211b061 Merge pull request #1442 from fishtown-analytics/fix/no-source-links
Only link schemas with executable things in them
2019-05-06 14:27:18 -06:00
Jacob Beck
f938fd4540 fix a test bug
dbt doing two things in the same second incorrectly failed the test
2019-05-06 12:24:40 -06:00
Jacob Beck
61e4fbf152 add a test 2019-05-06 11:27:16 -06:00
Jacob Beck
fda38e7cbb Only link schemas with executable things in them 2019-05-06 10:46:10 -06:00
Jacob Beck
c0a3b02fb4 Add modules and tracking info to the config parsing context
Add a test to make sure setting the target path works properly
2019-05-03 10:45:09 -06:00
Jacob Beck
80482aae34 add on-run-start/on-run-end hook logging 2019-05-03 08:00:08 -06:00
Jacob Beck
c19085862a suppress some cache logger noise 2019-05-03 07:59:33 -06:00
Connor McArthur
9672d55c1e Bump version: 0.13.0 → 0.13.1a1 2019-05-03 09:22:37 -04:00
Jacob Beck
e043643a54 Add a new ListTask, and 'dbt list'/'dbt ls'
All tasks now have a 'pre_init_hook' classmethod, called by main
 - runs after args are parsed, before anything else
2019-05-02 08:53:11 -06:00
Connor McArthur
ade108f01c changelog 2019-05-02 10:32:03 -04:00
Jacob Beck
6b08fd5e8d Merge pull request #1432 from convoyinc/dev/0.13.1
Add MaterializedView relation type, update Snowflake adapter
2019-05-02 07:27:11 -06:00
Jacob Beck
3c8bbddb5f Merge pull request #1413 from fishtown-analytics/feature/stub-adapter-in-parsing
Stub out methods at parse time
2019-05-01 10:39:49 -06:00
Jacob Beck
4c02b4a6c3 Make an stderr handler available as well and provide a way to swap between them 2019-05-01 07:11:29 -06:00
Adrian Kreuziger
786726e626 Add MaterializedView relation type, update Snowflake adapter 2019-04-30 09:54:37 -07:00
Drew Banin
1f97fe463e Merge branch 'dev/wilt-chamberlain' into snowflake_create_or_replace 2019-04-30 12:27:25 -04:00
Jacob Beck
5a3e3ba90f Merge pull request #1402 from fishtown-analytics/fix/quote-databases-properly
Quote databases properly (#1396)
2019-04-30 10:15:37 -06:00
Jacob Beck
154aae5093 Merge pull request #1410 from fishtown-analytics/feature/test-severity
Add a "severity" for tests (#1005)
2019-04-30 10:15:15 -06:00
Jacob Beck
3af88b0699 Update PackageListing to handle git packages having or not having .git
First write wins, on the assumption that all matching URLs are valid
2019-04-30 10:12:50 -06:00
Jacob Beck
0fb620c697 Merge pull request #1416 from fishtown-analytics/feature/strict-undefined
make undefined strict when not capturing macros (#1389)
2019-04-30 09:39:29 -06:00
Jacob Beck
7d66965d0b Merge branch 'dev/wilt-chamberlain' into feature/stub-adapter-in-parsing 2019-04-30 08:40:29 -06:00
Jacob Beck
acca6a7161 Merge pull request #1420 from fishtown-analytics/fix/postgres-text-types
Fix postgres text handling (#781)
2019-04-30 08:35:07 -06:00
Jacob Beck
ad2f228048 Merge pull request #1429 from fishtown-analytics/fix/vars-in-disabled-models
Fix: missing vars in disabled models fail compilation (#434)
2019-04-30 08:34:28 -06:00
Drew Banin
3a7dcd9736 Merge branch 'dev/wilt-chamberlain' into snowflake_create_or_replace 2019-04-30 09:44:43 -04:00
Drew Banin
ca15b44d0f Update core/dbt/clients/jinja.py
Co-Authored-By: beckjake <beckjake@users.noreply.github.com>
2019-04-30 07:35:32 -06:00
Jacob Beck
bf9c466855 Handle warnings vs errors
raise errors in strict mode
log warnings on warning states
Add tests
2019-04-29 19:58:05 -06:00
Jacob Beck
abcbacaf69 Implement test failure severity levels
A small refactor to make test parsing easier to modify
add concept of test modifier kwargs, pass them through to config
plug the severity setting into test result handling
Update existing tests
Add integration tests
severity settings for data tests, too
2019-04-29 19:57:07 -06:00
Jacob Beck
ffceff7498 Merge branch 'dev/wilt-chamberlain' into feature/stub-adapter-in-parsing 2019-04-29 19:51:37 -06:00
Jacob Beck
25ac1db646 make undefined strict when not capturing
Fix a number of tests and some table/view materialization issues
2019-04-29 19:46:14 -06:00
Jacob Beck
c6d6dae352 PR feedback: Make a RedshiftColumn, make the RedshiftAdapter use it 2019-04-29 19:33:26 -06:00
Jacob Beck
aa4f771df2 Merge pull request #1419 from fishtown-analytics/feature/destroy-non-destructive
remove non-destructive mode (#1415)
2019-04-29 19:28:00 -06:00
Jacob Beck
4715ad9009 Merge pull request #1426 from fishtown-analytics/feature/allow-null-vars
Allow vars to be set to null and differentiate them from unset vars (#608)
2019-04-29 19:22:24 -06:00
Jacob Beck
a4e5a5ac78 Merge pull request #1425 from fishtown-analytics/fix/propagate-undefined-to-calls
Propagate ParserMacroCapture undefineds into the calls (#1424)
2019-04-29 18:53:27 -06:00
Jacob Beck
f587efde60 Merge pull request #1427 from fishtown-analytics/feature/better-schema-test-errors
Improve invalid test warnings/errors (#1325)
2019-04-29 18:33:54 -06:00
Jacob Beck
d57f4c54d8 at parse time, return None for any missing vars 2019-04-29 14:39:08 -06:00
Jacob Beck
b9c74e0b07 Improve invalid test warnings/errors 2019-04-29 14:08:39 -06:00
Jacob Beck
aebefe09b5 Allow vars to be set to null and differentiate them from unset vars 2019-04-29 13:32:42 -06:00
Jacob Beck
78c13d252e Propagate ParserMacroCapture undefineds into the calls
Fix an issue when users attempt to use the results of missing macro calls
Add a test
2019-04-29 08:33:10 -06:00
Jacob Beck
8270c85ffd Merge pull request #1421 from fishtown-analytics/fix/dbt-debug-connect
fix dbt debug connections (#1422)
2019-04-29 07:10:13 -06:00
Bastien Boutonnet
7a2279e433 move unique key workaround to snowflake macro 2019-04-28 09:59:17 -07:00
Bastien Boutonnet
3ef519d139 todo and comments clean up 2019-04-28 07:43:16 -07:00
Bastien Boutonnet
85eac05a38 cleaner select 2019-04-28 07:33:49 -07:00
Bastien Boutonnet
8af79841f7 remove non-destructive logic 2019-04-28 07:31:14 -07:00
Bastien Boutonnet
afe236d9ac cleaning up some commented out stuff 2019-04-27 10:25:26 -07:00
Bastien Boutonnet
90f8e0b70e Revert "Revert "Merge branch 'dev/wilt-chamberlain' into snowflake_create_or_replace""
This reverts commit 4f62978de5.
2019-04-27 10:18:30 -07:00
Bastien Boutonnet
0432c1d7e3 conflic resolve 2019-04-27 10:11:59 -07:00
Bastien Boutonnet
08820a2061 fixing my jetlagged introduced bugs 2019-04-27 08:15:23 -07:00
Bastien Boutonnet
4f62978de5 Revert "Merge branch 'dev/wilt-chamberlain' into snowflake_create_or_replace"
This reverts commit 3ab8238cfb, reversing
changes made to 43a9db55b1.
2019-04-27 06:46:41 -07:00
Bastien Boutonnet
3ab8238cfb Merge branch 'dev/wilt-chamberlain' into snowflake_create_or_replace 2019-04-27 15:09:36 +02:00
Bastien Boutonnet
43a9db55b1 quick todo marker 2019-04-27 14:35:10 +02:00
Jacob Beck
08fdcad282 prevent getting this helper method collected as a test 2019-04-26 13:50:13 -06:00
Jacob Beck
7df6e0dc68 Fix postgres text handling
Fix incorrect maximum text field length on redshift
On postgres, pass through text without size information to fix archival of long text fields
Add a test that makes sure postgres archives work with very long text entries
Fix tests
Add a redshift test
2019-04-26 13:50:03 -06:00
Bastien Boutonnet
5c1c5880b6 more explicit comments and quick formatting 2019-04-26 20:10:12 +02:00
Bastien Boutonnet
f99efbf72e remove references to temp and backup relations 2019-04-26 20:04:21 +02:00
Jacob Beck
e90b60eecd fix dbt debug connections 2019-04-26 11:14:24 -06:00
Jacob Beck
1205e15be2 remove non-destructive mode 2019-04-26 10:14:27 -06:00
Jacob Beck
32f39f35f6 Merge pull request #1417 from fishtown-analytics/feature/use-pytest
nosetest -> pytest
2019-04-26 08:15:21 -06:00
Bastien Boutonnet
9591b86430 (PR fdbk) rm extra macro 2019-04-26 12:43:11 +02:00
Jacob Beck
b54c6023eb on windows, run via "python -m pytest" 2019-04-25 22:13:12 -06:00
Jacob Beck
00ba5d36b9 nosetest -> pytest 2019-04-25 21:51:43 -06:00
Jacob Beck
89eeaf1390 Merge branch 'dev/wilt-chamberlain' into feature/stub-adapter-in-parsing 2019-04-25 21:48:02 -06:00
Jacob Beck
3f18b93980 Merge pull request #1398 from fishtown-analytics/feature/archive-no-create-existing-schema
archives: do not create existing schema (#758)
2019-04-25 21:42:58 -06:00
Jacob Beck
96cb056ec9 Merge pull request #1361 from fishtown-analytics/feature/archive-blocks-as-regex
Feature/archive blocks
2019-04-25 21:42:20 -06:00
Jacob Beck
1042f1ac8b fix some merged-in flake8 failures 2019-04-25 14:58:29 -06:00
Jacob Beck
dd232594a5 Merge branch 'dev/wilt-chamberlain' into feature/archive-blocks-as-regex 2019-04-25 14:48:49 -06:00
Jacob Beck
5762e5fdfb Merge pull request #1406 from fishtown-analytics/fix/run-operation-return-codes
Fix run operation return codes (#1377)
2019-04-25 08:19:11 -06:00
Jacob Beck
0f1c154a1a create a decorator for stubbing out methods at parse time
Includes some unit tests
Update integration tests to handle the fact that sometimes we now fail at runtime
2019-04-24 14:36:16 -06:00
Jacob Beck
ad1fcbe8b2 Merge pull request #1412 from fishtown-analytics/fix/insensitive-list-schemas
use ilike instead of = for database when listing schemas (#1411)
2019-04-24 12:02:04 -06:00
Jacob Beck
877440b1e6 use ilike instead of = for database when listing schemas 2019-04-24 10:50:14 -06:00
Jacob Beck
ca02a58519 PR feedback: Add a clear_transaction call 2019-04-23 20:25:18 -06:00
Jacob Beck
2834f2d8b6 update test.env.sample 2019-04-23 19:39:44 -06:00
Jacob Beck
cc4f285765 if the schema exists, do not try to create it 2019-04-23 14:59:50 -06:00
Jacob Beck
2efae5a9c3 give bigquery list_schemas/check_schema_exist macros 2019-04-23 14:59:50 -06:00
Jacob Beck
416cc72498 Implmement check_cols
Contracts: some anyOf shenanigans to add support for check_cols
Macros: split apart archive selection, probably too much copy+paste
Legacy: Archive configs now include a "timestamp" strategy when parsed from dbt_project.yml
Add integration tests
fix aliases test
Unquote columns in archives
handle null columns
attr -> use_profile
2019-04-23 14:34:24 -06:00
Jacob Beck
d66584f35c Update jsonschema and go from Draft 4 to Draft 7 2019-04-23 14:34:24 -06:00
Jacob Beck
2b80d7ad8d move column-related things into adapters where they belong 2019-04-23 14:34:24 -06:00
Jacob Beck
be3445b78a get archive blocks working
tests
fix event tracking test
Fix print statements
make archives not inherit configs from models
archive now uses the name/alias properly for everything instead of target_table
skip non-archive blocks in archive parsing instead of raising
make archives ref-able
 - test for archive ref, test for archive selects
raise a more useful message on incorrect archive targets
add "--models" and "--exclude" arguments to archives
 - pass them through to selection
 - change get_fqn to take a full node object, have archives use that so selection behaves well
 - added tests

Improve error handling on invalid archive configs

Added a special archive-only node that has extra config restrictions
add tests for invalid archive config
2019-04-23 14:33:44 -06:00
Jacob Beck
ab63042dfa archive-paths support, wire up the block parser
raise on non-archive during parsing
break archive materialization
2019-04-23 14:33:12 -06:00
Jacob Beck
af8622e8ff flake8, pep8, unit tests 2019-04-23 14:32:23 -06:00
Jacob Beck
53d083ec58 fix this for real in a way that will make me not break it again 2019-04-23 14:30:59 -06:00
Jacob Beck
32f74b60ef Merge pull request #1408 from fishtown-analytics/fix/remove-sql_where-from-subquery
Remove sql_where
2019-04-23 07:59:35 -06:00
Jacob Beck
0885be1dc0 Remove sql_where, removing an unnecessary subquery in the process 2019-04-22 11:39:54 -06:00
Jacob Beck
8b58b208ca add quote policy to Relation.create calls 2019-04-22 11:29:07 -06:00
Jacob Beck
3188aeaac4 More tests
Add an already_exists call to a test that requires an alternative database
Make the alternative database on snowlfake be one that must be quoted
2019-04-22 11:29:07 -06:00
Bastien Boutonnet
e83edd30de fixme/todo regarding non destructive flag 2019-04-22 16:02:03 +02:00
Bastien Boutonnet
04333699a0 remove testing logging messages 2019-04-22 15:49:59 +02:00
Bastien Boutonnet
95c9f76e32 remove snowflake__ direct call in incremental 2019-04-22 11:10:02 +02:00
Bastien Boutonnet
2830b6a899 make default create or replace macro to allow snowflake adapter pick it up 2019-04-22 11:06:58 +02:00
Bastien Boutonnet
54c02ef4b4 some logging and temp call workaround 2019-04-22 01:32:07 +02:00
Bastien Boutonnet
dacce7c864 fix insert cols call and temp workaround call of snowflake 2019-04-22 01:27:25 +02:00
Jacob Beck
08c5f9aed8 no automatic transactions 2019-04-21 16:43:52 -06:00
Bastien Boutonnet
fb26ce5c24 Merge branch 'snowflake_create_or_replace' of github.com:bastienboutonnet/dbt into snowflake_create_or_replace 2019-04-20 19:32:40 +01:00
Bastien Boutonnet
91d869e61a revert test 2019-04-20 19:29:01 +01:00
Bastien Boutonnet
d168bdd0c8 Merge branch 'snowflake_create_or_replace' of github.com:bastienboutonnet/dbt into snowflake_create_or_replace 2019-04-20 19:25:41 +01:00
Bastien Boutonnet
6a104c1938 test 2019-04-20 19:25:16 +01:00
Bastien Boutonnet
2d5525e887 add some logging 2019-04-20 19:09:46 +01:00
Bastien Boutonnet
a35ad186e3 implement insert when no unique key and full refresh solution 2019-04-20 18:13:37 +01:00
Jacob Beck
dd469adf29 Merge pull request #1407 from fishtown-analytics/feature/wrap-models-in-jinja-tags
add a flag to wrap models/tests in jinja blocks
2019-04-18 11:48:22 -06:00
Jacob Beck
4ffc5cbe6a PR feedback 2019-04-18 11:17:26 -06:00
Jacob Beck
f3449dcfcb add a flag to wrap models/tests in jinja blocks, to see what happens 2019-04-18 11:17:26 -06:00
Jacob Beck
4e8c7b9216 add a test and a comment about an odd edge case in do-block parsing 2019-04-18 09:27:52 -06:00
Jacob Beck
473078986c PR feedback: run_unsafe -> _run_unsafe 2019-04-18 09:05:23 -06:00
Jacob Beck
5b74c58a43 add tests 2019-04-17 11:26:33 -06:00
Jacob Beck
a72a4e1fcb Acquire a connection before executing the macro, and commit after 2019-04-17 11:26:33 -06:00
Jacob Beck
13dd72029f run-operation fixes
Make dbt run-operation actually function at all with the RPC changes
On exceptions that occur outside of actual macro execution, catch them and return failure appropriately
2019-04-17 11:26:32 -06:00
Jacob Beck
fc4fc5762b add a pretty janky data test to expose quoting issues 2019-04-16 13:51:34 -06:00
Jacob Beck
d515903172 make PostgresAdapter.verify_database handle names that resolve to the same thing 2019-04-16 13:51:33 -06:00
Jacob Beck
97a6a51bec Quote databases when we list them
Fix a copy+paste error that broke database quoting configuration
2019-04-16 13:51:33 -06:00
Bastien Boutonnet
9222c79043 implement create or replace in table mater 2019-04-16 17:53:52 +02:00
Bastien Boutonnet
38254a8695 make create or replace snowflake macro 2019-04-16 17:53:31 +02:00
Jacob Beck
9b1aede911 Fix a number of bugs
After we find the start of a comment block, advance immediately
 - this is so we do not mistake "{#}" as both open and close of comment
support do/set statements (no {% enddo %}/{% endset %})
fix some edge-case bugs around quoting
fix a bug around materialization parsing
2019-04-16 09:52:27 -06:00
Jacob Beck
ac71888236 add tests
test for comment blocks where the first character of the comment is "}"
do/set tests
an even trickier test case
2019-04-16 09:45:28 -06:00
Jacob Beck
3f31b52daf Merge pull request #1388 from fishtown-analytics/fix/bigquery-missing-model-names
pass the model name along in get_relations
2019-04-11 07:28:22 -06:00
Connor McArthur
e3230aad55 Merge pull request #1391 from fishtown-analytics/add-logging-to-dbt-clean
Add logging to dbt clean
2019-04-08 09:31:20 -05:00
emilielimaburke
ac40aa9b02 fix merge conflict 2019-04-08 09:27:08 -05:00
Emilie Lima Schario
fa480e61a1 update docs with steps 2019-04-08 09:25:14 -05:00
Jacob Beck
c19644882b add unit tests 2019-04-05 10:22:54 -06:00
Jacob Beck
e29eccd741 pass the model name along in get_relations 2019-04-04 17:50:42 -06:00
Jacob Beck
4dd80567e1 Merge pull request #1380 from fishtown-analytics/feature/rpc-ps
RPC server task management
2019-04-03 11:46:37 -06:00
Jacob Beck
2654c79548 PR feedback 2019-04-03 10:01:36 -06:00
Jacob Beck
3b357340fd skip the timing assert on python 2.x 2019-04-02 15:15:43 -06:00
Jacob Beck
6c8e74bac9 tests, fight with (test-only?) deadlocks
when adding more threads stops helping, add more sleeps
2019-04-02 15:15:37 -06:00
Jacob Beck
182714b6b8 handle ctrl+c during parsing, etc 2019-04-02 15:14:28 -06:00
Jacob Beck
8410be848f fix the methods list 2019-04-02 15:14:28 -06:00
Jacob Beck
3f9b9962c3 add "ps" and "kill" commands, and track tasks in flight
proper cancel support
Refactor rpc server logic a bit
fix an issue in query cancelation where we would cancel ourselves
fix exception handling misbehavior
2019-04-02 15:14:28 -06:00
Jacob Beck
ec1f4bc33d fix bad add_query calls
also fix unit tests
2019-04-02 15:14:28 -06:00
Jacob Beck
f2a0d36b34 when a dbt RuntimeException is raised inside the exception handler, re-raise it instead of wrapping it 2019-04-02 15:14:28 -06:00
Jacob Beck
fc2b86df4f rearrange some path handling in the rpc server task 2019-04-02 15:14:28 -06:00
Jacob Beck
0cd0792b65 Merge pull request #1378 from fishtown-analytics/ci/add-build-tag
build on all dev branches, build on any branch named starting with "pr/"
2019-04-02 14:27:27 -06:00
Jacob Beck
122ee5ab7d Merge pull request #1381 from fishtown-analytics/fix/report-compiled-node-on-error
Fix/report compiled node on error
2019-04-02 14:24:32 -06:00
Darren Haken
8270ef71b2 Merge branch 'dev/wilt-chamberlain' of github.com:fishtown-analytics/dbt into dev/stephen-girard 2019-04-01 14:31:33 +01:00
Darren Haken
d59a13079f Merge remote-tracking branch 'upstream/dev/stephen-girard' into dev/stephen-girard 2019-04-01 14:30:59 +01:00
Jacob Beck
ed59bd22f3 actually override macros...
fix macro overrides
fix tests that masked the broken macro overrides
2019-03-29 09:03:54 -06:00
Jacob Beck
8d32c870fc attach nodes to exceptions and pass compiled sql into the error messages 2019-03-29 08:19:30 -06:00
Jacob Beck
bea2d4fb34 Merge pull request #1373 from fishtown-analytics/fix/rpc-ephemeral-clean
Support ephemeral nodes (#1368)
2019-03-27 13:02:29 -06:00
Jacob Beck
fcb97bf78a Merge pull request #1363 from roverdotcom/feature/alias-macro
Adding a generate_alias_name macro
2019-03-27 08:19:34 -06:00
Jacob Beck
161a78dc23 build on all dev branches or branches that start with "pr/" 2019-03-26 15:52:12 -06:00
Brandyn Lee
4b7bddb481 generate_macro update 2019-03-26 13:32:56 -07:00
Brandyn Lee
0879b1b38b A more verbose function 2019-03-26 13:29:08 -07:00
Brandyn Lee
8bf81a581a Fix integration tests 2019-03-26 13:29:08 -07:00
Brandyn Lee
759da58648 Fix unit test, node.name 2019-03-26 13:29:08 -07:00
Brandyn Lee
ec4a4fe7df Add second integration test and pass parsed_node through macro 2019-03-26 13:29:08 -07:00
Brandyn Lee
4dedd62aea Minimal testing 2019-03-26 13:29:08 -07:00
Brandyn Lee
cf4030ed94 Get unit testing working 2019-03-26 13:29:08 -07:00
Brandyn Lee
35df887307 Get get_customer_alias working. 2019-03-26 13:29:08 -07:00
Brandyn Lee
d41adaa51b Copying all the customer schema code 2019-03-26 13:29:08 -07:00
Jacob Beck
9373a45870 add integration test 2019-03-26 07:28:37 -06:00
Jacob Beck
a2db88c9c3 attach nodes to results during processing, but not for serialization 2019-03-26 07:25:35 -06:00
Jacob Beck
a26d7bf9e8 pass along the compiled manifest instead of pitching it away 2019-03-26 07:25:34 -06:00
Jacob Beck
4225047b06 Merge pull request #1375 from fishtown-analytics/feature/inline-macros-models
Support macros in execute/compile tasks (#1372)
2019-03-26 07:16:51 -06:00
Jacob Beck
0a9ed9977b include top-level raw/comment blocks 2019-03-25 17:30:49 -06:00
Jacob Beck
fc1b4ce88e add all non-macro toplevel blocks to the sql so things like "if" work 2019-03-25 15:30:21 -06:00
Jacob Beck
6295c96852 get macros from the body of the sql data instead of a separate parameter 2019-03-25 15:30:21 -06:00
Jacob Beck
73418b5c16 make the block lexer include raw toplevel data 2019-03-25 15:30:21 -06:00
Jacob Beck
98d530f0b1 Jinja block parsing/lexing implemented 2019-03-25 14:20:24 -06:00
Drew Banin
e45ed0ed8c Merge pull request #1366 from emilieschario/fix_profile_link_issue_1344
Update broken profiles.yml link
2019-03-24 12:07:58 -04:00
emilielimaburke
fc04e2db89 updates link 2019-03-24 11:59:20 -04:00
Drew Banin
cfaacc5a76 Merge pull request #1364 from fishtown-analytics/fix/redshift-test-env-password
fix sample test env password
2019-03-23 14:43:24 -04:00
Drew Banin
b91c3edb16 fix sample test env password 2019-03-21 19:29:03 -04:00
Connor McArthur
0a503a0bed try to resolve merge 2019-03-21 15:13:11 -04:00
Connor McArthur
015e4d66b2 resolve connection naming conflict 2019-03-21 15:12:00 -04:00
Connor McArthur
da4c135e23 Merge pull request #1359 from fishtown-analytics/dev/stephen-girard
0.13.0 (Stephen Girard)
2019-03-21 13:26:58 -04:00
Drew Banin
588851ac1c Update CHANGELOG.md 2019-03-21 11:16:29 -04:00
Drew Banin
2b7d7061f9 Update CHANGELOG.md 2019-03-21 11:15:03 -04:00
Connor McArthur
24bc3b6d76 Bump version: 0.13.0rc1 → 0.13.0 2019-03-21 11:05:28 -04:00
Connor McArthur
cd52a152f6 update RELEASE instructions, add script to build all the source distributions 2019-03-21 11:04:51 -04:00
Connor McArthur
3ecf8be873 add RC bugfixes 2019-03-21 10:50:03 -04:00
Connor McArthur
f38466db11 Merge pull request #1360 from fishtown-analytics/0.13.0-changelog
0.13.0 changelog
2019-03-21 10:40:34 -04:00
Drew Banin
3b8d5c0609 Merge pull request #1356 from fishtown-analytics/fix/concurrent-transaction-fresness
fix for error-ed out transactions on redshift
2019-03-20 16:49:14 -04:00
Drew Banin
60539aaa56 fix for errored out transactions on redshift 2019-03-20 14:55:54 -04:00
Drew Banin
6d53e67670 Update parsed.py
pep8
2019-03-19 15:37:13 -04:00
Drew Banin
9771e63247 Merge branch 'dev/stephen-girard' into dev/stephen-girard 2019-03-19 15:15:02 -04:00
Jacob Beck
02c9bcabe0 Merge pull request #1355 from fishtown-analytics/fix/out-of-order-execution-on-model-select
Fix out of order execution on model select (#1354)
2019-03-19 07:34:52 -06:00
Jacob Beck
69c8a09d43 Use the transitive closure to calculate the graph
Don't maintain the links manually while removing nodes
Just take the transitive closure and remove all nodes
2019-03-18 16:49:46 -06:00
Jacob Beck
9ae229a0d5 ugh, forgot to remove this I guess 2019-03-18 15:51:26 -06:00
Jacob Beck
38921fad17 tests 2019-03-18 15:48:22 -06:00
Jacob Beck
633858a218 When building the graph underlying the graph queue, preserve transitive dependencies while removing the skipped nodes 2019-03-18 15:08:51 -06:00
Drew Banin
70262b38f8 Merge pull request #1353 from fishtown-analytics/fix/issue-1352
possibly reckless fix for #1352
2019-03-18 09:30:46 -04:00
Jacob Beck
f96dedf3a9 redshift can just change this on you apparently 2019-03-16 12:56:10 -04:00
Drew Banin
1ce0493488 possibly reckless fix for #1352 2019-03-15 15:08:48 -04:00
Jacob Beck
027a0d2ee6 Merge pull request #1341 from fishtown-analytics/feature/rpc-improve-dbt-exceptions
RPC: Error handling improvements
2019-03-12 16:17:50 -06:00
Jacob Beck
9c8e08811b redshift can just change this on you apparently 2019-03-12 15:09:07 -06:00
Jacob Beck
c1c09f3342 Merge pull request #1348 from fishtown-analytics/feature/rpc-with-macros
RPC: macros
2019-03-12 14:00:30 -06:00
Drew Banin
05b82a22bc Merge pull request #1349 from fishtown-analytics/fix/new-logo
Use new logo
2019-03-11 21:33:45 -04:00
Drew Banin
067aa2ced0 replace logo 2019-03-11 21:32:40 -04:00
Drew Banin
f18733fd09 Add files via upload 2019-03-11 21:31:41 -04:00
Drew Banin
a981f657ec Merge pull request #1347 from fishtown-analytics/fix/warn-error-flag
re-add --warn-error flag
2019-03-11 20:48:15 -04:00
Jacob Beck
81426ae800 add optional "macros" parameter to dbt rpc calls 2019-03-11 18:25:17 -06:00
Drew Banin
4771452590 Merge pull request #1346 from fishtown-analytics/fix/on-run-hooks-in-tests
[Stephen Girard] fix for on-run- hooks running in tests
2019-03-11 20:07:46 -04:00
Drew Banin
10bfaf0e4b Update CHANGELOG.md 2019-03-11 19:56:57 -04:00
Drew Banin
9e25ec2f07 Update CHANGELOG.md 2019-03-11 19:55:57 -04:00
Drew Banin
90fb908376 re-add --warn-error flag 2019-03-11 19:42:14 -04:00
Drew Banin
a08c0753e7 fix for on-run- hooks running in tests 2019-03-11 13:01:22 -04:00
Jacob Beck
fc22cb2bf0 when encoding json, handle dates and times like datetimes 2019-03-08 13:28:44 -07:00
Jacob Beck
fbaae2e493 fix Python 2.7 2019-03-08 12:02:51 -07:00
Jacob Beck
c86390e139 use notice logging for "Found x models, ...", change a couple other levels 2019-03-08 10:54:50 -07:00
Jacob Beck
d890642c28 add NOTICE level logging, make log messages richer types 2019-03-08 10:54:10 -07:00
Jacob Beck
6620a3cd90 wrap all context-raised exceptions in node info
Fixes "called by <Unknown>"
2019-03-08 10:15:16 -07:00
Jacob Beck
7e181280b3 PR feedback: QueueMessageType class, remove extra assignments 2019-03-08 08:19:50 -07:00
Jacob Beck
53499e6b14 Merge branch 'dev/stephen-girard' into dev/wilt-chamberlain 2019-03-08 08:06:18 -07:00
Jacob Beck
3f948ae501 Error handling improvements
All dbt errors now have proper error codes/messages
The raised message at runtime ends up in result.error.data.message
The raised message type at runtime ends up in result.error.data.typename
result.error.message is a plaintext name for result.error.code
dbt.exceptions.Exception.data() becomes result.error.data
Collect dbt logs and make them available to requests/responses
2019-03-08 07:25:21 -07:00
Jacob Beck
2090887a07 Merge pull request #1336 from fishtown-analytics/feature/one-connection-per-thread
per-thread connections
2019-03-08 06:21:53 -07:00
Jacob Beck
a7bfae061c Merge pull request #1342 from fishtown-analytics/fix/temp-relations-from-modelname
Prefix temp relations with model name instead of alias (#1321)
2019-03-08 06:17:52 -07:00
Jacob Beck
fb1926a571 use model name instead of alias for temp tables to ensure uniqueness 2019-03-07 17:08:40 -07:00
Jacob Beck
c215158d67 PR feedback, clean up associated connection holding 2019-03-06 19:47:12 -07:00
Connor McArthur
74152562fe Bump version: 0.13.0a2 → 0.13.0rc1 2019-03-06 17:44:57 -05:00
Jacob Beck
e2af871a5a per-thread connections
parsing now always opens a connection, instead of waiting to need it
remove model_name/available_raw/etc
2019-03-06 12:10:22 -07:00
Jacob Beck
2ad116649a Merge branch 'dev/stephen-girard' into dev/wilt-chamberlain 2019-03-06 07:18:06 -07:00
Jacob Beck
03aa086e0b Merge pull request #1338 from fishtown-analytics/fix/jeb-snowflake-source-quoting
Fix snowflake source quoting / information_schema uses
2019-03-06 06:59:13 -07:00
Jacob Beck
a335857695 PR feedback 2019-03-05 21:47:37 -07:00
Jacob Beck
95a88b9d5d PR feedback 2019-03-05 18:12:19 -07:00
Jacob Beck
2501783d62 fix macro kwargs 2019-03-05 18:02:13 -07:00
Jacob Beck
7367f0ffbd Merge remote-tracking branch 'origin/snowflake-unquote-db-by-default-2' into fix/jeb-snowflake-source-quoting 2019-03-05 17:17:46 -07:00
Jacob Beck
088442e9c1 test fixes 2019-03-05 17:12:02 -07:00
Jacob Beck
ec14c6b2dc initial work, unit tests 2019-03-05 17:11:29 -07:00
Drew Banin
7eb033e71a fix incorrect schema filtering logic 2019-03-05 17:54:29 -05:00
Drew Banin
1a700c1212 Merge pull request #1330 from fishtown-analytics/fix/handle-unexpected-snapshot-values
[Stephen Girard] handle unexpected max_loaded_at types
2019-03-05 17:22:16 -05:00
Drew Banin
78fbde0e1f Merge branch 'dev/stephen-girard' of github.com:fishtown-analytics/dbt into fix/handle-unexpected-snapshot-values 2019-03-05 14:43:50 -05:00
Darren Haken
a30cc5e41e Add persist_docs to dbt's contract 2019-03-05 15:15:30 +00:00
Darren Haken
804a495d82 Fix BQ integration tests by adding persist_docs: {} 2019-03-05 14:51:00 +00:00
Jacob Beck
0a4eea4388 Merge pull request #1301 from fishtown-analytics/feature/rpc-tasks
RPC server (#1274)
2019-03-05 06:35:03 -07:00
Jacob Beck
8471ce8d46 Merge branch 'dev/wilt-chamberlain' into feature/rpc-tasks 2019-03-04 21:07:56 -07:00
Jacob Beck
f9b1cf6c1c Merge pull request #1314 from fishtown-analytics/azure-pipelines-2
Set up CI with Azure Pipelines
2019-03-04 21:06:04 -07:00
Drew Banin
22a2887df2 add missing import 2019-03-04 22:50:23 -05:00
Jacob Beck
02e88a31df Merge branch 'dev/wilt-chamberlain' into feature/rpc-tasks 2019-03-04 19:55:51 -07:00
Jacob Beck
98d5bc1285 PR feedback 2019-03-04 19:50:24 -07:00
Jacob Beck
436815f313 Merge branch 'dev/stephen-girard' into azure-pipelines-2 2019-03-04 19:50:16 -07:00
Drew Banin
328ce82bae fix unit tests 2019-03-04 20:43:37 -05:00
Drew Banin
d39a048e6e pr feedback 2019-03-04 20:39:31 -05:00
Drew Banin
67b56488d3 0.13.0 fixes around database quoting and rendering
- do not quote snowflake database identifiers by default
- do not find relations in source schemas in list_relations
- do not render database names in stdout if a custom database is not specified
2019-03-04 09:09:29 -05:00
Drew Banin
07397edd47 handle unexpected loaded_at field types 2019-03-03 17:06:34 -05:00
Darren Haken
1e3bdc9c06 Fix unit tests by adding persist_docs: {} 2019-03-02 17:36:28 +00:00
Darren Haken
12bfeaa0d3 Merge remote-tracking branch 'upstream/dev/stephen-girard' into dev/stephen-girard 2019-03-02 17:19:36 +00:00
Blake Blackwell
56801f9095 Adding changes based on Drew's recommendations 2019-03-02 05:28:09 -06:00
Drew Banin
54b0b38900 Merge pull request #1328 from fishtown-analytics/feature/operations-actually
Feature/operations actually
2019-03-01 11:27:42 -05:00
Drew Banin
e4660969cf Merge pull request #1324 from fishtown-analytics/fix/snowflake-rename
Fix for alter table ... rename statements on Snowflake
2019-02-28 14:59:57 -05:00
Drew Banin
1090a1612a add run-operation subtask 2019-02-28 11:16:14 -05:00
Drew Banin
f4baba8cc1 Merge pull request #1327 from fishtown-analytics/fix/relocate-event-json-schemas
relocate events dir back to root of repo
2019-02-28 10:12:44 -05:00
Darren Haken
28fa237f87 Add tests for the BQ description in the relations 2019-02-28 14:18:44 +00:00
Drew Banin
f19f0e8193 (fixes #1319) relocate events dir back to root of repo 2019-02-27 22:46:01 -05:00
Drew Banin
72d6ee2446 fix tests 2019-02-27 13:24:32 -05:00
Drew Banin
f8dfe48653 (fixes #1313) Show sources in resource count list during compilation 2019-02-26 13:18:30 -05:00
Drew Banin
9c9c0d991a (fixes #1316) Fix for alter table rename on Snowflake tables, added tests 2019-02-26 13:08:28 -05:00
Jacob Beck
8d2cb5fdf1 more newlines 2019-02-22 15:03:22 -07:00
Jacob Beck
1486796973 on windows, host on "localhost" instead of "database" 2019-02-22 14:07:59 -07:00
Jacob Beck
1300f8f49f Set up CI with Azure Pipelines
Add yml file
chunk up windows tests in tox.ini
add missing mock.patch calls
fix path issues in docs tests
better error messaging on mystery exceptions
ENOEXEC handling on Windows
Handle pipelines insisting on cloning with crlf line endings
set up postgres service for the postgres tests
remove appveyor
2019-02-22 13:44:37 -07:00
Jacob Beck
29e9c63e94 the RPC tests fail on windows, just skip them 2019-02-21 16:59:05 -07:00
Jacob Beck
4bda6769c5 fix an error handling bug I introduced 2019-02-21 15:29:10 -07:00
Jacob Beck
dc5c59b40b PR feedback
argument parsing fixes
change the table to a list of columns + list of rows
2019-02-20 15:06:05 -07:00
Jacob Beck
a90ef2504e PR feedback
Fix python 2.7
remove TODO
remove useless file I added by accident
close Pipe members
Give RPC its own logger, include remote addrs
2019-02-20 07:45:52 -07:00
Jacob Beck
0f3967e87d add tests, put them in sources to re-use all the source work 2019-02-19 14:41:48 -07:00
Jacob Beck
1a0df174c9 Implement the RPC server
- make tasks all have a "from_args" that handles initializing correct config type, etc
- make it possible to process a single node's refs at a time
- Make remote run/compile tasks + rpc server task, wire them up
- add ref() and source() support, and vestigial doc() support
- refactor results a bit to support new result behavior
- don't write to the filesystem on requests
- handle uniqueness issues
2019-02-19 10:43:27 -07:00
Darren Haken
a1b5375e50 Add AttributionError to except block 2019-02-19 15:59:12 +00:00
Darren Haken
101fd139c7 Add try catch for the updating config value.
This resolves the issue that `{{ config(persist_docs=true) }}`
would not raise a useful exception.
2019-02-19 15:41:51 +00:00
Darren Haken
25d5fb1655 Add persist_docs to project level settings.
Change `table_options` to have better error handling.
2019-02-19 12:18:09 +00:00
Darren Haken
e672042306 Merge remote-tracking branch 'upstream/dev/stephen-girard' into dev/stephen-girard 2019-02-19 11:46:04 +00:00
Connor McArthur
42ec3f9f06 add base setup.py to .bumpversion.cfg, bump the version 2019-02-18 16:04:24 -05:00
Connor McArthur
a4fd148a80 Bump version: 0.13.0a1 → 0.13.0a2 2019-02-18 16:02:37 -05:00
Drew Banin
e9927fb09c Merge pull request #1303 from fishtown-analytics/fix/0.13.0-tweaks
Fix/0.13.0 tweaks
2019-02-18 15:57:45 -05:00
Drew Banin
da31c9a708 pep8 2019-02-18 12:37:16 -05:00
Drew Banin
1be8cb8e91 add logging for registry requests 2019-02-18 12:34:20 -05:00
Drew Banin
a6ae79faf4 show db name in model result output 2019-02-18 12:30:19 -05:00
Drew Banin
61c345955e Merge pull request #1302 from fishtown-analytics/fix/source-timestamps
fix for invalid timestamps returned by source freshness cmd
2019-02-18 12:27:01 -05:00
Drew Banin
ebce6da788 fix tests 2019-02-18 10:00:15 -05:00
Blake Blackwell
9772c1caeb Adding incremental logic to Snowflake plugins 2019-02-17 19:28:46 -06:00
Darren Haken
5661855dcc Merge remote-tracking branch 'upstream/dev/stephen-girard' into dev/stephen-girard 2019-02-16 15:22:02 +00:00
Darren Haken
11319171be Add description for BQ tables and view relations.
Also make changes to table_options macro based on testing against a
real project.
2019-02-16 15:21:43 +00:00
Drew Banin
3134b59637 fix for invalid timestamps returned by source freshness cmd 2019-02-15 20:23:07 -05:00
Connor McArthur
dfb87dce38 fix long_description field in all the setup.pys 2019-02-14 15:46:23 -05:00
Connor McArthur
ce105d2350 Merge pull request #1299 from fishtown-analytics/rm-presto
remove presto adapter
2019-02-14 15:22:19 -05:00
Connor McArthur
e039397ab1 add setup.py for dbt 2019-02-14 14:34:43 -05:00
Connor McArthur
b54aadf968 rm presto adapter unit test 2019-02-14 14:30:55 -05:00
Connor McArthur
e9cf074b45 move presto docker stuff 2019-02-14 14:21:41 -05:00
Connor McArthur
c417c2011b remove presto from requirements.txt 2019-02-14 14:19:58 -05:00
Connor McArthur
2c3c3c9a84 remove presto adapter 2019-02-14 14:18:30 -05:00
Drew Banin
f546390221 Update CHANGELOG.md 2019-02-14 10:00:16 -05:00
Drew Banin
40034e056f Merge pull request #1298 from fishtown-analytics/feature/sources-in-docs-site
bump docs site index.html to use sources
2019-02-14 09:38:36 -05:00
Drew Banin
47e9896d54 bump docs site index.html to use sources 2019-02-14 09:38:03 -05:00
Jacob Beck
cda365f22a Merge pull request #1297 from fishtown-analytics/fix/widen-boto3-restrictions
fix: Widen botocore/boto3 requirements (#1234)
2019-02-14 07:18:59 -07:00
Jacob Beck
36e1252824 Widen botocore/boto3 requirements
Remove botocore/boto3 restrictions on the snowflake plugin
Increase minimum snowflake-connector-python to 1.6.12
 - 1.6.12 is the most recent boto3/botocore requirements change
Increase the upper bounds on boto3/botocre to match the redshift plugin
 - they match the upper bounds of snowflake-connector-python 1.6.12
2019-02-13 15:38:21 -07:00
Drew Banin
cf873d0fc5 Merge pull request #1293 from fishtown-analytics/fix/incremental-from-view
Fix for models which transition from views to incremental
2019-02-13 17:09:55 -05:00
Jacob Beck
026c50deb3 Merge pull request #1272 from fishtown-analytics/feature/source-freshness
Feature: source freshness (#1240)
2019-02-13 14:40:34 -07:00
Jacob Beck
c5138eb30f Merge pull request #1295 from fishtown-analytics/fix/dbt-seed-show
fix dbt seed --show (#1288)
2019-02-13 13:03:00 -07:00
Jacob Beck
0bd59998c0 Merge branch 'dev/stephen-girard' into feature/source-freshness 2019-02-13 12:14:57 -07:00
Jacob Beck
7cd336081f Merge branch 'dev/stephen-girard' into feature/source-freshness 2019-02-13 11:21:31 -07:00
Jacob Beck
47cc931b0f Merge pull request #1291 from fishtown-analytics/feature/enhanced-model-selection
Enhanced model selection syntax (#1156)
2019-02-13 11:08:19 -07:00
Jacob Beck
5462216bb3 fix dbt seed --show
Fix the invalid dict-stype lookup on the result of dbt seed --show
Add a --show to a test to make sure it doesn't crash dbt
2019-02-13 10:49:54 -07:00
Drew Banin
fe86615625 Merge pull request #1286 from fishtown-analytics/fix/error-url-v1-schema
fix error message url
2019-02-13 11:42:41 -05:00
Drew Banin
9a74abf4cc Merge pull request #1252 from fishtown-analytics/feature/snowflake-transient-tables
Support transient tables on Snowflake
2019-02-13 11:42:18 -05:00
Drew Banin
2847f690f1 Merge branch 'dev/stephen-girard' into fix/incremental-from-view 2019-02-13 11:27:25 -05:00
Drew Banin
2c94e9e650 (fixes #1292) Check for relation type in is_incremental()
If the target relation is a non-table (probably a view) then dbt
should return False from the is_incremental() macro. The
materialization will drop this relation before running the model
code as a `create table as` statement, so the incremental filter
would likely be invalid.
2019-02-13 11:26:40 -05:00
Jacob Beck
95ab2ab443 fix Snowflake tests 2019-02-13 09:05:07 -07:00
Jacob Beck
3dcfa2c475 Merge branch 'dev/stephen-girard' into feature/enhanced-model-selection 2019-02-13 08:59:03 -07:00
Jacob Beck
7bab31543e Merge pull request #1280 from fishtown-analytics/feature/werror
Add flag for raising errors on warnings (#1243)
2019-02-13 08:55:14 -07:00
Jacob Beck
08c4c2a8b5 PR feedback
print source name on pass/warn/error lines
distinguish 'error' vs 'error stale'
2019-02-13 08:44:23 -07:00
Jacob Beck
9fcad69bf4 tests 2019-02-13 08:17:19 -07:00
Jacob Beck
b406a536a9 initial selector work 2019-02-13 08:17:13 -07:00
Jacob Beck
5e8ab9ce4a Remove RunManager
Move some of RunManager into tasks
Move compile_node work into compilation
Move manifest work into the GraphLoader
Move the rest into the runners

Implement freshness calculations for sources

command: 'dbt source snapshot-freshness'
support for 4 adapters (no presto)
Integration tests
break up main.py's argument parsing
Pass the manifest along to freshness calculation

Results support for freshness

New freshness result contracts
Fix source result printing
Result contract cleanup
safe_run supports alternate result types
Fix tests to support changes in results

PR feedback:

- snowflake macro changed to always return utc
- no cte in collect_freshness
- remove extra optional arg
- fix the has_freshness check to examine if there is anything in freshness
- support error_after without warn_after and vice-versa
- snowflake: convert_timestamp -> convert_timezone

Update sources to be Relations

 - update contracts
 - add create_from_source
 - add create_from_source calls
 - fix tests

PR feedback

create_from_source forces quotes
default source schema/table from source/table names
snowflake quoting nonsense
also fix output: pass -> PASS
make seeding test 017 take 1m instead of 3m by using csv instead of sql

- source tweaks for the docs site
2019-02-13 10:07:37 -05:00
Darren Haken
562d47f12a Make changes as per review from @drewbanin 2019-02-12 15:21:29 +00:00
Drew Banin
ab6d4d7de5 use frozenset for adapter specific configs, add bq configs 2019-02-11 13:04:58 -05:00
Drew Banin
4b9ad21e9e fix error message url 2019-02-10 10:34:50 -05:00
Darren Haken
f4c233aeba #1031 Add macro to add table description from schema.yml for BQ 2019-02-10 13:38:40 +00:00
Jacob Beck
90497b1e47 give deprecation its own integration test 2019-02-08 08:22:12 -07:00
Jacob Beck
9b9319cbd0 fix bigquery
Remove unnecessary is_incremental() from test
Fix strict mode type check
2019-02-08 08:00:16 -07:00
Jacob Beck
fe948d6805 Add flag for raising errors on warnings
fix strict mode
fix some strict mode asserts
add internal WARN_ERROR support and a warn_on_error function
make everything that says "warning" go through warn_or_error
add --warn-error flag to main.py
remove sql_where from tests
replace drop-existing with full-refresh in tests
(re-)add integration tests for malformed schema/source tests and strict mode
2019-02-08 08:00:09 -07:00
Jacob Beck
b17d70679f Merge pull request #1277 from fishtown-analytics/fix/backwards-compatible-adapter-functions
0.13.0 backwards compatibility (#1273)
2019-02-08 06:54:36 -07:00
Jacob Beck
5290451a65 force upgrade pip on appveyor to see if that helps, idk 2019-02-07 19:51:20 -07:00
Jacob Beck
faadb34aff backwards compatibility work
create deprecation warning decorator for deprecated available methods
make already_exists just take schema/name again and direct users to get_relation
remove test that cannot fail (the deprecation does not exist!)
add a little deprecation warning check to test_simple_copy
2019-02-05 09:49:10 -07:00
Connor McArthur
314b4530d8 Merge pull request #1242 from fishtown-analytics/add-node-level-timing-info
add node level timing info to run_results.json and to tracking
2019-02-01 16:45:51 -05:00
Connor McArthur
843d342137 fix event tracking tests 2019-02-01 14:34:28 -05:00
Connor McArthur
f0981964f3 upgrade run_model iglu schema to 1-0-1 2019-02-01 13:59:53 -05:00
Jacob Beck
da409549d4 Merge pull request #1262 from fishtown-analytics/feature/source-table-selection
Add source selector support (#1256)
2019-01-30 08:22:25 -07:00
Connor McArthur
dc7ad2afc7 Merge pull request #1264 from fishtown-analytics/limit-exception-context
limit exception context scope
2019-01-29 13:33:00 -05:00
Connor McArthur
7e8ea51c1a remove timezone from datetime properties 2019-01-29 11:30:08 -05:00
Connor McArthur
343afc2374 typo: switch datetime in for pytz 2019-01-29 11:18:04 -05:00
Jacob Beck
c2a0a2092a source: now allows full-source selection, corresponding tests 2019-01-29 08:52:01 -07:00
Connor McArthur
f74a252b95 whitelist things from pytz, datetime 2019-01-29 10:50:18 -05:00
Connor McArthur
f5cfadae67 fix context exports for dbt.exceptions 2019-01-29 10:39:58 -05:00
Connor McArthur
7c1ecaf2b8 limit exception context scope 2019-01-29 09:44:58 -05:00
Jacob Beck
ea5edf20ba Add source selector support 2019-01-28 16:53:42 -07:00
Jacob Beck
c5f8cc7816 Merge pull request #1258 from fishtown-analytics/fix/dont-create-unused-schemas
Only create schemas for selected nodes (#1239)
2019-01-28 08:46:03 -07:00
Jacob Beck
60d75d26f0 Merge pull request #1260 from fishtown-analytics/fix/dbt-deps-foreign-langauges
Fix dbt deps on non-english shells (#1222)
2019-01-28 08:16:20 -07:00
Jacob Beck
f6402d3390 Merge pull request #1254 from fishtown-analytics/feature/source-tables
Add 'sources' to dbt (#814)
2019-01-28 08:05:22 -07:00
Jacob Beck
7b23a1b9a8 Make loader actually optional
Default to the empty string in ParsedSourceDefinitions
2019-01-25 07:37:09 -07:00
Jacob Beck
7714d12f7c fix "git clone" on windows
env replaces, it does not update, and windows really needs its env
2019-01-24 16:56:38 -07:00
Jacob Beck
fc813e40eb fix an old merge conflict 2019-01-24 15:36:50 -07:00
Jacob Beck
96578c3d2f add support for "env" parameter to system.run_cmd, make every git command that parses output use it 2019-01-24 15:36:50 -07:00
Jacob Beck
f47be0808f PR feedback
Add a test for model -> source
Cleaned up extra TODOs
Fixed missing variable in source_target_not_found
Changed a ref_target_not_found -> source_target_not_found
Better error messaging on invalid schema.yml files
Made loader optional
Make get_model_name_or_none accept just about anything
2019-01-24 15:25:35 -07:00
Drew Banin
b6d1e15a9f Merge pull request #1251 from fishtown-analytics/fix/redshift-iam-auth-0.13.0
fix for optional adapter configs with aliases
2019-01-24 10:44:47 -05:00
Jacob Beck
7d332aaa35 fix snowflake check_schema_exists macro 2019-01-24 06:20:26 -07:00
Jacob Beck
9ff8705cd7 fix an old merge conflict 2019-01-23 16:35:03 -07:00
Jacob Beck
76669995f6 Only create schemas for selected models (and the default schema)
- pass selected uids along to the before_run method for schema creation
 - only schemas used by selected nodes are created
 - fix some big issues with check_schema_exists (such as: it does not work)
 - integration test changes to test this
2019-01-23 16:35:03 -07:00
Jacob Beck
1079e9bfaf test enhancements, make failure to find sources clearer 2019-01-23 14:11:03 -07:00
Jacob Beck
67d85316ac make the test a bit sillier 2019-01-23 13:47:34 -07:00
Jacob Beck
7d41f4e22c contracts test, squash a nodes iteration bug 2019-01-23 13:37:32 -07:00
Jacob Beck
cdeb0d1423 Merge pull request #1246 from fishtown-analytics/feature/presto-notimplemented-error
presto notimplemented errors (#1228)
2019-01-23 11:18:36 -07:00
Jacob Beck
477699a102 Merge pull request #1245 from fishtown-analytics/feature/presto-adapter
Presto Adapter (#1106, #1229, #1230)
2019-01-23 11:18:17 -07:00
Jacob Beck
63047d01ab add a very basic integration test for sources 2019-01-23 09:48:01 -07:00
Jacob Beck
5e53e64df2 Add source configuration feature
- newly supported 'sources' section in schema.yml
   - like models - define tests, columns, column tests, descriptions
   - reference them via "source()" function (complements "ref()")
   - new 'sources' field on nodes (like 'refs')
 - many many contract and test updates to support it
 - fixed a long-standing bug with referencing tests by namespace
 - fix linecache on python 2.7
 - rendering now forces strings into their native format to avoid "u" prefixes in tests
 - fixup anything that iterates over nodes in the graph to accept ParsedSourceDefinitions
2019-01-23 09:48:01 -07:00
Connor McArthur
89207155fd update RELEASE process 2019-01-21 13:59:37 -05:00
Drew Banin
b7d9eecf86 Merge pull request #1232 from alexyer/feature/snowflake-ssh-login
Add support for Snowflake Key Pair Authentication
2019-01-21 11:29:18 -05:00
Drew Banin
8212994018 push adapter-specific configs to plugins, add transient config 2019-01-19 23:37:03 -05:00
Drew Banin
c283cb0ff4 fix for optional configs with aliases 2019-01-19 15:30:59 -05:00
Drew Banin
a34ab9a268 Merge pull request #1250 from fishtown-analytics/s-sinter-dbt-cloud-g
Update README.md
2019-01-19 15:08:06 -05:00
Drew Banin
2a2a2b26ef Update README.md 2019-01-19 15:07:24 -05:00
Jacob Beck
49fe2c3bb3 add NotImplemented archive implementation 2019-01-17 09:57:18 -07:00
Jacob Beck
170942c8be add presto tests for archives + incremental failing 2019-01-17 09:57:06 -07:00
Alexander Yermakov
438b3529ae Add support for Snowflake Key Pair Authentication
This PR adds support for Snowflake Key Pair Authentication.

Unit tests verify everything's getting passed through correctly.
2019-01-17 11:08:11 +02:00
Connor McArthur
2bff901860 pep8 2019-01-16 21:29:25 -05:00
Connor McArthur
d45fff3c5a move comment with relevant code 2019-01-16 21:24:12 -05:00
Connor McArthur
8843a22854 undo changes to tox.ini 2019-01-16 21:22:54 -05:00
Connor McArthur
d9ba73af44 Merge branch 'dev/stephen-girard' of github.com:fishtown-analytics/dbt into add-node-level-timing-info 2019-01-16 21:21:10 -05:00
Jacob Beck
491d5935cf make cancellation work, pretty ugly and no clue how to test it 2019-01-16 16:47:48 -07:00
Jacob Beck
85389afb3e Presto testo 2019-01-16 16:47:48 -07:00
Jacob Beck
16d75249c5 Implement the presto adapter
- seeds, views, tables, and ephemeral models all implemented
 - ConnectionManager methods and credentials
 - Adapter.date_function
 - macros
 - give presto a small chunk size for seeds
 - manually cascade drop_schema
 - stub out some stuff that does not exist in presto
 - stub out incremental materializations for now (delete is not useful)
 - stub out query cancellation for now
2019-01-16 16:47:48 -07:00
Jacob Beck
27842f4cff ran the create script 2019-01-16 16:47:48 -07:00
Jacob Beck
016afd4b2c more work on the create script 2019-01-16 16:47:48 -07:00
Jacob Beck
cdb0cbdca7 Some dbt-core fixes/changes to support the new adapter
- fix sql docstring, which is just wrong.
 - extract the default seed materialization to one that takes a chunk size param
 - add NUMBERS constant for all number types dbt should support inserting
 - update Column definition to allow for "varchar"
2019-01-16 16:47:43 -07:00
Jacob Beck
da2d7ea8c0 Create a docker-compose container system for presto testing 2019-01-16 16:46:56 -07:00
Jacob Beck
f6278d590a initial work on an adapter building script 2019-01-16 16:46:56 -07:00
Jacob Beck
3b0d14bd5d Merge pull request #1202 from fishtown-analytics/feature/dynamic-database-config
dynamic database config (#1183, #1204)
2019-01-16 16:46:33 -07:00
Connor McArthur
66d1f2099b Merge branch 'dev/stephen-girard' of github.com:fishtown-analytics/dbt into add-node-level-timing-info 2019-01-16 16:45:31 -05:00
Jacob Beck
1ae32c12ab Merge pull request #1226 from fishtown-analytics/fix/catch-everything
On unexpected errors in safe_run, do not raise (#1223)
2019-01-16 14:44:22 -07:00
Connor McArthur
c626de76ff fix test_events 2019-01-16 16:38:54 -05:00
Jacob Beck
c80792d713 Merge pull request #1207 from fishtown-analytics/feature/macros-for-everything
Convert all embedded adapter SQL into macros (#1204)
2019-01-16 14:33:02 -07:00
Connor McArthur
a16958e35d fix test_docs_generate integration test 2019-01-16 15:47:49 -05:00
Connor McArthur
3e4c75e41b add timing as a namedproperty of RunModelResult 2019-01-16 15:22:15 -05:00
Jacob Beck
1596174a36 give root grant, split up test 2019-01-16 12:49:39 -07:00
Jacob Beck
f4084f069a PR feedback from #1202
Postgres/Redshift permissions issues:
 - use pg_views/pg_tables to list relations
 - use pg_namespace to list schemas and check schema existence
Catalog:
 - Redshift: fix error, add database check
 - Snowflake: Use the information_schema_name macro
Snowflake/default: Quote the database properly in information_schema_name
2019-01-16 12:36:06 -07:00
Connor McArthur
aceee680c8 add thread id to run results 2019-01-16 12:40:08 -05:00
Connor McArthur
48c47bf11e clean up, fix tests 2019-01-16 12:21:38 -05:00
Connor McArthur
8783c013e5 add node level timing info to run_results.json and to tracking 2019-01-15 20:41:08 -05:00
Jacob Beck
70069f53b1 Move SQL previously embedded into adapters into macros
Adapters now store an internal manifest that only has the dbt internal projects
Adapters use that manifest if none is provided to execute_manifest
The internal manifest is lazy-loaded to avoid recursion issues
Moved declared plugin paths down one level
Connection management changes to accomadate calling macro -> adapter -> macro
Split up precision and scale when describing number columns so agate doesn't eat commas
Manifest building now happens in the RunManager instead of the compiler

Now macros:
  create/drop schema
  get_columns_in_relation
  alter column type
  rename/drop/truncate
  list_schemas/check_schema_exists
  list_relations_without_caching
2019-01-15 08:08:45 -07:00
Jacob Beck
bf665e1c14 Merge branch 'dev/stephen-girard' into feature/dynamic-database-config 2019-01-15 07:58:35 -07:00
Connor McArthur
e359a69b18 Merge pull request #1224 from convoyinc/adriank/add_snowflake_sso_support
Add support for Snowflake SSO authentication round 2
2019-01-09 15:53:49 -05:00
Adrian Kreuziger
c01caefac9 Add support for Snowflake SSO authentication 2019-01-08 16:40:30 -08:00
Jacob Beck
2653201fe1 do not re-raise 2019-01-08 14:53:48 -07:00
Jacob Beck
dadab35aee PR feedback
bigquery: naming/parameter sanity cleanup
postgres: never allow databases that aren't the default
postgres: simplify cache buliding since we know we'll only ever have one database
everything: parameter name change for execute_macro
everything: cache related bugfixes to casing
internal only: cross db/cross schema rename support in the cache
  - none of the adapters support it, but unit tests expose the behavior
tests: much more comprehensive cache tests
2019-01-08 11:36:00 -07:00
Jacob Beck
c218af8512 Point at the 0.13 branch of dbt-utils 2019-01-08 11:36:00 -07:00
Jacob Beck
874ead9751 Make config() accept database, add adapter-specifc aliasing
Add concept of aliasing for credentials/relations

All databases use database, schema, and identifier internally now:
 - Postgres/Redshift have 'dbname' aliased to database and pass to
    password
 - Bigquery has 'project' aliased to database and 'dataset' aliased to
    schema
 - Set default database include policy to True everywhere

config() calls accept aliases instead of canonical names

Remove unused functions and change others to accept Relations (see core/CHANGELOG.md)

Add catalog, etc support for multiple databases
2019-01-08 11:36:00 -07:00
Jacob Beck
1e5308db31 Merge remote-tracking branch 'origin/0.12.latest' into dev/stephen-girard 2019-01-08 11:32:20 -07:00
Connor McArthur
83c8381c19 Bump version: 0.12.2rc1 → 0.12.2 2019-01-07 14:14:41 -05:00
Connor McArthur
b5f5117555 Merge pull request #1220 from fishtown-analytics/dev/grace-kelly
Release: Grace Kelly (0.12.2)
2019-01-07 14:13:50 -05:00
Drew Banin
b0a4f5c981 Merge pull request #1216 from mikekaminsky/docs/dev-installation
Updates contributing doc with new local installation instructions
2019-01-07 13:25:14 -05:00
Drew Banin
1da50abe2f Merge pull request #1200 from fishtown-analytics/changelog/grace-kelly
Update CHANGELOG.md for Grace Kelly
2019-01-07 11:06:02 -05:00
Drew Banin
6d69ff0bda Update CHANGELOG.md
add links to docs
2019-01-07 11:04:39 -05:00
Jacob Beck
7179d135fa Merge pull request #1218 from fishtown-analytics/fix/lets-not-hang-on-errors
move cleanup into the executing thread (#1214)
2019-01-07 08:38:41 -07:00
Jacob Beck
3e4523e1ef On errors inside the thread, set a raise_next_tick flag
Threads that raise exceptions bypass the callback, which makes dbt hang.
Now threads don't raise during the callback, instead they set a flag.
The RunManager will check the flag during queue processing and raise if set.
Fix compilation failures so they raise properly.
2019-01-07 07:58:23 -07:00
Michael Kaminsky
69a65dd63f Updates contributing doc with new local installation instructions 2019-01-05 20:45:42 -06:00
Drew Banin
3c25a9b40d Merge pull request #1210 from fishtown-analytics/fix/dbt-init-command
do not try to remove remote after dbt init
2019-01-04 14:56:28 -05:00
Drew Banin
f43d9b5e88 pr feedback 2019-01-04 13:14:11 -05:00
Drew Banin
5826bc80bc (fixes #1209) do not try to remove remote after dbt init 2019-01-03 22:27:51 -05:00
Drew Banin
6563b03299 Merge pull request #1208 from fishtown-analytics/fix/repo-file-placement
move markdown files back into place
2019-01-03 18:13:53 -05:00
Drew Banin
406ff55c7d move markdown files back into place 2019-01-03 18:03:46 -05:00
Adrian
8882bbe617 Merge pull request #1 from fishtown-analytics/dev/stephen-girard
Merge upstream
2019-01-03 13:09:05 -08:00
Drew Banin
769a886b93 Merge pull request #1206 from fishtown-analytics/fix/windows-unicode-output-for-dbt-debug
Replace unicode characters with ascii strings
2019-01-03 13:27:38 -05:00
Jacob Beck
f2fc002f5c Merge pull request #1145 from fishtown-analytics/dbt-core
dbt core/plugin architecture (#1070, #1069)
2019-01-02 16:59:29 -07:00
Jacob Beck
836998c9e9 Merge branch 'dev/stephen-girard' into dbt-core 2019-01-02 11:30:31 -07:00
Drew Banin
f90a5b14ad (fixes #1201) Replace unicode characters with ascii strings 2019-01-02 10:51:50 -05:00
Drew Banin
6004bdf012 Merge pull request #1191 from brianhartsock/archive_character_columns
Ensure character columns are treated as string types.
2018-12-21 13:22:26 -05:00
Drew Banin
d7610a7c55 Merge pull request #1192 from patrickgoss/fix/postgres-dependency-query
Postgresql dependency query speedup
2018-12-21 13:21:01 -05:00
Drew Banin
2ecc1e06cf Merge pull request #1198 from vijaykiran/dev/grace-kelly
Print dbt version before every task is run
2018-12-21 13:16:08 -05:00
Drew Banin
dcc017d681 Merge pull request #1199 from vijaykiran/update-contributing-doc
Update environment setup in CONTRIBUTING.md
2018-12-21 13:14:36 -05:00
Drew Banin
ea401f6556 Update CHANGELOG.md 2018-12-21 13:07:19 -05:00
Vijay Kiran
09fbe288d8 Update environment setup in CONTRIBUTING.md
- Document command to activate the virtualenv
- Fix minor typo
2018-12-21 19:02:57 +01:00
Vijay Kiran
7786175d32 Print dbt version before every task is run
resolves  fishtown-analytics/dbt#1134
2018-12-21 17:58:35 +01:00
Jacob Beck
f558516f40 remove extra script 2018-12-20 08:41:04 -07:00
Jacob Beck
d8c46d94df fix the issue on bigquery where tests executed with a connection name of "master" 2018-12-20 08:40:13 -07:00
pgoss
f64e335735 split the relationship CTE in order to speed the dependency query on large dbs 2018-12-19 12:32:32 -05:00
Brian Hartsock
b263ba7df2 Ensure character columns are treated as string types. 2018-12-19 10:31:11 -05:00
Jacob Beck
d2a68d92a3 Merge pull request #1176 from fishtown-analytics/fix/remove-operations
Remove operations [#1117]
2018-12-18 12:23:46 -07:00
Jacob Beck
4cbff8e1a1 update changelog 2018-12-18 08:28:08 -07:00
Jacob Beck
33ffafc7d6 run_operation -> execute_macro 2018-12-18 08:01:59 -07:00
Jacob Beck
4780c4bb18 Split dbt into core and plugins 2018-12-14 11:17:41 -07:00
Jacob Beck
c61561aab2 Merge pull request #1178 from fishtown-analytics/feature/package-dbt-version-compatibility
package dbt version compatibility (#581)
2018-12-14 10:15:19 -08:00
Jacob Beck
9466862560 add require-dbt-version config item and --no-version-check flag, make dependency errors fail the run 2018-12-14 10:33:05 -07:00
Jacob Beck
931dd4e301 Merge branch 'dev/grace-kelly' into dev/stephen-girard 2018-12-14 10:16:40 -07:00
Jacob Beck
e52475cac7 Merge pull request #1186 from fishtown-analytics/fix/tracking
make tracking use the profile directory, and suppress errors (#1180)
2018-12-14 08:18:35 -08:00
Claire Carroll
afa9fc051e Merge pull request #1189 from fishtown-analytics/add-m-flag-for-tests
Add -m argument for dbt test
2018-12-14 11:15:51 -05:00
Claire Carroll
3cbf49cba7 Add m flag for tests 2018-12-13 16:46:06 -05:00
Jacob Beck
5deb7e8c2d everything about tracking is hard 2018-12-13 14:04:53 -07:00
Jacob Beck
2003222691 Merge branch 'dev/grace-kelly' into dev/stephen-girard 2018-12-13 12:58:44 -07:00
Jacob Beck
08913bf96b Merge pull request #1188 from fishtown-analytics/ipdb-dev-requirement
Add ipdb to dev requirements so it shows up in test docker containers
2018-12-13 11:48:09 -08:00
Jacob Beck
15c047077a Add ipdb to dev requirements so it shows up in test docker containers for debugging 2018-12-13 10:28:39 -07:00
Jacob Beck
260bcfd532 Merge branch 'dev/grace-kelly' into dev/stephen-girard 2018-12-13 09:07:16 -07:00
Claire Carroll
3deb295d29 Merge pull request #1147 from fishtown-analytics/cleanup-repo
Cleanup repo and update readme
2018-12-13 09:50:04 -05:00
Claire Carroll
f56ac542bc Merge branch 'dev/grace-kelly' into cleanup-repo 2018-12-13 09:46:25 -05:00
Claire Carroll
ebd6d3ef19 Update readme and cleanup repo 2018-12-13 09:43:38 -05:00
Jacob Beck
808ed75858 make tracking use the profile directory, and suppress errors 2018-12-12 16:01:42 -07:00
Connor McArthur
d2c704884e Bump version: 0.12.2a1 → 0.12.2rc1 2018-12-07 14:10:57 -05:00
Jacob Beck
91a5b1ce52 Merge branch 'dev/grace-kelly' into dev/stephen-girard 2018-12-05 12:14:54 -07:00
Jacob Beck
bec30efec5 Merge pull request #1174 from fishtown-analytics/feature/latest-version-from-pypi
use pypi for the latest version instead of git [#1122]
2018-12-05 11:56:44 -07:00
Drew Banin
2cd24cfa9e Merge pull request #1129 from fishtown-analytics/feature/better-incremental-models
(fixes #744) deprecate sql_where and provide an alternative
2018-12-05 12:52:45 -05:00
Jacob Beck
963fb84cb7 Merge pull request #1171 from fishtown-analytics/feature/make-debug-great
Improve dbt debug [#1061]
2018-12-05 10:52:03 -07:00
Connor McArthur
65729c4acc Bump version: 0.12.1 → 0.12.2a1 2018-12-05 12:15:50 -05:00
Jacob Beck
9b8e8ff17a more pr feedback 2018-12-05 10:12:57 -07:00
Jacob Beck
8ea9c68be0 PR feedback 2018-12-05 10:00:30 -07:00
Jacob Beck
282774cbdf Merge branch 'dev/grace-kelly' into dev/stephen-girard 2018-12-05 09:49:07 -07:00
Jacob Beck
eb504ae866 use pypi for the latest version instead of git 2018-12-05 09:29:30 -07:00
Jacob Beck
0f1520c392 re-add call to path_info 2018-12-05 09:23:11 -07:00
Drew Banin
009eaa3a59 pep8 2018-12-05 11:06:04 -05:00
Drew Banin
80232ff8e8 (fixes #744) deprecate sql_where and provide an alternative 2018-12-05 10:44:20 -05:00
Jacob Beck
9938af1580 pr feedback 2018-12-05 08:33:16 -07:00
Jacob Beck
6935a4a2e4 pr feedback: add pretty colors 2018-12-05 08:16:41 -07:00
Jacob Beck
eef5024354 Merge branch 'dev/grace-kelly' into feature/make-debug-great 2018-12-05 08:12:31 -07:00
Jacob Beck
bb6b469768 Merge pull request #1157 from fishtown-analytics/feature/graph-iteration
Handle concurrent job ordering via a graph iterator (#813)
2018-12-05 08:10:11 -07:00
Jacob Beck
d4c2dfedb2 dbt debug 2018-12-05 07:34:55 -07:00
Jacob Beck
3434ad9ca0 Bypass project loading in init, we do it anyway 2018-12-05 07:34:55 -07:00
Jacob Beck
937219dd91 add connection_info method to credentials for dbt debug 2018-12-05 07:34:55 -07:00
Jacob Beck
5bdd1ebdbc Merge pull request #1159 from fishtown-analytics/fix/use-root-generate-schema-name
get the default schema name generator from the root package [#801]
2018-12-05 07:32:52 -07:00
Jacob Beck
17f3f24652 Merge pull request #1168 from fishtown-analytics/fix/snowflake-drops-wrong-backup-type
drop the correct relation type on snowflake when building tables [#1103]
2018-12-05 07:32:25 -07:00
Jacob Beck
d80b37854a Merge pull request #1169 from fishtown-analytics/feature/log-warnings-on-missing-test-refs
log warnings on missing test refs [#968]
2018-12-05 07:32:02 -07:00
Jacob Beck
cbfa21ce45 PR feedback 2018-12-04 15:04:10 -07:00
Jacob Beck
3665e65986 reorganize some things to make us better detect disabled vs missing nodes 2018-12-04 11:16:46 -07:00
Jacob Beck
0daca0276b log about missing test ref targets at warning instead of debug level, include filepath 2018-12-04 09:33:24 -07:00
Jacob Beck
8769118471 drop the correct relation type on snowflake when building tables 2018-12-04 09:09:32 -07:00
Jacob Beck
863dbd2f4d Merge branch 'dev/grace-kelly' into fix/use-root-generate-schema-name 2018-12-03 16:36:46 -07:00
Jacob Beck
eb00b1a1b9 Merge pull request #1166 from fishtown-analytics/fix/trim-schema-whitespace
Trim schema whitespace (#1074)
2018-12-03 16:35:27 -07:00
Jacob Beck
953ba9b8eb Merge pull request #1163 from fishtown-analytics/fix/cdecimal-json-encoding
handle cdecimal.Decimal during serialization [#1155]
2018-12-03 16:29:53 -07:00
Jacob Beck
aa9d43a3fc Merge pull request #1164 from fishtown-analytics/fix/analysis-parsing-inside-raw
fix analysis to only compile sql once like model nodes [#1152]
2018-12-03 16:29:31 -07:00
Jacob Beck
9d5cbf7e51 strip the schema when there is extra whitespace 2018-12-03 14:35:32 -07:00
Jacob Beck
1744f21084 Refactor nodes into objects, make generate_schema_name use its own context
Allows us to cache the get_schema behavior without re-building it for every node
Allows us to special-case the generate_schema_name macro
2018-12-03 14:10:59 -07:00
Jacob Beck
adf05bd11d get the default schema name generator from the root package 2018-12-03 14:09:57 -07:00
Jacob Beck
3d205c3597 fix analysis to only compile sql once like model nodes, fix test 2018-12-03 13:06:52 -07:00
Jacob Beck
9f9b861769 handle cdecimal.Decimal during serialization as well as decimal.Decimal 2018-12-03 07:48:51 -07:00
Jacob Beck
6025d3d843 Merge pull request #1161 from fishtown-analytics/feature/m-is-for-models
add "-m" shorthand for models (#1160)
2018-11-30 15:33:12 -07:00
Jacob Beck
3cf03f3018 add "-m" shorthand for models 2018-11-30 15:25:29 -07:00
Jacob Beck
1c0caf9a81 Merge branch 'dev/grace-kelly' into dev/stephen-girard 2018-11-29 14:24:04 -07:00
Jacob Beck
4dc79f655f pr feedback, add many docstrings 2018-11-29 08:04:23 -07:00
Jacob Beck
5a06d57d7e use the manifest for node information and block/timeout arguments to get()
Assorted cleanup/test fixes
2018-11-28 15:14:45 -07:00
Jacob Beck
84ba7f57d0 fix many bugs, move RunBuilder back into the RunManager 2018-11-28 14:14:00 -07:00
Connor McArthur
8af30611f3 Merge pull request #1133 from fishtown-analytics/bigquery-smarter-delete-dataset
Bigquery: smarter delete_dataset
2018-11-28 09:40:14 -05:00
Jacob Beck
6e27476faa Merge pull request #1105 from fishtown-analytics/fix/hub-packaging
Fix the hub packaging so that it at least mostly works
2018-11-28 07:39:19 -07:00
Jacob Beck
acddb3b939 RunManager now operates on queues instead of lists 2018-11-27 15:26:16 -07:00
Jacob Beck
b6193be1ef add initial work on a graph-based priority queue 2018-11-27 15:26:16 -07:00
Jacob Beck
e7b1a093a3 Refactor the RunManager to build its runners as it iterates over the nodes instead of in advance 2018-11-27 15:26:16 -07:00
Jacob Beck
5be8c7f85f Merge pull request #1150 from fishtown-analytics/feature/config-muliple-calls
allow muliple config() calls (#558)
2018-11-26 12:16:06 -07:00
Jacob Beck
b751ed6c6a Merge pull request #1146 from fishtown-analytics/feature/hook-config-aliases
add pre_hook/post_hook aliases to config (#1124)
2018-11-26 11:55:22 -07:00
Jacob Beck
d16ca86782 Merge pull request #1151 from fishtown-analytics/feature/bq-profile-location
Add 'location' to google bigquery profile (#969)
2018-11-26 11:54:20 -07:00
Jacob Beck
b92d6692ce test that location gets passed along 2018-11-23 13:09:12 -07:00
Jacob Beck
dab2ff402f add a location field to the bigquery profile and pass it along to the google library if it is provided 2018-11-23 11:04:46 -07:00
Jacob Beck
51252b06b9 Add a test for overwrite/append 2018-11-23 10:12:24 -07:00
Jacob Beck
1fd84ad9d5 add unit tests 2018-11-23 10:12:24 -07:00
Jacob Beck
c4d6b2ed0f Delete dead code, move SourceConfig into the parser, allow multiple calls 2018-11-23 10:12:24 -07:00
Jacob Beck
71a239825a Merge branch 'dev/grace-kelly' into feature/hook-config-aliases 2018-11-23 08:11:31 -07:00
Jacob Beck
f72e0a8ddf Merge pull request #1148 from fishtown-analytics/feature/jinja-do-statement
Jinja expression statements (#1113)
2018-11-23 06:38:18 -07:00
Jacob Beck
069bc3a905 add do-syntax 2018-11-21 14:53:13 -07:00
Jacob Beck
0307d78236 PR feedback, tests around it 2018-11-21 12:29:19 -07:00
Jacob Beck
e543dc4278 add pre_hook/post_hook kwargs to config, add tests 2018-11-21 11:54:45 -07:00
Jacob Beck
029ef1795f Merge pull request #1020 from fishtown-analytics/profiler
dbt builtin profiler
2018-11-20 07:57:44 -07:00
Jacob Beck
12433fdba4 Merge branch 'dev/grace-kelly' into profiler 2018-11-20 07:08:49 -07:00
Jacob Beck
0a66adf707 Merge pull request #1142 from fishtown-analytics/feature/profiler-with-single-threaded
single-threaded mode
2018-11-19 19:06:30 -07:00
Jacob Beck
b5aab26c38 Merge pull request #1144 from fishtown-analytics/fix/run-repeatability-issues
Fix run repeatability/caching issues (#1138, #1139, #1140)
2018-11-19 19:04:44 -07:00
Drew Banin
416173a03c Merge pull request #1128 from fishtown-analytics/cache-macro-only-manifest
cache the macro manifest
2018-11-19 20:47:53 -05:00
Jacob Beck
e82361c893 lowercase the cache's view of schema/identifier, fix drop 2018-11-19 09:51:27 -07:00
Jacob Beck
7d3bf03404 fix casing expectations in unit tests 2018-11-19 09:51:27 -07:00
Jacob Beck
eb50b8319b Add integration tests with interesting model/case/quoting settings 2018-11-19 09:51:27 -07:00
Jacob Beck
cfd2d60575 stop dropping renames... 2018-11-19 09:51:27 -07:00
Jacob Beck
d4c3fb8261 fix schema cache casing bugs 2018-11-19 09:51:27 -07:00
Jacob Beck
7940b71ffe on dbt invocation, reset the adapters (to reset the cache) 2018-11-19 09:51:27 -07:00
Jacob Beck
6dd04b1a43 set cache logger propagation during invocation time instead of at log init time 2018-11-19 09:51:25 -07:00
Jacob Beck
399a6854c5 add a special flag to make dbt use map instead of the pool 2018-11-19 09:21:59 -07:00
Drew Banin
8eded7081c PR feedback (move comment) 2018-11-19 10:38:12 -05:00
Drew Banin
3bdebba18d Merge pull request #1123 from fishtown-analytics/fix/local-packages
fix for deps that depend on other local deps
2018-11-19 10:17:56 -05:00
Jacob Beck
8aab340a2a Merge branch 'dev/grace-kelly' into profiler 2018-11-19 08:13:34 -07:00
Drew Banin
0138228309 Merge branch '0.12.latest' into dev/grace-kelly 2018-11-15 16:05:58 -05:00
Connor McArthur
3912028318 bigquery: use delete_contents option in delete_dataset, remove unused drop_tables_in_schema 2018-11-15 10:45:04 -05:00
Connor McArthur
59cea11ef5 Bump version: 0.12.0 → 0.12.1 2018-11-15 09:28:22 -05:00
Connor McArthur
d9c12abd2d Merge pull request #1131 from fishtown-analytics/dev/0.12.1
dbt 0.12.1
2018-11-15 09:27:34 -05:00
Drew Banin
4b981caa53 first cut at caching macro manifest 2018-11-14 21:33:16 -05:00
Jacob Beck
735ff8831d Merge pull request #1110 from fishtown-analytics/fix/dbt-deps-issues
Fix dbt deps issues (#778 #994 #895)
2018-11-14 13:31:28 -07:00
Drew Banin
0264386c18 Update CHANGELOG.md 2018-11-14 15:09:06 -05:00
Jacob Beck
6529c3edd3 Merge pull request #1109 from fishtown-analytics/feature/adapter-refactor
Feature/adapter refactor (#962, #963, #965, #1035, #1090)
2018-11-14 11:32:05 -07:00
Jacob Beck
8840996a30 user feedback: log download directory in dbt deps 2018-11-14 11:30:17 -07:00
Jacob Beck
d35e549dbf Handle cross-drive windows permissions issues by undoing git's readonly settings 2018-11-14 10:37:23 -07:00
Drew Banin
7195f07b3d fix for local deps that depend on other local deps 2018-11-13 21:55:25 -05:00
Drew Banin
9398ccd820 Merge pull request #1120 from fishtown-analytics/fix/do-not-cache-irrelevant-schemas
(fixes #1119) Ignore dependencies into dbt schemas from external schemas
2018-11-13 16:18:58 -05:00
Drew Banin
cfd4aad49c lowercase schema names in check 2018-11-13 14:58:13 -05:00
Drew Banin
a97620f8f0 (fixes #1119) Ignore dependencies into dbt schemas from external schemas 2018-11-13 14:47:01 -05:00
Jacob Beck
351542257e Merge branch 'dev/grace-kelly' into fix/dbt-deps-issues 2018-11-13 11:32:42 -07:00
Jacob Beck
8927aa8e02 clean up TODOs 2018-11-13 09:54:10 -07:00
Jacob Beck
717d1ed995 Merge pull request #1107 from fishtown-analytics/no-postgres-for-non-postgres
On non-postgres tests, don't require the postgres container (#841)
2018-11-13 08:02:28 -07:00
Jacob Beck
3773843094 in deps, when "git" is not in the path, link users to help docs 2018-11-13 08:02:08 -07:00
Jacob Beck
9bee0190d2 Lazy load adapters
Move adapter package loading to runtime, after configs have been mostly parsed
Parse/validate credentials after determining what the type is
profile/config contracts ignore credentials
2018-11-12 11:30:28 -07:00
Jacob Beck
60c4619862 Make adapters optional 2018-11-12 11:30:28 -07:00
Jacob Beck
9ffbb3ad02 Split out connection managers 2018-11-12 11:30:28 -07:00
Jacob Beck
350b81db99 Class hierarchy, deprecate and remove deprecated methods, abstract method definitions 2018-11-12 11:30:28 -07:00
Jacob Beck
412b165dc9 fix issue where we incorrectly logged stack traces 2018-11-12 11:29:45 -07:00
Jacob Beck
531d7c687e use environment variables or per-run temp directories to assign the temporary downloads directory 2018-11-12 11:29:45 -07:00
Jacob Beck
e866caa900 add support for cross-drive moves 2018-11-12 11:29:45 -07:00
Jacob Beck
ec466067b2 Tests, fixup ci 2018-11-12 11:29:45 -07:00
Jacob Beck
59b6f78c71 Raise an exception on rc!=0 in run_cmd, raise more specific exceptions about what went wrong on error 2018-11-12 11:29:45 -07:00
Jacob Beck
7757c85d4f add some helpful exceptions about command running 2018-11-12 11:29:45 -07:00
Jacob Beck
3077eecb97 Avoid connecting to postgres on each run 2018-11-12 11:29:11 -07:00
Jacob Beck
e6fc0f6724 make 2.7 exceptions have the same output as 3.x 2018-11-12 11:27:43 -07:00
Jacob Beck
42f817abf5 contracts and very basic tests for hub packages 2018-11-12 11:27:43 -07:00
Connor McArthur
e716db7983 Merge pull request #1116 from fishtown-analytics/dev/guion-bluford
dbt 0.12 (guion bluford)
2018-11-12 13:12:31 -05:00
Connor McArthur
51f68e3aab Bump version: 0.12.0rc1 → 0.12.0 2018-11-12 10:36:38 -05:00
Connor McArthur
3dc2c9126d Bump version: 0.12.0a2 → 0.12.0rc1 2018-11-02 15:52:56 -04:00
Drew Banin
3f409a7183 Merge pull request #1092 from fishtown-analytics/0-12-0-changelog
Update CHANGELOG.md for v0.12.0
2018-11-02 11:38:45 -04:00
Drew Banin
b3133f7cdf Merge pull request #1102 from fishtown-analytics/tags-in-docs-site
add tags to docs site
2018-11-01 15:45:29 -04:00
Drew Banin
ac8ed74f28 Update CHANGELOG.md 2018-11-01 14:55:54 -04:00
Drew Banin
b6af6994cb add tags to docs site 2018-10-30 11:00:25 -04:00
Jacob Beck
371e8b438a Merge pull request #1099 from fishtown-analytics/fix/allow-string-hooks
allow strings in hook configuration (#1097)
2018-10-29 13:54:05 -06:00
Jacob Beck
91efd7fc78 compatibility fix for 2.7 + socketserver 2018-10-29 10:25:13 -06:00
Jacob Beck
133c857c8b allow strings in hook configuration, have the preprocessor sort it all out 2018-10-29 10:16:17 -06:00
Connor McArthur
3b0c9f8b48 merged dev/gb 2018-10-29 10:55:33 -04:00
Jacob Beck
37a888deac Merge pull request #1094 from fishtown-analytics/fix/only-error-skip-on-ephemeral-failures
Fix error skip criteria (#1093)
2018-10-29 08:17:31 -06:00
Jacob Beck
46dc4eaa87 fix causality 2018-10-25 13:13:22 -06:00
Jacob Beck
20132085b5 PR feedback 2018-10-25 12:02:49 -06:00
Jacob Beck
20417a02a4 Only log ERROR SKIPs when the skip was caused by an ephemeral model failure 2018-10-25 11:05:48 -06:00
Drew Banin
aa16f3dcb3 Update CHANGELOG.md for v0.12.0 2018-10-24 22:53:59 -04:00
Jacob Beck
23dfcdd396 Merge pull request #1089 from fishtown-analytics/fix/set-default-target
Add the default target back in (#1088)
2018-10-24 12:45:01 -06:00
Jacob Beck
bf00abf71e pep8 2018-10-24 11:34:05 -06:00
Jacob Beck
8ff06d8b82 Add the default target back in, fix tests 2018-10-24 11:31:18 -06:00
Drew Banin
61af974a83 Merge pull request #1079 from joshtemple/hotfix/bq-load-errormsg
Display BigQuery error stream when a load fails during dbt seed.
2018-10-24 11:42:12 -04:00
Connor McArthur
e3cc9f0f92 Bump version: 0.12.0a1 → 0.12.0a2 2018-10-24 11:19:09 -04:00
Jacob Beck
92670cbce0 Merge pull request #1086 from fishtown-analytics/fix/undefined-error-cleanup
Handle undefined variables in jinja better (#935)
2018-10-23 14:55:53 -06:00
Jacob Beck
8a921360d7 improve error message on undefined 2018-10-23 13:47:14 -06:00
Jacob Beck
045783e296 implement getitem so jinja cleanly propagates undefined values during parsing 2018-10-23 11:44:30 -06:00
Jacob Beck
15ae37a70c Handle undefined variables in jinja without accidentally raising ParsedMacroCapture objects 2018-10-23 11:44:30 -06:00
Jacob Beck
a31745683b Merge pull request #1085 from fishtown-analytics/fix/more-helpful-invalid-refs
Fix/more helpful invalid refs (#1080)
2018-10-23 09:52:54 -06:00
Josh Temple
1f5d0fb72c Raise native dbt exception on BigQuery load error instead of Google Cloud API exception. 2018-10-23 10:26:25 -04:00
Jacob Beck
6702d5e35c PR feedback 2018-10-23 08:19:02 -06:00
Jacob Beck
9afc06c3c7 make undefined refs more user-friendly 2018-10-23 07:57:02 -06:00
Jacob Beck
5ef8df2fae add a lot of handling for edge cases around captured undefined values 2018-10-23 07:38:51 -06:00
Jacob Beck
fe4389509a Merge pull request #1081 from fishtown-analytics/fix-none-models
Fix validation issues (#1078)
2018-10-23 06:53:40 -06:00
Jacob Beck
a0856c1785 log helpful information about profiles on profile errors instead of on project errors 2018-10-22 17:14:13 -06:00
Jacob Beck
f3441662b5 add recursion handling 2018-10-22 16:29:35 -06:00
Jacob Beck
bd40ff337f package unit tests 2018-10-22 15:56:08 -06:00
Jacob Beck
f312583627 allow git package contracts to have arrays of strings to handle version resolution 2018-10-22 13:38:58 -06:00
Jacob Beck
eb066ec337 Allow and handle nulls in some project configuration fields 2018-10-22 13:29:44 -06:00
Jacob Beck
a20d98aeaf remove memoization as it interferes with some uses of deep_map 2018-10-22 13:28:24 -06:00
Josh Temple
9f5701680f Display BigQuery error stream when a load fails during dbt seed. 2018-10-22 15:09:06 -04:00
Connor McArthur
29578858c6 Bump version: 0.11.1 → 0.12.0a1 2018-10-19 12:03:18 -04:00
Jacob Beck
31bd22f753 Merge pull request #1057 from fishtown-analytics/macro-parsing
Macro parsing
2018-10-19 09:29:06 -06:00
Jacob Beck
74ad1ca1d1 fix docs tests 2018-10-18 14:52:17 -06:00
Jacob Beck
5c6e464f98 handle subpackage path collisions and make the original_file_path on macros correct 2018-10-18 14:06:23 -06:00
Jacob Beck
2451b78cdf handle multiple macros in the same template 2018-10-18 13:13:44 -06:00
Jacob Beck
c8fc558099 between integration tests, clear the template cache 2018-10-18 10:23:33 -06:00
Jacob Beck
1f4c35f3d8 Merge branch 'dev/stephen-girard' into macro-parsing 2018-10-18 09:41:34 -06:00
Drew Banin
0b135772d2 Merge pull request #1073 from fishtown-analytics/fix/merge-dev-guion-bluford
fix the build
2018-10-18 10:46:32 -04:00
Drew Banin
014672637a fix the build 2018-10-17 16:55:09 -04:00
Drew Banin
84588a366c Merge pull request #1014 from fishtown-analytics/feature/tags
Add custom tags
2018-10-17 15:30:44 -04:00
Drew Banin
ab14380eb8 PR Feedback 2018-10-17 14:53:04 -04:00
Drew Banin
6d66ab06d1 Merge pull request #1049 from mikekaminsky/feature/profile-env-var
Allows the profile directory to be set with an environment var
2018-10-17 14:15:38 -04:00
Drew Banin
a829da5d48 Merge pull request #1067 from fishtown-analytics/fix/test-fail-count-casted-to-bool
Re-order Agate type inference ordering
2018-10-17 14:13:26 -04:00
Drew Banin
72996a2250 Merge pull request #1050 from fishtown-analytics/fix/bq-clusterby-multiple
Fix clustering on multiple columns (BigQuery) (#1013)
2018-10-17 14:12:29 -04:00
Jacob Beck
00fb0c4965 Merge pull request #1072 from fishtown-analytics/fix/move-cache-new-relations
move cache_new_relation call into the materialization caller itself
2018-10-17 12:06:53 -06:00
Jacob Beck
e15a04ade9 move cache_new_relation call into the materialization caller itself 2018-10-17 11:13:52 -06:00
Jeremy Cohen
9d1b25d424 local integration tests :) 2018-10-17 10:26:20 -04:00
Drew Banin
791fa2ba16 (fixes #1040) Re-order Agate type inference ordering
By putting Booleans after Numbers, Agate will correctly infer values
like "0" and "1" as Numbers. The boolean type is followed only by
"Text", so values like "True" and "False" should still be inferred
as booleans appropriately.
2018-10-17 09:48:37 -04:00
Drew Banin
e13b31d99b Merge branch 'dev/guion-bluford' into feature/tags 2018-10-16 23:17:38 -04:00
Jacob Beck
4ec391b9fe Merge pull request #1027 from fishtown-analytics/pr/clrcrl/check-model-configs
Check model configs
2018-10-16 11:57:17 -06:00
Connor McArthur
84c0c355d5 Merge pull request #1056 from fishtown-analytics/snowplow-tracker-speedup
upgrade to minimal-snowplow-tracker 0.0.2
2018-10-16 11:40:21 -04:00
Jacob Beck
e07d1aa621 keep track of disabled nodes 2018-10-16 08:10:03 -06:00
Jacob Beck
34b98e06a3 Merge branch 'dev/guion-bluford' into pr/clrcrl/check-model-configs 2018-10-15 15:30:10 -06:00
Jacob Beck
c6dba6a83a pep8 2018-10-15 15:30:07 -06:00
Jeremy Cohen
e6b105837d update expected test counts 2018-10-15 16:11:47 -04:00
Drew Banin
1cf12ff6ae Merge pull request #1060 from mikekaminsky/docs/better-help
Improves dbt --help documentation
2018-10-15 10:40:36 -07:00
Jacob Beck
56523a5d82 Merge pull request #1062 from fishtown-analytics/fix/ambiguous-models-flag
Fix ambiguous models flag (#1023)
2018-10-15 11:37:05 -06:00
Jacob Beck
0ca86a5cf4 Merge pull request #1028 from fishtown-analytics/feature/schemas-on-on-run-end
make schemas available to on-run-end hooks (#908)
2018-10-15 10:39:40 -06:00
Jacob Beck
4c4bd0cd8f Merge branch 'dev/guion-bluford' into feature/schemas-on-on-run-end 2018-10-15 09:29:27 -06:00
Jacob Beck
8da3438ae7 fix tests 2018-10-15 09:22:43 -06:00
Jacob Beck
dd25750f7d Merge pull request #1053 from fishtown-analytics/fix/ephemeral-compile-errors
On ephemeral compile errors that lead to skips, generate real errors (#1037)
2018-10-15 09:13:58 -06:00
Jacob Beck
8210b1b86f Fix incorrect handling of package selections
When searching the possible ways to match an fqn to a qualified name,
dbt no longer short-circuits the search when a selector's first
field is a package but otherwise is not a match
2018-10-15 09:04:40 -06:00
Jacob Beck
d644d05245 add a test 2018-10-15 08:48:47 -06:00
Jacob Beck
33b14fd7d8 remove unnecessary function calls 2018-10-15 08:32:43 -06:00
Michael Kaminsky
b8c0602a37 Improves dbt --help documentation 2018-10-15 08:50:04 -05:00
Michael Kaminsky
e0de86ec88 Fix line length. Thanks Pep8! 2018-10-13 08:45:09 -05:00
Connor McArthur
8d418604f0 merged dev/gb 2018-10-13 09:20:23 -04:00
Connor McArthur
77b19b834b add and wire up TemplateCache 2018-10-13 09:14:40 -04:00
Connor McArthur
2f4b3c0e26 no need to disable_contracts now 2018-10-13 09:04:32 -04:00
Connor McArthur
b87d24de91 macro parsing improvements 2018-10-13 08:59:41 -04:00
Jeremy Cohen
7b30e0e406 attempt add integration tests 2018-10-12 17:49:14 -04:00
Jacob Beck
618dee031d Merge pull request #1025 from fishtown-analytics/feature/cache-relations
Feature/cache relations (#911)
2018-10-12 15:24:39 -06:00
Jacob Beck
c233caf50b Merge pull request #1048 from fishtown-analytics/fix/refs-in-vars-in-projects-not-rendered
Fix refs in vars in projects being rendered (#1047)
2018-10-12 14:40:33 -06:00
Michael Kaminsky
a54b5e39ab Shorter (and more pythonic) global variable setting 2018-10-12 14:57:57 -05:00
Jacob Beck
c25919d87e PR feedback 2018-10-12 13:24:57 -06:00
Jacob Beck
fb0da4578c handle ephemeral model failures better
On an ephemeral model failure mark dependencies as being skipped due to errors, and log them.
2018-10-12 10:35:57 -06:00
Jacob Beck
7e328b5c4c add an integration test 2018-10-12 10:33:29 -06:00
Jeremy Cohen
e6f044c516 make jinja respect whitespace 2018-10-10 18:23:43 -04:00
Jeremy Cohen
9ecd5a7a3c add space 2018-10-10 16:53:13 -04:00
Jeremy Cohen
a96dba2dd0 remove parens around bq clusterby 2018-10-10 13:46:31 -04:00
Michael Kaminsky
45bbaf2af1 Allows the profile directory to be set with an environment var
Passing the CLI flag (as in `dbt run --profiles-dir ~/path/to/profile`)
should still work. Similarly, if no DBT_PROFILES_DIR environment
variable is set, DBT will look for profiles in the default location.
2018-10-09 18:06:24 -04:00
Jacob Beck
0c7ef07cf3 Merge branch 'dev/guion-bluford' into feature/cache-relations 2018-10-09 13:05:58 -06:00
Jacob Beck
069b8ebf4d fixed the message generation expression 2018-10-09 09:47:03 -06:00
Jacob Beck
f1d01877dc Merge branch 'dev/guion-bluford' into pr/clrcrl/check-model-configs 2018-10-09 09:37:40 -06:00
Jacob Beck
c6acf94914 Merge pull request #1046 from fishtown-analytics/feature/parse-csv-seeds-only
Only parse CSV files during "dbt seed" (#867)
2018-10-09 09:34:05 -06:00
Jacob Beck
c85cb43c4d if a referenced schema is not in the cache, do not try to add a link to it 2018-10-09 08:42:53 -06:00
Jacob Beck
4c928c6157 add failing external references test 2018-10-09 08:42:27 -06:00
Jacob Beck
ce660cb826 fix __copy__/__deepcopy__ 2018-10-09 07:23:51 -06:00
Jacob Beck
8377522f61 move warning filtering into compatibility module 2018-10-09 07:23:51 -06:00
Jacob Beck
a03ca11ab9 handle a subtle iteration issue in the cache with extra locking 2018-10-09 07:23:51 -06:00
Jacob Beck
780512c279 PR feedback 2018-10-09 07:23:51 -06:00
Jacob Beck
57d814fda2 tests 2018-10-09 07:23:51 -06:00
Jacob Beck
3883ad351d docstrings 2018-10-09 07:23:51 -06:00
Jacob Beck
7be91155e1 make jinja debugging unconditional 2018-10-09 07:23:51 -06:00
Jacob Beck
a1cc37c6d9 comment/todos/errors cleanup 2018-10-09 07:23:51 -06:00
Jacob Beck
2cbae63649 go through list_relations for get_relation again, sadly 2018-10-09 07:23:51 -06:00
Jacob Beck
f8a78c39a0 remove "list_relations" calls in materializations 2018-10-09 07:23:51 -06:00
Jacob Beck
f4afd495ad Add _is_cached function, optimize get_relation for cache bypassing, add "in" operator support to the cache 2018-10-09 07:23:51 -06:00
Jacob Beck
2a7cebcf30 expose get_relation 2018-10-09 07:23:51 -06:00
Jacob Beck
4d0abe0961 add --bypass-cache flag that ignores using the cache 2018-10-09 07:23:51 -06:00
Jacob Beck
9b3df57588 on failure, do not clear the cache 2018-10-09 07:23:51 -06:00
Jacob Beck
fc146be08a add missing import 2018-10-09 07:23:51 -06:00
Jacob Beck
d359d0574b make cache logging a toggle 2018-10-09 07:23:51 -06:00
Jacob Beck
69cbb609ba remove verify-relation-cche as it is a bad idea due to concurrent transactions 2018-10-09 07:23:51 -06:00
Jacob Beck
d61b28e767 add exceptions, remove "kind" field, make the inner Relation reference mandatory 2018-10-09 07:23:51 -06:00
Jacob Beck
6f43c8fe50 turn off verification-by-default as threading makes it hard 2018-10-09 07:23:51 -06:00
Jacob Beck
6e30cd87aa slightly better error 2018-10-09 07:23:51 -06:00
Jacob Beck
5163529b69 add cache clearing 2018-10-09 07:23:51 -06:00
Jacob Beck
418f4adc6a pep8, skip text generation in optimized mode 2018-10-09 07:23:51 -06:00
Jacob Beck
b7b03c7064 run most tests in verify mode, at least for now 2018-10-09 07:23:51 -06:00
Jacob Beck
2e4bc56b73 More test tweaks 2018-10-09 07:23:51 -06:00
Jacob Beck
9f5040d8cc make jinja templates kinda debuggable by injecting ourselves into the linecache 2018-10-09 07:23:51 -06:00
Jacob Beck
cf77a9a744 tons of logging 2018-10-09 07:23:51 -06:00
Jacob Beck
32765ed706 add cache verification flag 2018-10-09 07:23:51 -06:00
Jacob Beck
ccee039c76 First pass on caching 2018-10-09 07:23:51 -06:00
Jacob Beck
29584e3c51 Fix a bug where vars were rendered under models/seeds in the config 2018-10-09 07:17:20 -06:00
Jacob Beck
4ccab99765 added test 2018-10-09 07:07:00 -06:00
Jacob Beck
b523590f9e Merge branch 'dev/guion-bluford' into feature/parse-csv-seeds-only 2018-10-08 12:21:46 -06:00
Jacob Beck
2f72c0e496 Merge pull request #1033 from fishtown-analytics/feature/render-more-config-fields
Render more config fields (#960)
2018-10-08 12:18:09 -06:00
Jacob Beck
45ddd3d7f1 pr feedback: more tests 2018-10-08 10:03:19 -06:00
Jacob Beck
deab38a4e1 remove unused staticmethods 2018-10-08 09:41:53 -06:00
Jacob Beck
30b6868d95 make schemas available to on-run-end hooks 2018-10-05 15:01:31 -06:00
Jacob Beck
4b8d19c75c add test to ensure "dbt run" skips seed parsing 2018-10-05 14:59:29 -06:00
Jacob Beck
a1b44201d4 make seed parsing conditional on being a dbt seed invocation 2018-10-05 14:12:27 -06:00
Jacob Beck
52c1d5ace2 attach args to the config directly instead of just the cli parameters 2018-10-05 14:12:27 -06:00
Jacob Beck
e5e93f5f43 Merge branch 'dev/guion-bluford' into feature/render-more-config-fields 2018-10-05 08:03:29 -06:00
Jacob Beck
389c4af010 Merge pull request #1039 from fishtown-analytics/fix/no-backtrace-on-dbt-error
Remove backtraces on dbt errors (#1015)
2018-10-04 14:14:14 -06:00
Jacob Beck
bc8d523a4e PR feedback 2018-10-02 08:20:50 -06:00
Jacob Beck
42d6c9ff43 Merge pull request #1038 from fishtown-analytics/fix/clean-without-profiles
make clean not require a profile (#1022)
2018-10-01 13:28:42 -06:00
Drew Banin
4cc5e6d648 fix bq tests 2018-09-30 12:01:59 -04:00
Drew Banin
e4ca350391 (fixes #311) Configure tags, and select them with --models 2018-09-30 11:39:32 -04:00
Jacob Beck
36dcca2f1f avoid logging stack traces to the console on dbt-created errors 2018-09-28 14:30:16 -06:00
Jacob Beck
ba4cc78a75 properly move config errors into the dbt exception hierarchy 2018-09-28 14:22:39 -06:00
Jacob Beck
af44abf7a6 make clean not require a profile, make bare dbt -d fail with better errors 2018-09-28 14:09:27 -06:00
Jacob Beck
db71b1a43d Merge branch 'dev/guion-bluford' into feature/render-more-config-fields 2018-09-28 10:49:08 -06:00
Jacob Beck
a9487e89bf more unit tests 2018-09-28 10:29:21 -06:00
Jacob Beck
addcb1460b Add rendering to projects as well, fix up existing unit tests 2018-09-28 07:36:47 -06:00
Jacob Beck
95f3064aef Merge pull request #970 from borisuvarov/client_session_keep_alive_snowflake
Add client_session_keep_alive parameter for Snowflake connection
2018-09-28 07:25:46 -06:00
Boris Uvarov
4da156f392 Add unit tests 2018-09-28 12:20:18 +03:00
Jacob Beck
5e5916ce08 on profiles, render env vars and cli vars 2018-09-27 11:37:43 -06:00
Jacob Beck
16e055a740 add a deep_map utility function 2018-09-27 11:16:43 -06:00
Boris Uvarov
c2bc1c5361 Add client_session_keep_alive option for Snowflake adapter in order to prevent session timeout after 4 hours of inactivity 2018-09-27 17:08:20 +03:00
Jacob Beck
2cb7394583 move get_resource_fqns method into the manifest, add tests, add a missing import 2018-09-26 13:05:28 -06:00
Jacob Beck
cbb256adde Merge branch 'dev/guion-bluford' into pr/clrcrl/check-model-configs 2018-09-21 10:54:11 -06:00
Connor McArthur
0b51d18b3a Merge pull request #1012 from fishtown-analytics/update-release
update release documentation
2018-09-21 11:59:45 -04:00
Jacob Beck
6454a81593 Merge pull request #1010 from fishtown-analytics/feature/adapters-as-objects
adapters as objects (#961)
2018-09-20 09:01:31 -06:00
Jacob Beck
08150b09fc Merge branch 'dev/guion-bluford' into feature/adapters-as-objects 2018-09-20 08:11:02 -06:00
Connor McArthur
738304f438 dbt builtin profiler 2018-09-20 08:44:42 -04:00
Drew Banin
5b94bc3259 Merge pull request #1019 from elexisvenator/comment-guard
Add newline around SQL in incremental materialization to guard against line comments.
2018-09-20 08:23:00 -04:00
Connor McArthur
ad02e8cd05 post-deploy 2018-09-20 08:03:02 -04:00
Ben Edwards
665264723d Add newline around SQL in incremental materialization to guard against line comments. 2018-09-20 14:14:36 +10:00
Connor McArthur
f588876461 Merge branch 'master' of github.com:fishtown-analytics/dbt into dev/guion-bluford 2018-09-19 10:29:41 -04:00
Connor McArthur
cce9e3a100 update release documentation 2018-09-18 19:47:50 -04:00
Jacob Beck
b7134385b2 Merge remote-tracking branch 'clrcrl/check-model-configs' into pr/clrcrl/check-model-configs 2018-09-18 14:43:03 -06:00
Jacob Beck
63793b74f2 combine profile and context functions 2018-09-18 14:29:26 -06:00
Jacob Beck
0b0e9e02e7 add a way for tests to reset the adapters known to dbt between runs 2018-09-18 14:29:26 -06:00
Jacob Beck
5c60f18146 move quoting all the way into the project config, make tests reset adapters 2018-09-18 14:29:26 -06:00
Jacob Beck
c0ce5cb3e3 fix some tests and shut up some warnings 2018-09-18 14:29:26 -06:00
Jacob Beck
8e84f53c65 make adapters into objects, fix unit tests 2018-09-18 14:29:26 -06:00
Connor McArthur
ea974fde74 Bump version: 0.11.1rc1 → 0.11.1 2018-09-18 16:23:03 -04:00
Connor McArthur
a5ee60c56a Merge pull request #1011 from fishtown-analytics/dev/lucretia-mott
dbt 0.11.1: lucretia mott
2018-09-18 16:22:16 -04:00
Jacob Beck
2a8f0b8d0b Merge pull request #1006 from fishtown-analytics/fix/bigquery-underscores
Include datasets with underscores when listing BigQuery datasets
2018-09-18 14:21:48 -06:00
Connor McArthur
4f2f5bb700 0.11.1 changelog 2018-09-18 15:37:26 -04:00
Jacob Beck
5fc97bc7f3 Merge pull request #1000 from fishtown-analytics/fix/exception-handling-errors
Fix exception handling masking on cleanup (#997)
2018-09-18 11:25:18 -06:00
Jacob Beck
c11cd92b83 add a test that already passed anyway 2018-09-18 10:13:06 -06:00
Jacob Beck
9a91aa2584 remove never-called method 2018-09-18 10:12:56 -06:00
Jacob Beck
15b13054d1 Include datasets with underscores when listing BigQuery datasets
Co-authored-by: kf Fellows <k@therinefello.ws>
2018-09-18 10:10:34 -06:00
Jacob Beck
22d4a1d73d PR feedback 2018-09-18 09:23:42 -06:00
Jacob Beck
fb970192cd handle python 2 quirks 2018-09-18 09:23:42 -06:00
Jacob Beck
c58daa1dc9 fix an error where a non-string is passed to RuntimeException for a message on python 2.7 2018-09-18 09:23:42 -06:00
Jacob Beck
28ef796d47 add extra logic around connection release in the finally block to avoid raising during exception handling 2018-09-18 09:23:38 -06:00
Connor McArthur
8135948e0d Bump version: 0.11.1a1 → 0.11.1rc1 2018-09-17 12:26:46 -04:00
Jacob Beck
1c67d19b37 Merge pull request #973 from fishtown-analytics/project-profile-combined
Project profile combined
2018-09-17 10:25:56 -06:00
Connor McArthur
bb8883b7ef Merge pull request #1007 from fishtown-analytics/docs/fix-copy-to-clipboard
Docs/fix copy to clipboard
2018-09-17 12:25:04 -04:00
Drew Banin
361eee66ca Merge branch 'dev/lucretia-mott' of github.com:fishtown-analytics/dbt into docs/fix-copy-to-clipboard 2018-09-17 12:19:30 -04:00
Jacob Beck
228524fa46 Merge pull request #870 from clrcrl/jinja-upgrade
Upgrade version of Jinja2
2018-09-17 08:40:38 -06:00
Drew Banin
acbda732a8 fix copy to clipboard button in docs ui 2018-09-14 13:59:29 -04:00
Jacob Beck
18a5e44dbc fix unit tests 2018-09-13 18:27:07 -06:00
Jacob Beck
6652eced95 warn on unspecified test name 2018-09-13 18:27:07 -06:00
Jacob Beck
b4772bc3b6 Missed a project->config 2018-09-13 18:27:07 -06:00
Jacob Beck
273af5368f add more explicit dbt.config imports for readability 2018-09-13 18:27:07 -06:00
Jacob Beck
582f9f9143 make __eq__ type checks symmetrical 2018-09-13 18:27:07 -06:00
Jacob Beck
1620a17eca Create a somewhat sane, if huge, configuration object 2018-09-13 18:27:07 -06:00
Jacob Beck
16fa082e47 Merge pull request #991 from fishtown-analytics/fix/drop-table-quoting
Fix table/schema quoting on drop, truncate, and rename (#983)
2018-09-12 21:23:17 -06:00
Jacob Beck
f473eae8c3 Merge pull request #998 from fishtown-analytics/fix/snowflake-quoted-identifiers-case-docs
Fix QUOTED_IDENTIFIERS_IGNORE_CASE errors (#982)
2018-09-12 21:21:50 -06:00
Jacob Beck
40f009f017 add a test that sets QUOTED_IDENTIFIERS_IGNORE_CASE 2018-09-12 15:23:07 -06:00
Jacob Beck
97536e53a8 on snowflake, force-lowercase the columns of the catalog query results 2018-09-12 12:33:15 -06:00
Jacob Beck
7cbec9ee8f PR feedback 2018-09-12 08:38:49 -06:00
Jacob Beck
18f3849678 Merge branch 'dev/lucretia-mott' into fix/drop-table-quoting 2018-09-12 07:26:30 -06:00
Jacob Beck
f2d153779c fix typo 2018-09-12 07:26:13 -06:00
Jacob Beck
d66f3a8bf4 Merge pull request #995 from fishtown-analytics/fix/agate-empty-string-nulls
let the empty string mean null (#993)
2018-09-12 07:24:30 -06:00
Jacob Beck
197c05106e let the empty string mean null 2018-09-11 14:57:49 -06:00
Jacob Beck
8e9a44ee3f Merge branch 'dev/lucretia-mott' into fix/drop-table-quoting 2018-09-11 14:44:31 -06:00
Jacob Beck
5c2fa708e7 Merge pull request #992 from fishtown-analytics/fix/errors-on-null-schema
Fix errors on null schema (#980)
2018-09-11 14:42:44 -06:00
Jacob Beck
6e620589b5 Merge pull request #978 from fishtown-analytics/bigquery-clustering-columns
Bigquery clustering columns (#918)
2018-09-11 14:41:01 -06:00
Jacob Beck
97d836eb1d PR feedback 2018-09-11 13:12:30 -06:00
Jacob Beck
13ebfdf831 handle null schemas when filtering out tables for the catalog 2018-09-11 12:07:03 -06:00
Jacob Beck
130cac96d3 add unit test 2018-09-11 11:57:47 -06:00
Connor McArthur
dd8307268e changelog 2018-09-11 13:13:12 -04:00
Jacob Beck
99a04e9512 add tests for quoting 2018-09-11 11:12:29 -06:00
Jacob Beck
0cf38bcd19 set quote policy on all cls.Relation.create() invocations 2018-09-11 11:12:29 -06:00
Connor McArthur
19e4e0cbeb Bump version: 0.11.0 → 0.11.1a1 2018-09-11 13:08:26 -04:00
Connor McArthur
59ad09cfbd Merge pull request #990 from fishtown-analytics/docs/0.11.1-updates
Fixes for the 0.11.1rc1 release
2018-09-11 12:28:45 -04:00
Drew Banin
c1c9fc1ed4 Fixes for the 0.11.1rc1 release
Fix for nonexisting column defined in schema.yml https://github.com/fishtown-analytics/dbt-docs/pull/3
Fix for schema test join when columns are upcased https://github.com/fishtown-analytics/dbt-docs/pull/2
2018-09-11 11:42:04 -04:00
Jacob Beck
df570f6889 Merge branch 'dev/lucretia-mott' into bigquery-clustering-columns 2018-09-11 07:36:43 -06:00
Jacob Beck
669a29bded Merge pull request #987 from fishtown-analytics/feature/dbt-docs-serve-port
add --port parameter to "dbt docs serve" (#955)
2018-09-11 07:34:01 -06:00
Jacob Beck
ff047a27c9 Merge branch 'development' into bigquery-clustering-columns 2018-09-07 14:05:13 -06:00
Jacob Beck
957115e467 add --port parameter to "dbt docs serve". 2018-09-07 14:00:35 -06:00
Jacob Beck
be541237e8 Merge pull request #985 from fishtown-analytics/fix/hooks-in-configs-not-running
Fix hooks in configs not running (#984)
2018-09-07 13:50:55 -06:00
Jacob Beck
b801518195 actually add model-configured hooks to the parsed nodes 2018-09-07 11:37:47 -06:00
Jacob Beck
32615f48b6 add failing tests 2018-09-07 11:37:47 -06:00
Drew Banin
6a62ec43e9 Merge pull request #977 from fishtown-analytics/fix/redshift-catalog-test
fix redshift catalog test
2018-09-07 12:03:17 -04:00
Jacob Beck
74b00f42df pep8 2018-09-06 14:04:31 -06:00
Jacob Beck
7954ff688f add clustering by, add it to the stats we collect, add tests 2018-09-06 12:31:39 -06:00
Jacob Beck
05777ebf38 add a test that we at least try to partition_by/cluster_by and don't get errors 2018-09-06 10:09:02 -06:00
Jacob Beck
26a1fb06a0 add cluster_by 2018-09-06 09:40:45 -06:00
Jacob Beck
b370852272 remove unused set 2018-09-06 09:40:40 -06:00
Drew Banin
f143cfe213 fix test 2018-09-06 10:24:46 -04:00
Drew Banin
0d074c36e5 Merge pull request #974 from fishtown-analytics/0.11.0-changelog
0.11.0 release
2018-09-06 09:11:53 -04:00
Drew Banin
c358fc0c04 Merge branch '0.11.0-changelog' of github.com:fishtown-analytics/dbt into 0.11.0-changelog 2018-09-06 08:51:40 -04:00
Drew Banin
f316b07a2b quick fix for invalid byte in Redshift stats response 2018-09-06 08:51:30 -04:00
Drew Banin
025a87222f Merge branch 'development' into 0.11.0-changelog 2018-09-05 19:47:56 -04:00
Drew Banin
492305e965 Update CHANGELOG.md 2018-09-05 19:46:42 -04:00
Drew Banin
ac16fefbf8 Update CHANGELOG.md 2018-09-05 19:26:36 -04:00
Drew Banin
386abbee66 bump to 0.11.0 2018-09-05 19:03:56 -04:00
Drew Banin
3a8ffa7e0f use initial dbt docs release 2018-09-05 19:03:03 -04:00
Connor McArthur
75d6413f49 Bump version: 0.11.0rc1 → 0.11.0rc2 2018-09-04 20:02:46 -04:00
Connor McArthur
6ef1ef6a3d pin botocore to the associated versions from boto3 2018-09-04 20:01:50 -04:00
Connor McArthur
d820d68a8c Bump version: 0.11.0a2 → 0.11.0rc1 2018-09-04 19:23:44 -04:00
Jacob Beck
0f164c9204 Merge pull request #952 from fishtown-analytics/schema-v1-v2-converter
Schema v1 v2 converter (#942)
2018-09-04 17:04:28 -06:00
Drew Banin
7145fa5528 Update CHANGELOG.md 2018-09-04 17:55:49 -04:00
Jacob Beck
4f4810c327 Merge branch 'development' into schema-v1-v2-converter 2018-09-04 13:21:58 -06:00
Jacob Beck
d24cf0ad27 this does not belong 2018-09-04 13:21:26 -06:00
Jacob Beck
36cd3331a7 if tests are not dicts, make them dicts 2018-09-04 13:18:39 -06:00
Jacob Beck
e0d87eee71 get pyyaml to dump things in a more pleasing order 2018-09-04 13:04:25 -06:00
Drew Banin
5d4c770b6c Merge pull request #972 from fishtown-analytics/fix/show-injected-sql
render injected (instead of compiled) sql in docs site
2018-09-04 13:24:13 -04:00
Drew Banin
fe2e22f5b8 render injected (instead of compiled) sql in docs site 2018-09-04 13:21:33 -04:00
Drew Banin
78ca9d3ab8 Merge pull request #967 from fishtown-analytics/bump/0.11.0a2
bump to 0.11.0a2
2018-08-30 08:42:33 -04:00
Drew Banin
5c5f471731 bump to 0.11.0a2 2018-08-30 08:41:13 -04:00
Drew Banin
4d27585b34 Merge pull request #964 from fishtown-analytics/fix/snowflake-boto-version-mismatch
set upper bound on boto3 version
2018-08-30 08:35:06 -04:00
Jacob Beck
0fddcfef32 turn off "flow style" in output 2018-08-29 13:42:58 -06:00
Drew Banin
d56800f638 set upper bound on boto3 version 2018-08-29 14:37:10 -04:00
Jacob Beck
c38e34fe1b Merge branch 'development' into schema-v1-v2-converter 2018-08-28 14:38:20 -06:00
Jacob Beck
68047d6fa7 Merge pull request #957 from fishtown-analytics/handle-models-named-version
add handling for v1 schema.yml with a model named "version", and tests (#950)
2018-08-28 14:37:34 -06:00
Jacob Beck
94c2b05a60 clean up the incorrect version message a bit 2018-08-28 12:11:49 -06:00
Jacob Beck
3046eee5dc Merge branch 'development' into handle-models-named-version 2018-08-28 11:18:15 -06:00
Jacob Beck
6d6cb201ca Merge pull request #958 from fishtown-analytics/var-validate-model-name
Fix error with validation code (#956)
2018-08-28 11:16:14 -06:00
Claire Carroll
8587bd4435 Add seeds to default_project_cfg 2018-08-28 17:50:20 +02:00
Jacob Beck
50efa65c12 Merge branch 'development' into var-validate-model-name 2018-08-28 09:41:45 -06:00
Claire Carroll
aa06a8a606 Handle case when configs supplied but no resource of that type in project 2018-08-28 17:41:01 +02:00
Jacob Beck
606014642d Merge pull request #949 from fishtown-analytics/selectively-download-bumpversion
Remove unconditional calls to get_latest_version (#907)
2018-08-28 09:26:45 -06:00
Jacob Beck
dd406a8cdb add unit test 2018-08-28 09:17:45 -06:00
Jacob Beck
9b15377cee we set model_name in __init__, so just use that 2018-08-28 08:51:19 -06:00
Jacob Beck
ed4b8f0c8f add handling for v1 schema.yml with a model named "version", and tests 2018-08-28 08:42:35 -06:00
Jacob Beck
d946ac2c99 pep8 2018-08-28 08:12:56 -06:00
Jacob Beck
383ea3542a Convert the full project, not just single files 2018-08-28 08:11:53 -06:00
Claire Carroll
c367d5bc75 Check for unused configs in project file 2018-08-28 10:22:52 +02:00
Jacob Beck
4ab0ec96a8 make the case of an inaccessible raw.githubusercontent.com a bit more user-friendly 2018-08-27 14:46:07 -06:00
Jacob Beck
9955070085 make version special the same way argparse does, but with lazy evaluation of the version value. 2018-08-27 14:35:30 -06:00
Jacob Beck
56957d4940 Merge branch 'development' into selectively-download-bumpversion 2018-08-27 13:11:12 -06:00
Jacob Beck
4bb01e5fe8 Merge branch 'development' into schema-v1-v2-converter 2018-08-27 08:03:52 -06:00
Jacob Beck
01212cb19d pep8/test sanity 2018-08-27 07:49:44 -06:00
Jacob Beck
15ca05d7e1 add custom mappings arguments 2018-08-27 07:46:53 -06:00
Jacob Beck
1876257610 add handling for unknown tests 2018-08-27 07:05:38 -06:00
Jacob Beck
00fdf6a1c1 control flow cleanup 2018-08-27 06:57:43 -06:00
Jacob Beck
b99b4d5ef3 format that log message 2018-08-24 15:08:41 -06:00
Jacob Beck
c1c88a2ca7 make the script run... 2018-08-24 14:59:51 -06:00
Jacob Beck
9214e98c78 logging 2018-08-24 14:47:05 -06:00
Jacob Beck
3b12b93e09 add handling for simple custom tests 2018-08-24 14:23:30 -06:00
Jacob Beck
96172da83f initial work on a converter, only built-in tests handled 2018-08-24 14:03:14 -06:00
Connor McArthur
8240542d3e Bump version: 0.10.2 → 0.11.0a1 2018-08-24 15:38:51 -04:00
Drew Banin
088553d308 Merge pull request #951 from fishtown-analytics/docs-site-0-11-a1-update
Bump docs site index.html for column docs, custom overview
2018-08-24 15:31:18 -04:00
Drew Banin
39cab15994 Bump docs site index.html for column docs, custom overview 2018-08-24 13:55:39 -04:00
Jacob Beck
045fccc8c0 remove a sneaky, hidden unconditional call to get_latest_version 2018-08-24 08:42:39 -06:00
Jacob Beck
0e5a8f158e Remove unconditional calls to get_latest_version 2018-08-24 08:16:28 -06:00
Drew Banin
72c39bcfc8 Merge pull request #916 from lewish/fix/merge_name_clashes
Change SOURCE and DEST aliases in BQ merge to be less generic.
2018-08-23 23:34:48 -04:00
Drew Banin
017e08747d Merge branch 'development' into fix/merge_name_clashes 2018-08-23 21:45:14 -04:00
Drew Banin
b1e186a132 Merge pull request #940 from fishtown-analytics/fix/snowflake-view-transactions
Fix/snowflake view transactions
2018-08-23 21:35:24 -04:00
Drew Banin
78fd05ab73 Merge branch 'development' into fix/snowflake-view-transactions 2018-08-23 19:47:44 -04:00
Drew Banin
435f1b4781 Merge pull request #943 from fishtown-analytics/override-default-docs-for-website
Create default overview block for docs website
2018-08-23 19:47:30 -04:00
Drew Banin
287c4cf89f Merge branch 'development' into override-default-docs-for-website 2018-08-23 14:39:22 -04:00
Drew Banin
3601f1c9ee Merge branch 'development' into fix/snowflake-view-transactions 2018-08-23 14:13:35 -04:00
Jacob Beck
ca42b63bc2 Merge pull request #939 from fishtown-analytics/detailed-catalog
Pull detailed table stats into the catalog
2018-08-23 11:55:57 -06:00
Connor McArthur
06725c5a51 Merge branch 'development' into override-default-docs-for-website 2018-08-23 13:06:56 -04:00
Jacob Beck
bc23db08fb Merge branch 'development' into detailed-catalog 2018-08-23 09:06:52 -06:00
Jacob Beck
6802237479 Merge pull request #948 from fishtown-analytics/compile-on-docs-generate
Compile on docs generate (#932)
2018-08-23 09:01:26 -06:00
Drew Banin
dbb32e99c7 Merge branch 'development' into fix/snowflake-view-transactions 2018-08-23 10:57:04 -04:00
Drew Banin
9593a6f720 fix tests 2018-08-23 10:55:42 -04:00
Drew Banin
2d91be0329 (fixes #941) Create default overview block for docs website 2018-08-23 10:55:42 -04:00
Drew Banin
796130066d pr feedback 2018-08-22 16:39:46 -04:00
Jacob Beck
554ecb0e33 Only include table-only fields on bigquery if the relationship is a table 2018-08-22 10:21:09 -06:00
Jacob Beck
c9b1cade48 make schema filtering case-insensitive 2018-08-22 09:13:40 -06:00
Jacob Beck
8570c632b9 fix unit tests 2018-08-22 09:13:40 -06:00
Jacob Beck
eb9e4f7133 snowflake tests 2018-08-22 09:13:40 -06:00
Jacob Beck
5fe95db75e fix redshift tests 2018-08-22 09:13:39 -06:00
Jacob Beck
88f3430f1e move has_stats to a python-computed value [skip ci] 2018-08-22 09:13:39 -06:00
Jacob Beck
0822af4e68 snowflake stats 2018-08-22 09:13:39 -06:00
Jacob Beck
d4bdc50b57 bigquery fixes 2018-08-22 09:13:39 -06:00
Jacob Beck
c027a12654 add a description field for has_stats, for consistency 2018-08-22 09:13:39 -06:00
Jacob Beck
4b5417ead3 build out some bigquery stats 2018-08-22 09:13:39 -06:00
Jacob Beck
2a6277cc19 don't special-case has_stats, return a dict of dicts instead of list of dicts 2018-08-22 09:13:39 -06:00
Jacob Beck
cd0263c2e4 make postgres return something more similar to redshift 2018-08-22 09:13:39 -06:00
Jacob Beck
f7c0b6f59c make a canonical layout in integration tests 2018-08-22 09:13:39 -06:00
Jacob Beck
090172ca66 pep8 + unit tests 2018-08-22 09:13:39 -06:00
Drew Banin
66fc74ac18 detailed catalog info 2018-08-22 09:13:39 -06:00
Jacob Beck
9a395facfd Merge branch 'development' into compile-on-docs-generate 2018-08-22 09:01:24 -06:00
Jacob Beck
1a1d45c9a0 Merge pull request #944 from fishtown-analytics/snowflake-default-unquoted
Snowflake default unquoted (#824)
2018-08-22 09:00:42 -06:00
Jacob Beck
3868f70b18 more test cleanup post-merge 2018-08-22 07:21:33 -06:00
Jacob Beck
90ea0e601b make this test actually run on snowflake, add a clamp because snowflake does not care about varchar sizes below 16777216 2018-08-22 07:14:21 -06:00
Jacob Beck
7846a2ecba snowflake is now unquoted, of course 2018-08-21 19:18:44 -06:00
Jacob Beck
1edfb50000 another bigquery branch issue 2018-08-21 16:02:17 -06:00
Jacob Beck
6435c0f5f7 numerous casing-related things on snowflake 2018-08-21 15:43:32 -06:00
Jacob Beck
35333c5fe2 make create/drop schema sql methods choose default quoting from the adapter class 2018-08-21 15:43:31 -06:00
Jacob Beck
ebdc11b380 Fix all the tests I broke 2018-08-21 15:43:31 -06:00
Jacob Beck
b15c5a7278 remove warning logging since we no longer do that 2018-08-21 15:43:31 -06:00
Jacob Beck
48810996b3 invert make_match_kwargs override, flip default snowflake quoting 2018-08-21 15:43:31 -06:00
Jacob Beck
19ccbf2d47 namespacing is hard 2018-08-21 14:37:14 -06:00
Jacob Beck
02fc867ef4 Revert "Merge pull request #945 from fishtown-analytics/revert-936-compile-on-docs-generate"
This reverts commit bb5d211c94, reversing
changes made to d050b3268a.
2018-08-21 14:06:44 -06:00
Jacob Beck
bb5d211c94 Merge pull request #945 from fishtown-analytics/revert-936-compile-on-docs-generate
Revert "call compile when generating docs (#932)"
2018-08-21 13:59:53 -06:00
Jacob Beck
afba7f7294 Revert "call compile when generating docs (#932)" 2018-08-21 13:58:30 -06:00
Jacob Beck
d050b3268a Merge pull request #936 from fishtown-analytics/compile-on-docs-generate
call compile when generating docs (#932)
2018-08-21 12:32:46 -06:00
Jacob Beck
e7ef99bae9 Merge pull request #926 from fishtown-analytics/link-docs-on-parsing-failure
Link docs on parsing failure (#886)
2018-08-21 07:09:16 -06:00
Jacob Beck
f14ad85402 pr feedback 2018-08-21 06:31:56 -06:00
Drew Banin
457df4de2c handle table --> view switch on snowflake 2018-08-20 16:57:13 -04:00
Drew Banin
b33dbf0717 fix tests 2018-08-17 16:33:03 -04:00
Drew Banin
dad9970ca6 account for view --> table switch 2018-08-17 15:04:21 -04:00
Drew Banin
afe1489a73 Fix snowflake view swaps... and a lot of other things too
- obviate difference between this.name and this.table
- use `create or replace view` on snowflake
- avoid doing costly alter tables and drops for snowflake views
2018-08-17 15:04:19 -04:00
Jacob Beck
7cd544d33e Merge pull request #937 from fishtown-analytics/fix/non-destructive-and-i-mean-it
When a user sets --non-destructive, don't drop the view (#931)
2018-08-17 11:18:10 -06:00
Jacob Beck
13cb504c38 Do not drop the view if we are not going to create a new one 2018-08-17 10:00:00 -06:00
Jacob Beck
d528d25f8c add a test to ensure that --non-destructive doesn't break everything on bigquery 2018-08-17 08:32:20 -06:00
Jacob Beck
5c062d6700 On compile failure, return the compile results to generate better output 2018-08-16 07:55:34 -06:00
Jacob Beck
e2e26141d9 Merge pull request #921 from fishtown-analytics/relationships-tests-with-nulls
Relationships tests with nulls (#887)
2018-08-15 15:37:28 -06:00
Jacob Beck
0e580ca6a6 dbt docs generate now implicitly calls dbt compile, so all docs requirements get generated 2018-08-15 14:56:06 -06:00
Jacob Beck
70299041b0 Merge branch 'nicer-validation-errors' into link-docs-on-parsing-failure 2018-08-15 10:12:01 -06:00
Jacob Beck
fb25258a62 make our error/warning strings nicer for schema validation issues that users can trigger 2018-08-15 09:21:52 -06:00
Jacob Beck
0a21938fe3 remove config info from schema errors 2018-08-15 09:05:07 -06:00
Jacob Beck
768abdeea3 pr feedback 2018-08-15 08:41:33 -06:00
Jacob Beck
62228291ab PR feedback 2018-08-15 08:40:39 -06:00
Jacob Beck
f4cbf85e2e add some bigquery tests with ephemeral models 2018-08-15 08:40:39 -06:00
Jacob Beck
81e31e7be2 Convert relationships test to use left join instead of "not in"
Co-authored-by: Michael Dunn <rsmichaeldunn@users.noreply.github.com>
Co-authored-by: Martin Lue <martinlue@users.noreply.github.com>
2018-08-15 08:38:49 -06:00
Jacob Beck
7baf983574 fix tests, and make the table summary have a different name from its source so we fail next time this gets messed up 2018-08-15 08:38:49 -06:00
Jacob Beck
8122c1b692 I mixed up from and field pretty badly there 2018-08-15 08:38:49 -06:00
Jacob Beck
e752345cc5 Add tests 2018-08-15 08:38:48 -06:00
Jacob Beck
e57c497f7d Add missing where clause 2018-08-15 08:38:48 -06:00
Jacob Beck
cbc675e584 pr feedback 2018-08-15 08:10:22 -06:00
Jacob Beck
a698486c2d Merge pull request #929 from fishtown-analytics/fix-cte-ordering
Fix cte ordering (#924)
2018-08-15 08:07:27 -06:00
Jacob Beck
5a4bdd44e4 Merge pull request #925 from fishtown-analytics/quote-schema-in-temp-tables
Quote schema in temp tables (#859)
2018-08-14 13:31:23 -06:00
Jacob Beck
1d8b2370d3 Merge pull request #923 from fishtown-analytics/upgrade-google-cloud-bigquery
Upgrade google-cloud-bigquery to 1.5.0 (#806)
2018-08-14 13:25:57 -06:00
Jacob Beck
b71ff3799c fix tests to expect lists of key/value dicts instead of dicts, add test 2018-08-14 11:36:25 -06:00
Jacob Beck
dd4f4c4e8c convert ctes to be stored in lists instead of ordered dicts 2018-08-14 11:31:16 -06:00
Jacob Beck
cb285f55d1 enforce ordering on parent/child maps for test consistency 2018-08-14 11:24:19 -06:00
Jacob Beck
8428740098 I don't know why, but this seems to fix the test? 2018-08-14 11:17:05 -06:00
Jacob Beck
7b70efe4cd add tests for column names in schemas 2018-08-14 09:58:09 -06:00
Jacob Beck
af59fd8514 Pass column names through during schema parsing 2018-08-14 09:54:15 -06:00
Jacob Beck
d1afb27fe9 add a unit test 2018-08-14 08:22:39 -06:00
Jacob Beck
545cf0b0c5 Add version/type sanity checks in schema parsing 2018-08-14 07:07:18 -06:00
Jacob Beck
a57a487513 Add a new test for explicitly lowercased snowflake schemas 2018-08-14 06:16:10 -06:00
Jacob Beck
4b2332ae39 Quote schemas in snowflake__create_table_as 2018-08-14 06:16:10 -06:00
Jacob Beck
f739bd3927 Set google-cloud-bigquery dependency to >=1, <2 2018-08-13 14:40:11 -06:00
Drew Banin
825df517db Merge pull request #920 from fishtown-analytics/bump-docs-site
bump docs site code to account for manifest + catalog changes
2018-08-13 12:33:12 -04:00
Drew Banin
afb46586ab bump docs site code to account for manifest + catalog changes 2018-08-13 12:12:11 -04:00
Drew Banin
9bcd4e3061 Merge pull request #917 from fishtown-analytics/feature/catalog-as-unique-id-dict
Feature/catalog as unique id dict
2018-08-13 10:02:48 -04:00
Jacob Beck
ee1f385ed2 Merge pull request #919 from fishtown-analytics/include-project-metadata
Include project metadata in the manifest (#906)
2018-08-13 08:00:02 -06:00
Drew Banin
1f2d9ca2ea Merge pull request #851 from fishtown-analytics/tests/snowflake-integration-test-speed
cache snowflake column defs in tests
2018-08-13 09:31:56 -04:00
Drew Banin
eaea480060 handle column casing for snowflake 2018-08-12 19:21:23 -04:00
Drew Banin
9cda84f855 cache snowflake column defs in tests 2018-08-11 14:26:52 -04:00
Drew Banin
6e82e31c77 fix for bigquery test 2018-08-10 12:11:27 -04:00
Jacob Beck
f58751b356 integration tests 2018-08-10 08:26:44 -06:00
Jacob Beck
34c113ad98 update unit tests 2018-08-10 08:26:44 -06:00
Jacob Beck
679e57cfa9 Pass the full project object along to GraphLoader.load_all instead of its config dict 2018-08-10 08:26:44 -06:00
Jacob Beck
49373e54d1 Insert project metadata into the manifest 2018-08-10 08:26:44 -06:00
Drew Banin
c4262a7734 add tests for columns stored as dicts 2018-08-10 10:10:20 -04:00
Drew Banin
e07a017984 (fixes #896) use dict of unique ids for catalog nodes 2018-08-10 09:18:11 -04:00
Drew Banin
6d5802c788 (fixes #897) use dicts for columns in catalog and manifest 2018-08-10 09:17:43 -04:00
Jacob Beck
02799e5297 Merge pull request #904 from fishtown-analytics/write-run-results
Write run results to disk (closes #829)
2018-08-10 07:10:12 -06:00
Jacob Beck
b89018eb84 status can also be boolean now 2018-08-09 12:23:26 -06:00
Jacob Beck
b6f5283dd1 Merge branch 'development' into write-run-results 2018-08-09 09:53:48 -06:00
Jacob Beck
4459c0d04c Merge pull request #905 from fishtown-analytics/agate-less-aggressive-inference
Agate less aggressive inference (#861)
2018-08-09 09:35:05 -06:00
Jacob Beck
5ccaf5b7e2 make write_json just use write_file(...json.dumps()) 2018-08-09 09:32:29 -06:00
Jacob Beck
92566fdbb1 Update changelog, I forgot as usual 2018-08-09 09:32:29 -06:00
Jacob Beck
9c9baf98a3 Integration tests 2018-08-09 09:32:29 -06:00
Jacob Beck
4b43b6d2b3 Add a JSONEncoder that encodes decimals to floats 2018-08-09 09:17:52 -06:00
Jacob Beck
b801f9d762 Make RunModelResult an APIObject with a contract, add write_json 2018-08-09 09:17:52 -06:00
Connor McArthur
57eaa0cfa4 merged development 2018-08-09 09:29:25 -04:00
Connor McArthur
77eb04a5c3 updated index.html 2018-08-09 09:28:14 -04:00
Connor McArthur
c815004860 Merge pull request #883 from fishtown-analytics/incorporate-catalog-unique-ids
incorporate unique ids into catalog
2018-08-09 09:19:22 -04:00
Connor McArthur
3cb27fbd50 Merge pull request #910 from fishtown-analytics/prep-sinter-docs
Prep sinter docs
2018-08-08 20:28:06 -04:00
Lewis Hemens
8256706f40 Change SOURCE and DEST aliases in BQ merge macro to DBT_INTERNAL_DEST/SOURCE to avoid conflicts with similarly named fields in user models. 2018-08-08 15:58:52 -07:00
Connor McArthur
ceb51dfdcf fix index.html path 2018-08-08 16:45:14 -04:00
Connor McArthur
e79904fbd7 copy index file in generate task 2018-08-08 16:34:53 -04:00
Connor McArthur
ba6dca6e3b fix one test for redshift 2018-08-08 11:07:22 -04:00
Connor McArthur
ccd16a2b8a fix integration tests 2018-08-08 10:40:12 -04:00
Connor McArthur
903612dc56 merged dev/isaac-asimov 2018-08-08 09:27:30 -04:00
Jacob Beck
7c286fc8b9 add a test 2018-08-07 07:50:21 -06:00
Jacob Beck
30f03692ef Make the agate table type tester more restrictive on what counts as null/true/false 2018-08-07 07:10:36 -06:00
Jacob Beck
bd79dd1aec Merge pull request #899 from fishtown-analytics/goodbye-flat-graph
Remove most flat_graph uses, replace with manifest use
2018-08-06 14:37:35 -06:00
Drew Banin
b4480cb88f Merge pull request #900 from fishtown-analytics/0-10-2-changelog
0.10.2 changelog
2018-08-03 14:13:54 -04:00
Drew Banin
10ceaa256f 0.10.2 changelog 2018-08-03 14:11:29 -04:00
Jacob Beck
74b33f483b re-add check for extra ctes already being injected 2018-08-03 11:18:42 -06:00
Jacob Beck
5fef7529c2 This has to inherit from CompiledNode to get access to cte-related stuff 2018-08-03 11:03:35 -06:00
Jacob Beck
1209212f45 contracts.graph.any -> contracts.graph.manifest 2018-08-03 10:56:18 -06:00
Jacob Beck
d9001f8765 PR feedback 2018-08-03 10:56:17 -06:00
Jacob Beck
1df26d5ac7 This is already a string... 2018-08-03 10:07:37 -06:00
Jacob Beck
b8aeb40ca5 pep8 2018-08-03 09:51:16 -06:00
Jacob Beck
2a11069380 Make context generation accept ParsedMacros 2018-08-03 09:51:16 -06:00
Jacob Beck
cad3f9a5ac unit tests pass again 2018-08-03 09:51:16 -06:00
Jacob Beck
b5c2ce3521 Made context take a ParsedNode instead of a dict. Added intentional shallow copying 2018-08-03 09:51:16 -06:00
Jacob Beck
6d969817d0 pass the config through to context generator instead of a magic key in a dict 2018-08-03 09:51:16 -06:00
Jacob Beck
2690b50986 More flat_graph -> manifest conversion, fix issues caused by rebase 2018-08-03 09:51:16 -06:00
Jacob Beck
5957195aaf Added a new contract set, moved manifest into that so we can allow both Compiled and Parsed nodes. 2018-08-03 09:38:45 -06:00
Jacob Beck
76f526d167 Convert a lot from flat_graph -> manifest 2018-08-03 09:38:45 -06:00
Connor McArthur
4136e96ce3 Bump version: 0.10.2rc1 → 0.10.2 2018-08-03 11:38:44 -04:00
Connor McArthur
1c2f6b6284 Merge branch 'development' of github.com:fishtown-analytics/dbt 2018-08-03 11:37:59 -04:00
Connor McArthur
37cd3e10ed merged dev/isaac-asimov 2018-08-02 15:24:45 -04:00
Jacob Beck
4a9e3ee937 Merge pull request #888 from fishtown-analytics/docs-blocks
Docs blocks (#810)
2018-08-02 13:11:47 -06:00
Connor McArthur
1e98c5467d updated index.html 2018-08-02 14:43:48 -04:00
Jacob Beck
9767d11162 Make a docs-paths, if unset it defaults to source-paths 2018-08-02 08:26:15 -06:00
Connor McArthur
c60187f78a Merge pull request #882 from fishtown-analytics/docs-serve
first pass at a working "dbt docs serve" command!
2018-08-02 07:23:47 -04:00
Connor McArthur
73febed2dc Bump version: 0.10.2a4 → 0.10.2rc1 2018-08-01 17:24:40 -04:00
Drew Banin
832f4286bb Merge pull request #893 from fishtown-analytics/fix/invalid-profile-errror-message
Show more helpful error for misconfigured profile
2018-08-01 17:21:43 -04:00
Drew Banin
befe9c2e52 (fixes #890) Show more helpful error for misconfigured profile
dbt would previously fail hard with a key error and a cryptic message
2018-08-01 11:30:49 -04:00
Drew Banin
c8d329ebf4 Merge pull request #892 from fishtown-analytics/fix/relaton-type-switch-errors
handle view <--> table materialization changes
2018-08-01 10:19:14 -04:00
Jacob Beck
777510edec PR fixes, changelog update 2018-08-01 08:10:09 -06:00
Drew Banin
4a15f5e1f5 (fixes #891) handle view <--> table materialization changes 2018-07-31 20:25:04 -04:00
Jacob Beck
4e57b17c0b Integration test -> many bug fixes 2018-07-31 13:57:03 -06:00
Jacob Beck
f44a5121f4 Add a test for late-binding views that has been hanging out on my filesystem 2018-07-31 13:57:03 -06:00
Jacob Beck
56b7aacb8a Add tests, wire up more things 2018-07-31 13:57:03 -06:00
Jacob Beck
1123f7e16f attach the full docs block to ParsedDocumentation instead of the template 2018-07-31 13:55:43 -06:00
Jacob Beck
a4b6048fea More work on docs blocks, added some rudimentary unit tests around parsing 2018-07-31 13:49:49 -06:00
Jacob Beck
c67924f0e2 initial docs extension work 2018-07-31 13:49:49 -06:00
Jacob Beck
94046075c1 Merge branch 'development' into dev/isaac-asimov 2018-07-31 13:49:33 -06:00
Jacob Beck
ec83c0256f Merge pull request #881 from fishtown-analytics/issue-template
Add an issue template for dbt (#865)
2018-07-31 07:38:20 -06:00
Jacob Beck
3b3a486966 Merge pull request #880 from fishtown-analytics/new-schema-yaml-syntax
Support new schema.yml syntax (#790)
2018-07-31 07:36:52 -06:00
Jacob Beck
2e1aaac1ed PR feedback 2018-07-31 06:07:47 -06:00
Connor McArthur
89001e15b8 Bump version: 0.10.2a3 → 0.10.2a4 2018-07-30 11:27:30 -04:00
Drew Banin
5d0624becc Merge pull request #884 from fishtown-analytics/fix/bq-incremental-merge-bugs
Fix issues with for incremental models with unique keys, add hooks
2018-07-27 13:30:21 -04:00
Drew Banin
fd3f9efdd0 fix for unicode error on py27 2018-07-26 14:23:28 -04:00
Connor McArthur
44e06eecee add a contributing guide (#858) 2018-07-26 14:06:31 -04:00
Connor McArthur
12d5c58e3c test existence of unique_id 2018-07-26 12:20:37 -04:00
Jacob Beck
37c4279629 More PR feedback, added a test around disabled models and tests 2018-07-26 08:45:31 -06:00
Drew Banin
4cc8de920c Fix issues with for incremental models with unique keys, add hooks
We had tests for this case, but they didn't run because the test
case used an incorrect profile. The test has been updated accordingly.
2018-07-26 10:32:11 -04:00
Connor McArthur
9ead2663c2 finishing up 2018-07-26 10:11:00 -04:00
Connor McArthur
af42a20f4f warnings and errors 2018-07-26 09:56:33 -04:00
Jacob Beck
e6550b464d More PR feedback 2018-07-26 07:07:17 -06:00
Connor McArthur
fdbf030723 incorporate unique ids into catalog 2018-07-26 08:16:32 -04:00
Jacob Beck
42e611af67 PR feedback 2018-07-25 21:30:34 -06:00
Jacob Beck
1e513f3f47 pr feedback 2018-07-25 13:37:48 -06:00
Drew Banin
a2f8f48e48 Merge pull request #879 from fishtown-analytics/fix/readd-this-to-operation-context
put "this" var back into operation context
2018-07-25 14:38:54 -04:00
Connor McArthur
ad7800695e advise devs to not send usage stats 2018-07-25 14:01:17 -04:00
Connor McArthur
87d04cee9e Merge branch 'development' of github.com:fishtown-analytics/dbt into contributing-guide 2018-07-25 14:00:16 -04:00
Connor McArthur
6f01836f10 remove some unused imports 2018-07-25 13:52:25 -04:00
Connor McArthur
975df131da first pass at a working "dbt docs serve" command! 2018-07-25 13:47:50 -04:00
Jacob Beck
2e1d7c7668 Add an issue template for dbt 2018-07-25 11:17:51 -06:00
Jacob Beck
27d62b87d5 Make column tests optional 2018-07-25 10:16:28 -06:00
Jacob Beck
d6ee3ad160 Add changelog entries (including retrospectively) 2018-07-25 08:37:07 -06:00
Drew Banin
e6b21796c1 fix for catlalog generation 2018-07-25 10:34:42 -04:00
Jacob Beck
a7ef822636 Add schema.yml manifest changes to integration tests 2018-07-25 08:24:07 -06:00
Jacob Beck
b995cbbbee Add/fix unit tests 2018-07-25 08:23:53 -06:00
Jacob Beck
ed22de1847 Macros in the v2 schema now take column_name/model_name explicitly instead of 'field' or 'arg' depending upon the test 2018-07-25 08:20:36 -06:00
Jacob Beck
cea11a3165 implement schema v2 support 2018-07-25 08:20:36 -06:00
Drew Banin
38dc9fa23d put "this" var back into operation context 2018-07-25 10:07:52 -04:00
Jacob Beck
c7c3d09355 Merge pull request #877 from fishtown-analytics/add-generated-dates
Add generated dates (#864)
2018-07-25 08:04:53 -06:00
Jacob Beck
3cb174feb2 Handle weird windows clocks 2018-07-25 06:51:23 -06:00
Jacob Beck
5d049b0ede PR feedback 2018-07-24 14:20:37 -06:00
Connor McArthur
012a98949c Bump version: 0.10.2a2 → 0.10.2a3 2018-07-24 15:47:12 -04:00
Drew Banin
736c5aed2f Merge pull request #876 from fishtown-analytics/fix/archive-update-redshift
Fix for archival on Redshift (add tests) and maxLength for schema tests
2018-07-24 15:33:19 -04:00
Jacob Beck
adca49cc9d Make the generated_at field mandatory 2018-07-24 13:24:44 -06:00
Jacob Beck
2330e67499 Somehow I re-broke this test that I fixed a while ago 2018-07-24 13:12:57 -06:00
Jacob Beck
9ad1dd10bf No freezing time for us 2018-07-24 13:03:55 -06:00
Jacob Beck
c5c09d077f Add generated_at field to catalog and manifest (#864) 2018-07-24 13:02:44 -06:00
Jacob Beck
b28b23c7df Merge branch 'development' into dev/isaac-asimov 2018-07-24 13:00:48 -06:00
Jacob Beck
a57c0b2428 Merge pull request #866 from fishtown-analytics/redshift-get-catalog (#831) 2018-07-24 13:00:26 -06:00
Drew Banin
be8b9c0b0b Fix for archival on Redshift (add tests) and maxLength for schema tests
Fixes bugs found in the testing of 0.10.2-a2
2018-07-24 13:40:51 -04:00
Jacob Beck
9655869416 tests lost in the rebase :( 2018-07-24 10:05:54 -06:00
Jacob Beck
526a449e5d Cleanup 2018-07-24 09:33:02 -06:00
Jacob Beck
bf67477cac Fix docs generate tests 2018-07-24 08:37:55 -06:00
Jacob Beck
eed4b5c388 When generating the catalog, explicitly clear the transaction we create after completion 2018-07-24 08:21:20 -06:00
Jacob Beck
9afe77a0bb Add a partially functioning test for late binding views 2018-07-24 08:21:20 -06:00
Jacob Beck
98295558a6 view -> LATE BINDING VIEW 2018-07-24 08:21:20 -06:00
Jacob Beck
6bf9c326f9 Add some probably not-functional redshift support 2018-07-24 08:21:19 -06:00
Claire Carroll
ff64f8166a Upgrade version of Jinja 2018-07-22 16:05:33 +10:00
Connor McArthur
fa7f5070c4 Bump version: 0.10.2a1 → 0.10.2a2 2018-07-20 08:49:14 -04:00
Connor McArthur
9bcaf2b059 fix semver code for current version 2018-07-20 08:47:50 -04:00
Connor McArthur
743fef66d8 fix version identifier for semver code 2018-07-20 08:38:44 -04:00
Connor McArthur
ece36751f0 Bump version: 0.10.1 → 0.10.2a1 2018-07-20 08:19:31 -04:00
Connor McArthur
610ae5d9e4 fixed bumpversion 2018-07-20 08:18:29 -04:00
Connor McArthur
c79c41298b fix ordering 2018-07-19 15:24:37 -04:00
Connor McArthur
ce0e31c1d9 gitignore .pythonversion 2018-07-19 15:23:25 -04:00
Connor McArthur
95fa78ac80 update RELEASE doc, bumpversion cfg 2018-07-19 15:22:55 -04:00
Connor McArthur
5c56653c1f Update CHANGELOG.md 2018-07-19 14:02:38 -04:00
Connor McArthur
f3854a7164 Update changelog for Betsy Ross (#869) 2018-07-19 13:53:50 -04:00
Connor McArthur
26af941e5d Merge branch 'betsy-ross/changelog' of github.com:fishtown-analytics/dbt into betsy-ross/changelog 2018-07-19 13:52:49 -04:00
Connor McArthur
3a6f6d4fa5 add overview & contributors 2018-07-19 13:52:42 -04:00
Connor McArthur
f1824469cd Update CHANGELOG.md 2018-07-19 09:44:02 -04:00
Connor McArthur
ccef3c4697 Update CHANGELOG.md 2018-07-19 09:43:38 -04:00
Connor McArthur
05f8c28ed1 forgot an important comma 2018-07-19 09:41:20 -04:00
Connor McArthur
03d9e0b24d updated changelog 2018-07-19 09:39:56 -04:00
Connor McArthur
5e877c055b changelog 2018-07-19 09:36:08 -04:00
Connor McArthur
c3c824330a Merge pull request #848 from fishtown-analytics/minimal-snowplow-tracker
switch to minimal-snowplow-tracker
2018-07-19 09:15:59 -04:00
Connor McArthur
5579a30392 changelog 2018-07-19 09:15:44 -04:00
Connor McArthur
3794336920 pin requests <3 2018-07-19 09:11:18 -04:00
Drew Banin
574d859bed Feature/bq incremental and archive (#856)
* Implement archive and incremental models for bigquery
2018-07-18 22:26:34 -04:00
Jacob Beck
e5bc9c08bc Bigquery catalog generation (#830) (#857) 2018-07-18 14:36:08 -06:00
Connor McArthur
7e1f6eef66 Merge pull request #853 from fishtown-analytics/tests/snowplow-tracking
Tests for snowplow tracking
2018-07-18 15:13:50 -04:00
Connor McArthur
9d8275c7d6 update lots of deps 2018-07-18 15:11:51 -04:00
Connor McArthur
4b02265c1b add vars, make sure they are not tracked 2018-07-18 10:25:17 -04:00
Connor McArthur
c13f16ca6d fix some language 2018-07-17 10:54:06 -04:00
Connor McArthur
7f9be89b8d add CONTRIBUTING.md 2018-07-17 10:51:23 -04:00
Jacob Beck
568c82e25c Merge branch 'development' into dev/isaac-asimov 2018-07-17 07:41:00 -06:00
Jacob Beck
111142ba4f Ensure tests run (#842) (#854) 2018-07-17 07:40:36 -06:00
Jacob Beck
b5a5003921 Snowflake catalog/manifest support (#832) (#849) 2018-07-16 20:30:47 -06:00
Connor McArthur
577609b392 ?? 2018-07-16 15:16:59 -04:00
Connor McArthur
3c767e015e fix noaccess test 2018-07-16 11:33:15 -04:00
Connor McArthur
c68bf7a7d8 reset flags 2018-07-16 11:11:30 -04:00
Connor McArthur
1b947ec180 Merge branch 'development' of github.com:fishtown-analytics/dbt into tests/snowplow-tracking 2018-07-16 11:07:15 -04:00
Connor McArthur
0b05355522 reset flags between tests 2018-07-16 10:49:46 -04:00
Connor McArthur
3effb00c80 refactor tests. add archive and docs generate tests. 2018-07-16 09:48:28 -04:00
Jacob Beck
e86bbe0816 Add bigquery hooks (#779) (#836) 2018-07-16 07:13:15 -06:00
Jacob Beck
8548b6d340 Improve circleci flow (#850) 2018-07-13 13:04:25 -04:00
Drew Banin
587d525d98 wip 2018-07-13 11:53:40 -04:00
Jacob Beck
4d70120b1d Switch dbt to use Circle 2.0 (#788) (#843) 2018-07-13 09:15:21 -04:00
Connor McArthur
376709c944 Bigquery fix concurrent relation loads (#835) 2018-07-12 21:26:51 -04:00
Connor McArthur
6bf8028b1f switch to minimal-snowplow-tracker 2018-07-12 20:15:21 -04:00
Connor McArthur
a2ef3741ad merge again? fix changelog 2018-07-12 19:24:08 -04:00
Connor McArthur
cdfdb24f92 merge develop, use new BigQueryRelation.External 2018-07-12 19:22:59 -04:00
Connor McArthur
3ca2d5c6b8 merge develop, use new BigQueryRelation.External 2018-07-12 19:22:28 -04:00
Connor McArthur
5f6bda072a changelog 2018-07-12 19:19:53 -04:00
Jacob Beck
40afdf4a76 Handle external bigquery relations (#791) (#828) 2018-07-11 16:20:39 -04:00
Connor McArthur
fa4a2f9eeb implement get_relation that bypasses list_relations 2018-07-11 15:50:34 -04:00
Connor McArthur
c402207f74 Merge branch 'development' of github.com:fishtown-analytics/dbt into bigquery-fix-concurrent-relation-loads 2018-07-11 15:07:41 -04:00
Jacob Beck
56e30286ba add tox environments that have the user specify what tests should be run (#837) 2018-07-11 11:09:38 -04:00
Jacob Beck
19be61ac97 Set TCP keepalive on Redshift (#782) (#826) 2018-07-11 09:41:59 -04:00
Jacob Beck
c49970bcf0 Add missing "logging.handlers" import 2018-07-10 16:22:41 -04:00
Connor McArthur
1889b5b7b4 pep8 2018-07-09 18:12:54 -04:00
Connor McArthur
9d09931903 add significant explanatory comment 2018-07-09 14:39:50 -04:00
Connor McArthur
d05f9959f3 weird missing import 2018-07-09 14:16:34 -04:00
Connor McArthur
07903368d4 first attempt: do not paginate list_relations 2018-07-09 14:15:16 -04:00
Drew Banin
684bde9039 Fix for "table dropped by concurrent query" on Redshift (#825)
* (Fixes #653) Update table and view materializations to be good transaction citizens
2018-07-09 09:54:02 -04:00
Connor McArthur
3aab1a558f Merge pull request #817 from fishtown-analytics/remove-error-tracking
remove run_error, result from tracking code
2018-07-06 11:00:32 -04:00
Connor McArthur
2ff8a25192 update changelog 2018-07-06 09:50:33 -04:00
Connor McArthur
285f1da847 fix invocations by stringifying version 2018-07-06 09:25:22 -04:00
Connor McArthur
5b2ba18bfe revert to 1.0.0 schemas, null out fields instead 2018-07-05 14:06:12 -04:00
Jacob Beck
8aba382350 Fix profile yml error handling (#816) (#820) 2018-07-05 09:33:00 -06:00
Drew Banin
540631f696 Feature/alias (#800)
Implement model aliasing

Co-authored-by: Brian Abelson <abelsonlive@users.noreply.github.com>
Co-authored-by: Jonathan Kaczynski <jon-rtr@users.noreply.github.com>
2018-07-04 12:35:59 -04:00
Drew Banin
145a82b738 Merge pull request #818 from fishtown-analytics/feature/redshift-iam-auth
Feature/redshift iam auth
2018-07-03 20:50:05 -04:00
Drew Banin
5d9b8c5995 Refactor: split out parsers (#809)
Split parsers out into their own classes
2018-07-03 19:35:07 -04:00
Jacob Beck
b20fa52bcd Fix thread values to override maximum in the default adapter (#648) (#819) 2018-07-03 14:12:33 -06:00
Drew Banin
89e45fb738 fix catalog generation (#808)
Improve catalog generation code
2018-07-03 16:07:09 -04:00
Connor McArthur
0f37c9811e remove result, as well 2018-07-03 15:54:03 -04:00
Drew Banin
e7abe27bfa pep8 2018-07-03 14:02:26 -04:00
Drew Banin
bf7608550d add unit tests, refactor 2018-07-02 22:23:12 -04:00
Drew Banin
a894ca9e65 add boto3 dep to setup.py 2018-07-02 17:03:47 -04:00
Drew Banin
58c184a1f4 cleanup; update json schemas 2018-07-02 16:58:40 -04:00
Connor McArthur
1c4ead3572 remove run_error from tracking code 2018-07-02 15:53:15 -04:00
Drew Banin
e20ac4193b Merge branch 'development' into feature/redshift-iam-auth 2018-07-02 13:28:52 -04:00
Conrad
d13d85681f fix __all__ setting for submodules (#777) (#780) 2018-06-29 14:00:06 -04:00
Drew Banin
a382def2d0 Bump/psycopg2 2.7.5 (#807)
* use bin psycopg2 dep, bump to 2.7.5

* use psycopg-2 source package, bump to 275
2018-06-28 13:58:35 -04:00
Drew Banin
d28407d735 Allow for more complex version comparison (redux) (#797)
* Allow for more complex version comparison

Allow for letters in version names
Add tests for version info

* Fix integration tests

* Combine code from @mturzanska's PR with development

Based on: https://github.com/fishtown-analytics/dbt/pull/577
Fixes: https://github.com/fishtown-analytics/dbt/issues/557
2018-06-27 12:24:41 -04:00
Drew Banin
457db9d09e Merge betsy-ross release code into development (#798)
Merge dev/betsy-ross branch into development
2018-06-18 14:43:52 -04:00
Mjumbe Poe
13691adc63 Ensure that numeric precision is included only if not None (#796)
* Ensure that numeric precision is included only if not None

* Add unit test for data_type of field with empty numeric precision

* Add numeric fields to the schema_tests integration tests
2018-06-16 18:42:39 -04:00
Drew Banin
fa43d9d117 use a subselect instead of a CTE when building incremental models (#787)
(must faster on postgresql due to the CTE optimization fence)
2018-06-08 18:32:45 -04:00
Drew Banin
5f0c645d5a Revert "use a subselect instead of a CTE when building incremental models (#785)" (#786)
This reverts commit a52b30aa2a.
2018-06-08 18:30:02 -04:00
Joe Van Dyk
a52b30aa2a use a subselect instead of a CTE when building incremental models (#785)
(must faster on postgresql due to the CTE optimization fence)
2018-06-08 18:29:12 -04:00
Drew Banin
c19a42625a Bump version: 0.10.1rc2 → 0.10.1 (#776) 2018-05-18 12:45:24 -04:00
Drew Banin
f6fcbaffad Update CHANGELOG.md 2018-05-18 12:41:04 -04:00
Drew Banin
d1d7dcb9cc fix for bug which continued to use master conn for on-run-end hooks (#764) 2018-05-16 13:36:54 -04:00
Drew Banin
5b76eb1161 fix for bad column order in statement results (#770) 2018-05-16 13:33:51 -04:00
Daniel Chalef
f65b3d677a move get_cluster_credentials into separate fn. Make method optional 2018-05-10 15:32:44 -07:00
Daniel Chalef
688fa467b2 back out SQL uppercasing 2018-05-10 14:24:02 -07:00
Daniel Chalef
bc432f9584 added iam authentication method for redshift adapter 2018-05-10 13:00:52 -07:00
Drew Banin
4133656bea rm long description, doesnt work 2018-05-01 12:43:28 -04:00
Drew Banin
0413b6c841 update setup.py file (#754)
* update setup.py file

* missing import

* bump to rc2
2018-05-01 12:30:36 -04:00
Drew Banin
8c874176f4 release candidate (#753) 2018-04-30 16:26:18 -04:00
Drew Banin
3d0c026835 Update CHANGELOG.md (#752)
* Update CHANGELOG.md

* Update CHANGELOG.md

* Update CHANGELOG.md
2018-04-30 16:19:32 -04:00
Drew Banin
d4966b6bca fix for hooks on seed that use this var (#750) 2018-04-30 14:29:34 -04:00
Drew Banin
7be0fb0d56 Fix/seed bq (#751)
* fix for seed on bq

* add test to run seed on bq twice
2018-04-30 14:27:56 -04:00
Drew Banin
f5f7bea3db add test for hooks on seeds 2018-04-29 16:07:29 -04:00
Connor McArthur
5344f54c3c Implement relations api (#727) 2018-04-26 17:38:44 -04:00
Connor McArthur
7d7b557142 add .sublime-* files to .gitignore (#747) 2018-04-26 16:31:27 -04:00
Drew Banin
0a797af081 fix var precedence from cli, add tests (#739)
* fix var precedence from cli, add tests

* updated from PR
2018-04-26 12:02:49 -04:00
Drew Banin
70464529a6 Handle circular refs (#628)
* Fixes #627 but maybe breaks other stuff too

* cleanup

* pep8, fix tests

* cleanup; fix cycle test

* add comment for cycle detection
2018-04-26 12:01:56 -04:00
Drew Banin
5fefbbd214 use a materialization to load csv files (#741) 2018-04-23 21:07:25 -04:00
Drew Banin
3567e205a9 override seed types (#708)
* override seed types

* s/_columns/column_types/g

* pep8

* fix unit tests, add integration test

* add bq, snowflake tests for seed type overrides
2018-04-22 20:03:17 -04:00
Drew Banin
e20796e828 check for node name uniqueness across refable resource types (#737)
* check for node name uniqueness across refable resource types

* change test for new error msg

* consistent error message for dupes

* small refactor, improve error msg

* fix tests
2018-04-20 13:23:12 -07:00
Drew Banin
c81417dc1a fix bq seed (#735)
* fix bq seed

* revert makefile
2018-04-20 10:30:51 -07:00
Drew Banin
d966ec28aa add requests dep to fix broken install (#732) 2018-04-10 14:59:16 -04:00
Drew Banin
64a6ec552c Fix/close all connections (#722) 2018-04-10 14:58:31 -04:00
Connor McArthur
e680e46671 if the connection is closed, do not create an unhandled exception. just exit. (#705) 2018-04-10 13:22:31 -04:00
Drew Banin
edbc7ca885 Fix/snowflake unbound local var (#721)
* Use snowflake's query parser to split statements into queries

fixes: https://github.com/fishtown-analytics/dbt/issues/719
fixes: https://github.com/fishtown-analytics/dbt/issues/711

* nicer error if no query is provided

* make stringio buf work on py3
2018-04-09 15:41:24 -04:00
Connor McArthur
4131c06e12 fix statements in on run start hooks (#693)
* fix statements in on run start hooks

* do nothing on empty hook

* pr feedback
2018-04-09 15:33:35 -04:00
Connor McArthur
848ff6a3f9 fix snowflake seed types, add chunking for seed files with more than 16k rows (#694) 2018-04-09 15:33:19 -04:00
Drew Banin
68634a2e87 use create table as syntax on BigQuery (#717)
(wip) Fixes: https://github.com/fishtown-analytics/dbt/issues/712
Fixes: https://github.com/fishtown-analytics/dbt/issues/716
2018-04-06 18:33:37 -04:00
Drew Banin
2d441f8ebd get_columns_in_table parity (#709)
* support numeric types, slight refactor (fixes: https://github.com/fishtown-analytics/dbt/issues/701)

* working on bigquery

* Support cross-database column lookups

Fixes: https://github.com/fishtown-analytics/dbt/issues/679

* pep8 fixes, plus fix syntax error

Previous commits in this PR will fix https://github.com/fishtown-analytics/dbt/issues/579

* test for bq get_columns_in_table

* pep8

* fix test, catch 404 for get_columns_in_table

* fix bq tests

* add tests, unnest columns with macro
2018-04-06 18:31:36 -04:00
Drew Banin
79c60f68c5 rm freezegun deps - this is only needed in the dev reqs (#695) 2018-04-06 13:16:18 -04:00
Drew Banin
5664fe4a2e Fixes https://github.com/fishtown-analytics/dbt/issues/718 (#720) 2018-04-06 11:21:00 -04:00
Drew Banin
69616bb3c9 Integration test project updates (#698)
* update for new integration project model names

* more model name updates
2018-03-28 20:01:04 -04:00
732 changed files with 47868 additions and 12619 deletions

View File

@@ -1,9 +1,36 @@
[bumpversion]
current_version = 0.10.0
commit = True
tag = True
current_version = 0.14.2
parse = (?P<major>\d+)
\.(?P<minor>\d+)
\.(?P<patch>\d+)
((?P<prerelease>[a-z]+)(?P<num>\d+))?
serialize =
{major}.{minor}.{patch}{prerelease}{num}
{major}.{minor}.{patch}
commit = False
tag = False
[bumpversion:part:prerelease]
first_value = a
values =
a
b
rc
[bumpversion:part:num]
first_value = 1
[bumpversion:file:setup.py]
[bumpversion:file:dbt/version.py]
[bumpversion:file:core/setup.py]
[bumpversion:file:core/dbt/version.py]
[bumpversion:file:plugins/postgres/setup.py]
[bumpversion:file:plugins/redshift/setup.py]
[bumpversion:file:plugins/snowflake/setup.py]
[bumpversion:file:plugins/bigquery/setup.py]

129
.circleci/config.yml Normal file
View File

@@ -0,0 +1,129 @@
version: 2
jobs:
unit:
docker: &test_and_postgres
- image: fishtownjacob/test-container
- image: postgres
name: database
environment: &pgenv
POSTGRES_USER: "root"
POSTGRES_PASSWORD: "password"
POSTGRES_DB: "dbt"
steps:
- checkout
- run: &setupdb
name: Setup postgres
command: bash test/setup_db.sh
environment:
PGHOST: database
PGUSER: root
PGPASSWORD: password
PGDATABASE: postgres
- run: tox -e flake8,unit-py27,unit-py36
integration-postgres-py36:
docker: *test_and_postgres
steps:
- checkout
- run: *setupdb
- run:
name: Run tests
command: tox -e integration-postgres-py36
- store_artifacts:
path: ./logs
integration-snowflake-py36:
docker: &test_only
- image: fishtownjacob/test-container
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-snowflake-py36
no_output_timeout: 1h
- store_artifacts:
path: ./logs
integration-redshift-py36:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-redshift-py36
- store_artifacts:
path: ./logs
integration-bigquery-py36:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-bigquery-py36
- store_artifacts:
path: ./logs
integration-postgres-py27:
docker: *test_and_postgres
steps:
- checkout
- run: *setupdb
- run:
name: Run tests
command: tox -e integration-postgres-py27
- store_artifacts:
path: ./logs
integration-snowflake-py27:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-snowflake-py27
no_output_timeout: 1h
- store_artifacts:
path: ./logs
integration-redshift-py27:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-redshift-py27
- store_artifacts:
path: ./logs
integration-bigquery-py27:
docker: *test_only
steps:
- checkout
- run:
name: Run tests
command: tox -e integration-bigquery-py27
- store_artifacts:
path: ./logs
workflows:
version: 2
test-everything:
jobs:
- unit
- integration-postgres-py36:
requires:
- unit
- integration-postgres-py27:
requires:
- unit
- integration-redshift-py27:
requires:
- integration-postgres-py27
- integration-bigquery-py27:
requires:
- integration-postgres-py27
- integration-snowflake-py27:
requires:
- integration-postgres-py27
- integration-redshift-py36:
requires:
- integration-postgres-py36
- integration-bigquery-py36:
requires:
- integration-postgres-py36
- integration-snowflake-py36:
requires:
- integration-postgres-py36

View File

@@ -1,3 +0,0 @@
[report]
include =
dbt/*

1
.dockerignore Normal file
View File

@@ -0,0 +1 @@
*

41
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@@ -0,0 +1,41 @@
---
name: Bug report
about: Report a bug or an issue you've found with dbt
title: ''
labels: bug, triage
assignees: ''
---
### Describe the bug
A clear and concise description of what the bug is. What command did you run? What happened?
### Steps To Reproduce
In as much detail as possible, please provide steps to reproduce the issue. Sample data that triggers the issue, example model code, etc is all very helpful here.
### Expected behavior
A clear and concise description of what you expected to happen.
### Screenshots and log output
If applicable, add screenshots or log output to help explain your problem.
### System information
**Which database are you using dbt with?**
- [ ] postgres
- [ ] redshift
- [ ] bigquery
- [ ] snowflake
- [ ] other (specify: ____________)
**The output of `dbt --version`:**
```
<output goes here>
```
**The operating system you're using:**
**The output of `python --version`:**
### Additional context
Add any other context about the problem here.

View File

@@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for dbt
title: ''
labels: enhancement, triage
assignees: ''
---
### Describe the feature
A clear and concise description of what you want to happen.
### Describe alternatives you've considered
A clear and concise description of any alternative solutions or features you've considered.
### Additional context
Is this feature database-specific? Which database(s) is/are relevant? Please include any other relevant context here.
### Who will this benefit?
What kind of use case will this feature be useful for? Please be specific and provide examples, this will help us prioritize properly.

11
.gitignore vendored
View File

@@ -65,3 +65,14 @@ target/
#Emacs
*~
# Sublime Text
*.sublime-*
# Vim
*.sw*
.python-version
# Vim
*.sw*

View File

@@ -1,3 +1,519 @@
## dbt 0.14.2 (September 13, 2019)
### Overview
This is a bugfix release.
### Fixes:
- Fix for dbt hanging at the end of execution in `dbt source snapshot-freshness` tasks ([#1728](https://github.com/fishtown-analytics/dbt/issues/1728), [#1729](https://github.com/fishtown-analytics/dbt/pull/1729))
- Fix for broken "packages" and "tags" selector dropdowns in the dbt Documentation website ([docs#47](https://github.com/fishtown-analytics/dbt-docs/issues/47), [#1726](https://github.com/fishtown-analytics/dbt/pull/1726))
## dbt 0.14.1 (September 3, 2019)
### Overview
This is primarily a bugfix release which contains a few minor improvements too. Note: this release includes an important change in how the `check` snapshot strategy works. See [#1614](https://github.com/fishtown-analytics/dbt/pull/1614) for more information. If you are using snapshots with the `check` strategy on dbt v0.14.0, it is strongly recommended that you upgrade to 0.14.1 at your soonest convenience.
### Breaking changes
- The undocumented `macros` attribute was removed from the `graph` context variable ([#1615](https://github.com/fishtown-analytics/dbt/pull/1615))
### Features:
- Summarize warnings at the end of dbt runs ([#1597](https://github.com/fishtown-analytics/dbt/issues/1597), [#1654](https://github.com/fishtown-analytics/dbt/pull/1654))
- Speed up catalog generation on postgres by using avoiding use of the `information_schema` ([#1540](https://github.com/fishtown-analytics/dbt/pull/1540))
- Docs site updates ([#1621](https://github.com/fishtown-analytics/dbt/issues/1621))
- Fix for incorrect node selection logic in DAG view ([docs#38](https://github.com/fishtown-analytics/dbt-docs/pull/38))
- Update page title, meta tags, and favicon ([docs#39](https://github.com/fishtown-analytics/dbt-docs/pull/39))
- Bump the version of `dbt-styleguide`, changing file tree colors from orange to black :)
- Add environment variables for macro debugging flags ([#1628](https://github.com/fishtown-analytics/dbt/issues/1628), [#1629](https://github.com/fishtown-analytics/dbt/pull/1629))
- Speed up node selection by making it linear, rather than quadratic, in complexity ([#1611](https://github.com/fishtown-analytics/dbt/issues/1611), [#1615](https://github.com/fishtown-analytics/dbt/pull/1615))
- Specify the `application` field in Snowflake connections ([#1622](https://github.com/fishtown-analytics/dbt/issues/1622), [#1623](https://github.com/fishtown-analytics/dbt/pull/1623))
- Add support for clustering on Snowflake ([#634](https://github.com/fishtown-analytics/dbt/issues/634), [#1591](https://github.com/fishtown-analytics/dbt/pull/1591), [#1689](https://github.com/fishtown-analytics/dbt/pull/1689)) ([docs](https://docs.getdbt.com/docs/snowflake-configs#section-configuring-table-clustering))
- Add support for job priority on BigQuery ([#1456](https://github.com/fishtown-analytics/dbt/issues/1456), [#1673](https://github.com/fishtown-analytics/dbt/pull/1673)) ([docs](https://docs.getdbt.com/docs/profile-bigquery#section-priority))
- Add `node.config` and `node.tags` to the `generate_schema_name` and `generate_alias_name` macro context ([#1700](https://github.com/fishtown-analytics/dbt/issues/1700), [#1701](https://github.com/fishtown-analytics/dbt/pull/1701))
### Fixes:
- Fix for reused `check_cols` values in snapshots ([#1614](https://github.com/fishtown-analytics/dbt/pull/1614), [#1709](https://github.com/fishtown-analytics/dbt/pull/1709))
- Fix for rendering column descriptions in sources ([#1619](https://github.com/fishtown-analytics/dbt/issues/1619), [#1633](https://github.com/fishtown-analytics/dbt/pull/1633))
- Fix for `is_incremental()` returning True for models that are not materialized as incremental models ([#1249](https://github.com/fishtown-analytics/dbt/issues/1249), [#1608](https://github.com/fishtown-analytics/dbt/pull/1608))
- Fix for serialization of BigQuery results which contain nested or repeated records ([#1626](https://github.com/fishtown-analytics/dbt/issues/1626), [#1638](https://github.com/fishtown-analytics/dbt/pull/1638))
- Fix for loading seed files which contain non-ascii characters ([#1632](https://github.com/fishtown-analytics/dbt/issues/1632), [#1644](https://github.com/fishtown-analytics/dbt/pull/1644))
- Fix for creation of user cookies in incorrect directories when `--profile-dir` or `$DBT_PROFILES_DIR` is provided ([#1645](https://github.com/fishtown-analytics/dbt/issues/1645), [#1656](https://github.com/fishtown-analytics/dbt/pull/1656))
- Fix for error handling when transactions are being rolled back ([#1647](https://github.com/fishtown-analytics/dbt/pull/1647))
- Fix for incorrect references to `dbt.exceptions` in jinja code ([#1569](https://github.com/fishtown-analytics/dbt/issues/1569), [#1609](https://github.com/fishtown-analytics/dbt/pull/1609))
- Fix for duplicated schema creation due to case-sensitive comparison ([#1651](https://github.com/fishtown-analytics/dbt/issues/1651), [#1663](https://github.com/fishtown-analytics/dbt/pull/1663))
- Fix for "schema stub" created automatically by dbt ([#913](https://github.com/fishtown-analytics/dbt/issues/913), [#1663](https://github.com/fishtown-analytics/dbt/pull/1663))
- Fix for incremental merge query on old versions of postgres (<=9.6) ([#1665](https://github.com/fishtown-analytics/dbt/issues/1665), [#1666](https://github.com/fishtown-analytics/dbt/pull/1666))
- Fix for serializing results of queries which return `TIMESTAMP_TZ` columns on Snowflake in the RPC server ([#1670](https://github.com/fishtown-analytics/dbt/pull/1670))
- Fix typo in InternalException ([#1640](https://github.com/fishtown-analytics/dbt/issues/1640), [#1672](https://github.com/fishtown-analytics/dbt/pull/1672))
- Fix typo in CLI help for snapshot migration subcommand ([#1664](https://github.com/fishtown-analytics/dbt/pull/1664))
- Fix for error handling logic when empty queries are submitted on Snowflake ([#1693](https://github.com/fishtown-analytics/dbt/issues/1693), [#1694](https://github.com/fishtown-analytics/dbt/pull/1694))
- Fix for non-atomic column expansion logic in Snowflake incremental models and snapshots ([#1687](https://github.com/fishtown-analytics/dbt/issues/1687), [#1690](https://github.com/fishtown-analytics/dbt/pull/1690))
- Fix for unprojected `count(*)` expression injected by custom data tests ([#1688](https://github.com/fishtown-analytics/dbt/pull/1688))
- Fix for `dbt run` and `dbt docs generate` commands when running against Panoply Redshift ([#1479](https://github.com/fishtown-analytics/dbt/issues/1479), [#1686](https://github.com/fishtown-analytics/dbt/pull/1686))
### Contributors:
Thanks for your contributions to dbt!
- [@levimalott](https://github.com/levimalott) ([#1647](https://github.com/fishtown-analytics/dbt/pull/1647))
- [@aminamos](https://github.com/aminamos) ([#1609](https://github.com/fishtown-analytics/dbt/pull/1609))
- [@elexisvenator](https://github.com/elexisvenator) ([#1540](https://github.com/fishtown-analytics/dbt/pull/1540))
- [@edmundyan](https://github.com/edmundyan) ([#1663](https://github.com/fishtown-analytics/dbt/pull/1663))
- [@vitorbaptista](https://github.com/vitorbaptista) ([#1664](https://github.com/fishtown-analytics/dbt/pull/1664))
- [@sjwhitworth](https://github.com/sjwhitworth) ([#1672](https://github.com/fishtown-analytics/dbt/pull/1672), [#1673](https://github.com/fishtown-analytics/dbt/pull/1673))
- [@mikaelene](https://github.com/mikaelene) ([#1688](https://github.com/fishtown-analytics/dbt/pull/1688), [#1709](https://github.com/fishtown-analytics/dbt/pull/1709))
- [@bastienboutonnet](https://github.com/bastienboutonnet) ([#1591](https://github.com/fishtown-analytics/dbt/pull/1591), [#1689](https://github.com/fishtown-analytics/dbt/pull/1689))
## dbt 0.14.0 - Wilt Chamberlain (July 10, 2019)
### Overview
- Replace Archives with Snapshots ([docs](https://docs.getdbt.com/v0.14/docs/snapshots), [migration guide](https://docs.getdbt.com/v0.14/docs/upgrading-to-014))
- Add three new top-level commands:
- `dbt ls` ([docs](https://docs.getdbt.com/v0.14/docs/list))
- `dbt run-operation` ([docs](https://docs.getdbt.com/v0.14/docs/run-operation))
- `dbt rpc` ([docs](https://docs.getdbt.com/v0.14/docs/rpc))
- Support the specification of severity levels for schema and data tests ([docs](https://docs.getdbt.com/v0.14/docs/testing#section-test-severity))
- Many new quality of life improvements and bugfixes
### Breaking changes
- Stub out adapter methods at parse-time to speed up parsing ([#1413](https://github.com/fishtown-analytics/dbt/pull/1413))
- Removed support for the `--non-destructive` flag ([#1419](https://github.com/fishtown-analytics/dbt/pull/1419), [#1415](https://github.com/fishtown-analytics/dbt/issues/1415))
- Removed support for the `sql_where` config to incremental models ([#1408](https://github.com/fishtown-analytics/dbt/pull/1408), [#1351](https://github.com/fishtown-analytics/dbt/issues/1351))
- Changed `expand_target_column_types` to take a Relation instead of a string ([#1478](https://github.com/fishtown-analytics/dbt/pull/1478))
- Replaced Archives with Snapshots
- Normalized meta-column names in Snapshot tables ([#1361](https://github.com/fishtown-analytics/dbt/pull/1361), [#251](https://github.com/fishtown-analytics/dbt/issues/251))
### Features
- Add `run-operation` command which invokes macros directly from the CLI ([#1328](https://github.com/fishtown-analytics/dbt/pull/1328)) ([docs](https://docs.getdbt.com/v0.14/docs/run-operation))
- Add a `dbt ls` command which lists resources in your project ([#1436](https://github.com/fishtown-analytics/dbt/pull/1436), [#467](https://github.com/fishtown-analytics/dbt/issues/467)) ([docs](https://docs.getdbt.com/v0.14/docs/list))
- Add Snapshots, an improvement over Archives ([#1361](https://github.com/fishtown-analytics/dbt/pull/1361), [#1175](https://github.com/fishtown-analytics/dbt/issues/1175)) ([docs](https://docs.getdbt.com/v0.14/docs/snapshots))
- Add the 'check' snapshot strategy ([#1361](https://github.com/fishtown-analytics/dbt/pull/1361), [#706](https://github.com/fishtown-analytics/dbt/issues/706))
- Support Snapshots across logical databases ([#1455](https://github.com/fishtown-analytics/dbt/issues/1455))
- Implement Snapshots using a merge statement where supported ([#1478](https://github.com/fishtown-analytics/dbt/pull/1478))
- Support Snapshot selection using `--select` ([#1520](https://github.com/fishtown-analytics/dbt/pull/1520), [#1512](https://github.com/fishtown-analytics/dbt/issues/1512))
- Add an RPC server via `dbt rpc` ([#1301](https://github.com/fishtown-analytics/dbt/pull/1301), [#1274](https://github.com/fishtown-analytics/dbt/issues/1274)) ([docs](https://docs.getdbt.com/v0.14/docs/rpc))
- Add `ps` and `kill` commands to the rpc server ([#1380](https://github.com/fishtown-analytics/dbt/pull/1380/), [#1369](https://github.com/fishtown-analytics/dbt/issues/1369), [#1370](https://github.com/fishtown-analytics/dbt/issues/1370))
- Add support for ephemeral nodes to the rpc server ([#1373](https://github.com/fishtown-analytics/dbt/pull/1373), [#1368](https://github.com/fishtown-analytics/dbt/issues/1368))
- Add support for inline macros to the rpc server ([#1375](https://github.com/fishtown-analytics/dbt/pull/1375), [#1372](https://github.com/fishtown-analytics/dbt/issues/1372), [#1348](https://github.com/fishtown-analytics/dbt/pull/1348))
- Improve error handling in the rpc server ([#1341](https://github.com/fishtown-analytics/dbt/pull/1341), [#1309](https://github.com/fishtown-analytics/dbt/issues/1309), [#1310](https://github.com/fishtown-analytics/dbt/issues/1310))
- Made printer width configurable ([#1026](https://github.com/fishtown-analytics/dbt/issues/1026), [#1247](https://github.com/fishtown-analytics/dbt/pull/1247)) ([docs](https://docs.getdbt.com/v0.14/docs/configure-your-profile#section-additional-profile-configurations))
- Retry package downloads from the hub.getdbt.com ([#1451](https://github.com/fishtown-analytics/dbt/issues/1451), [#1491](https://github.com/fishtown-analytics/dbt/pull/1491))
- Add a test "severity" level, presented as a keyword argument to schema tests ([#1410](https://github.com/fishtown-analytics/dbt/pull/1410), [#1005](https://github.com/fishtown-analytics/dbt/issues/1005)) ([docs](https://docs.getdbt.com/v0.14/docs/testing#section-test-severity))
- Add a `generate_alias_name` macro to configure alias names dynamically ([#1363](https://github.com/fishtown-analytics/dbt/pull/1363)) ([docs](https://docs.getdbt.com/v0.14/docs/using-custom-aliases#section-generate_alias_name))
- Add a `node` argument to `generate_schema_name` to configure schema names dynamically ([#1483](https://github.com/fishtown-analytics/dbt/pull/1483), [#1463](https://github.com/fishtown-analytics/dbt/issues/1463)) ([docs](https://docs.getdbt.com/v0.14/docs/using-custom-schemas#section-generate_schema_name-arguments))
- Use `create or replace` on Snowflake to rebuild tables and views atomically ([#1101](https://github.com/fishtown-analytics/dbt/issues/1101), [#1409](https://github.com/fishtown-analytics/dbt/pull/1409))
- Use `merge` statement for incremental models on Snowflake ([#1414](https://github.com/fishtown-analytics/dbt/issues/1414), [#1307](https://github.com/fishtown-analytics/dbt/pull/1307), [#1409](https://github.com/fishtown-analytics/dbt/pull/1409)) ([docs](https://docs.getdbt.com/v0.14/docs/snowflake-configs#section-merge-behavior-incremental-models-))
- Add support seed CSV files that start with a UTF-8 Byte Order Mark (BOM) ([#1452](https://github.com/fishtown-analytics/dbt/pull/1452), [#1177](https://github.com/fishtown-analytics/dbt/issues/1177))
- Add a warning when git packages are not pinned to a version ([#1453](https://github.com/fishtown-analytics/dbt/pull/1453), [#1446](https://github.com/fishtown-analytics/dbt/issues/1446))
- Add logging for `on-run-start` and `on-run-end hooks` to console output ([#1440](https://github.com/fishtown-analytics/dbt/pull/1440), [#696](https://github.com/fishtown-analytics/dbt/issues/696))
- Add modules and tracking information to the rendering context for configuration files ([#1441](https://github.com/fishtown-analytics/dbt/pull/1441), [#1320](https://github.com/fishtown-analytics/dbt/issues/1320))
- Add support for `null` vars, and distinguish `null` vars from unset vars ([#1426](https://github.com/fishtown-analytics/dbt/pull/1426), [#608](https://github.com/fishtown-analytics/dbt/issues/608))
- Add support for the `search_path` configuration in Postgres/Redshift profiles ([#1477](https://github.com/fishtown-analytics/dbt/issues/1477), [#1476](https://github.com/fishtown-analytics/dbt/pull/1476)) ([docs (postgres)](https://docs.getdbt.com/v0.14/docs/profile-postgres), [docs (redshift)](https://docs.getdbt.com/v0.14/docs/profile-redshift))
- Add support for persisting documentation as `descriptions` for tables and views on BigQuery ([#1031](https://github.com/fishtown-analytics/dbt/issues/1031), [#1285](https://github.com/fishtown-analytics/dbt/pull/1285)) ([docs](https://docs.getdbt.com/v0.14/docs/bigquery-configs#section-persisting-model-descriptions))
- Add a `--project-dir` path which will invoke dbt in the specified directory ([#1549](https://github.com/fishtown-analytics/dbt/pull/1549), [#1544](https://github.com/fishtown-analytics/dbt/issues/1544))
### dbt docs Changes
- Add searching by tag name ([#32](https://github.com/fishtown-analytics/dbt-docs/pull/32))
- Add context menu link to export graph viz as a PNG ([#34](https://github.com/fishtown-analytics/dbt-docs/pull/34))
- Fix for clicking models in left-nav while search results are open ([#31](https://github.com/fishtown-analytics/dbt-docs/pull/31))
### Fixes
- Fix for unduly long timeouts when anonymous event tracking is blocked ([#1445](https://github.com/fishtown-analytics/dbt/pull/1445), [#1063](https://github.com/fishtown-analytics/dbt/issues/1063))
- Fix for error with mostly-duplicate git urls in packages, picking the one that came first. ([#1428](https://github.com/fishtown-analytics/dbt/pull/1428), [#1084](https://github.com/fishtown-analytics/dbt/issues/1084))
- Fix for unrendered `description` field as jinja in top-level Source specification ([#1484](https://github.com/fishtown-analytics/dbt/issues/1484), [#1494](https://github.com/fishtown-analytics/dbt/issues/1494))
- Fix for API error when very large temp tables are created in BigQuery ([#1423](https://github.com/fishtown-analytics/dbt/issues/1423), [#1478](https://github.com/fishtown-analytics/dbt/pull/1478))
- Fix for compiler errors that occurred if jinja code was present outside of a docs blocks in .md files ([#1513](https://github.com/fishtown-analytics/dbt/pull/1513), [#988](https://github.com/fishtown-analytics/dbt/issues/988))
- Fix `TEXT` handling on postgres and redshift ([#1420](https://github.com/fishtown-analytics/dbt/pull/1420), [#781](https://github.com/fishtown-analytics/dbt/issues/781))
- Fix for compiler error when vars are undefined but only used in disabled models ([#1429](https://github.com/fishtown-analytics/dbt/pull/1429), [#434](https://github.com/fishtown-analytics/dbt/issues/434))
- Improved the error message when iterating over the results of a macro that doesn't exist ([#1425](https://github.com/fishtown-analytics/dbt/pull/1425), [#1424](https://github.com/fishtown-analytics/dbt/issues/1424))
- Improved the error message when tests have invalid parameter definitions ([#1427](https://github.com/fishtown-analytics/dbt/pull/1427), [#1325](https://github.com/fishtown-analytics/dbt/issues/1325))
- Improved the error message when a user tries to archive a non-existent table ([#1361](https://github.com/fishtown-analytics/dbt/pull/1361), [#1066](https://github.com/fishtown-analytics/dbt/issues/1066))
- Fix for archive logic which tried to create already-existing destination schemas ([#1398](https://github.com/fishtown-analytics/dbt/pull/1398), [#758](https://github.com/fishtown-analytics/dbt/issues/758))
- Fix for incorrect error codes when Operations exit with an error ([#1406](https://github.com/fishtown-analytics/dbt/pull/1406), [#1377](https://github.com/fishtown-analytics/dbt/issues/1377))
- Fix for missing compiled SQL when the rpc server encounters a database error ([#1381](https://github.com/fishtown-analytics/dbt/pull/1381), [#1371](https://github.com/fishtown-analytics/dbt/issues/1371))
- Fix for broken link in the profile.yml generated by `dbt init` ([#1366](https://github.com/fishtown-analytics/dbt/pull/1366), [#1344](https://github.com/fishtown-analytics/dbt/issues/1344))
- Fix the sample test.env file's redshift password field ([#1364](https://github.com/fishtown-analytics/dbt/pull/1364))
- Fix collisions on models running concurrently that have duplicate names but have distinguishing aliases ([#1342](https://github.com/fishtown-analytics/dbt/pull/1342), [#1321](https://github.com/fishtown-analytics/dbt/issues/1321))
- Fix for a bad error message when a `version` is missing from a package spec in `packages.yml` ([#1551](https://github.com/fishtown-analytics/dbt/pull/1551), [#1546](https://github.com/fishtown-analytics/dbt/issues/1546))
- Fix for wrong package scope when the two-arg method of `ref` is used ([#1515](https://github.com/fishtown-analytics/dbt/pull/1515), [#1504](https://github.com/fishtown-analytics/dbt/issues/1504))
- Fix missing import in test suite ([#1572](https://github.com/fishtown-analytics/dbt/pull/1572))
- Fix for a Snowflake error when an external table exists in a schema that dbt operates on ([#1571](https://github.com/fishtown-analytics/dbt/pull/1571), [#1505](https://github.com/fishtown-analytics/dbt/issues/1505))
### Under the hood
- Use pytest for tests ([#1417](https://github.com/fishtown-analytics/dbt/pull/1417))
- Use flake8 for linting ([#1361](https://github.com/fishtown-analytics/dbt/pull/1361), [#1333](https://github.com/fishtown-analytics/dbt/issues/1333))
- Added a flag for wrapping models and tests in jinja blocks ([#1407](https://github.com/fishtown-analytics/dbt/pull/1407), [#1400](https://github.com/fishtown-analytics/dbt/issues/1400))
- Connection management: Bind connections threads rather than to names ([#1336](https://github.com/fishtown-analytics/dbt/pull/1336), [#1312](https://github.com/fishtown-analytics/dbt/issues/1312))
- Add deprecation warning for dbt users on Python2 ([#1534](https://github.com/fishtown-analytics/dbt/pull/1534), [#1531](https://github.com/fishtown-analytics/dbt/issues/1531))
- Upgrade networkx to v2.x ([#1509](https://github.com/fishtown-analytics/dbt/pull/1509), [#1496](https://github.com/fishtown-analytics/dbt/issues/1496))
- Anonymously track adapter type and rpc requests when tracking is enabled ([#1575](https://github.com/fishtown-analytics/dbt/pull/1575), [#1574](https://github.com/fishtown-analytics/dbt/issues/1574))
- Fix for test warnings and general test suite cleanup ([#1578](https://github.com/fishtown-analytics/dbt/pull/1578))
### Contributors:
Over a dozen contributors wrote code for this release of dbt! Thanks for taking the time, and nice work y'all! :)
- [@nydnarb](https://github.com/nydnarb) ([#1363](https://github.com/fishtown-analytics/dbt/issues/1363))
- [@emilieschario](https://github.com/emilieschario) ([#1366](https://github.com/fishtown-analytics/dbt/pull/1366))
- [@bastienboutonnet](https://github.com/bastienboutonnet) ([#1409](https://github.com/fishtown-analytics/dbt/pull/1409))
- [@kasanchez](https://github.com/kasanchez) ([#1247](https://github.com/fishtown-analytics/dbt/pull/1247))
- [@Blakewell](https://github.com/Blakewell) ([#1307](https://github.com/fishtown-analytics/dbt/pull/1307))
- [@buremba](https://github.com/buremba) ([#1476](https://github.com/fishtown-analytics/dbt/pull/1476))
- [@darrenhaken](https://github.com/darrenhaken) ([#1285](https://github.com/fishtown-analytics/dbt/pull/1285))
- [@tbescherer](https://github.com/tbescherer) ([#1504](https://github.com/fishtown-analytics/dbt/issues/1504))
- [@heisencoder](https://github.com/heisencoder) ([#1509](https://github.com/fishtown-analytics/dbt/pull/1509), [#1549](https://github.com/fishtown-analytics/dbt/pull/1549). [#1578](https://github.com/fishtown-analytics/dbt/pull/1578))
- [@cclauss](https://github.com/cclauss) ([#1572](https://github.com/fishtown-analytics/dbt/pull/1572))
- [@josegalarza](https://github.com/josegalarza) ([#1571](https://github.com/fishtown-analytics/dbt/pull/1571))
- [@rmgpinto](https://github.com/rmgpinto) ([docs#31](https://github.com/fishtown-analytics/dbt-docs/pull/31), [docs#32](https://github.com/fishtown-analytics/dbt-docs/pull/32))
- [@groodt](https://github.com/groodt) ([docs#34](https://github.com/fishtown-analytics/dbt-docs/pull/34))
## dbt 0.13.1 (May 13, 2019)
### Overview
This is a bugfix release.
### Bugfixes
- Add "MaterializedView" relation type to the Snowflake adapter ([#1430](https://github.com/fishtown-analytics/dbt/issues/1430), [#1432](https://github.com/fishtown-analytics/dbt/pull/1432)) ([@adriank-convoy](https://github.com/adriank-convoy))
- Quote databases properly ([#1396](https://github.com/fishtown-analytics/dbt/issues/1396), [#1402](https://github.com/fishtown-analytics/dbt/pull/1402))
- Use "ilike" instead of "=" for database equality when listing schemas ([#1411](https://github.com/fishtown-analytics/dbt/issues/1411), [#1412](https://github.com/fishtown-analytics/dbt/pull/1412))
- Pass the model name along in get_relations ([#1384](https://github.com/fishtown-analytics/dbt/issues/1384), [#1388](https://github.com/fishtown-analytics/dbt/pull/1388))
- Add logging to dbt clean ([#1261](https://github.com/fishtown-analytics/dbt/issues/1261), [#1383](https://github.com/fishtown-analytics/dbt/pull/1383), [#1391](https://github.com/fishtown-analytics/dbt/pull/1391)) ([@emilieschario](https://github.com/emilieschario))
### dbt Docs
- Search by columns ([dbt-docs#23](https://github.com/fishtown-analytics/dbt-docs/pull/23)) ([rmgpinto](https://github.com/rmgpinto))
- Support @ selector ([dbt-docs#27](https://github.com/fishtown-analytics/dbt-docs/pull/27))
- Fix number formatting on Snowflake and BQ in table stats ([dbt-docs#28](https://github.com/fishtown-analytics/dbt-docs/pull/28))
### Contributors:
Thanks for your contributions to dbt!
- [@emilieschario](https://github.com/emilieschario)
- [@adriank-convoy](https://github.com/adriank-convoy)
- [@rmgpinto](https://github.com/rmgpinto)
## dbt 0.13.0 - Stephen Girard (March 21, 2019)
### Overview
This release provides [a stable API for building new adapters](https://docs.getdbt.com/v0.13/docs/building-a-new-adapter) and reimplements dbt's adapters as "plugins". Additionally, a new adapter for [Presto](https://github.com/fishtown-analytics/dbt-presto) was added using this architecture. Beyond adapters, this release of dbt also includes [Sources](https://docs.getdbt.com/v0.13/docs/using-sources) which can be used to document and test source data tables. See the full list of features added in 0.13.0 below.
### Breaking Changes
- version 1 schema.yml specs are no longer implemented. Please use the version 2 spec instead ([migration guide](https://docs.getdbt.com/docs/upgrading-from-0-10-to-0-11#section-schema-yml-v2-syntax))
- `{{this}}` is no longer implemented for `on-run-start` and `on-run-end` hooks. Use `{{ target }}` or an [`on-run-end` context variable](https://docs.getdbt.com/docs/on-run-end-context#section-schemas) instead ([#1176](https://github.com/fishtown-analytics/dbt/pull/1176), implementing [#878](https://github.com/fishtown-analytics/dbt/issues/878))
- A number of materialization-specific adapter methods have changed in breaking ways. If you use these adapter methods in your macros or materializations, you may need to update your code accordingly.
- query_for_existing - **removed**, use [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation) instead.
- [get_missing_columns](https://docs.getdbt.com/v0.13/reference#adapter-get-missing-columns) - changed to take `Relation`s instead of schemas and identifiers
- [expand_target_column_types](https://docs.getdbt.com/v0.13/reference#adapter-expand-target-column-types) - changed to take a `Relation` instead of schema, identifier
- [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation) - added a `database` argument
- [create_schema](https://docs.getdbt.com/v0.13/reference#adapter-create-schema) - added a `database` argument
- [drop_schema](https://docs.getdbt.com/v0.13/reference#adapter-drop-schema) - added a `database` argument
### Deprecations
- The following adapter methods are now deprecated, and will be removed in a future release:
- get_columns_in_table - deprecated in favor of [get_columns_in_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-columns-in-relation)
- already_exists - deprecated in favor of [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation)
### Features
- Add `source`s to dbt, use them to calculate source data freshness ([docs](https://docs.getdbt.com/v0.13/docs/using-sources) ) ([#814](https://github.com/fishtown-analytics/dbt/issues/814), [#1240](https://github.com/fishtown-analytics/dbt/issues/1240))
- Add support for Presto ([docs](https://docs.getdbt.com/v0.13/docs/profile-presto), [repo](https://github.com/fishtown-analytics/dbt-presto)) ([#1106](https://github.com/fishtown-analytics/dbt/issues/1106))
- Add `require-dbt-version` option to `dbt_project.yml` to state the supported versions of dbt for packages ([docs](https://docs.getdbt.com/v0.13/docs/requiring-dbt-versions)) ([#581](https://github.com/fishtown-analytics/dbt/issues/581))
- Add an output line indicating the installed version of dbt to every run ([#1134](https://github.com/fishtown-analytics/dbt/issues/1134))
- Add a new model selector (`@`) which build models, their children, and their children's parents ([docs](https://docs.getdbt.com/v0.13/reference#section-the-at-operator)) ([#1156](https://github.com/fishtown-analytics/dbt/issues/1156))
- Add support for Snowflake Key Pair Authentication ([docs](https://docs.getdbt.com/v0.13/docs/profile-snowflake#section-key-pair-authentication)) ([#1232](https://github.com/fishtown-analytics/dbt/pull/1232))
- Support SSO Authentication for Snowflake ([docs](https://docs.getdbt.com/v0.13/docs/profile-snowflake#section-sso-authentication)) ([#1172](https://github.com/fishtown-analytics/dbt/issues/1172))
- Add support for Snowflake's transient tables ([docs](https://docs.getdbt.com/v0.13/docs/snowflake-configs#section-transient-tables)) ([#946](https://github.com/fishtown-analytics/dbt/issues/946))
- Capture build timing data in `run_results.json` to visualize project performance ([#1179](https://github.com/fishtown-analytics/dbt/issues/1179))
- Add CLI flag to toggle warnings as errors ([docs](https://docs.getdbt.com/v0.13/reference#section-treat-warnings-as-errors)) ([#1243](https://github.com/fishtown-analytics/dbt/issues/1243))
- Add tab completion script for Bash ([docs](https://github.com/fishtown-analytics/dbt-completion.bash)) ([#1197](https://github.com/fishtown-analytics/dbt/issues/1197))
- Added docs on how to build a new adapter ([docs](https://docs.getdbt.com/v0.13/docs/building-a-new-adapter)) ([#560](https://github.com/fishtown-analytics/dbt/issues/560))
- Use new logo ([#1349](https://github.com/fishtown-analytics/dbt/pull/1349))
### Fixes
- Fix for Postgres character columns treated as string types ([#1194](https://github.com/fishtown-analytics/dbt/issues/1194))
- Fix for hard to reach edge case in which dbt could hang ([#1223](https://github.com/fishtown-analytics/dbt/issues/1223))
- Fix for `dbt deps` in non-English shells ([#1222](https://github.com/fishtown-analytics/dbt/issues/1222))
- Fix for over eager schema creation when models are run with `--models` ([#1239](https://github.com/fishtown-analytics/dbt/issues/1239))
- Fix for `dbt seed --show` ([#1288](https://github.com/fishtown-analytics/dbt/issues/1288))
- Fix for `is_incremental()` which should only return `True` if the target relation is a `table` ([#1292](https://github.com/fishtown-analytics/dbt/issues/1292))
- Fix for error in Snowflake table materializations with custom schemas ([#1316](https://github.com/fishtown-analytics/dbt/issues/1316))
- Fix errored out concurrent transactions on Redshift and Postgres ([#1356](https://github.com/fishtown-analytics/dbt/pull/1356))
- Fix out of order execution on model select ([#1354](https://github.com/fishtown-analytics/dbt/issues/1354), [#1355](https://github.com/fishtown-analytics/dbt/pull/1355))
- Fix adapter macro namespace issue ([#1352](https://github.com/fishtown-analytics/dbt/issues/1352), [#1353](https://github.com/fishtown-analytics/dbt/pull/1353))
- Re-add CLI flag to toggle warnings as errors ([#1347](https://github.com/fishtown-analytics/dbt/pull/1347))
- Fix release candidate regression that runs run hooks on test invocations ([#1346](https://github.com/fishtown-analytics/dbt/pull/1346))
- Fix Snowflake source quoting ([#1338](https://github.com/fishtown-analytics/dbt/pull/1338), [#1317](https://github.com/fishtown-analytics/dbt/issues/1317), [#1332](https://github.com/fishtown-analytics/dbt/issues/1332))
- Handle unexpected max_loaded_at types ([#1330](https://github.com/fishtown-analytics/dbt/pull/1330))
### Under the hood
- Replace all SQL in Python code with Jinja in macros ([#1204](https://github.com/fishtown-analytics/dbt/issues/1204))
- Loosen restrictions of boto3 dependency ([#1234](https://github.com/fishtown-analytics/dbt/issues/1234))
- Rewrote Postgres introspective queries to be faster on large databases ([#1192](https://github.com/fishtown-analytics/dbt/issues/1192)
### Contributors:
Thanks for your contributions to dbt!
- [@patrickgoss](https://github.com/patrickgoss) [#1193](https://github.com/fishtown-analytics/dbt/issues/1193)
- [@brianhartsock](https://github.com/brianhartsock) [#1191](https://github.com/fishtown-analytics/dbt/pull/1191)
- [@alexyer](https://github.com/alexyer) [#1232](https://github.com/fishtown-analytics/dbt/pull/1232)
- [@adriank-convoy](https://github.com/adriank-convoy) [#1224](https://github.com/fishtown-analytics/dbt/pull/1224)
- [@mikekaminsky](https://github.com/mikekaminsky) [#1216](https://github.com/fishtown-analytics/dbt/pull/1216)
- [@vijaykiran](https://github.com/vijaykiran) [#1198](https://github.com/fishtown-analytics/dbt/pull/1198), [#1199](https://github.com/fishtown-analytics/dbt/pull/1199)
## dbt 0.12.2 - Grace Kelly (January 8, 2019)
### Overview
This release reduces the runtime of dbt projects by improving dbt's approach to model running. Additionally, a number of workflow improvements have been added.
### Deprecations
- Deprecate `sql_where` ([#744](https://github.com/fishtown-analytics/dbt/issues/744)) ([docs](https://docs.getdbt.com/v0.12/docs/configuring-incremental-models))
### Features
- More intelligently order and execute nodes in the graph. This _significantly_ speeds up the runtime of most dbt projects ([#813](https://github.com/fishtown-analytics/dbt/issues/813))
- Add `-m` flag as an alias for `--models` ([#1160](https://github.com/fishtown-analytics/dbt/issues/1160))
- Add `post_hook` and `pre_hook` as aliases for `post-hook` and `pre-hook`, respectively ([#1124](https://github.com/fishtown-analytics/dbt/issues/1124)) ([docs](https://docs.getdbt.com/v0.12/docs/using-hooks))
- Better handling of git errors in `dbt deps` + full support for Windows ([#994](https://github.com/fishtown-analytics/dbt/issues/994), [#778](https://github.com/fishtown-analytics/dbt/issues/778), [#895](https://github.com/fishtown-analytics/dbt/issues/895))
- Add support for specifying a `location` in BigQuery datasets ([#969](https://github.com/fishtown-analytics/dbt/issues/969)) ([docs](https://docs.getdbt.com/v0.12/docs/supported-databases#section-dataset-locations))
- Add support for Jinja expressions using the `{% do ... %}` block ([#1113](https://github.com/fishtown-analytics/dbt/issues/1113))
- The `dbt debug` command is actually useful now ([#1061](https://github.com/fishtown-analytics/dbt/issues/1061))
- The `config` function can now be called multiple times in a model ([#558](https://github.com/fishtown-analytics/dbt/issues/558))
- Source the latest version of dbt from PyPi instead of GitHub ([#1122](https://github.com/fishtown-analytics/dbt/issues/1122))
- Add a peformance profiling mechnanism to dbt ([#1001](https://github.com/fishtown-analytics/dbt/issues/1001))
- Add caching for dbt's macros-only manifest to speedup parsing ([#1098](https://github.com/fishtown-analytics/dbt/issues/1098))
### Fixes
- Fix for custom schemas used alongside the `generate_schema_name` macro ([#801](https://github.com/fishtown-analytics/dbt/issues/801))
- Fix for silent failure of tests that reference nonexistent models ([#968](https://github.com/fishtown-analytics/dbt/issues/968))
- Fix for `generate_schema_name` macros that return whitespace-padded schema names ([#1074](https://github.com/fishtown-analytics/dbt/issues/1074))
- Fix for incorrect relation type for backup tables on Snowflake ([#1103](https://github.com/fishtown-analytics/dbt/issues/1103))
- Fix for incorrectly cased values in the relation cache ([#1140](https://github.com/fishtown-analytics/dbt/issues/1140))
- Fix for JSON decoding error on Python2 installed with Anaconda ([#1155](https://github.com/fishtown-analytics/dbt/issues/1155))
- Fix for unhandled exceptions that occur in anonymous event tracking ([#1180](https://github.com/fishtown-analytics/dbt/issues/1180))
- Fix for analysis files that contain `raw` tags ([#1152](https://github.com/fishtown-analytics/dbt/issues/1152))
- Fix for packages which reference the [hubsite](hub.getdbt.com) ([#1095](https://github.com/fishtown-analytics/dbt/issues/1095))
## dbt 0.12.1 - (November 15, 2018)
### Overview
This is a bugfix release.
### Fixes
- Fix for relation caching when views outside of a dbt schema depend on relations inside of a dbt schema ([#1119](https://github.com/fishtown-analytics/dbt/issues/1119))
## dbt 0.12.0 - Guion Bluford (November 12, 2018)
### Overview
This release adds caching for some introspective queries on all adapters. Additionally, custom tags can be supplied for models, along with many other minor improvements and bugfixes.
### Breaking Changes
- Support for the `repositories:` block in `dbt_project.yml` (deprecated in 0.10.0) was removed.
### tl;dr
- Make runs faster by caching introspective queries
- Support [model tags](https://docs.getdbt.com/v0.12/docs/tags)
- Add a list of [schemas](https://docs.getdbt.com/v0.12/reference#schemas) to the `on-run-end` context
- Set your [profiles directory](https://docs.getdbt.com/v0.12/docs/configure-your-profile#section-using-the-dbt_profiles_dir-environment-variable) with an environment variable
### Features
- Cache the existence of relations to speed up dbt runs ([#1025](https://github.com/fishtown-analytics/dbt/pull/1025))
- Add support for tag configuration and selection ([#1014](https://github.com/fishtown-analytics/dbt/pull/1014))
- Add tags to the model and graph views in the docs UI ([#7](https://github.com/fishtown-analytics/dbt-docs/pull/7))
- Add the set of schemas that dbt built models into in the `on-run-end` hook context ([#908](https://github.com/fishtown-analytics/dbt/issues/908))
- Warn for unused resource config paths in dbt_project.yml ([#725](https://github.com/fishtown-analytics/dbt/pull/725))
- Add more information to the `dbt --help` output ([#1058](https://github.com/fishtown-analytics/dbt/issues/1058))
- Add support for configuring the profiles directory with an env var ([#1055](https://github.com/fishtown-analytics/dbt/issues/1055))
- Add support for cli and env vars in most `dbt_project.yml` and `profiles.yml` fields ([#1033](https://github.com/fishtown-analytics/dbt/pull/1033))
- Provide a better error message when seed file loading fails on BigQuery ([#1079](https://github.com/fishtown-analytics/dbt/pull/1079))
- Improved error handling and messaging on Redshift ([#997](https://github.com/fishtown-analytics/dbt/issues/997))
- Include datasets with underscores when listing BigQuery datasets ([#954](https://github.com/fishtown-analytics/dbt/pull/954))
- Forgo validating the user's profile for `dbt deps` and `dbt clean` commands ([#947](https://github.com/fishtown-analytics/dbt/issues/947), [#1022](https://github.com/fishtown-analytics/dbt/issues/1022))
- Don't read/parse CSV files outside of the `dbt seed` command ([#1046](https://github.com/fishtown-analytics/dbt/pull/1046))
### Fixes
- Fix for incorrect model selection with the `--models` CLI flag when projects and directories share the same name ([#1023](https://github.com/fishtown-analytics/dbt/issues/1023))
- Fix for table clustering configuration with multiple columns on BigQuery ([#1013](https://github.com/fishtown-analytics/dbt/issues/1013))
- Fix for incorrect output when a single row fails validation in `dbt test` ([#1040](https://github.com/fishtown-analytics/dbt/issues/1040))
- Fix for unwieldly Jinja errors regarding undefined variables at parse time ([#1086](https://github.com/fishtown-analytics/dbt/pull/1086), [#1080](https://github.com/fishtown-analytics/dbt/issues/1080), [#935](https://github.com/fishtown-analytics/dbt/issues/935))
- Fix for incremental models that have a line comment on the last line of the file ([#1018](https://github.com/fishtown-analytics/dbt/issues/1018))
- Fix for error messages when ephemeral models fail to compile ([#1053](https://github.com/fishtown-analytics/dbt/pull/1053))
### Under the hood
- Create adapters as singleton objects instead of classes ([#961](https://github.com/fishtown-analytics/dbt/issues/961))
- Combine project and profile into a single, coherent object ([#973](https://github.com/fishtown-analytics/dbt/pull/973))
- Investigate approaches for providing more complete compilation output ([#588](https://github.com/fishtown-analytics/dbt/issues/588))
### Contributors
Thanks for contributing!
- [@mikekaminsky](https://github.com/mikekaminsky) ([#1049](https://github.com/fishtown-analytics/dbt/pull/1049), [#1060](https://github.com/fishtown-analytics/dbt/pull/1060))
- [@joshtemple](https://github.com/joshtemple) ([#1079](https://github.com/fishtown-analytics/dbt/pull/1079))
- [@k4y3ff](https://github.com/k4y3ff) ([#954](https://github.com/fishtown-analytics/dbt/pull/954))
- [@elexisvenator](https://github.com/elexisvenator) ([#1019](https://github.com/fishtown-analytics/dbt/pull/1019))
- [@clrcrl](https://github.com/clrcrl) ([#725](https://github.com/fishtown-analytics/dbt/pull/725)
## dbt 0.11.1 - Lucretia Mott (September 18, 2018)
### Overview
This is a patch release containing a few bugfixes and one quality of life change for dbt docs.
### Features
- dbt
- Add `--port` parameter to dbt docs serve ([#987](https://github.com/fishtown-analytics/dbt/pull/987))
### Fixes
- dbt
- Fix hooks in model configs not running ([#985](https://github.com/fishtown-analytics/dbt/pull/985))
- Fix integration test on redshift catalog generation ([#977](https://github.com/fishtown-analytics/dbt/pull/977))
- Snowflake: Fix docs generation errors when QUOTED_IDENTIFIER_IGNORE_CASE is set ([#998](https://github.com/fishtown-analytics/dbt/pull/998))
- Translate empty strings to null in seeds ([#995](https://github.com/fishtown-analytics/dbt/pull/995))
- Filter out null schemas during catalog generation ([#992](https://github.com/fishtown-analytics/dbt/pull/992))
- Fix quoting on drop, truncate, and rename ([#991](https://github.com/fishtown-analytics/dbt/pull/991))
- dbt-docs
- Fix for non-existent column in schema.yml ([#3](https://github.com/fishtown-analytics/dbt-docs/pull/3))
- Fixes for missing tests in docs UI when columns are upcased ([#2](https://github.com/fishtown-analytics/dbt-docs/pull/2))
- Fix "copy to clipboard" ([#4](https://github.com/fishtown-analytics/dbt-docs/issues/4))
## dbt 0.11.0 - Isaac Asimov (September 6, 2018)
### Overview
This release adds support for auto-generated dbt documentation, adds a new syntax for `schema.yml` files, and fixes a number of minor bugs. With the exception of planned changes to Snowflake's default quoting strategy, this release should not contain any breaking changes. Check out the [blog post](https://blog.fishtownanalytics.com/using-dbt-docs-fae6137da3c3) for more information about this release.
### Breaking Changes
- Change default Snowflake quoting strategy to "unquoted" ([docs](https://docs.getdbt.com/v0.11/docs/configuring-quoting)) ([#824](https://github.com/fishtown-analytics/dbt/issues/824))
### Features
- Add autogenerated dbt project documentation ([docs](https://docs.getdbt.com/v0.11/docs/testing-and-documentation)) ([#375](https://github.com/fishtown-analytics/dbt/issues/375), [#863](https://github.com/fishtown-analytics/dbt/issues/863), [#941](https://github.com/fishtown-analytics/dbt/issues/941), [#815](https://github.com/fishtown-analytics/dbt/issues/815))
- Version 2 of schema.yml, which allows users to create table and column comments that end up in the manifest ([docs](https://docs.getdbt.com/v0.11/docs/schemayml-files)) ([#880](https://github.com/fishtown-analytics/dbt/pull/880))
- Extend catalog and manifest to also support Snowflake, BigQuery, and Redshift, in addition to existing Postgres support ([#866](https://github.com/fishtown-analytics/dbt/pull/866), [#857](https://github.com/fishtown-analytics/dbt/pull/857), [#849](https://github.com/fishtown-analytics/dbt/pull/849))
- Add a 'generated_at' field to both the manifest and the catalog. ([#887](https://github.com/fishtown-analytics/dbt/pull/877))
- Add `docs` blocks that users can put into `.md` files and `doc()` value for schema v2 description fields ([#888](https://github.com/fishtown-analytics/dbt/pull/888))
- Write out a 'run_results.json' after dbt invocations. ([#904](https://github.com/fishtown-analytics/dbt/pull/904))
- Type inference for interpreting CSV data is now less aggressive ([#905](https://github.com/fishtown-analytics/dbt/pull/905))
- Remove distinction between `this.table` and `this.schema` by refactoring materialization SQL ([#940](https://github.com/fishtown-analytics/dbt/pull/940))
### Fixes
- Fix for identifier clashes in BigQuery merge statements ([#914](https://github.com/fishtown-analytics/dbt/issues/914))
- Fix for unneccessary downloads of `bumpversion.cfg`, handle failures gracefully ([#907](https://github.com/fishtown-analytics/dbt/issues/907))
- Fix for incompatible `boto3` requirements ([#959](https://github.com/fishtown-analytics/dbt/issues/959))
- Fix for invalid `relationships` test when the parent column contains null values ([#921](https://github.com/fishtown-analytics/dbt/pull/921))
### Contributors
Thanks for contributing!
- [@rsmichaeldunn](https://github.com/rsmichaeldunn) ([#799](https://github.com/fishtown-analytics/dbt/pull/799))
- [@lewish](https://github.com/fishtown-analytics/dbt/pull/915) ([#915](https://github.com/fishtown-analytics/dbt/pull/915))
- [@MartinLue](https://github.com/MartinLue) ([#872](https://github.com/fishtown-analytics/dbt/pull/872))
## dbt 0.10.2 - Betsy Ross (August 3, 2018)
### Overview
This release makes it possible to alias relation names, rounds out support for BigQuery with incremental, archival, and hook support, adds the IAM Auth method for Redshift, and builds the foundation for autogenerated dbt project documentation, to come in the next release.
Additionally, a number of bugs have been fixed including intermittent BigQuery 404 errors, Redshift "table dropped by concurrent query" errors, and a probable fix for Redshift connection timeout issues.
### Contributors
We want to extend a big thank you to our outside contributors for this release! You all are amazing.
- [@danielchalef](https://github.com/danielchalef) ([#818](https://github.com/fishtown-analytics/dbt/pull/818))
- [@mjumbewu](https://github.com/mjumbewu) ([#796](https://github.com/fishtown-analytics/dbt/pull/796))
- [@abelsonlive](https://github.com/abelsonlive) ([#800](https://github.com/fishtown-analytics/dbt/pull/800))
- [@jon-rtr](https://github.com/jon-rtr) ([#800](https://github.com/fishtown-analytics/dbt/pull/800))
- [@mturzanska](https://github.com/mturzanska) ([#797](https://github.com/fishtown-analytics/dbt/pull/797))
- [@cpdean](https://github.com/cpdean) ([#780](https://github.com/fishtown-analytics/dbt/pull/780))
### Features
- BigQuery
- Support incremental models ([#856](https://github.com/fishtown-analytics/dbt/pull/856)) ([docs](https://docs.getdbt.com/docs/configuring-models#section-configuring-incremental-models))
- Support archival ([#856](https://github.com/fishtown-analytics/dbt/pull/856)) ([docs](https://docs.getdbt.com/docs/archival))
- Add pre/post hook support ([#836](https://github.com/fishtown-analytics/dbt/pull/836)) ([docs](https://docs.getdbt.com/docs/using-hooks))
- Redshift: IAM Auth ([#818](https://github.com/fishtown-analytics/dbt/pull/818)) ([docs](https://docs.getdbt.com/docs/supported-databases#section-iam-authentication))
- Model aliases ([#800](https://github.com/fishtown-analytics/dbt/pull/800))([docs](https://docs.getdbt.com/docs/using-custom-aliases))
- Write JSON manifest file to disk during compilation ([#761](https://github.com/fishtown-analytics/dbt/pull/761))
- Add forward and backward graph edges to the JSON manifest file ([#762](https://github.com/fishtown-analytics/dbt/pull/762))
- Add a 'dbt docs generate' command to generate a JSON catalog file ([#774](https://github.com/fishtown-analytics/dbt/pull/774), [#808](https://github.com/fishtown-analytics/dbt/pull/808))
### Bugfixes
- BigQuery: fix concurrent relation loads ([#835](https://github.com/fishtown-analytics/dbt/pull/835))
- BigQuery: support external relations ([#828](https://github.com/fishtown-analytics/dbt/pull/828))
- Redshift: set TCP keepalive on connections ([#826](https://github.com/fishtown-analytics/dbt/pull/826))
- Redshift: fix "table dropped by concurrent query" ([#825](https://github.com/fishtown-analytics/dbt/pull/825))
- Fix the error handling for profiles.yml validation ([#820](https://github.com/fishtown-analytics/dbt/pull/820))
- Make the `--threads` parameter actually change the number of threads used ([#819](https://github.com/fishtown-analytics/dbt/pull/819))
- Ensure that numeric precision of a column is not `None` ([#796](https://github.com/fishtown-analytics/dbt/pull/796))
- Allow for more complex version comparison ([#797](https://github.com/fishtown-analytics/dbt/pull/797))
### Changes
- Use a subselect instead of CTE when building incremental models ([#787](https://github.com/fishtown-analytics/dbt/pull/787))
- Internals
- Improved dependency selection, rip out some unused dependencies ([#848](https://github.com/fishtown-analytics/dbt/pull/848))
- Stop tracking `run_error` in tracking code ([#817](https://github.com/fishtown-analytics/dbt/pull/817))
- Use Mapping instead of dict as the base class for APIObject ([#756](https://github.com/fishtown-analytics/dbt/pull/756))
- Split out parsers ([#809](https://github.com/fishtown-analytics/dbt/pull/809))
- Fix `__all__` parameter in submodules ([#780](https://github.com/fishtown-analytics/dbt/pull/780))
- Switch to CircleCI 2.0 ([#843](https://github.com/fishtown-analytics/dbt/pull/843), [#850](https://github.com/fishtown-analytics/dbt/pull/850))
- Added tox environments that have the user specify what tests should be run ([#837](https://github.com/fishtown-analytics/dbt/pull/837))
## dbt 0.10.1 (May 18, 2018)
This release focuses on achieving functional parity between all of dbt's adapters. With this release, most dbt functionality should work on every adapter except where noted [here](https://docs.getdbt.com/v0.10/docs/supported-databases#section-caveats).
### tl;dr
- Configure model schema and name quoting in your `dbt_project.yml` file ([Docs](https://docs.getdbt.com/v0.10/docs/configuring-quoting))
- Add a `Relation` object to the context to simplify model quoting [Docs](https://docs.getdbt.com/v0.10/reference#relation)
- Implement BigQuery materializations using new `create table as (...)` syntax, support `partition by` clause ([Docs](https://docs.getdbt.com/v0.10/docs/warehouse-specific-configurations#section-partition-clause))
- Override seed column types ([Docs](https://docs.getdbt.com/v0.10/reference#section-override-column-types))
- Add `get_columns_in_table` context function for BigQuery ([Docs](https://docs.getdbt.com/v0.10/reference#get_columns_in_table))
### Changes
- Consistent schema and identifier quoting ([#727](https://github.com/fishtown-analytics/dbt/pull/727))
- Configure quoting settings in the `dbt_project.yml` file ([#742](https://github.com/fishtown-analytics/dbt/pull/742))
- Add a `Relation` object to the context to make quoting consistent and simple ([#742](https://github.com/fishtown-analytics/dbt/pull/742))
- Use the new `create table as (...)` syntax on BigQuery ([#717](https://github.com/fishtown-analytics/dbt/pull/717))
- Support `partition by` clause
- CSV Updates:
- Use floating point as default seed column type to avoid issues with type inference ([#694](https://github.com/fishtown-analytics/dbt/pull/694))
- Provide a mechanism for overriding seed column types in the `dbt_project.yml` file ([#708](https://github.com/fishtown-analytics/dbt/pull/708))
- Fix seeding for files with more than 16k rows on Snowflake ([#694](https://github.com/fishtown-analytics/dbt/pull/694))
- Implement seeds using a materialization
- Improve `get_columns_in_table` context function ([#709](https://github.com/fishtown-analytics/dbt/pull/709))
- Support numeric types on Redshift, Postgres
- Support BigQuery (including nested columns in `struct` types)
- Support cross-database `information_schema` queries for Snowflake
- Retain column ordinal positions
### Bugfixes
- Fix for incorrect var precendence when using `--vars` on the CLI ([#739](https://github.com/fishtown-analytics/dbt/pull/739))
- Fix for closed connections in `on-run-end` hooks for long-running dbt invocations ([#693](https://github.com/fishtown-analytics/dbt/pull/693))
- Fix: don't try to run empty hooks ([#620](https://github.com/fishtown-analytics/dbt/issues/620), [#693](https://github.com/fishtown-analytics/dbt/pull/693))
- Fix: Prevent seed data from being serialized into `graph.gpickle` file ([#720](https://github.com/fishtown-analytics/dbt/pull/720))
- Fix: Disallow seed and model files with the same name ([#737](https://github.com/fishtown-analytics/dbt/pull/737))
## dbt 0.10.0 (March 8, 2018)
This release overhauls dbt's package management functionality, makes seeding csv files work across all adapters, and adds date partitioning support for BigQuery.
@@ -76,7 +592,7 @@ brew install dbt
### Breaking Changes
- `adapter` functions must be namespaced to the `adapter` context variable. To fix this error, use `adapter.already_exists` instead of just `already_exists`, or similar for other [adapter functions](https://docs.getdbt.com/reference#adapter).
- `adapter` functions must be namespaced to the `adapter` context variable. To fix this error, use `adapter.already_exists` instead of just `already_exists`, or similar for other [adapter functions](https://docs.getdbt.com/docs/adapter).
### Bugfixes
@@ -175,7 +691,7 @@ Compilation Error in model {your_model} (models/path/to/your_model.sql)
'already_exists' is undefined
```
To fix this error, use `adapter.already_exists` instead of just `already_exists`, or similar for other [adapter functions](https://docs.getdbt.com/reference#adapter).
To fix this error, use `adapter.already_exists` instead of just `already_exists`, or similar for other [adapter functions](https://docs.getdbt.com/docs/adapter).
### Bugfixes
- Handle lingering `__dbt_tmp` relations ([#511](https://github.com/fishtown-analytics/dbt/pull/511))

155
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,155 @@
# Getting started with dbt
## About this document
This document is a guide intended for folks interested in contributing to dbt. It is not intended as a guide for end users of dbt (though if it is helpful, that's great!) and it assumes a certain level of familiarity with Python concepts such as virtualenvs, `pip`, python modules, filesystems, and so on. It also assumes you are using macOS or Linux and are comfortable with the command line. If you get stuck while reading this guide, drop us a line in the #development channel on [slack](slack.getdbt.com).
## Getting the code
### Installing git
You will need `git` in order to download and modify the dbt source code. On macOS, the best way to download git is to just install Xcode.
### External contributors
If you are not a member of the `fishtown-analytics` GitHub organization, you can contribute to dbt by forking the dbt repository. For a detailed overview on forking, check out the [GitHub docs on forking](https://help.github.com/en/articles/fork-a-repo). In short, you will need to:
1. fork the dbt repository
2. clone your fork
3. check out a new branch for your proposed changes
4. push changes to your fork
5. open a pull request against `fishtown-analytics/dbt` from your forked repository
### Core contributors
If you are a member of the `fishtown-analytics` GitHub organization, you will have push access to the dbt repo. Rather than
forking dbt to make your changes, just clone the repository and push directly to a branch.
## Setting up an environment
To begin developing code in dbt, you should set up the following:
### virtualenv
We strongly recommend using virtual environments when developing code in dbt. We recommend creating this virtualenv
in the root of the dbt repository. To create a new virtualenv, run:
```
python3 -m venv env
source env/bin/activate
```
This will create and activate a new Python virtual environment.
### docker and docker-compose
Docker and docker-compose are both used in testing. For macOS, the easiest thing to do is to [download docker for mac](https://store.docker.com/editions/community/docker-ce-desktop-mac). You'll need to make an account. On Linux, you can use one of the packages [here](https://docs.docker.com/install/#server). We recommend installing from docker.com instead of from your package manager. On Linux you also have to install docker-compose separately, follow [these instructions](https://docs.docker.com/compose/install/#install-compose).
### Installing postgres locally (optional)
For testing, and later in the examples in this document, you may want to have `psql` available so you can poke around in the database and see what happened. We recommend that you use [homebrew](https://brew.sh/) for that on macOS, and your package manager on Linux. You can install any version of the postgres client that you'd like. On macOS, with homebrew setup, you can run:
```
brew install postgresql
```
## Running dbt in development
### Installation
First make sure that you set up your `virtualenv` as described in section _Setting up an environment_. Next, install dbt (and it's dependencies) with:
```
pip install -r requirements.txt
```
When dbt is installed from source in this way, any changes you make to the dbt source code will be reflected immediately in your next `dbt` run.
### Running dbt
With your virtualenv activated, the `dbt` script should point back to the source code you've cloned on your machine. You can verify this by running `which dbt`. This command should show you a path to an executable in your virtualenv.
Configure your [profile](https://docs.getdbt.com/docs/configure-your-profile) as necessary to connect to your target databases. It may be a good idea to add a new profile pointing to a local postgres instance, or a specific test sandbox within your data warehouse if appropriate.
## Testing
Getting the dbt integration tests set up in your local environment will be very helpful as you start to make changes to your local version of dbt. The section that follows outlines some helpful tips for setting up the test environment.
### Tools
A short list of tools used in dbt testing that will be helpful to your understanding:
- [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage dependencies
- [tox](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions
- [pytest](https://docs.pytest.org/en/latest/) to discover/run tests
- [make](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple
- [flake8](https://gitlab.com/pycqa/flake8) for code linting
- [CircleCI](https://circleci.com/product/) and [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/)
A deep understanding of these tools in not required to effectively contribute to dbt, but we recommend checking out the attached documentation if you're interested in learning more about them.
### Running tests via Docker
dbt's unit and integration tests run in Docker. Because dbt works with a number of different databases, you will need to supply credentials for one or more of these databases in your test environment. Most organizations don't have access to each of a BigQuery, Redshift, Snowflake, and Postgres database, so it's likely that you will be unable to run every integration test locally. Fortunately, Fishtown Analytics provides a CI environment with access to sandboxed Redshift, Snowflake, BigQuery, and Postgres databases. See the section on _Submitting a Pull Request_ below for more information on this CI setup.
#### Specifying your test credentials
dbt uses test credentials specified in a `test.env` file in the root of the repository. This `test.env` file is git-ignored, but please be _extra_ careful to never check in credentials or other sensitive information when developing against dbt. To create your `test.env` file, copy the provided sample file, then supply your relevant credentials:
```
cp test.env.sample test.env
atom test.env # supply your credentials
```
We recommend starting with dbt's Postgres tests. These tests cover most of the functionality in dbt, are the fastest to run, and are the easiest to set up. dbt's test suite runs Postgres in a Docker container, so no setup should be required to run these tests. If you additionally want to test Snowflake, Bigquery, or Redshift locally you'll need to get credentials and add them to the `test.env` file.
#### Running tests
dbt's unit tests and Python linter can be run with:
```
make test-unit
```
To run the Postgres + Python 3.6 integration tests, you'll have to do one extra step of setting up the test database:
```
docker-compose up -d database
PGHOST=localhost PGUSER=root PGPASSWORD=password PGDATABASE=postgres bash test/setup_db.sh
```
To run a quick test for Python3 integration tests on Postgres, you can run:
```
make test-quick
```
To run tests for a specific database, invoke `tox` directly with the required flags:
```
# Run Postgres py36 tests
docker-compose run test tox -e integration-postgres-py36 -- -x
# Run Snowflake py36 tests
docker-compose run test tox -e integration-snowflake-py36 -- -x
# Run BigQuery py36 tests
docker-compose run test tox -e integration-bigquery-py36 -- -x
# Run Redshift py36 tests
docker-compose run test tox -e integration-redshift-py36 -- -x
```
See the `Makefile` contents for more some other examples of ways to run `tox`.
### Submitting a Pull Request
Fishtown Analytics provides a sandboxed Redshift, Snowflake, and BigQuery database for use in a CI environment.
When pull requests are submitted to the `fishtown-analytics/dbt` repo, GitHub will trigger automated tests in CircleCI and Azure Pipelines. If the PR submitter is a member of the `fishtown-analytics` GitHub organization, then the credentials for these databases will be automatically supplied as environment variables in the CI test suite.
**If the PR submitter is not a member of the `fishtown-analytics` organization, then these environment variables will not be automatically supplied in the CI environment**. Once a core maintainer has taken a look at the Pull Request, they will kick off the test suite with the required credentials.
Once your tests are passing and your PR has been reviewed, a dbt maintainer will merge your changes into the active development branch! And that's it! Happy developing :tada:

View File

@@ -1,14 +1,53 @@
FROM python:3.6
FROM ubuntu:18.04
RUN apt-get update
ENV DEBIAN_FRONTEND noninteractive
RUN apt-get install -y python-pip netcat
RUN apt-get install -y python-dev python3-dev
RUN apt-get update && \
apt-get install -y --no-install-recommends \
netcat postgresql make build-essential libssl-dev zlib1g-dev \
libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev \
xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev git ca-certificates \
curl git ssh && \
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
RUN pip install pip --upgrade
RUN pip install virtualenv
RUN pip install virtualenvwrapper
RUN pip install tox
RUN useradd -mU dbt_test_user
RUN mkdir /usr/app && chown dbt_test_user /usr/app
RUN mkdir /home/tox && chown dbt_test_user /home/tox
USER dbt_test_user
WORKDIR /usr/src/app
RUN cd /usr/src/app
WORKDIR /usr/app
VOLUME /usr/app
RUN curl -L https://github.com/pyenv/pyenv-installer/raw/master/bin/pyenv-installer | bash
ENV PYENV_ROOT="/home/dbt_test_user/.pyenv" \
PATH="/home/dbt_test_user/.pyenv/bin:/home/dbt_test_user/.pyenv/shims:$PATH"
RUN pyenv update && \
echo "2.7.16 3.6.8 3.7.3" | xargs -P 4 -n 1 pyenv install && \
pyenv global $(pyenv versions --bare)
RUN pyenv virtualenv 3.6.8 dbt36 && \
pyenv virtualenv 3.7.3 dbt37 && \
pyenv virtualenv 2.7.16 dbt27
RUN cd /usr/app && \
python -m pip install -U pip && \
python -m pip install tox && \
pyenv local dbt37 && \
python -m pip install -U pip && \
python -m pip install tox && \
pyenv local --unset && \
pyenv local dbt36 && \
python -m pip install -U pip && \
python -m pip install tox && \
pyenv local --unset && \
pyenv local dbt27 && \
python -m pip install -U pip && \
python -m pip install tox && \
pyenv local --unset && \
pyenv rehash
RUN pyenv local dbt36 dbt37 dbt27
ENV PYTHONIOENCODING=utf-8

View File

@@ -1 +0,0 @@
recursive-include dbt/include *.py *.sql *.yml

View File

@@ -11,14 +11,26 @@ test:
test-unit:
@echo "Unit test run starting..."
@time docker-compose run test tox -e unit-py27,unit-py36,pep8
@time docker-compose run test tox -e unit-py27,unit-py36,flake8
test-integration:
@echo "Integration test run starting..."
@time docker-compose run test tox -e integration-postgres-py27,integration-postgres-py36,integration-snowflake-py27,integration-snowflake-py36,integration-bigquery-py27,integration-bigquery-py36
@time docker-compose run test tox -e integration-postgres-py27,integration-postgres-py36,integration-redshift-py27,integration-redshift-py36,integration-snowflake-py27,integration-snowflake-py36,integration-bigquery-py27,integration-bigquery-py36
test-new:
@echo "Test run starting..."
@echo "Changed test files:"
@echo "${changed_tests}"
@docker-compose run test /usr/src/app/test/runner.sh ${changed_tests}
test-quick:
@echo "Integration test run starting..."
@time docker-compose run test tox -e integration-postgres-py36 -- -x
clean:
rm -f .coverage
rm -rf .eggs/
rm -rf .tox/
rm -rf build/
rm -rf dbt.egg-info/
rm -f dbt_project.yml
rm -rf dist/
rm -f htmlcov/*.{css,html,js,json,png}
rm -rf logs/
rm -rf target/
find . -type f -name '*.pyc' -delete
find . -type d -name '__pycache__' -depth -delete

View File

@@ -1,49 +1,58 @@
# dbt
<p align="center">
<img src="https://github.com/fishtown-analytics/dbt/blob/master/etc/dbt-horizontal.png?raw=true" alt="dbt logo"/>
</p>
<p align="center">
<a href="https://codeclimate.com/github/fishtown-analytics/dbt">
<img src="https://codeclimate.com/github/fishtown-analytics/dbt/badges/gpa.svg" alt="Code Climate"/>
</a>
<a href="https://circleci.com/gh/fishtown-analytics/dbt/tree/master">
<img src="https://circleci.com/gh/fishtown-analytics/dbt/tree/master.svg?style=svg" alt="CircleCI" />
</a>
<a href="https://ci.appveyor.com/project/DrewBanin/dbt/branch/development">
<img src="https://ci.appveyor.com/api/projects/status/v01rwd3q91jnwp9m/branch/development?svg=true" alt="AppVeyor" />
</a>
<a href="https://slack.getdbt.com">
<img src="https://slack.getdbt.com/badge.svg" alt="Slack" />
</a>
</p>
dbt (data build tool) helps analysts write reliable, modular code using a workflow that closely mirrors software development.
**[dbt](https://www.getdbt.com/)** (data build tool) enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
A dbt project primarily consists of "models". These models are SQL `select` statements that filter, aggregate, and otherwise transform data to facilitate analytics. Analysts use dbt to [aggregate pageviews into sessions](https://github.com/fishtown-analytics/snowplow), calculate [ad spend ROI](https://github.com/fishtown-analytics/facebook-ads), or report on [email campaign performance](https://github.com/fishtown-analytics/mailchimp).
dbt is the T in ELT. Organize, cleanse, denormalize, filter, rename, and pre-aggregate the raw data in your warehouse so that it's ready for analysis.
These models frequently build on top of one another. Fortunately, dbt makes it easy to [manage relationships](https://docs.getdbt.com/reference#ref) between models, [test](https://docs.getdbt.com/docs/testing) your assumptions, and [visualize](https://graph.sinterdata.com/) your projects.
![dbt architecture](https://github.com/fishtown-analytics/dbt/blob/master/etc/dbt-arch.png?raw=true)
Still reading? Check out the [docs](https://docs.getdbt.com/docs/overview) for more information.
dbt can be used to [aggregate pageviews into sessions](https://github.com/fishtown-analytics/snowplow), calculate [ad spend ROI](https://github.com/fishtown-analytics/facebook-ads), or report on [email campaign performance](https://github.com/fishtown-analytics/mailchimp).
![dbt dag](/etc/dag.png?raw=true)
## Understanding dbt
---
### Getting Started
Analysts using dbt can transform their data by simply writing select statements, while dbt handles turning these statements into tables and views in a data warehouse.
- [What is dbt]?
- Read the [dbt viewpoint]
- [Installation]
- Join the [chat][slack-url] on Slack for live questions and support.
These select statements, or "models", form a dbt project. Models frequently build on top of one another dbt makes it easy to [manage relationships](https://docs.getdbt.com/docs/ref) between models, and [visualize these relationships](https://docs.getdbt.com/docs/documentation), as well as assure the quality of your transformations through [testing](https://docs.getdbt.com/docs/testing).
---
### The dbt ecosystem
- Visualize your dbt graph [here](https://graph.sinterdata.com/)
- Run your dbt projects on a schedule [here](http://sinterdata.com/)
![dbt dag](https://github.com/fishtown-analytics/dbt/blob/master/etc/dbt-dag.png?raw=true)
---
## Getting started
[![Code Climate](https://codeclimate.com/github/fishtown-analytics/dbt/badges/gpa.svg)](https://codeclimate.com/github/fishtown-analytics/dbt) [![Slack](https://slack.getdbt.com/badge.svg)](https://slack.getdbt.com)
- [Install dbt](https://docs.getdbt.com/docs/installation)
- Read the [documentation](https://docs.getdbt.com/).
- Productionize your dbt project with [dbt Cloud](https://www.getdbt.com)
### Testing
## Find out more
| service | development | master |
| --- | --- | --- |
| CircleCI| [![CircleCI](https://circleci.com/gh/fishtown-analytics/dbt/tree/development.svg?style=svg)](https://circleci.com/gh/fishtown-analytics/dbt/tree/development) | [![CircleCI](https://circleci.com/gh/fishtown-analytics/dbt/tree/master.svg?style=svg)](https://circleci.com/gh/fishtown-analytics/dbt/tree/master) |
| AppVeyor | [![AppVeyor](https://ci.appveyor.com/api/projects/status/v01rwd3q91jnwp9m/branch/development?svg=true)](https://ci.appveyor.com/project/DrewBanin/dbt/branch/development) | [![AppVeyor](https://ci.appveyor.com/api/projects/status/v01rwd3q91jnwp9m/branch/master?svg=true)](https://ci.appveyor.com/project/DrewBanin/dbt/branch/master) |
- Check out the [Introduction to dbt](https://dbt.readme.io/docs/introduction).
- Read the [dbt Viewpoint](https://dbt.readme.io/docs/viewpoint).
[Coverage](https://circleci.com/api/v1/project/fishtown-analytics/dbt/latest/artifacts/0/$CIRCLE_ARTIFACTS/htmlcov/index.html?branch=development)
## Join thousands of analysts in the dbt community
- Join the [chat](http://slack.getdbt.com/) on Slack.
- Find community posts on [dbt Discourse](https://discourse.getdbt.com).
## Reporting bugs and contributing code
- Want to report a bug or request a feature? Let us know on [Slack](http://slack.getdbt.com/), or open [an issue](https://github.com/fishtown-analytics/dbt/issues/new).
- Want to help us build dbt? Check out the [Contributing Getting Started Guide](/CONTRIBUTING.md)
## Code of Conduct
Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [PyPA Code of Conduct].
[PyPA Code of Conduct]: https://www.pypa.io/en/latest/code-of-conduct/
[slack-url]: https://slack.getdbt.com/
[Installation]: https://docs.getdbt.com/docs/installation
[What is dbt]: https://docs.getdbt.com/docs/overview
[dbt viewpoint]: https://docs.getdbt.com/docs/viewpoint
Everyone interacting in the dbt project's codebases, issue trackers, chat rooms, and mailing lists is expected to follow the [PyPA Code of Conduct](https://www.pypa.io/en/latest/code-of-conduct/).

View File

@@ -1,63 +1,77 @@
### Release Procedure :shipit:
1. Update changelog
1. Bumpversion
1. Merge to master
- (on master) git pull origin development
1. Deploy to pypi
- python setup.py sdist upload -r pypi
1. Deploy to homebrew
- Make a pull request against homebrew-core
1. Deploy to conda-forge
- Make a pull request against dbt-feedstock
1. Git release notes (points to changelog)
1. Post to slack (point to changelog)
#### Branching Strategy
dbt has three types of branches:
- **Trunks** track the latest release of a minor version of dbt. Historically, we used the `master` branch as the trunk. Each minor version release has a corresponding trunk. For example, the `0.11.x` series of releases has a branch called `0.11.latest`. This allows us to release new patch versions under `0.11` without necessarily needing to pull them into the latest version of dbt.
- **Release Branches** track a specific, not yet complete release of dbt. These releases are codenamed since we don't always know what their semantic version will be. Example: `dev/lucretia-mott` became `0.11.1`.
- **Feature Branches** track individual features and fixes. On completion they should be merged into a release branch.
#### Git & PyPI
1. Update CHANGELOG.md with the most recent changes
2. If this is a release candidate, you want to create it off of your release branch. If it's an actual release, you must first merge to a master branch. Open a Pull Request in Github to merge it into the appropriate trunk (`X.X.latest`)
3. Bump the version using `bumpversion`:
- Dry run first by running `bumpversion --new-version <desired-version> <part>` and checking the diff. If it looks correct, clean up the chanages and move on:
- Alpha releases: `bumpversion --commit --no-tag --new-version 0.10.2a1 num`
- Patch releases: `bumpversion --commit --no-tag --new-version 0.10.2 patch`
- Minor releases: `bumpversion --commit --no-tag --new-version 0.11.0 minor`
- Major releases: `bumpversion --commit --no-tag --new-version 1.0.0 major`
4. (If this is a not a release candidate) Merge to `x.x.latest` and (optionally) `master`.
5. Update the default branch to the next dev release branch.
6. Build source distributions for all packages by running `./scripts/build-sdists.sh`. Note that this will clean out your `dist/` folder, so if you have important stuff in there, don't run it!!!
7. Deploy to pypi
- `twine upload dist/*`
8. Deploy to homebrew (see below)
9. Deploy to conda-forge (see below)
10. Git release notes (points to changelog)
11. Post to slack (point to changelog)
After releasing a new version, it's important to merge the changes back into the other outstanding release branches. This avoids merge conflicts moving forward.
In some cases, where the branches have diverged wildly, it's ok to skip this step. But this means that the changes you just released won't be included in future releases.
#### Homebrew Release Process
1. fork homebrew and add a remote:
1. Clone the `homebrew-dbt` repository:
```
cd $(brew --repo homebrew/core)
git remote add origin <your-github-username> <your-fork-url>
git clone git@github.com:fishtown-analytics/homebrew-dbt.git
```
2. edit the formula.
2. For ALL releases (prereleases and version releases), copy the relevant formula. To copy from the latest version release of dbt, do:
```bash
brew update
mkvirtualenv --python="$(which python3)" brew
pip install homebrew-pypi-poet dbt
diff "$(brew --repo homebrew/core)"/Formula/dbt.rb <(poet -f dbt)
cp Formula/dbt.rb Formula/dbt@{NEW-VERSION}.rb
```
find any differences in resource stanzas, and incorporate them into the formula
To copy from a different version, simply copy the corresponding file.
3. Open the file, and edit the following:
- the name of the ruby class: this is important, homebrew won't function properly if the class name is wrong. Check historical versions to figure out the right name.
- under the `bottle` section, remove all of the hashes (lines starting with `sha256`)
4. Create a **Python 3.7** virtualenv, activate it, and then install two packages: `homebrew-pypi-poet`, and the version of dbt you are preparing. I use:
```
brew edit dbt
...
diff "$(brew --repo homebrew/core)"/Formula/dbt.rb <(poet -f dbt)
pyenv virtualenv 3.7.0 homebrew-dbt-{VERSION}
pyenv activate homebrew-dbt-{VERSION}
pip install dbt=={VERSION} homebrew-pypi-poet
```
3. reinstall, test, and audit dbt. if the test or audit fails, fix the formula with step 1.
homebrew-pypi-poet is a program that generates a valid homebrew formula for an installed pip package. You want to use it to generate a diff against the existing formula. Then you want to apply the diff for the dependency packages only -- e.g. it will tell you that `google-api-core` has been updated and that you need to use the latest version.
5. reinstall, test, and audit dbt. if the test or audit fails, fix the formula with step 1.
```bash
brew uninstall --force dbt
brew install --build-from-source dbt
brew uninstall --force Formula/{YOUR-FILE}.rb
brew install Formula/{YOUR-FILE}.rb
brew test dbt
brew audit --strict dbt
```
4. make a pull request for the change.
```bash
cd $(brew --repo homebrew/core)
git pull origin master
git checkout -b dbt-<version> origin/master
git add . -p
git commit -m 'dbt <version>'
git push -u <your-github-username> dbt-<version>
```
6. Ask Connor to bottle the change (only his laptop can do it!)
#### Conda Forge Release Process

View File

@@ -1,63 +0,0 @@
version: 1.0.{build}-{branch}
environment:
# SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
# /E:ON and /V:ON options are not enabled in the batch script intepreter
# See: http://stackoverflow.com/a/13751649/163740
CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd"
TOX_ENV: "pywin"
matrix:
- PYTHON: "C:\\Python35"
PYTHON_VERSION: "3.5.2"
PYTHON_ARCH: "32"
#- PYTHON: "C:\\Python35"
# PYTHON_VERSION: "3.5.2"
# PYTHON_ARCH: "32"
PGUSER: postgres
PGPASSWORD: Password12!
services:
- postgresql94
hosts:
database: 127.0.0.1
init:
- PATH=C:\Program Files\PostgreSQL\9.4\bin\;%PATH%
- ps: Set-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host all all ::1/128 trust"
- ps: Add-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host all all 127.0.0.1/32 trust"
install:
# Download setup scripts and unzip
- ps: "wget https://github.com/cloudify-cosmo/appveyor-utils/archive/master.zip -OutFile ./master.zip"
- "7z e master.zip */appveyor/* -oappveyor"
# Install Python (from the official .msi of http://python.org) and pip when
# not already installed.
- "powershell ./appveyor/install.ps1"
# Prepend newly installed Python to the PATH of this build (this cannot be
# done from inside the powershell script as it would require to restart
# the parent CMD process).
- "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
# Check that we have the expected version and architecture for Python
- "python --version"
- "python -c \"import struct; print(struct.calcsize('P') * 8)\""
build: false # Not a C# project, build stuff at the test step instead.
before_test:
- "%CMD_IN_ENV% pip install psycopg2==2.6.2"
- "%CMD_IN_ENV% pip install tox"
test_script:
- "bash test/setup_db.sh"
# this is generally a bad idea TODO
- git config --system http.sslverify false
- "%CMD_IN_ENV% tox -e %TOX_ENV%"

125
azure-pipelines.yml Normal file
View File

@@ -0,0 +1,125 @@
# Python package
# Create and test a Python package on multiple Python versions.
# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
# https://docs.microsoft.com/azure/devops/pipelines/languages/python
trigger:
branches:
include:
- master
- dev/*
- pr/*
jobs:
- job: UnitTest
pool:
vmImage: 'vs2017-win2016'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.5'
architecture: 'x64'
- script: python -m pip install --upgrade pip && pip install tox
displayName: 'Install dependencies'
- script: python -m tox -e pywin-unit
displayName: Run unit tests
- job: PostgresIntegrationTest
pool:
vmImage: 'vs2017-win2016'
dependsOn: UnitTest
steps:
- pwsh: |
choco install postgresql --params '/Password:password' --params-global --version 10.6
Set-Content "c:\program files\postgresql\10\data\pg_hba.conf" "host all all ::1/128 trust"
Add-Content "c:\program files\postgresql\10\data\pg_hba.conf" "host all all 127.0.0.1/32 trust"
# the service name is "postgresql-x64-10", conveniently it's both the display name and the actual name
Restart-Service postgresql-x64-10
& "C:\program files\postgresql\10\bin\createdb.exe" -U postgres dbt
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "CREATE ROLE root WITH PASSWORD 'password';"
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "ALTER ROLE root WITH LOGIN;"
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root WITH GRANT OPTION;"
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "CREATE ROLE noaccess WITH PASSWORD 'password' NOSUPERUSER;"
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "ALTER ROLE noaccess WITH LOGIN;"
& "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "GRANT CONNECT ON DATABASE dbt TO noaccess;"
displayName: Install postgresql and set up database
- task: UsePythonVersion@0
inputs:
versionSpec: '3.5'
architecture: 'x64'
- script: python -m pip install --upgrade pip && pip install tox
displayName: 'Install dependencies'
- script: python -m tox -e pywin-postgres
displayName: Run integration tests
# These three are all similar except secure environment variables, which MUST be passed along to their tasks,
# but there's probably a better way to do this!
- job: SnowflakeIntegrationTest
pool:
vmImage: 'vs2017-win2016'
dependsOn: PostgresIntegrationTest
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.5'
architecture: 'x64'
- script: python -m pip install --upgrade pip && pip install tox
displayName: 'Install dependencies'
- script: python -m tox -e pywin-snowflake
env:
SNOWFLAKE_TEST_ACCOUNT: $(SNOWFLAKE_TEST_ACCOUNT)
SNOWFLAKE_TEST_PASSWORD: $(SNOWFLAKE_TEST_PASSWORD)
SNOWFLAKE_TEST_USER: $(SNOWFLAKE_TEST_USER)
SNOWFLAKE_TEST_WAREHOUSE: $(SNOWFLAKE_TEST_WAREHOUSE)
displayName: Run integration tests
- job: BigQueryIntegrationTest
pool:
vmImage: 'vs2017-win2016'
dependsOn: PostgresIntegrationTest
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.5'
architecture: 'x64'
- script: python -m pip install --upgrade pip && pip install tox
displayName: 'Install dependencies'
- script: python -m tox -e pywin-bigquery
env:
BIGQUERY_SERVICE_ACCOUNT_JSON: $(BIGQUERY_SERVICE_ACCOUNT_JSON)
displayName: Run integration tests
- job: RedshiftIntegrationTest
pool:
vmImage: 'vs2017-win2016'
dependsOn: PostgresIntegrationTest
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.5'
architecture: 'x64'
- script: python -m pip install --upgrade pip && pip install tox
displayName: 'Install dependencies'
- script: python -m tox -e pywin-redshift
env:
REDSHIFT_TEST_DBNAME: $(REDSHIFT_TEST_DBNAME)
REDSHIFT_TEST_PASS: $(REDSHIFT_TEST_PASS)
REDSHIFT_TEST_USER: $(REDSHIFT_TEST_USER)
REDSHIFT_TEST_PORT: $(REDSHIFT_TEST_PORT)
REDSHIFT_TEST_HOST: $(REDSHIFT_TEST_HOST)
displayName: Run integration tests

View File

@@ -1,25 +0,0 @@
machine:
python:
version: 3.6.0
post:
- pyenv global 2.7.12 3.6.0
hosts:
database: 127.0.0.1
database:
override:
- bash test/setup_db.sh
dependencies:
pre:
- pip install --upgrade pip setuptools || true
- pip install --upgrade tox tox-pyenv
override:
- pyenv local 2.7.12 3.6.0
test:
override:
- sudo chown -R ubuntu:ubuntu /root/
- /bin/bash -c 'cd /home/ubuntu/dbt && tox'
post:
- mv htmlcov $CIRCLE_ARTIFACTS/

1
core/MANIFEST.in Normal file
View File

@@ -0,0 +1 @@
recursive-include dbt/include *.py *.sql *.yml *.html *.md

1
core/dbt/__init__.py Normal file
View File

@@ -0,0 +1 @@
__path__ = __import__('pkgutil').extend_path(__path__, __name__)

View File

@@ -0,0 +1 @@
__path__ = __import__('pkgutil').extend_path(__path__, __name__)

View File

@@ -0,0 +1,8 @@
# these are all just exports, #noqa them so flake8 will be happy
from dbt.adapters.base.meta import available # noqa
from dbt.adapters.base.relation import BaseRelation # noqa
from dbt.adapters.base.relation import Column # noqa
from dbt.adapters.base.connections import BaseConnectionManager # noqa
from dbt.adapters.base.connections import Credentials # noqa
from dbt.adapters.base.impl import BaseAdapter # noqa
from dbt.adapters.base.plugin import AdapterPlugin # noqa

View File

@@ -0,0 +1,329 @@
import abc
import multiprocessing
import os
import six
import dbt.exceptions
import dbt.flags
from dbt.api import APIObject
from dbt.compat import abstractclassmethod, get_ident
from dbt.contracts.connection import Connection
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import translate_aliases
class Credentials(APIObject):
"""Common base class for credentials. This is not valid to instantiate"""
SCHEMA = NotImplemented
# map credential aliases to their canonical names.
ALIASES = {}
def __init__(self, **kwargs):
renamed = self.translate_aliases(kwargs)
super(Credentials, self).__init__(**renamed)
@property
def type(self):
raise NotImplementedError(
'type not implemented for base credentials class'
)
def connection_info(self):
"""Return an ordered iterator of key/value pairs for pretty-printing.
"""
for key in self._connection_keys():
if key in self._contents:
yield key, self._contents[key]
def _connection_keys(self):
"""The credential object keys that should be printed to users in
'dbt debug' output. This is specific to each adapter.
"""
raise NotImplementedError
def incorporate(self, **kwargs):
# implementation note: we have to do this here, or
# incorporate(alias_name=...) will result in duplicate keys in the
# merged dict that APIObject.incorporate() creates.
renamed = self.translate_aliases(kwargs)
return super(Credentials, self).incorporate(**renamed)
def serialize(self, with_aliases=False):
serialized = super(Credentials, self).serialize()
if with_aliases:
serialized.update({
new_name: serialized[canonical_name]
for new_name, canonical_name in self.ALIASES.items()
if canonical_name in serialized
})
return serialized
@classmethod
def translate_aliases(cls, kwargs):
return translate_aliases(kwargs, cls.ALIASES)
@six.add_metaclass(abc.ABCMeta)
class BaseConnectionManager(object):
"""Methods to implement:
- exception_handler
- cancel_open
- open
- begin
- commit
- clear_transaction
- execute
You must also set the 'TYPE' class attribute with a class-unique constant
string.
"""
TYPE = NotImplemented
def __init__(self, profile):
self.profile = profile
self.thread_connections = {}
self.lock = multiprocessing.RLock()
@staticmethod
def get_thread_identifier():
# note that get_ident() may be re-used, but we should never experience
# that within a single process
return (os.getpid(), get_ident())
def get_thread_connection(self):
key = self.get_thread_identifier()
with self.lock:
if key not in self.thread_connections:
raise RuntimeError(
'connection never acquired for thread {}, have {}'
.format(key, list(self.thread_connections))
)
return self.thread_connections[key]
def get_if_exists(self):
key = self.get_thread_identifier()
with self.lock:
return self.thread_connections.get(key)
def clear_thread_connection(self):
key = self.get_thread_identifier()
with self.lock:
if key in self.thread_connections:
del self.thread_connections[key]
def clear_transaction(self):
"""Clear any existing transactions."""
conn = self.get_thread_connection()
if conn is not None:
if conn.transaction_open:
self._rollback(conn)
self.begin()
self.commit()
@abc.abstractmethod
def exception_handler(self, sql):
"""Create a context manager that handles exceptions caused by database
interactions.
:param str sql: The SQL string that the block inside the context
manager is executing.
:return: A context manager that handles exceptions raised by the
underlying database.
"""
raise dbt.exceptions.NotImplementedException(
'`exception_handler` is not implemented for this adapter!')
def set_connection_name(self, name=None):
if name is None:
# if a name isn't specified, we'll re-use a single handle
# named 'master'
name = 'master'
conn = self.get_if_exists()
thread_id_key = self.get_thread_identifier()
if conn is None:
conn = Connection(
type=self.TYPE,
name=None,
state='init',
transaction_open=False,
handle=None,
credentials=self.profile.credentials
)
self.thread_connections[thread_id_key] = conn
if conn.name == name and conn.state == 'open':
return conn
logger.debug('Acquiring new {} connection "{}".'
.format(self.TYPE, name))
if conn.state == 'open':
logger.debug(
'Re-using an available connection from the pool (formerly {}).'
.format(conn.name))
else:
logger.debug('Opening a new connection, currently in state {}'
.format(conn.state))
self.open(conn)
conn.name = name
return conn
@abc.abstractmethod
def cancel_open(self):
"""Cancel all open connections on the adapter. (passable)"""
raise dbt.exceptions.NotImplementedException(
'`cancel_open` is not implemented for this adapter!'
)
@abstractclassmethod
def open(cls, connection):
"""Open a connection on the adapter.
This may mutate the given connection (in particular, its state and its
handle).
This should be thread-safe, or hold the lock if necessary. The given
connection should not be in either in_use or available.
:param Connection connection: A connection object to open.
:return: A connection with a handle attached and an 'open' state.
:rtype: Connection
"""
raise dbt.exceptions.NotImplementedException(
'`open` is not implemented for this adapter!'
)
def release(self):
with self.lock:
conn = self.get_if_exists()
if conn is None:
return
try:
if conn.state == 'open':
if conn.transaction_open is True:
self._rollback(conn)
else:
self.close(conn)
except Exception:
# if rollback or close failed, remove our busted connection
self.clear_thread_connection()
raise
def cleanup_all(self):
with self.lock:
for connection in self.thread_connections.values():
if connection.state not in {'closed', 'init'}:
logger.debug("Connection '{}' was left open."
.format(connection.name))
else:
logger.debug("Connection '{}' was properly closed."
.format(connection.name))
self.close(connection)
# garbage collect these connections
self.thread_connections.clear()
@abc.abstractmethod
def begin(self):
"""Begin a transaction. (passable)
:param str name: The name of the connection to use.
"""
raise dbt.exceptions.NotImplementedException(
'`begin` is not implemented for this adapter!'
)
@abc.abstractmethod
def commit(self):
"""Commit a transaction. (passable)"""
raise dbt.exceptions.NotImplementedException(
'`commit` is not implemented for this adapter!'
)
@classmethod
def _rollback_handle(cls, connection):
"""Perform the actual rollback operation."""
try:
connection.handle.rollback()
except Exception:
logger.debug(
'Failed to rollback {}'.format(connection.name),
exc_info=True
)
@classmethod
def _close_handle(cls, connection):
"""Perform the actual close operation."""
# On windows, sometimes connection handles don't have a close() attr.
if hasattr(connection.handle, 'close'):
logger.debug('On {}: Close'.format(connection.name))
connection.handle.close()
else:
logger.debug('On {}: No close available on handle'
.format(connection.name))
@classmethod
def _rollback(cls, connection):
"""Roll back the given connection.
"""
if dbt.flags.STRICT_MODE:
assert isinstance(connection, Connection)
if connection.transaction_open is False:
raise dbt.exceptions.InternalException(
'Tried to rollback transaction on connection "{}", but '
'it does not have one open!'.format(connection.name))
logger.debug('On {}: ROLLBACK'.format(connection.name))
cls._rollback_handle(connection)
connection.transaction_open = False
return connection
@classmethod
def close(cls, connection):
if dbt.flags.STRICT_MODE:
assert isinstance(connection, Connection)
# if the connection is in closed or init, there's nothing to do
if connection.state in {'closed', 'init'}:
return connection
if connection.transaction_open and connection.handle:
cls._rollback_handle(connection)
connection.transaction_open = False
cls._close_handle(connection)
connection.state = 'closed'
return connection
def commit_if_has_connection(self):
"""If the named connection exists, commit the current transaction.
:param str name: The name of the connection to use.
"""
connection = self.get_if_exists()
if connection:
self.commit()
@abc.abstractmethod
def execute(self, sql, auto_begin=False, fetch=False):
"""Execute the given SQL.
:param str sql: The sql to execute.
:param bool auto_begin: If set, and dbt is not currently inside a
transaction, automatically begin one.
:param bool fetch: If set, fetch results.
:return: A tuple of the status and the results (empty if fetch=False).
:rtype: Tuple[str, agate.Table]
"""
raise dbt.exceptions.NotImplementedException(
'`execute` is not implemented for this adapter!'
)

View File

@@ -0,0 +1,957 @@
import abc
from contextlib import contextmanager
import agate
import pytz
import six
import dbt.exceptions
import dbt.flags
import dbt.clients.agate_helper
from dbt.compat import abstractclassmethod, classmethod
from dbt.node_types import NodeType
from dbt.loader import GraphLoader
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import filter_null_values
from dbt.adapters.base.meta import AdapterMeta, available
from dbt.adapters.base import BaseRelation
from dbt.adapters.base import Column
from dbt.adapters.cache import RelationsCache
GET_CATALOG_MACRO_NAME = 'get_catalog'
FRESHNESS_MACRO_NAME = 'collect_freshness'
def _expect_row_value(key, row):
if key not in row.keys():
raise dbt.exceptions.InternalException(
'Got a row without "{}" column, columns: {}'
.format(key, row.keys())
)
return row[key]
def _relations_filter_schemas(schemas):
def test(row):
referenced_schema = _expect_row_value('referenced_schema', row)
dependent_schema = _expect_row_value('dependent_schema', row)
# handle the null schema
if referenced_schema is not None:
referenced_schema = referenced_schema.lower()
if dependent_schema is not None:
dependent_schema = dependent_schema.lower()
return referenced_schema in schemas or dependent_schema in schemas
return test
def _catalog_filter_schemas(manifest):
"""Return a function that takes a row and decides if the row should be
included in the catalog output.
"""
schemas = frozenset((d.lower(), s.lower())
for d, s in manifest.get_used_schemas())
def test(row):
table_database = _expect_row_value('table_database', row)
table_schema = _expect_row_value('table_schema', row)
# the schema may be present but None, which is not an error and should
# be filtered out
if table_schema is None:
return False
return (table_database.lower(), table_schema.lower()) in schemas
return test
def _utc(dt, source, field_name):
"""If dt has a timezone, return a new datetime that's in UTC. Otherwise,
assume the datetime is already for UTC and add the timezone.
"""
if dt is None:
raise dbt.exceptions.raise_database_error(
"Expected a non-null value when querying field '{}' of table "
" {} but received value 'null' instead".format(
field_name,
source))
elif not hasattr(dt, 'tzinfo'):
raise dbt.exceptions.raise_database_error(
"Expected a timestamp value when querying field '{}' of table "
"{} but received value of type '{}' instead".format(
field_name,
source,
type(dt).__name__))
elif dt.tzinfo:
return dt.astimezone(pytz.UTC)
else:
return dt.replace(tzinfo=pytz.UTC)
class SchemaSearchMap(dict):
"""A utility class to keep track of what information_schema tables to
search for what schemas
"""
def add(self, relation):
key = relation.information_schema_only()
if key not in self:
self[key] = set()
self[key].add(relation.schema.lower())
def search(self):
for information_schema_name, schemas in self.items():
for schema in schemas:
yield information_schema_name, schema
def schemas_searched(self):
result = set()
for information_schema_name, schemas in self.items():
result.update(
(information_schema_name.database, schema)
for schema in schemas
)
return result
def flatten(self):
new = self.__class__()
database = None
# iterate once to look for a database name
seen = {r.database.lower() for r in self if r.database}
if len(seen) > 1:
dbt.exceptions.raise_compiler_error(str(seen))
elif len(seen) == 1:
database = list(seen)[0]
for information_schema_name, schema in self.search():
new.add(information_schema_name.incorporate(
path={'database': database, 'schema': schema},
quote_policy={'database': False},
include_policy={'database': False},
))
return new
@six.add_metaclass(AdapterMeta)
class BaseAdapter(object):
"""The BaseAdapter provides an abstract base class for adapters.
Adapters must implement the following methods and macros. Some of the
methods can be safely overridden as a noop, where it makes sense
(transactions on databases that don't support them, for instance). Those
methods are marked with a (passable) in their docstrings. Check docstrings
for type information, etc.
To implement a macro, implement "${adapter_type}__${macro_name}". in the
adapter's internal project.
Methods:
- exception_handler
- date_function
- list_schemas
- drop_relation
- truncate_relation
- rename_relation
- get_columns_in_relation
- expand_column_types
- list_relations_without_caching
- is_cancelable
- create_schema
- drop_schema
- quote
- convert_text_type
- convert_number_type
- convert_boolean_type
- convert_datetime_type
- convert_date_type
- convert_time_type
Macros:
- get_catalog
"""
requires = {}
Relation = BaseRelation
Column = Column
# This should be an implementation of BaseConnectionManager
ConnectionManager = None
# A set of clobber config fields accepted by this adapter
# for use in materializations
AdapterSpecificConfigs = frozenset()
def __init__(self, config):
self.config = config
self.cache = RelationsCache()
self.connections = self.ConnectionManager(config)
self._internal_manifest_lazy = None
###
# Methods that pass through to the connection manager
###
def acquire_connection(self, name=None):
return self.connections.set_connection_name(name)
def release_connection(self):
return self.connections.release()
def cleanup_connections(self):
return self.connections.cleanup_all()
def clear_transaction(self):
self.connections.clear_transaction()
def commit_if_has_connection(self):
return self.connections.commit_if_has_connection()
def nice_connection_name(self):
conn = self.connections.get_thread_connection()
if conn is None or conn.name is None:
return '<None>'
return conn.name
@contextmanager
def connection_named(self, name):
try:
yield self.acquire_connection(name)
finally:
self.release_connection()
@available.parse(lambda *a, **k: ('', dbt.clients.agate_helper()))
def execute(self, sql, auto_begin=False, fetch=False):
"""Execute the given SQL. This is a thin wrapper around
ConnectionManager.execute.
:param str sql: The sql to execute.
:param bool auto_begin: If set, and dbt is not currently inside a
transaction, automatically begin one.
:param bool fetch: If set, fetch results.
:return: A tuple of the status and the results (empty if fetch=False).
:rtype: Tuple[str, agate.Table]
"""
return self.connections.execute(
sql=sql,
auto_begin=auto_begin,
fetch=fetch
)
###
# Methods that should never be overridden
###
@classmethod
def type(cls):
"""Get the type of this adapter. Types must be class-unique and
consistent.
:return: The type name
:rtype: str
"""
return cls.ConnectionManager.TYPE
@property
def _internal_manifest(self):
if self._internal_manifest_lazy is None:
manifest = GraphLoader.load_internal(self.config)
self._internal_manifest_lazy = manifest
return self._internal_manifest_lazy
def check_internal_manifest(self):
"""Return the internal manifest (used for executing macros) if it's
been initialized, otherwise return None.
"""
return self._internal_manifest_lazy
###
# Caching methods
###
def _schema_is_cached(self, database, schema):
"""Check if the schema is cached, and by default logs if it is not."""
if dbt.flags.USE_CACHE is False:
return False
elif (database, schema) not in self.cache:
logger.debug(
'On "{}": cache miss for schema "{}.{}", this is inefficient'
.format(self.nice_connection_name(), database, schema)
)
return False
else:
return True
@classmethod
def _relations_filter_table(cls, table, schemas):
"""Filter the table as appropriate for relations table entries.
Subclasses can override this to change filtering rules on a per-adapter
basis.
"""
return table.where(_relations_filter_schemas(schemas))
def _get_cache_schemas(self, manifest, exec_only=False):
"""Get a mapping of each node's "information_schema" relations to a
set of all schemas expected in that information_schema.
There may be keys that are technically duplicates on the database side,
for example all of '"foo", 'foo', '"FOO"' and 'FOO' could coexist as
databases, and values could overlap as appropriate. All values are
lowercase strings.
"""
info_schema_name_map = SchemaSearchMap()
for node in manifest.nodes.values():
if exec_only and node.resource_type not in NodeType.executable():
continue
relation = self.Relation.create_from(self.config, node)
info_schema_name_map.add(relation)
# result is a map whose keys are information_schema Relations without
# identifiers that have appropriate database prefixes, and whose values
# are sets of lowercase schema names that are valid members of those
# schemas
return info_schema_name_map
def _relations_cache_for_schemas(self, manifest):
"""Populate the relations cache for the given schemas. Returns an
iteratble of the schemas populated, as strings.
"""
if not dbt.flags.USE_CACHE:
return
info_schema_name_map = self._get_cache_schemas(manifest,
exec_only=True)
for db, schema in info_schema_name_map.search():
for relation in self.list_relations_without_caching(db, schema):
self.cache.add(relation)
# it's possible that there were no relations in some schemas. We want
# to insert the schemas we query into the cache's `.schemas` attribute
# so we can check it later
self.cache.update_schemas(info_schema_name_map.schemas_searched())
def set_relations_cache(self, manifest, clear=False):
"""Run a query that gets a populated cache of the relations in the
database and set the cache on this adapter.
"""
if not dbt.flags.USE_CACHE:
return
with self.cache.lock:
if clear:
self.cache.clear()
self._relations_cache_for_schemas(manifest)
def cache_new_relation(self, relation):
"""Cache a new relation in dbt. It will show up in `list relations`."""
if relation is None:
name = self.nice_connection_name()
dbt.exceptions.raise_compiler_error(
'Attempted to cache a null relation for {}'.format(name)
)
if dbt.flags.USE_CACHE:
self.cache.add(relation)
# so jinja doesn't render things
return ''
###
# Abstract methods for database-specific values, attributes, and types
###
@abstractclassmethod
def date_function(cls):
"""Get the date function used by this adapter's database.
:return: The date function
:rtype: str
"""
raise dbt.exceptions.NotImplementedException(
'`date_function` is not implemented for this adapter!')
@abstractclassmethod
def is_cancelable(cls):
raise dbt.exceptions.NotImplementedException(
'`is_cancelable` is not implemented for this adapter!'
)
###
# Abstract methods about schemas
###
@abc.abstractmethod
def list_schemas(self, database):
"""Get a list of existing schemas.
:param str database: The name of the database to list under.
:return: All schemas that currently exist in the database
:rtype: List[str]
"""
raise dbt.exceptions.NotImplementedException(
'`list_schemas` is not implemented for this adapter!'
)
@available.parse(lambda *a, **k: False)
def check_schema_exists(self, database, schema):
"""Check if a schema exists.
The default implementation of this is potentially unnecessarily slow,
and adapters should implement it if there is an optimized path (and
there probably is)
"""
search = (
s.lower() for s in
self.list_schemas(database=database)
)
return schema.lower() in search
###
# Abstract methods about relations
###
@abc.abstractmethod
@available.parse_none
def drop_relation(self, relation):
"""Drop the given relation.
*Implementors must call self.cache.drop() to preserve cache state!*
:param self.Relation relation: The relation to drop
"""
raise dbt.exceptions.NotImplementedException(
'`drop_relation` is not implemented for this adapter!'
)
@abc.abstractmethod
@available.parse_none
def truncate_relation(self, relation):
"""Truncate the given relation.
:param self.Relation relation: The relation to truncate
"""
raise dbt.exceptions.NotImplementedException(
'`truncate_relation` is not implemented for this adapter!'
)
@abc.abstractmethod
@available.parse_none
def rename_relation(self, from_relation, to_relation):
"""Rename the relation from from_relation to to_relation.
Implementors must call self.cache.rename() to preserve cache state.
:param self.Relation from_relation: The original relation name
:param self.Relation to_relation: The new relation name
"""
raise dbt.exceptions.NotImplementedException(
'`rename_relation` is not implemented for this adapter!'
)
@abc.abstractmethod
@available.parse_list
def get_columns_in_relation(self, relation):
"""Get a list of the columns in the given Relation.
:param self.Relation relation: The relation to query for.
:return: Information about all columns in the given relation.
:rtype: List[self.Column]
"""
raise dbt.exceptions.NotImplementedException(
'`get_columns_in_relation` is not implemented for this adapter!'
)
@available.deprecated('get_columns_in_relation', lambda *a, **k: [])
def get_columns_in_table(self, schema, identifier):
"""DEPRECATED: Get a list of the columns in the given table."""
relation = self.Relation.create(
database=self.config.credentials.database,
schema=schema,
identifier=identifier,
quote_policy=self.config.quoting
)
return self.get_columns_in_relation(relation)
@abc.abstractmethod
def expand_column_types(self, goal, current):
"""Expand the current table's types to match the goal table. (passable)
:param self.Relation goal: A relation that currently exists in the
database with columns of the desired types.
:param self.Relation current: A relation that currently exists in the
database with columns of unspecified types.
"""
raise dbt.exceptions.NotImplementedException(
'`expand_target_column_types` is not implemented for this adapter!'
)
@abc.abstractmethod
def list_relations_without_caching(self, information_schema, schema):
"""List relations in the given schema, bypassing the cache.
This is used as the underlying behavior to fill the cache.
:param Relation information_schema: The information schema to list
relations from.
:param str schema: The name of the schema to list relations from.
:return: The relations in schema
:rtype: List[self.Relation]
"""
raise dbt.exceptions.NotImplementedException(
'`list_relations_without_caching` is not implemented for this '
'adapter!'
)
###
# Provided methods about relations
###
@available.parse_list
def get_missing_columns(self, from_relation, to_relation):
"""Returns a list of Columns in from_relation that are missing from
to_relation.
:param Relation from_relation: The relation that might have extra
columns
:param Relation to_relation: The realtion that might have columns
missing
:return: The columns in from_relation that are missing from to_relation
:rtype: List[self.Relation]
"""
if not isinstance(from_relation, self.Relation):
dbt.exceptions.invalid_type_error(
method_name='get_missing_columns',
arg_name='from_relation',
got_value=from_relation,
expected_type=self.Relation)
if not isinstance(to_relation, self.Relation):
dbt.exceptions.invalid_type_error(
method_name='get_missing_columns',
arg_name='to_relation',
got_value=to_relation,
expected_type=self.Relation)
from_columns = {
col.name: col for col in
self.get_columns_in_relation(from_relation)
}
to_columns = {
col.name: col for col in
self.get_columns_in_relation(to_relation)
}
missing_columns = set(from_columns.keys()) - set(to_columns.keys())
return [
col for (col_name, col) in from_columns.items()
if col_name in missing_columns
]
@available.parse_none
def valid_snapshot_target(self, relation):
"""Ensure that the target relation is valid, by making sure it has the
expected columns.
:param Relation relation: The relation to check
:raises dbt.exceptions.CompilationException: If the columns are
incorrect.
"""
if not isinstance(relation, self.Relation):
dbt.exceptions.invalid_type_error(
method_name='valid_snapshot_target',
arg_name='relation',
got_value=relation,
expected_type=self.Relation)
columns = self.get_columns_in_relation(relation)
names = set(c.name.lower() for c in columns)
expanded_keys = ('scd_id', 'valid_from', 'valid_to')
extra = []
missing = []
for legacy in expanded_keys:
desired = 'dbt_' + legacy
if desired not in names:
missing.append(desired)
if legacy in names:
extra.append(legacy)
if missing:
if extra:
msg = (
'Snapshot target has ("{}") but not ("{}") - is it an '
'unmigrated previous version archive?'
.format('", "'.join(extra), '", "'.join(missing))
)
else:
msg = (
'Snapshot target is not a snapshot table (missing "{}")'
.format('", "'.join(missing))
)
dbt.exceptions.raise_compiler_error(msg)
@available.parse_none
def expand_target_column_types(self, from_relation, to_relation):
if not isinstance(from_relation, self.Relation):
dbt.exceptions.invalid_type_error(
method_name='expand_target_column_types',
arg_name='from_relation',
got_value=from_relation,
expected_type=self.Relation)
if not isinstance(to_relation, self.Relation):
dbt.exceptions.invalid_type_error(
method_name='expand_target_column_types',
arg_name='to_relation',
got_value=to_relation,
expected_type=self.Relation)
self.expand_column_types(from_relation, to_relation)
def list_relations(self, database, schema):
if self._schema_is_cached(database, schema):
return self.cache.get_relations(database, schema)
information_schema = self.Relation.create(
database=database,
schema=schema,
model_name='',
quote_policy=self.config.quoting
).information_schema()
# we can't build the relations cache because we don't have a
# manifest so we can't run any operations.
relations = self.list_relations_without_caching(
information_schema, schema
)
logger.debug('with database={}, schema={}, relations={}'
.format(database, schema, relations))
return relations
def _make_match_kwargs(self, database, schema, identifier):
quoting = self.config.quoting
if identifier is not None and quoting['identifier'] is False:
identifier = identifier.lower()
if schema is not None and quoting['schema'] is False:
schema = schema.lower()
if database is not None and quoting['database'] is False:
database = database.lower()
return filter_null_values({
'database': database,
'identifier': identifier,
'schema': schema,
})
def _make_match(self, relations_list, database, schema, identifier):
matches = []
search = self._make_match_kwargs(database, schema, identifier)
for relation in relations_list:
if relation.matches(**search):
matches.append(relation)
return matches
@available.parse_none
def get_relation(self, database, schema, identifier):
relations_list = self.list_relations(database, schema)
matches = self._make_match(relations_list, database, schema,
identifier)
if len(matches) > 1:
kwargs = {
'identifier': identifier,
'schema': schema,
'database': database,
}
dbt.exceptions.get_relation_returned_multiple_results(
kwargs, matches
)
elif matches:
return matches[0]
return None
@available.deprecated('get_relation', lambda *a, **k: False)
def already_exists(self, schema, name):
"""DEPRECATED: Return if a model already exists in the database"""
database = self.config.credentials.database
relation = self.get_relation(database, schema, name)
return relation is not None
###
# ODBC FUNCTIONS -- these should not need to change for every adapter,
# although some adapters may override them
###
@abc.abstractmethod
@available.parse_none
def create_schema(self, database, schema):
"""Create the given schema if it does not exist.
:param str schema: The schema name to create.
"""
raise dbt.exceptions.NotImplementedException(
'`create_schema` is not implemented for this adapter!'
)
@abc.abstractmethod
def drop_schema(self, database, schema):
"""Drop the given schema (and everything in it) if it exists.
:param str schema: The schema name to drop.
"""
raise dbt.exceptions.NotImplementedException(
'`drop_schema` is not implemented for this adapter!'
)
@available
@abstractclassmethod
def quote(cls, identifier):
"""Quote the given identifier, as appropriate for the database.
:param str identifier: The identifier to quote
:return: The quoted identifier
:rtype: str
"""
raise dbt.exceptions.NotImplementedException(
'`quote` is not implemented for this adapter!'
)
@available
def quote_as_configured(self, identifier, quote_key):
"""Quote or do not quote the given identifer as configured in the
project config for the quote key.
The quote key should be one of 'database' (on bigquery, 'profile'),
'identifier', or 'schema', or it will be treated as if you set `True`.
"""
default = self.Relation.DEFAULTS['quote_policy'].get(quote_key)
if self.config.quoting.get(quote_key, default):
return self.quote(identifier)
else:
return identifier
###
# Conversions: These must be implemented by concrete implementations, for
# converting agate types into their sql equivalents.
###
@abstractclassmethod
def convert_text_type(cls, agate_table, col_idx):
"""Return the type in the database that best maps to the agate.Text
type for the given agate table and column index.
:param agate.Table agate_table: The table
:param int col_idx: The index into the agate table for the column.
:return: The name of the type in the database
:rtype: str
"""
raise dbt.exceptions.NotImplementedException(
'`convert_text_type` is not implemented for this adapter!')
@abstractclassmethod
def convert_number_type(cls, agate_table, col_idx):
"""Return the type in the database that best maps to the agate.Number
type for the given agate table and column index.
:param agate.Table agate_table: The table
:param int col_idx: The index into the agate table for the column.
:return: The name of the type in the database
:rtype: str
"""
raise dbt.exceptions.NotImplementedException(
'`convert_number_type` is not implemented for this adapter!')
@abstractclassmethod
def convert_boolean_type(cls, agate_table, col_idx):
"""Return the type in the database that best maps to the agate.Boolean
type for the given agate table and column index.
:param agate.Table agate_table: The table
:param int col_idx: The index into the agate table for the column.
:return: The name of the type in the database
:rtype: str
"""
raise dbt.exceptions.NotImplementedException(
'`convert_boolean_type` is not implemented for this adapter!')
@abstractclassmethod
def convert_datetime_type(cls, agate_table, col_idx):
"""Return the type in the database that best maps to the agate.DateTime
type for the given agate table and column index.
:param agate.Table agate_table: The table
:param int col_idx: The index into the agate table for the column.
:return: The name of the type in the database
:rtype: str
"""
raise dbt.exceptions.NotImplementedException(
'`convert_datetime_type` is not implemented for this adapter!')
@abstractclassmethod
def convert_date_type(cls, agate_table, col_idx):
"""Return the type in the database that best maps to the agate.Date
type for the given agate table and column index.
:param agate.Table agate_table: The table
:param int col_idx: The index into the agate table for the column.
:return: The name of the type in the database
:rtype: str
"""
raise dbt.exceptions.NotImplementedException(
'`convert_date_type` is not implemented for this adapter!')
@abstractclassmethod
def convert_time_type(cls, agate_table, col_idx):
"""Return the type in the database that best maps to the
agate.TimeDelta type for the given agate table and column index.
:param agate.Table agate_table: The table
:param int col_idx: The index into the agate table for the column.
:return: The name of the type in the database
:rtype: str
"""
raise dbt.exceptions.NotImplementedException(
'`convert_time_type` is not implemented for this adapter!')
@available
@classmethod
def convert_type(cls, agate_table, col_idx):
return cls.convert_agate_type(agate_table, col_idx)
@classmethod
def convert_agate_type(cls, agate_table, col_idx):
agate_type = agate_table.column_types[col_idx]
conversions = [
(agate.Text, cls.convert_text_type),
(agate.Number, cls.convert_number_type),
(agate.Boolean, cls.convert_boolean_type),
(agate.DateTime, cls.convert_datetime_type),
(agate.Date, cls.convert_date_type),
(agate.TimeDelta, cls.convert_time_type),
]
for agate_cls, func in conversions:
if isinstance(agate_type, agate_cls):
return func(agate_table, col_idx)
###
# Operations involving the manifest
###
def execute_macro(self, macro_name, manifest=None, project=None,
context_override=None, kwargs=None, release=False):
"""Look macro_name up in the manifest and execute its results.
:param str macro_name: The name of the macro to execute.
:param Optional[Manifest] manifest: The manifest to use for generating
the base macro execution context. If none is provided, use the
internal manifest.
:param Optional[str] project: The name of the project to search in, or
None for the first match.
:param Optional[dict] context_override: An optional dict to update()
the macro execution context.
:param Optional[dict] kwargs: An optional dict of keyword args used to
pass to the macro.
:param bool release: If True, release the connection after executing.
Return an an AttrDict with three attributes: 'table', 'data', and
'status'. 'table' is an agate.Table.
"""
if kwargs is None:
kwargs = {}
if context_override is None:
context_override = {}
if manifest is None:
manifest = self._internal_manifest
macro = manifest.find_macro_by_name(macro_name, project)
if macro is None:
if project is None:
package_name = 'any package'
else:
package_name = 'the "{}" package'.format(project)
# The import of dbt.context.runtime below shadows 'dbt'
import dbt.exceptions
raise dbt.exceptions.RuntimeException(
'dbt could not find a macro with the name "{}" in {}'
.format(macro_name, package_name)
)
# This causes a reference cycle, as dbt.context.runtime.generate()
# ends up calling get_adapter, so the import has to be here.
import dbt.context.operation
macro_context = dbt.context.operation.generate(
macro,
self.config,
manifest
)
macro_context.update(context_override)
macro_function = macro.generator(macro_context)
try:
result = macro_function(**kwargs)
finally:
if release:
self.release_connection()
return result
@classmethod
def _catalog_filter_table(cls, table, manifest):
"""Filter the table as appropriate for catalog entries. Subclasses can
override this to change filtering rules on a per-adapter basis.
"""
return table.where(_catalog_filter_schemas(manifest))
def get_catalog(self, manifest):
"""Get the catalog for this manifest by running the get catalog macro.
Returns an agate.Table of catalog information.
"""
information_schemas = list(self._get_cache_schemas(manifest).keys())
# make it a list so macros can index into it.
kwargs = {'information_schemas': information_schemas}
table = self.execute_macro(GET_CATALOG_MACRO_NAME,
kwargs=kwargs,
release=True)
results = self._catalog_filter_table(table, manifest)
return results
def cancel_open_connections(self):
"""Cancel all open connections."""
return self.connections.cancel_open()
def calculate_freshness(self, source, loaded_at_field, manifest=None):
"""Calculate the freshness of sources in dbt, and return it"""
# in the future `source` will be a Relation instead of a string
kwargs = {
'source': source,
'loaded_at_field': loaded_at_field
}
# run the macro
table = self.execute_macro(
FRESHNESS_MACRO_NAME,
kwargs=kwargs,
release=True,
manifest=manifest
)
# now we have a 1-row table of the maximum `loaded_at_field` value and
# the current time according to the db.
if len(table) != 1 or len(table[0]) != 2:
dbt.exceptions.raise_compiler_error(
'Got an invalid result from "{}" macro: {}'.format(
FRESHNESS_MACRO_NAME, [tuple(r) for r in table]
)
)
max_loaded_at = _utc(table[0][0], source, loaded_at_field)
snapshotted_at = _utc(table[0][1], source, loaded_at_field)
age = (snapshotted_at - max_loaded_at).total_seconds()
return {
'max_loaded_at': max_loaded_at,
'snapshotted_at': snapshotted_at,
'age': age,
}

View File

@@ -0,0 +1,117 @@
import abc
from functools import wraps
from dbt.deprecations import warn, renamed_method
def _always_none(*args, **kwargs):
return None
def _always_list(*args, **kwargs):
return None
def available(func):
"""A decorator to indicate that a method on the adapter will be
exposed to the database wrapper, and will be available at parse and run
time.
"""
func._is_available_ = True
return func
def available_deprecated(supported_name, parse_replacement=None):
"""A decorator that marks a function as available, but also prints a
deprecation warning. Use like
@available_deprecated('my_new_method')
def my_old_method(self, arg):
args = compatability_shim(arg)
return self.my_new_method(*args)
@available_deprecated('my_new_slow_method', lambda *a, **k: (0, ''))
def my_old_slow_method(self, arg):
args = compatibility_shim(arg)
return self.my_new_slow_method(*args)
To make `adapter.my_old_method` available but also print out a warning on
use directing users to `my_new_method`.
The optional parse_replacement, if provided, will provide a parse-time
replacement for the actual method (see `available_parse`).
"""
def wrapper(func):
func_name = func.__name__
renamed_method(func_name, supported_name)
@wraps(func)
def inner(*args, **kwargs):
warn('adapter:{}'.format(func_name))
return func(*args, **kwargs)
if parse_replacement:
available = available_parse(parse_replacement)
return available(inner)
return wrapper
def available_parse(parse_replacement):
"""A decorator factory to indicate that a method on the adapter will be
exposed to the database wrapper, and will be stubbed out at parse time with
the given function.
@available_parse()
def my_method(self, a, b):
if something:
return None
return big_expensive_db_query()
@available_parse(lambda *args, **args: {})
def my_other_method(self, a, b):
x = {}
x.update(big_expensive_db_query())
return x
"""
def inner(func):
func._parse_replacement_ = parse_replacement
available(func)
return func
return inner
available.deprecated = available_deprecated
available.parse = available_parse
available.parse_none = available_parse(lambda *a, **k: None)
available.parse_list = available_parse(lambda *a, **k: [])
class AdapterMeta(abc.ABCMeta):
def __new__(mcls, name, bases, namespace, **kwargs):
cls = super(AdapterMeta, mcls).__new__(mcls, name, bases, namespace,
**kwargs)
# this is very much inspired by ABCMeta's own implementation
# dict mapping the method name to whether the model name should be
# injected into the arguments. All methods in here are exposed to the
# context.
available = set()
replacements = {}
# collect base class data first
for base in bases:
available.update(getattr(base, '_available_', set()))
replacements.update(getattr(base, '_parse_replacements_', set()))
# override with local data if it exists
for name, value in namespace.items():
if getattr(value, '_is_available_', False):
available.add(name)
parse_replacement = getattr(value, '_parse_replacement_', None)
if parse_replacement is not None:
replacements[name] = parse_replacement
cls._available_ = frozenset(available)
# should this be a namedtuple so it will be immutable like _available_?
cls._parse_replacements_ = replacements
return cls

View File

@@ -0,0 +1,23 @@
from dbt.config.project import Project
class AdapterPlugin(object):
"""Defines the basic requirements for a dbt adapter plugin.
:param type adapter: An adapter class, derived from BaseAdapter
:param type credentials: A credentials object, derived from Credentials
:param str project_name: The name of this adapter plugin's associated dbt
project.
:param str include_path: The path to this adapter plugin's root
:param Optional[List[str]] dependencies: A list of adapter names that this
adapter depends upon.
"""
def __init__(self, adapter, credentials, include_path, dependencies=None):
self.adapter = adapter
self.credentials = credentials
self.include_path = include_path
project = Project.from_project_root(include_path, {})
self.project_name = project.project_name
if dependencies is None:
dependencies = []
self.dependencies = dependencies

View File

@@ -0,0 +1,406 @@
from dbt.api import APIObject
from dbt.utils import filter_null_values
from dbt.node_types import NodeType
import dbt.exceptions
class BaseRelation(APIObject):
Table = "table"
View = "view"
CTE = "cte"
MaterializedView = "materializedview"
ExternalTable = "externaltable"
RelationTypes = [
Table,
View,
CTE,
MaterializedView,
ExternalTable
]
DEFAULTS = {
'metadata': {
'type': 'BaseRelation'
},
'quote_character': '"',
'quote_policy': {
'database': True,
'schema': True,
'identifier': True,
},
'include_policy': {
'database': True,
'schema': True,
'identifier': True,
},
'dbt_created': False,
}
PATH_SCHEMA = {
'type': 'object',
'properties': {
'database': {'type': ['string', 'null']},
'schema': {'type': ['string', 'null']},
'identifier': {'type': ['string', 'null']},
},
'required': ['database', 'schema', 'identifier'],
}
POLICY_SCHEMA = {
'type': 'object',
'properties': {
'database': {'type': 'boolean'},
'schema': {'type': 'boolean'},
'identifier': {'type': 'boolean'},
},
'required': ['database', 'schema', 'identifier'],
}
SCHEMA = {
'type': 'object',
'properties': {
'metadata': {
'type': 'object',
'properties': {
'type': {
'type': 'string',
'const': 'BaseRelation',
},
},
},
'type': {
'enum': RelationTypes + [None],
},
'path': PATH_SCHEMA,
'include_policy': POLICY_SCHEMA,
'quote_policy': POLICY_SCHEMA,
'quote_character': {'type': 'string'},
'dbt_created': {'type': 'boolean'},
},
'required': ['metadata', 'type', 'path', 'include_policy',
'quote_policy', 'quote_character', 'dbt_created']
}
PATH_ELEMENTS = ['database', 'schema', 'identifier']
def _is_exactish_match(self, field, value):
if self.dbt_created and self.quote_policy.get(field) is False:
return self.get_path_part(field).lower() == value.lower()
else:
return self.get_path_part(field) == value
def matches(self, database=None, schema=None, identifier=None):
search = filter_null_values({
'database': database,
'schema': schema,
'identifier': identifier
})
if not search:
# nothing was passed in
raise dbt.exceptions.RuntimeException(
"Tried to match relation, but no search path was passed!")
exact_match = True
approximate_match = True
for k, v in search.items():
if not self._is_exactish_match(k, v):
exact_match = False
if self.get_path_part(k).lower() != v.lower():
approximate_match = False
if approximate_match and not exact_match:
target = self.create(
database=database, schema=schema, identifier=identifier
)
dbt.exceptions.approximate_relation_match(target, self)
return exact_match
def get_path_part(self, part):
return self.path.get(part)
def should_quote(self, part):
return self.quote_policy.get(part)
def should_include(self, part):
return self.include_policy.get(part)
def quote(self, database=None, schema=None, identifier=None):
policy = filter_null_values({
'database': database,
'schema': schema,
'identifier': identifier
})
return self.incorporate(quote_policy=policy)
def include(self, database=None, schema=None, identifier=None):
policy = filter_null_values({
'database': database,
'schema': schema,
'identifier': identifier
})
return self.incorporate(include_policy=policy)
def information_schema(self, identifier=None):
include_db = self.database is not None
include_policy = filter_null_values({
'database': include_db,
'schema': True,
'identifier': identifier is not None
})
quote_policy = filter_null_values({
'database': self.quote_policy['database'],
'schema': False,
'identifier': False,
})
path_update = {
'schema': 'information_schema',
'identifier': identifier
}
return self.incorporate(
quote_policy=quote_policy,
include_policy=include_policy,
path=path_update,
table_name=identifier)
def information_schema_only(self):
return self.information_schema()
def information_schema_table(self, identifier):
return self.information_schema(identifier)
def render(self, use_table_name=True):
parts = []
for k in self.PATH_ELEMENTS:
if self.should_include(k):
path_part = self.get_path_part(k)
if path_part is None:
continue
elif k == 'identifier':
if use_table_name:
path_part = self.table
else:
path_part = self.identifier
parts.append(
self.quote_if(
path_part,
self.should_quote(k)))
if len(parts) == 0:
raise dbt.exceptions.RuntimeException(
"No path parts are included! Nothing to render.")
return '.'.join(parts)
def quote_if(self, identifier, should_quote):
if should_quote:
return self.quoted(identifier)
return identifier
def quoted(self, identifier):
return '{quote_char}{identifier}{quote_char}'.format(
quote_char=self.quote_character,
identifier=identifier)
@classmethod
def create_from_source(cls, source, **kwargs):
quote_policy = dbt.utils.deep_merge(
cls.DEFAULTS['quote_policy'],
source.quoting,
kwargs.get('quote_policy', {})
)
return cls.create(
database=source.database,
schema=source.schema,
identifier=source.identifier,
quote_policy=quote_policy,
**kwargs
)
@classmethod
def create_from_node(cls, config, node, table_name=None, quote_policy=None,
**kwargs):
if quote_policy is None:
quote_policy = {}
quote_policy = dbt.utils.merge(config.quoting, quote_policy)
return cls.create(
database=node.get('database'),
schema=node.get('schema'),
identifier=node.get('alias'),
table_name=table_name,
quote_policy=quote_policy,
**kwargs)
@classmethod
def create_from(cls, config, node, **kwargs):
if node.resource_type == NodeType.Source:
return cls.create_from_source(node, **kwargs)
else:
return cls.create_from_node(config, node, **kwargs)
@classmethod
def create(cls, database=None, schema=None,
identifier=None, table_name=None,
type=None, **kwargs):
if table_name is None:
table_name = identifier
return cls(type=type,
path={
'database': database,
'schema': schema,
'identifier': identifier
},
table_name=table_name,
**kwargs)
def __repr__(self):
return "<{} {}>".format(self.__class__.__name__, self.render())
def __hash__(self):
return hash(self.render())
def __str__(self):
return self.render()
@property
def path(self):
return self.get('path', {})
@property
def database(self):
return self.path.get('database')
@property
def schema(self):
return self.path.get('schema')
@property
def identifier(self):
return self.path.get('identifier')
# Here for compatibility with old Relation interface
@property
def name(self):
return self.identifier
# Here for compatibility with old Relation interface
@property
def table(self):
return self.table_name
@property
def is_table(self):
return self.type == self.Table
@property
def is_cte(self):
return self.type == self.CTE
@property
def is_view(self):
return self.type == self.View
class Column(object):
TYPE_LABELS = {
'STRING': 'TEXT',
'TIMESTAMP': 'TIMESTAMP',
'FLOAT': 'FLOAT',
'INTEGER': 'INT'
}
def __init__(self, column, dtype, char_size=None, numeric_precision=None,
numeric_scale=None):
self.column = column
self.dtype = dtype
self.char_size = char_size
self.numeric_precision = numeric_precision
self.numeric_scale = numeric_scale
@classmethod
def translate_type(cls, dtype):
return cls.TYPE_LABELS.get(dtype.upper(), dtype)
@classmethod
def create(cls, name, label_or_dtype):
column_type = cls.translate_type(label_or_dtype)
return cls(name, column_type)
@property
def name(self):
return self.column
@property
def quoted(self):
return '"{}"'.format(self.column)
@property
def data_type(self):
if self.is_string():
return Column.string_type(self.string_size())
elif self.is_numeric():
return Column.numeric_type(self.dtype, self.numeric_precision,
self.numeric_scale)
else:
return self.dtype
def is_string(self):
return self.dtype.lower() in ['text', 'character varying', 'character',
'varchar']
def is_numeric(self):
return self.dtype.lower() in ['numeric', 'number']
def string_size(self):
if not self.is_string():
raise RuntimeError("Called string_size() on non-string field!")
if self.dtype == 'text' or self.char_size is None:
# char_size should never be None. Handle it reasonably just in case
return 256
else:
return int(self.char_size)
def can_expand_to(self, other_column):
"""returns True if this column can be expanded to the size of the
other column"""
if not self.is_string() or not other_column.is_string():
return False
return other_column.string_size() > self.string_size()
def literal(self, value):
return "{}::{}".format(value, self.data_type)
@classmethod
def string_type(cls, size):
return "character varying({})".format(size)
@classmethod
def numeric_type(cls, dtype, precision, scale):
# This could be decimal(...), numeric(...), number(...)
# Just use whatever was fed in here -- don't try to get too clever
if precision is None or scale is None:
return dtype
else:
return "{}({},{})".format(dtype, precision, scale)
def __repr__(self):
return "<Column {} ({})>".format(self.name, self.data_type)

476
core/dbt/adapters/cache.py Normal file
View File

@@ -0,0 +1,476 @@
from collections import namedtuple
import threading
from copy import deepcopy
from dbt.logger import CACHE_LOGGER as logger
import dbt.exceptions
_ReferenceKey = namedtuple('_ReferenceKey', 'database schema identifier')
def _lower(value):
"""Postgres schemas can be None so we can't just call lower()."""
if value is None:
return None
return value.lower()
def _make_key(relation):
"""Make _ReferenceKeys with lowercase values for the cache so we don't have
to keep track of quoting
"""
return _ReferenceKey(_lower(relation.database),
_lower(relation.schema),
_lower(relation.identifier))
def dot_separated(key):
"""Return the key in dot-separated string form.
:param key _ReferenceKey: The key to stringify.
"""
return '.'.join(map(str, key))
class _CachedRelation(object):
"""Nothing about _CachedRelation is guaranteed to be thread-safe!
:attr str schema: The schema of this relation.
:attr str identifier: The identifier of this relation.
:attr Dict[_ReferenceKey, _CachedRelation] referenced_by: The relations
that refer to this relation.
:attr BaseRelation inner: The underlying dbt relation.
"""
def __init__(self, inner):
self.referenced_by = {}
self.inner = inner
def __str__(self):
return (
'_CachedRelation(database={}, schema={}, identifier={}, inner={})'
).format(self.database, self.schema, self.identifier, self.inner)
@property
def database(self):
return _lower(self.inner.database)
@property
def schema(self):
return _lower(self.inner.schema)
@property
def identifier(self):
return _lower(self.inner.identifier)
def __copy__(self):
new = self.__class__(self.inner)
new.__dict__.update(self.__dict__)
return new
def __deepcopy__(self, memo):
new = self.__class__(self.inner.incorporate())
new.__dict__.update(self.__dict__)
new.referenced_by = deepcopy(self.referenced_by, memo)
def is_referenced_by(self, key):
return key in self.referenced_by
def key(self):
"""Get the _ReferenceKey that represents this relation
:return _ReferenceKey: A key for this relation.
"""
return _make_key(self)
def add_reference(self, referrer):
"""Add a reference from referrer to self, indicating that if this node
were drop...cascaded, the referrer would be dropped as well.
:param _CachedRelation referrer: The node that refers to this node.
"""
self.referenced_by[referrer.key()] = referrer
def collect_consequences(self):
"""Recursively collect a set of _ReferenceKeys that would
consequentially get dropped if this were dropped via
"drop ... cascade".
:return Set[_ReferenceKey]: All the relations that would be dropped
"""
consequences = {self.key()}
for relation in self.referenced_by.values():
consequences.update(relation.collect_consequences())
return consequences
def release_references(self, keys):
"""Non-recursively indicate that an iterable of _ReferenceKey no longer
exist. Unknown keys are ignored.
:param Iterable[_ReferenceKey] keys: The keys to drop.
"""
keys = set(self.referenced_by) & set(keys)
for key in keys:
self.referenced_by.pop(key)
def rename(self, new_relation):
"""Rename this cached relation to new_relation.
Note that this will change the output of key(), all refs must be
updated!
:param _CachedRelation new_relation: The new name to apply to the
relation
"""
# Relations store this stuff inside their `path` dict. But they
# also store a table_name, and usually use it in their .render(),
# so we need to update that as well. It doesn't appear that
# table_name is ever anything but the identifier (via .create())
self.inner = self.inner.incorporate(
path={
'database': new_relation.inner.database,
'schema': new_relation.inner.schema,
'identifier': new_relation.inner.identifier
},
table_name=new_relation.inner.identifier
)
def rename_key(self, old_key, new_key):
"""Rename a reference that may or may not exist. Only handles the
reference itself, so this is the other half of what `rename` does.
If old_key is not in referenced_by, this is a no-op.
:param _ReferenceKey old_key: The old key to be renamed.
:param _ReferenceKey new_key: The new key to rename to.
:raises InternalError: If the new key already exists.
"""
if new_key in self.referenced_by:
dbt.exceptions.raise_cache_inconsistent(
'in rename of "{}" -> "{}", new name is in the cache already'
.format(old_key, new_key)
)
if old_key not in self.referenced_by:
return
value = self.referenced_by.pop(old_key)
self.referenced_by[new_key] = value
def dump_graph_entry(self):
"""Return a key/value pair representing this key and its referents.
return List[str]: The dot-separated form of all referent keys.
"""
return [dot_separated(r) for r in self.referenced_by]
def lazy_log(msg, func):
if logger.disabled:
return
logger.debug(msg.format(func()))
class RelationsCache(object):
"""A cache of the relations known to dbt. Keeps track of relationships
declared between tables and handles renames/drops as a real database would.
:attr Dict[_ReferenceKey, _CachedRelation] relations: The known relations.
:attr threading.RLock lock: The lock around relations, held during updates.
The adapters also hold this lock while filling the cache.
:attr Set[str] schemas: The set of known/cached schemas, all lowercased.
"""
def __init__(self):
self.relations = {}
self.lock = threading.RLock()
self.schemas = set()
def add_schema(self, database, schema):
"""Add a schema to the set of known schemas (case-insensitive)
:param str database: The database name to add.
:param str schema: The schema name to add.
"""
self.schemas.add((_lower(database), _lower(schema)))
def remove_schema(self, database, schema):
"""Remove a schema from the set of known schemas (case-insensitive)
If the schema does not exist, it will be ignored - it could just be a
temporary table.
:param str database: The database name to remove.
:param str schema: The schema name to remove.
"""
self.schemas.discard((_lower(database), _lower(schema)))
def update_schemas(self, schemas):
"""Add multiple schemas to the set of known schemas (case-insensitive)
:param Iterable[str] schemas: An iterable of the schema names to add.
"""
self.schemas.update((_lower(d), _lower(s)) for (d, s) in schemas)
def __contains__(self, schema_id):
"""A schema is 'in' the relations cache if it is in the set of cached
schemas.
:param Tuple[str, str] schema: The db name and schema name to look up.
"""
db, schema = schema_id
return (_lower(db), _lower(schema)) in self.schemas
def dump_graph(self):
"""Dump a key-only representation of the schema to a dictionary. Every
known relation is a key with a value of a list of keys it is referenced
by.
"""
# we have to hold the lock for the entire dump, if other threads modify
# self.relations or any cache entry's referenced_by during iteration
# it's a runtime error!
with self.lock:
return {
dot_separated(k): v.dump_graph_entry()
for k, v in self.relations.items()
}
def _setdefault(self, relation):
"""Add a relation to the cache, or return it if it already exists.
:param _CachedRelation relation: The relation to set or get.
:return _CachedRelation: The relation stored under the given relation's
key
"""
self.add_schema(relation.database, relation.schema)
key = relation.key()
return self.relations.setdefault(key, relation)
def _add_link(self, referenced_key, dependent_key):
"""Add a link between two relations to the database. Both the old and
new entries must alraedy exist in the database.
:param _ReferenceKey referenced_key: The key identifying the referenced
model (the one that if dropped will drop the dependent model).
:param _ReferenceKey dependent_key: The key identifying the dependent
model.
:raises InternalError: If either entry does not exist.
"""
referenced = self.relations.get(referenced_key)
if referenced is None:
dbt.exceptions.raise_cache_inconsistent(
'in add_link, referenced link key {} not in cache!'
.format(referenced_key)
)
dependent = self.relations.get(dependent_key)
if dependent is None:
dbt.exceptions.raise_cache_inconsistent(
'in add_link, dependent link key {} not in cache!'
.format(dependent_key)
)
referenced.add_reference(dependent)
def add_link(self, referenced, dependent):
"""Add a link between two relations to the database. Both the old and
new entries must already exist in the database.
The dependent model refers _to_ the referenced model. So, given
arguments of (jake_test, bar, jake_test, foo):
both values are in the schema jake_test and foo is a view that refers
to bar, so "drop bar cascade" will drop foo and all of foo's
dependents.
:param BaseRelation referenced: The referenced model.
:param BaseRelation dependent: The dependent model.
:raises InternalError: If either entry does not exist.
"""
referenced = _make_key(referenced)
if (referenced.database, referenced.schema) not in self:
# if we have not cached the referenced schema at all, we must be
# referring to a table outside our control. There's no need to make
# a link - we will never drop the referenced relation during a run.
logger.debug(
'{dep!s} references {ref!s} but {ref.database}.{ref.schema} '
'is not in the cache, skipping assumed external relation'
.format(dep=dependent, ref=referenced)
)
return
dependent = _make_key(dependent)
logger.debug(
'adding link, {!s} references {!s}'.format(dependent, referenced)
)
with self.lock:
self._add_link(referenced, dependent)
def add(self, relation):
"""Add the relation inner to the cache, under the schema schema and
identifier identifier
:param BaseRelation relation: The underlying relation.
"""
cached = _CachedRelation(relation)
logger.debug('Adding relation: {!s}'.format(cached))
lazy_log('before adding: {!s}', self.dump_graph)
with self.lock:
self._setdefault(cached)
lazy_log('after adding: {!s}', self.dump_graph)
def _remove_refs(self, keys):
"""Removes all references to all entries in keys. This does not
cascade!
:param Iterable[_ReferenceKey] keys: The keys to remove.
"""
# remove direct refs
for key in keys:
del self.relations[key]
# then remove all entries from each child
for cached in self.relations.values():
cached.release_references(keys)
def _drop_cascade_relation(self, dropped):
"""Drop the given relation and cascade it appropriately to all
dependent relations.
:param _CachedRelation dropped: An existing _CachedRelation to drop.
"""
if dropped not in self.relations:
logger.debug('dropped a nonexistent relationship: {!s}'
.format(dropped))
return
consequences = self.relations[dropped].collect_consequences()
logger.debug(
'drop {} is cascading to {}'.format(dropped, consequences)
)
self._remove_refs(consequences)
def drop(self, relation):
"""Drop the named relation and cascade it appropriately to all
dependent relations.
Because dbt proactively does many `drop relation if exist ... cascade`
that are noops, nonexistent relation drops cause a debug log and no
other actions.
:param str schema: The schema of the relation to drop.
:param str identifier: The identifier of the relation to drop.
"""
dropped = _make_key(relation)
logger.debug('Dropping relation: {!s}'.format(dropped))
with self.lock:
self._drop_cascade_relation(dropped)
def _rename_relation(self, old_key, new_relation):
"""Rename a relation named old_key to new_key, updating references.
Return whether or not there was a key to rename.
:param _ReferenceKey old_key: The existing key, to rename from.
:param _CachedRelation new_key: The new relation, to rename to.
"""
# On the database level, a rename updates all values that were
# previously referenced by old_name to be referenced by new_name.
# basically, the name changes but some underlying ID moves. Kind of
# like an object reference!
relation = self.relations.pop(old_key)
new_key = new_relation.key()
# relaton has to rename its innards, so it needs the _CachedRelation.
relation.rename(new_relation)
# update all the relations that refer to it
for cached in self.relations.values():
if cached.is_referenced_by(old_key):
logger.debug(
'updated reference from {0} -> {2} to {1} -> {2}'
.format(old_key, new_key, cached.key())
)
cached.rename_key(old_key, new_key)
self.relations[new_key] = relation
# also fixup the schemas!
self.remove_schema(old_key.database, old_key.schema)
self.add_schema(new_key.database, new_key.schema)
return True
def _check_rename_constraints(self, old_key, new_key):
"""Check the rename constraints, and return whether or not the rename
can proceed.
If the new key is already present, that is an error.
If the old key is absent, we debug log and return False, assuming it's
a temp table being renamed.
:param _ReferenceKey old_key: The existing key, to rename from.
:param _ReferenceKey new_key: The new key, to rename to.
:return bool: If the old relation exists for renaming.
:raises InternalError: If the new key is already present.
"""
if new_key in self.relations:
dbt.exceptions.raise_cache_inconsistent(
'in rename, new key {} already in cache: {}'
.format(new_key, list(self.relations.keys()))
)
if old_key not in self.relations:
logger.debug(
'old key {} not found in self.relations, assuming temporary'
.format(old_key)
)
return False
return True
def rename(self, old, new):
"""Rename the old schema/identifier to the new schema/identifier and
update references.
If the new schema/identifier is already present, that is an error.
If the schema/identifier key is absent, we only debug log and return,
assuming it's a temp table being renamed.
:param BaseRelation old: The existing relation name information.
:param BaseRelation new: The new relation name information.
:raises InternalError: If the new key is already present.
"""
old_key = _make_key(old)
new_key = _make_key(new)
logger.debug('Renaming relation {!s} to {!s}'.format(
old_key, new_key
))
lazy_log('before rename: {!s}', self.dump_graph)
with self.lock:
if self._check_rename_constraints(old_key, new_key):
self._rename_relation(old_key, _CachedRelation(new))
else:
self._setdefault(_CachedRelation(new))
lazy_log('after rename: {!s}', self.dump_graph)
def get_relations(self, database, schema):
"""Case-insensitively yield all relations matching the given schema.
:param str schema: The case-insensitive schema name to list from.
:return List[BaseRelation]: The list of relations with the given
schema
"""
schema = _lower(schema)
with self.lock:
results = [
r.inner for r in self.relations.values()
if (r.schema == _lower(schema) and
r.database == _lower(database))
]
if None in results:
dbt.exceptions.raise_cache_inconsistent(
'in get_relations, a None relation was found in the cache!'
)
return results
def clear(self):
"""Clear the cache"""
with self.lock:
self.relations.clear()
self.schemas.clear()

View File

@@ -0,0 +1,86 @@
import dbt.exceptions
from importlib import import_module
from dbt.include.global_project import PACKAGES
from dbt.logger import GLOBAL_LOGGER as logger
import threading
ADAPTER_TYPES = {}
_ADAPTERS = {}
_ADAPTER_LOCK = threading.Lock()
def get_adapter_class_by_name(adapter_name):
with _ADAPTER_LOCK:
if adapter_name in ADAPTER_TYPES:
return ADAPTER_TYPES[adapter_name]
message = "Invalid adapter type {}! Must be one of {}"
adapter_names = ", ".join(ADAPTER_TYPES.keys())
formatted_message = message.format(adapter_name, adapter_names)
raise dbt.exceptions.RuntimeException(formatted_message)
def get_relation_class_by_name(adapter_name):
adapter = get_adapter_class_by_name(adapter_name)
return adapter.Relation
def load_plugin(adapter_name):
try:
mod = import_module('.' + adapter_name, 'dbt.adapters')
except ImportError as e:
logger.info("Error importing adapter: {}".format(e))
raise dbt.exceptions.RuntimeException(
"Could not find adapter type {}!".format(adapter_name)
)
plugin = mod.Plugin
if plugin.adapter.type() != adapter_name:
raise dbt.exceptions.RuntimeException(
'Expected to find adapter with type named {}, got adapter with '
'type {}'
.format(adapter_name, plugin.adapter.type())
)
with _ADAPTER_LOCK:
ADAPTER_TYPES[adapter_name] = plugin.adapter
PACKAGES[plugin.project_name] = plugin.include_path
for dep in plugin.dependencies:
load_plugin(dep)
return plugin.credentials
def get_adapter(config):
adapter_name = config.credentials.type
if adapter_name in _ADAPTERS:
return _ADAPTERS[adapter_name]
with _ADAPTER_LOCK:
if adapter_name not in ADAPTER_TYPES:
raise dbt.exceptions.RuntimeException(
"Could not find adapter type {}!".format(adapter_name)
)
adapter_type = ADAPTER_TYPES[adapter_name]
# check again, in case something was setting it before
if adapter_name in _ADAPTERS:
return _ADAPTERS[adapter_name]
adapter = adapter_type(config)
_ADAPTERS[adapter_name] = adapter
return adapter
def reset_adapters():
"""Clear the adapters. This is useful for tests, which change configs.
"""
with _ADAPTER_LOCK:
for adapter in _ADAPTERS.values():
adapter.cleanup_connections()
_ADAPTERS.clear()

View File

@@ -0,0 +1,3 @@
# these are all just exports, #noqa them so flake8 will be happy
from dbt.adapters.sql.connections import SQLConnectionManager # noqa
from dbt.adapters.sql.impl import SQLAdapter # noqa

View File

@@ -0,0 +1,141 @@
import abc
import time
import dbt.clients.agate_helper
import dbt.exceptions
from dbt.contracts.connection import Connection
from dbt.adapters.base import BaseConnectionManager
from dbt.compat import abstractclassmethod
from dbt.logger import GLOBAL_LOGGER as logger
class SQLConnectionManager(BaseConnectionManager):
"""The default connection manager with some common SQL methods implemented.
Methods to implement:
- exception_handler
- cancel
- get_status
- open
"""
@abc.abstractmethod
def cancel(self, connection):
"""Cancel the given connection.
:param Connection connection: The connection to cancel.
"""
raise dbt.exceptions.NotImplementedException(
'`cancel` is not implemented for this adapter!'
)
def cancel_open(self):
names = []
this_connection = self.get_if_exists()
with self.lock:
for connection in self.thread_connections.values():
if connection is this_connection:
continue
self.cancel(connection)
names.append(connection.name)
return names
def add_query(self, sql, auto_begin=True, bindings=None,
abridge_sql_log=False):
connection = self.get_thread_connection()
if auto_begin and connection.transaction_open is False:
self.begin()
logger.debug('Using {} connection "{}".'
.format(self.TYPE, connection.name))
with self.exception_handler(sql):
if abridge_sql_log:
logger.debug('On %s: %s....', connection.name, sql[0:512])
else:
logger.debug('On %s: %s', connection.name, sql)
pre = time.time()
cursor = connection.handle.cursor()
cursor.execute(sql, bindings)
logger.debug("SQL status: %s in %0.2f seconds",
self.get_status(cursor), (time.time() - pre))
return connection, cursor
@abstractclassmethod
def get_status(cls, cursor):
"""Get the status of the cursor.
:param cursor: A database handle to get status from
:return: The current status
:rtype: str
"""
raise dbt.exceptions.NotImplementedException(
'`get_status` is not implemented for this adapter!'
)
@classmethod
def process_results(cls, column_names, rows):
return [dict(zip(column_names, row)) for row in rows]
@classmethod
def get_result_from_cursor(cls, cursor):
data = []
column_names = []
if cursor.description is not None:
column_names = [col[0] for col in cursor.description]
rows = cursor.fetchall()
data = cls.process_results(column_names, rows)
return dbt.clients.agate_helper.table_from_data(data, column_names)
def execute(self, sql, auto_begin=False, fetch=False):
_, cursor = self.add_query(sql, auto_begin)
status = self.get_status(cursor)
if fetch:
table = self.get_result_from_cursor(cursor)
else:
table = dbt.clients.agate_helper.empty_table()
return status, table
def add_begin_query(self):
return self.add_query('BEGIN', auto_begin=False)
def add_commit_query(self):
return self.add_query('COMMIT', auto_begin=False)
def begin(self):
connection = self.get_thread_connection()
if dbt.flags.STRICT_MODE:
assert isinstance(connection, Connection)
if connection.transaction_open is True:
raise dbt.exceptions.InternalException(
'Tried to begin a new transaction on connection "{}", but '
'it already had one open!'.format(connection.get('name')))
self.add_begin_query()
connection.transaction_open = True
return connection
def commit(self):
connection = self.get_thread_connection()
if dbt.flags.STRICT_MODE:
assert isinstance(connection, Connection)
if connection.transaction_open is False:
raise dbt.exceptions.InternalException(
'Tried to commit transaction on connection "{}", but '
'it does not have one open!'.format(connection.name))
logger.debug('On {}: COMMIT'.format(connection.name))
self.add_commit_query()
connection.transaction_open = False
return connection

View File

@@ -0,0 +1,222 @@
import agate
import dbt.clients.agate_helper
import dbt.exceptions
import dbt.flags
from dbt.adapters.base import BaseAdapter, available
from dbt.logger import GLOBAL_LOGGER as logger
LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching'
GET_COLUMNS_IN_RELATION_MACRO_NAME = 'get_columns_in_relation'
LIST_SCHEMAS_MACRO_NAME = 'list_schemas'
CHECK_SCHEMA_EXISTS_MACRO_NAME = 'check_schema_exists'
CREATE_SCHEMA_MACRO_NAME = 'create_schema'
DROP_SCHEMA_MACRO_NAME = 'drop_schema'
RENAME_RELATION_MACRO_NAME = 'rename_relation'
TRUNCATE_RELATION_MACRO_NAME = 'truncate_relation'
DROP_RELATION_MACRO_NAME = 'drop_relation'
ALTER_COLUMN_TYPE_MACRO_NAME = 'alter_column_type'
class SQLAdapter(BaseAdapter):
"""The default adapter with the common agate conversions and some SQL
methods implemented. This adapter has a different much shorter list of
methods to implement, but some more macros that must be implemented.
To implement a macro, implement "${adapter_type}__${macro_name}". in the
adapter's internal project.
Methods to implement:
- date_function
Macros to implement:
- get_catalog
- list_relations_without_caching
- get_columns_in_relation
"""
@available.parse(lambda *a, **k: (None, None))
def add_query(self, sql, auto_begin=True, bindings=None,
abridge_sql_log=False):
"""Add a query to the current transaction. A thin wrapper around
ConnectionManager.add_query.
:param str sql: The SQL query to add
:param bool auto_begin: If set and there is no transaction in progress,
begin a new one.
:param Optional[List[object]]: An optional list of bindings for the
query.
:param bool abridge_sql_log: If set, limit the raw sql logged to 512
characters
"""
return self.connections.add_query(sql, auto_begin, bindings,
abridge_sql_log)
@classmethod
def convert_text_type(cls, agate_table, col_idx):
return "text"
@classmethod
def convert_number_type(cls, agate_table, col_idx):
decimals = agate_table.aggregate(agate.MaxPrecision(col_idx))
return "float8" if decimals else "integer"
@classmethod
def convert_boolean_type(cls, agate_table, col_idx):
return "boolean"
@classmethod
def convert_datetime_type(cls, agate_table, col_idx):
return "timestamp without time zone"
@classmethod
def convert_date_type(cls, agate_table, col_idx):
return "date"
@classmethod
def convert_time_type(cls, agate_table, col_idx):
return "time"
@classmethod
def is_cancelable(cls):
return True
def expand_column_types(self, goal, current):
reference_columns = {
c.name: c for c in
self.get_columns_in_relation(goal)
}
target_columns = {
c.name: c for c
in self.get_columns_in_relation(current)
}
for column_name, reference_column in reference_columns.items():
target_column = target_columns.get(column_name)
if target_column is not None and \
target_column.can_expand_to(reference_column):
col_string_size = reference_column.string_size()
new_type = self.Column.string_type(col_string_size)
logger.debug("Changing col type from %s to %s in table %s",
target_column.data_type, new_type, current)
self.alter_column_type(current, column_name, new_type)
def alter_column_type(self, relation, column_name, new_column_type):
"""
1. Create a new column (w/ temp name and correct type)
2. Copy data over to it
3. Drop the existing column (cascade!)
4. Rename the new column to existing column
"""
kwargs = {
'relation': relation,
'column_name': column_name,
'new_column_type': new_column_type,
}
self.execute_macro(
ALTER_COLUMN_TYPE_MACRO_NAME,
kwargs=kwargs
)
def drop_relation(self, relation):
if dbt.flags.USE_CACHE:
self.cache.drop(relation)
if relation.type is None:
dbt.exceptions.raise_compiler_error(
'Tried to drop relation {}, but its type is null.'
.format(relation))
self.execute_macro(
DROP_RELATION_MACRO_NAME,
kwargs={'relation': relation}
)
def truncate_relation(self, relation):
self.execute_macro(
TRUNCATE_RELATION_MACRO_NAME,
kwargs={'relation': relation}
)
def rename_relation(self, from_relation, to_relation):
if dbt.flags.USE_CACHE:
self.cache.rename(from_relation, to_relation)
kwargs = {'from_relation': from_relation, 'to_relation': to_relation}
self.execute_macro(
RENAME_RELATION_MACRO_NAME,
kwargs=kwargs
)
def get_columns_in_relation(self, relation):
return self.execute_macro(
GET_COLUMNS_IN_RELATION_MACRO_NAME,
kwargs={'relation': relation}
)
def create_schema(self, database, schema):
logger.debug('Creating schema "%s"."%s".', database, schema)
kwargs = {
'database_name': self.quote_as_configured(database, 'database'),
'schema_name': self.quote_as_configured(schema, 'schema'),
}
self.execute_macro(CREATE_SCHEMA_MACRO_NAME, kwargs=kwargs)
self.commit_if_has_connection()
def drop_schema(self, database, schema):
logger.debug('Dropping schema "%s"."%s".', database, schema)
kwargs = {
'database_name': self.quote_as_configured(database, 'database'),
'schema_name': self.quote_as_configured(schema, 'schema'),
}
self.execute_macro(DROP_SCHEMA_MACRO_NAME,
kwargs=kwargs)
def list_relations_without_caching(self, information_schema, schema):
kwargs = {'information_schema': information_schema, 'schema': schema}
results = self.execute_macro(
LIST_RELATIONS_MACRO_NAME,
kwargs=kwargs
)
relations = []
quote_policy = {
'database': True,
'schema': True,
'identifier': True
}
for _database, name, _schema, _type in results:
relations.append(self.Relation.create(
database=_database,
schema=_schema,
identifier=name,
quote_policy=quote_policy,
type=_type
))
return relations
def quote(cls, identifier):
return '"{}"'.format(identifier)
def list_schemas(self, database):
results = self.execute_macro(
LIST_SCHEMAS_MACRO_NAME,
kwargs={'database': database}
)
return [row[0] for row in results]
def check_schema_exists(self, database, schema):
information_schema = self.Relation.create(
database=database, schema=schema,
quote_policy=self.config.quoting
).information_schema()
kwargs = {'information_schema': information_schema, 'schema': schema}
results = self.execute_macro(
CHECK_SCHEMA_EXISTS_MACRO_NAME,
kwargs=kwargs
)
return results[0][0] > 0

5
core/dbt/api/__init__.py Normal file
View File

@@ -0,0 +1,5 @@
from dbt.api.object import APIObject
__all__ = [
'APIObject'
]

125
core/dbt/api/object.py Normal file
View File

@@ -0,0 +1,125 @@
import copy
from collections import Mapping
from jsonschema import Draft7Validator
from dbt.exceptions import JSONValidationException
from dbt.utils import deep_merge
from dbt.clients.system import write_json
class APIObject(Mapping):
"""
A serializable / deserializable object intended for
use in a future dbt API.
To create a new object, you'll want to extend this
class, and then implement the SCHEMA property (a
valid JSON schema), the DEFAULTS property (default
settings for this object), and a static method that
calls this constructor.
"""
SCHEMA = {
'type': 'object',
'properties': {}
}
DEFAULTS = {}
def __init__(self, **kwargs):
"""
Create and validate an instance. Note that if you override this, you
will want to do so by modifying kwargs and only then calling
super(NewClass, self).__init__(**kwargs).
"""
super(APIObject, self).__init__()
# note: deep_merge does a deep copy on its arguments.
self._contents = deep_merge(self.DEFAULTS, kwargs)
self.validate()
def __str__(self):
return '{}(**{})'.format(self.__class__.__name__, self._contents)
def __repr__(self):
return '{}(**{})'.format(self.__class__.__name__, self._contents)
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return self.serialize() == other.serialize()
def incorporate(self, **kwargs):
"""
Given a list of kwargs, incorporate these arguments
into a new copy of this instance, and return the new
instance after validating.
"""
return type(self)(**deep_merge(self._contents, kwargs))
def serialize(self):
"""
Return a dict representation of this object.
"""
return copy.deepcopy(self._contents)
def write(self, path):
write_json(path, self.serialize())
@classmethod
def deserialize(cls, settings):
"""
Convert a dict representation of this object into
an actual object for internal use.
"""
return cls(**settings)
def validate(self):
"""
Using the SCHEMA property, validate the attributes
of this instance. If any attributes are missing or
invalid, raise a ValidationException.
"""
validator = Draft7Validator(self.SCHEMA)
errors = set() # make errors a set to avoid duplicates
for error in validator.iter_errors(self.serialize()):
errors.add('.'.join(
list(map(str, error.path)) + [error.message]
))
if errors:
raise JSONValidationException(type(self).__name__, errors)
# implement the Mapping protocol:
# https://docs.python.org/3/library/collections.abc.html
def __getitem__(self, key):
return self._contents[key]
def __iter__(self):
return self._contents.__iter__()
def __len__(self):
return self._contents.__len__()
# implement this because everyone always expects it.
def get(self, key, default=None):
try:
return self[key]
except KeyError:
return default
def set(self, key, value):
self._contents[key] = value
# most users of APIObject also expect the attributes to be available via
# dot-notation because the previous implementation assigned to __dict__.
# we should consider removing this if we fix all uses to have properties.
def __getattr__(self, name):
if name != '_contents' and name in self._contents:
return self._contents[name]
elif hasattr(self.__class__, name):
return getattr(self.__class__, name)
raise AttributeError((
"'{}' object has no attribute '{}'"
).format(type(self).__name__, name))

View File

@@ -0,0 +1,371 @@
import re
from collections import namedtuple
import dbt.exceptions
def regex(pat):
return re.compile(pat, re.DOTALL | re.MULTILINE)
class BlockData(object):
"""raw plaintext data from the top level of the file."""
def __init__(self, contents):
self.block_type_name = '__dbt__data'
self.contents = contents
self.full_block = contents
class BlockTag(object):
def __init__(self, block_type_name, block_name, contents=None,
full_block=None, **kw):
self.block_type_name = block_type_name
self.block_name = block_name
self.contents = contents
self.full_block = full_block
def __str__(self):
return 'BlockTag({!r}, {!r})'.format(self.block_type_name,
self.block_name)
def __repr__(self):
return str(self)
@property
def end_block_type_name(self):
return 'end{}'.format(self.block_type_name)
def end_pat(self):
# we don't want to use string formatting here because jinja uses most
# of the string formatting operators in its syntax...
pattern = ''.join((
r'(?P<endblock>((?:\s*\{\%\-|\{\%)\s*',
self.end_block_type_name,
r'\s*(?:\-\%\}\s*|\%\})))',
))
return regex(pattern)
Tag = namedtuple('Tag', 'block_type_name block_name start end')
_NAME_PATTERN = r'[A-Za-z_][A-Za-z_0-9]*'
COMMENT_START_PATTERN = regex(r'(?:(?P<comment_start>(\s*\{\#)))')
COMMENT_END_PATTERN = regex(r'(.*?)(\s*\#\})')
RAW_START_PATTERN = regex(
r'(?:\s*\{\%\-|\{\%)\s*(?P<raw_start>(raw))\s*(?:\-\%\}\s*|\%\})'
)
EXPR_START_PATTERN = regex(r'(?P<expr_start>(\{\{\s*))')
EXPR_END_PATTERN = regex(r'(?P<expr_end>(\s*\}\}))')
BLOCK_START_PATTERN = regex(''.join((
r'(?:\s*\{\%\-|\{\%)\s*',
r'(?P<block_type_name>({}))'.format(_NAME_PATTERN),
# some blocks have a 'block name'.
r'(?:\s+(?P<block_name>({})))?'.format(_NAME_PATTERN),
)))
RAW_BLOCK_PATTERN = regex(''.join((
r'(?:\s*\{\%\-|\{\%)\s*raw\s*(?:\-\%\}\s*|\%\})',
r'(?:.*)',
r'(?:\s*\{\%\-|\{\%)\s*endraw\s*(?:\-\%\}\s*|\%\})',
)))
TAG_CLOSE_PATTERN = regex(r'(?:(?P<tag_close>(\-\%\}\s*|\%\})))')
# stolen from jinja's lexer. Note that we've consumed all prefix whitespace by
# the time we want to use this.
STRING_PATTERN = regex(
r"(?P<string>('([^'\\]*(?:\\.[^'\\]*)*)'|"
r'"([^"\\]*(?:\\.[^"\\]*)*)"))'
)
QUOTE_START_PATTERN = regex(r'''(?P<quote>(['"]))''')
class TagIterator(object):
def __init__(self, data):
self.data = data
self.blocks = []
self._parenthesis_stack = []
self.pos = 0
def advance(self, new_position):
self.pos = new_position
def rewind(self, amount=1):
self.pos -= amount
def _search(self, pattern):
return pattern.search(self.data, self.pos)
def _match(self, pattern):
return pattern.match(self.data, self.pos)
def _first_match(self, *patterns, **kwargs):
matches = []
for pattern in patterns:
# default to 'search', but sometimes we want to 'match'.
if kwargs.get('method', 'search') == 'search':
match = self._search(pattern)
else:
match = self._match(pattern)
if match:
matches.append(match)
if not matches:
return None
# if there are multiple matches, pick the least greedy match
# TODO: do I need to account for m.start(), or is this ok?
return min(matches, key=lambda m: m.end())
def _expect_match(self, expected_name, *patterns, **kwargs):
match = self._first_match(*patterns, **kwargs)
if match is None:
msg = 'unexpected EOF, expected {}, got "{}"'.format(
expected_name, self.data[self.pos:]
)
dbt.exceptions.raise_compiler_error(msg)
return match
def handle_expr(self, match):
"""Handle an expression. At this point we're at a string like:
{{ 1 + 2 }}
^ right here
And the match contains "{{ "
We expect to find a `}}`, but we might find one in a string before
that. Imagine the case of `{{ 2 * "}}" }}`...
You're not allowed to have blocks or comments inside an expr so it is
pretty straightforward, I hope: only strings can get in the way.
"""
self.advance(match.end())
while True:
match = self._expect_match('}}',
EXPR_END_PATTERN,
QUOTE_START_PATTERN)
if match.groupdict().get('expr_end') is not None:
break
else:
# it's a quote. we haven't advanced for this match yet, so
# just slurp up the whole string, no need to rewind.
match = self._expect_match('string', STRING_PATTERN)
self.advance(match.end())
self.advance(match.end())
def handle_comment(self, match):
self.advance(match.end())
match = self._expect_match('#}', COMMENT_END_PATTERN)
self.advance(match.end())
def _expect_block_close(self):
"""Search for the tag close marker.
To the right of the type name, there are a few possiblities:
- a name (handled by the regex's 'block_name')
- any number of: `=`, `(`, `)`, strings, etc (arguments)
- nothing
followed eventually by a %}
So the only characters we actually have to worry about in this context
are quote and `%}` - nothing else can hide the %} and be valid jinja.
"""
while True:
end_match = self._expect_match(
'tag close ("%}")',
QUOTE_START_PATTERN,
TAG_CLOSE_PATTERN
)
self.advance(end_match.end())
if end_match.groupdict().get('tag_close') is not None:
return
# must be a string. Rewind to its start and advance past it.
self.rewind()
string_match = self._expect_match('string', STRING_PATTERN)
self.advance(string_match.end())
def handle_raw(self):
# raw blocks are super special, they are a single complete regex
match = self._expect_match('{% raw %}...{% endraw %}',
RAW_BLOCK_PATTERN)
self.advance(match.end())
return match.end()
def handle_tag(self, match):
"""The tag could be one of a few things:
{% mytag %}
{% mytag x = y %}
{% mytag x = "y" %}
{% mytag x.y() %}
{% mytag foo("a", "b", c="d") %}
But the key here is that it's always going to be `{% mytag`!
"""
groups = match.groupdict()
# always a value
block_type_name = groups['block_type_name']
# might be None
block_name = groups.get('block_name')
start_pos = self.pos
if block_type_name == 'raw':
match = self._expect_match('{% raw %}...{% endraw %}',
RAW_BLOCK_PATTERN)
self.advance(match.end())
else:
self.advance(match.end())
self._expect_block_close()
return Tag(
block_type_name=block_type_name,
block_name=block_name,
start=start_pos,
end=self.pos
)
def find_tags(self):
while True:
match = self._first_match(
BLOCK_START_PATTERN,
COMMENT_START_PATTERN,
EXPR_START_PATTERN
)
if match is None:
break
self.advance(match.start())
# start = self.pos
groups = match.groupdict()
comment_start = groups.get('comment_start')
expr_start = groups.get('expr_start')
block_type_name = groups.get('block_type_name')
if comment_start is not None:
self.handle_comment(match)
elif expr_start is not None:
self.handle_expr(match)
elif block_type_name is not None:
yield self.handle_tag(match)
else:
raise dbt.exceptions.InternalException(
'Invalid regex match in next_block, expected block start, '
'expr start, or comment start'
)
def __iter__(self):
return self.find_tags()
duplicate_tags = (
'Got nested tags: {outer.block_type_name} (started at {outer.start}) did '
'not have a matching {{% end{outer.block_type_name} %}} before a '
'subsequent {inner.block_type_name} was found (started at {inner.start})'
)
_CONTROL_FLOW_TAGS = {
'if': 'endif',
'for': 'endfor',
}
_CONTROL_FLOW_END_TAGS = {
v: k
for k, v in _CONTROL_FLOW_TAGS.items()
}
class BlockIterator(object):
def __init__(self, data):
self.tag_parser = TagIterator(data)
self.current = None
self.stack = []
self.last_position = 0
@property
def current_end(self):
if self.current is None:
return 0
else:
return self.current.end
@property
def data(self):
return self.tag_parser.data
def is_current_end(self, tag):
return (
tag.block_type_name.startswith('end') and
self.current is not None and
tag.block_type_name[3:] == self.current.block_type_name
)
def find_blocks(self, allowed_blocks=None, collect_raw_data=True):
"""Find all top-level blocks in the data."""
if allowed_blocks is None:
allowed_blocks = {'snapshot', 'macro', 'materialization', 'docs'}
for tag in self.tag_parser.find_tags():
if tag.block_type_name in _CONTROL_FLOW_TAGS:
self.stack.append(tag.block_type_name)
elif tag.block_type_name in _CONTROL_FLOW_END_TAGS:
found = None
if self.stack:
found = self.stack.pop()
else:
expected = _CONTROL_FLOW_END_TAGS[tag.block_type_name]
dbt.exceptions.raise_compiler_error((
'Got an unexpected control flow end tag, got {} but '
'never saw a preceeding {} (@ {})'
).format(tag.block_type_name, expected, tag.start))
expected = _CONTROL_FLOW_TAGS[found]
if expected != tag.block_type_name:
dbt.exceptions.raise_compiler_error((
'Got an unexpected control flow end tag, got {} but '
'expected {} next (@ {})'
).format(tag.block_type_name, expected, tag.start))
if tag.block_type_name in allowed_blocks:
if self.stack:
dbt.exceptions.raise_compiler_error((
'Got a block definition inside control flow at {}. '
'All dbt block definitions must be at the top level'
).format(tag.start))
if self.current is not None:
dbt.exceptions.raise_compiler_error(
duplicate_tags.format(outer=self.current, inner=tag)
)
if collect_raw_data:
raw_data = self.data[self.last_position:tag.start]
self.last_position = tag.start
if raw_data:
yield BlockData(raw_data)
self.current = tag
elif self.is_current_end(tag):
self.last_position = tag.end
yield BlockTag(
block_type_name=self.current.block_type_name,
block_name=self.current.block_name,
contents=self.data[self.current.end:tag.start],
full_block=self.data[self.current.start:tag.end]
)
self.current = None
if self.current:
dbt.exceptions.raise_compiler_error((
'Reached EOF without finding a close block for '
'{0.block_type_name} (from {0.end})'
).format(self.current))
if collect_raw_data:
raw_data = self.data[self.last_position:]
if raw_data:
yield BlockData(raw_data)
def lex_for_blocks(self, allowed_blocks=None, collect_raw_data=True):
return list(self.find_blocks(allowed_blocks=allowed_blocks,
collect_raw_data=collect_raw_data))

View File

@@ -0,0 +1,65 @@
import dbt.compat
import agate
import json
DEFAULT_TYPE_TESTER = agate.TypeTester(types=[
agate.data_types.Number(null_values=('null', '')),
agate.data_types.TimeDelta(null_values=('null', '')),
agate.data_types.Date(null_values=('null', '')),
agate.data_types.DateTime(null_values=('null', '')),
agate.data_types.Boolean(true_values=('true',),
false_values=('false',),
null_values=('null', '')),
agate.data_types.Text(null_values=('null', ''))
])
def table_from_data(data, column_names):
"Convert list of dictionaries into an Agate table"
# The agate table is generated from a list of dicts, so the column order
# from `data` is not preserved. We can use `select` to reorder the columns
#
# If there is no data, create an empty table with the specified columns
if len(data) == 0:
return agate.Table([], column_names=column_names)
else:
table = agate.Table.from_object(data, column_types=DEFAULT_TYPE_TESTER)
return table.select(column_names)
def table_from_data_flat(data, column_names):
"Convert list of dictionaries into an Agate table"
rows = []
for _row in data:
row = []
for value in list(_row.values()):
if isinstance(value, (dict, list, tuple)):
row.append(json.dumps(value))
else:
row.append(value)
rows.append(row)
return agate.Table(rows, column_names)
def empty_table():
"Returns an empty Agate table. To be used in place of None"
return agate.Table(rows=[])
def as_matrix(table):
"Return an agate table as a matrix of data sans columns"
return [r.values() for r in table.rows.values()]
def from_csv(abspath):
with dbt.compat.open_seed_file(abspath) as fp:
if fp.read(len(dbt.compat.BOM_UTF8)) != dbt.compat.BOM_UTF8:
fp.seek(0)
return agate.Table.from_csv(fp, column_types=DEFAULT_TYPE_TESTER)

View File

@@ -12,7 +12,7 @@ def clone(repo, cwd, dirname=None, remove_git_dir=False):
if dirname is not None:
clone_cmd.append(dirname)
result = run_cmd(cwd, clone_cmd)
result = run_cmd(cwd, clone_cmd, env={'LC_ALL': 'C'})
if remove_git_dir:
rmdir(os.path.join(dirname, '.git'))
@@ -21,15 +21,12 @@ def clone(repo, cwd, dirname=None, remove_git_dir=False):
def list_tags(cwd):
out, err = run_cmd(cwd, ['git', 'tag', '--list'])
tags = set(out.decode('utf-8').strip().split("\n"))
out, err = run_cmd(cwd, ['git', 'tag', '--list'], env={'LC_ALL': 'C'})
tags = out.decode('utf-8').strip().split("\n")
return tags
def checkout(cwd, repo, branch=None):
if branch is None:
branch = 'master'
def _checkout(cwd, repo, branch):
logger.debug(' Checking out branch {}.'.format(branch))
run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', branch])
@@ -43,30 +40,43 @@ def checkout(cwd, repo, branch=None):
else:
spec = 'origin/{}'.format(branch)
out, err = run_cmd(cwd, ['git', 'reset', '--hard', spec])
stderr = err.decode('utf-8').strip()
out, err = run_cmd(cwd, ['git', 'reset', '--hard', spec],
env={'LC_ALL': 'C'})
return out, err
if stderr.startswith('fatal:'):
dbt.exceptions.bad_package_spec(repo, branch, stderr)
else:
return out, err
def checkout(cwd, repo, branch=None):
if branch is None:
branch = 'master'
try:
return _checkout(cwd, repo, branch)
except dbt.exceptions.CommandResultError as exc:
stderr = exc.stderr.decode('utf-8').strip()
dbt.exceptions.bad_package_spec(repo, branch, stderr)
def get_current_sha(cwd):
out, err = run_cmd(cwd, ['git', 'rev-parse', 'HEAD'])
out, err = run_cmd(cwd, ['git', 'rev-parse', 'HEAD'], env={'LC_ALL': 'C'})
return out.decode('utf-8')
def remove_remote(cwd):
return run_cmd(cwd, ['git', 'remote', 'rm', 'origin'])
return run_cmd(cwd, ['git', 'remote', 'rm', 'origin'], env={'LC_ALL': 'C'})
def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
branch=None):
_, err = clone(repo, cwd, dirname=dirname, remove_git_dir=remove_git_dir)
exists = re.match("fatal: destination path '(.+)' already exists",
err.decode('utf-8'))
exists = None
try:
_, err = clone(repo, cwd, dirname=dirname,
remove_git_dir=remove_git_dir)
except dbt.exceptions.CommandResultError as exc:
err = exc.stderr.decode('utf-8')
exists = re.match("fatal: destination path '(.+)' already exists", err)
if not exists: # something else is wrong, raise it
raise
directory = None
start_sha = None
if exists:

328
core/dbt/clients/jinja.py Normal file
View File

@@ -0,0 +1,328 @@
import codecs
import linecache
import os
import tempfile
import jinja2
import jinja2._compat
import jinja2.ext
import jinja2.nodes
import jinja2.parser
import jinja2.sandbox
import dbt.compat
import dbt.exceptions
import dbt.utils
from dbt.clients._jinja_blocks import BlockIterator
from dbt.logger import GLOBAL_LOGGER as logger # noqa
def _linecache_inject(source, write):
if write:
# this is the only reliable way to accomplish this. Obviously, it's
# really darn noisy and will fill your temporary directory
tmp_file = tempfile.NamedTemporaryFile(
prefix='dbt-macro-compiled-',
suffix='.py',
delete=False,
mode='w+',
encoding='utf-8',
)
tmp_file.write(source)
filename = tmp_file.name
else:
filename = codecs.encode(os.urandom(12), 'hex').decode('ascii')
# encode, though I don't think this matters
filename = jinja2._compat.encode_filename(filename)
# put ourselves in the cache
linecache.cache[filename] = (
len(source),
None,
[line + '\n' for line in source.splitlines()],
filename
)
return filename
class MacroFuzzParser(jinja2.parser.Parser):
def parse_macro(self):
node = jinja2.nodes.Macro(lineno=next(self.stream).lineno)
# modified to fuzz macros defined in the same file. this way
# dbt can understand the stack of macros being called.
# - @cmcarthur
node.name = dbt.utils.get_dbt_macro_name(
self.parse_assign_target(name_only=True).name)
self.parse_signature(node)
node.body = self.parse_statements(('name:endmacro',),
drop_needle=True)
return node
class MacroFuzzEnvironment(jinja2.sandbox.SandboxedEnvironment):
def _parse(self, source, name, filename):
return MacroFuzzParser(
self, source, name,
jinja2._compat.encode_filename(filename)
).parse()
def _compile(self, source, filename):
"""Override jinja's compilation to stash the rendered source inside
the python linecache for debugging when the appropriate environment
variable is set.
If the value is 'write', also write the files to disk.
WARNING: This can write a ton of data if you aren't careful.
"""
macro_compile = os.environ.get('DBT_MACRO_DEBUGGING')
if filename == '<template>' and macro_compile:
write = macro_compile == 'write'
filename = _linecache_inject(source, write)
return super(MacroFuzzEnvironment, self)._compile(source, filename)
class TemplateCache(object):
def __init__(self):
self.file_cache = {}
def get_node_template(self, node):
key = (node['package_name'], node['original_file_path'])
if key in self.file_cache:
return self.file_cache[key]
template = get_template(
string=node.get('raw_sql'),
ctx={},
node=node
)
self.file_cache[key] = template
return template
def clear(self):
self.file_cache.clear()
template_cache = TemplateCache()
def macro_generator(node):
def apply_context(context):
def call(*args, **kwargs):
name = node.get('name')
template = template_cache.get_node_template(node)
module = template.make_module(context, False, context)
macro = module.__dict__[dbt.utils.get_dbt_macro_name(name)]
module.__dict__.update(context)
try:
return macro(*args, **kwargs)
except dbt.exceptions.MacroReturn as e:
return e.value
except (TypeError, jinja2.exceptions.TemplateRuntimeError) as e:
dbt.exceptions.raise_compiler_error(str(e), node)
except dbt.exceptions.CompilationException as e:
e.stack.append(node)
raise e
return call
return apply_context
class MaterializationExtension(jinja2.ext.Extension):
tags = ['materialization']
def parse(self, parser):
node = jinja2.nodes.Macro(lineno=next(parser.stream).lineno)
materialization_name = \
parser.parse_assign_target(name_only=True).name
adapter_name = 'default'
node.args = []
node.defaults = []
while parser.stream.skip_if('comma'):
target = parser.parse_assign_target(name_only=True)
if target.name == 'default':
pass
elif target.name == 'adapter':
parser.stream.expect('assign')
value = parser.parse_expression()
adapter_name = value.value
else:
dbt.exceptions.invalid_materialization_argument(
materialization_name, target.name)
node.name = dbt.utils.get_materialization_macro_name(
materialization_name, adapter_name)
node.body = parser.parse_statements(('name:endmaterialization',),
drop_needle=True)
return node
class DocumentationExtension(jinja2.ext.Extension):
tags = ['docs']
def parse(self, parser):
node = jinja2.nodes.Macro(lineno=next(parser.stream).lineno)
docs_name = parser.parse_assign_target(name_only=True).name
node.args = []
node.defaults = []
node.name = dbt.utils.get_docs_macro_name(docs_name)
node.body = parser.parse_statements(('name:enddocs',),
drop_needle=True)
return node
def _is_dunder_name(name):
return name.startswith('__') and name.endswith('__')
def create_macro_capture_env(node):
class ParserMacroCapture(jinja2.Undefined):
"""
This class sets up the parser to capture macros.
"""
def __init__(self, hint=None, obj=None, name=None, exc=None):
super(ParserMacroCapture, self).__init__(hint=hint, name=name)
self.node = node
self.name = name
self.package_name = node.get('package_name')
# jinja uses these for safety, so we have to override them.
# see https://github.com/pallets/jinja/blob/master/jinja2/sandbox.py#L332-L339 # noqa
self.unsafe_callable = False
self.alters_data = False
def __deepcopy__(self, memo):
path = os.path.join(self.node.get('root_path'),
self.node.get('original_file_path'))
logger.debug(
'dbt encountered an undefined variable, "{}" in node {}.{} '
'(source path: {})'
.format(self.name, self.node.get('package_name'),
self.node.get('name'), path))
# match jinja's message
dbt.exceptions.raise_compiler_error(
"{!r} is undefined".format(self.name),
node=self.node
)
def __getitem__(self, name):
# Propagate the undefined value if a caller accesses this as if it
# were a dictionary
return self
def __getattr__(self, name):
if name == 'name' or _is_dunder_name(name):
raise AttributeError(
"'{}' object has no attribute '{}'"
.format(type(self).__name__, name)
)
self.package_name = self.name
self.name = name
return self
def __call__(self, *args, **kwargs):
return self
return ParserMacroCapture
def get_environment(node=None, capture_macros=False):
args = {
'extensions': ['jinja2.ext.do']
}
if capture_macros:
args['undefined'] = create_macro_capture_env(node)
args['extensions'].append(MaterializationExtension)
args['extensions'].append(DocumentationExtension)
return MacroFuzzEnvironment(**args)
def parse(string):
try:
return get_environment().parse(dbt.compat.to_string(string))
except (jinja2.exceptions.TemplateSyntaxError,
jinja2.exceptions.UndefinedError) as e:
e.translated = False
dbt.exceptions.raise_compiler_error(str(e))
def get_template(string, ctx, node=None, capture_macros=False):
try:
env = get_environment(node, capture_macros)
template_source = dbt.compat.to_string(string)
return env.from_string(template_source, globals=ctx)
except (jinja2.exceptions.TemplateSyntaxError,
jinja2.exceptions.UndefinedError) as e:
e.translated = False
dbt.exceptions.raise_compiler_error(str(e), node)
def render_template(template, ctx, node=None):
try:
return template.render(ctx)
except (jinja2.exceptions.TemplateSyntaxError,
jinja2.exceptions.UndefinedError) as e:
e.translated = False
dbt.exceptions.raise_compiler_error(str(e), node)
def get_rendered(string, ctx, node=None,
capture_macros=False):
template = get_template(string, ctx, node,
capture_macros=capture_macros)
return render_template(template, ctx, node)
def undefined_error(msg):
raise jinja2.exceptions.UndefinedError(msg)
def extract_toplevel_blocks(data, allowed_blocks=None, collect_raw_data=True):
"""Extract the top level blocks with matching block types from a jinja
file, with some special handling for block nesting.
:param str data: The data to extract blocks from.
:param Optional[Set[str]] allowed_blocks: The names of the blocks to
extract from the file. They may not be nested within if/for blocks.
If None, use the default values.
:param bool collect_raw_data: If set, raw data between matched blocks will
also be part of the results, as `BlockData` objects. They have a
`block_type_name` field of `'__dbt_data'` and will never have a
`block_name`.
:return List[Union[BlockData, BlockTag]]: A list of `BlockTag`s matching
the allowed block types and (if `collect_raw_data` is `True`)
`BlockData` objects.
"""
return BlockIterator(data).lex_for_blocks(
allowed_blocks=allowed_blocks,
collect_raw_data=collect_raw_data
)

View File

@@ -3,7 +3,9 @@ import six
import requests
from dbt.exceptions import RegistryException
from dbt.utils import memoized
from dbt.logger import GLOBAL_LOGGER as logger
import os
import time
if os.getenv('DBT_PACKAGE_HUB_URL'):
DEFAULT_REGISTRY_BASE_URL = os.getenv('DBT_PACKAGE_HUB_URL')
@@ -21,18 +23,30 @@ def _get_url(url, registry_base_url=None):
def _wrap_exceptions(fn):
@wraps(fn)
def wrapper(*args, **kwargs):
try:
return fn(*args, **kwargs)
except requests.exceptions.ConnectionError as e:
six.raise_from(
RegistryException('Unable to connect to registry hub'), e)
max_attempts = 5
attempt = 0
while True:
attempt += 1
try:
return fn(*args, **kwargs)
except requests.exceptions.ConnectionError as exc:
if attempt < max_attempts:
time.sleep(1)
continue
six.raise_from(
RegistryException('Unable to connect to registry hub'),
exc
)
return wrapper
@_wrap_exceptions
def _get(path, registry_base_url=None):
url = _get_url(path, registry_base_url)
logger.debug('Making package registry request: GET {}'.format(url))
resp = requests.get(url)
logger.debug('Response from registry: GET {} {}'.format(url,
resp.status_code))
resp.raise_for_status()
return resp.json()

402
core/dbt/clients/system.py Normal file
View File

@@ -0,0 +1,402 @@
import errno
import fnmatch
import json
import os
import os.path
import shutil
import subprocess
import sys
import tarfile
import requests
import stat
import dbt.compat
import dbt.exceptions
import dbt.utils
from dbt.logger import GLOBAL_LOGGER as logger
def find_matching(root_path,
relative_paths_to_search,
file_pattern):
"""
Given an absolute `root_path`, a list of relative paths to that
absolute root path (`relative_paths_to_search`), and a `file_pattern`
like '*.sql', returns information about the files. For example:
> find_matching('/root/path', 'models', '*.sql')
[ { 'absolute_path': '/root/path/models/model_one.sql',
'relative_path': 'models/model_one.sql',
'searched_path': 'models' },
{ 'absolute_path': '/root/path/models/subdirectory/model_two.sql',
'relative_path': 'models/subdirectory/model_two.sql',
'searched_path': 'models' } ]
"""
matching = []
root_path = os.path.normpath(root_path)
for relative_path_to_search in relative_paths_to_search:
absolute_path_to_search = os.path.join(
root_path, relative_path_to_search)
walk_results = os.walk(absolute_path_to_search)
for current_path, subdirectories, local_files in walk_results:
for local_file in local_files:
absolute_path = os.path.join(current_path, local_file)
relative_path = os.path.relpath(
absolute_path, absolute_path_to_search)
if fnmatch.fnmatch(local_file, file_pattern):
matching.append({
'searched_path': relative_path_to_search,
'absolute_path': absolute_path,
'relative_path': relative_path,
})
return matching
def load_file_contents(path, strip=True):
with open(path, 'rb') as handle:
to_return = handle.read().decode('utf-8')
if strip:
to_return = to_return.strip()
return to_return
def make_directory(path):
"""
Make a directory and any intermediate directories that don't already
exist. This function handles the case where two threads try to create
a directory at once.
"""
if not os.path.exists(path):
# concurrent writes that try to create the same dir can fail
try:
os.makedirs(path)
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise e
def make_file(path, contents='', overwrite=False):
"""
Make a file at `path` assuming that the directory it resides in already
exists. The file is saved with contents `contents`
"""
if overwrite or not os.path.exists(path):
with open(path, 'w') as fh:
fh.write(contents)
return True
return False
def make_symlink(source, link_path):
"""
Create a symlink at `link_path` referring to `source`.
"""
if not supports_symlinks():
dbt.exceptions.system_error('create a symbolic link')
return os.symlink(source, link_path)
def supports_symlinks():
return getattr(os, "symlink", None) is not None
def write_file(path, contents=''):
make_directory(os.path.dirname(path))
dbt.compat.write_file(path, contents)
return True
def write_json(path, data):
return write_file(path, json.dumps(data, cls=dbt.utils.JSONEncoder))
def _windows_rmdir_readonly(func, path, exc):
exception_val = exc[1]
if exception_val.errno == errno.EACCES:
os.chmod(path, stat.S_IWUSR)
func(path)
else:
raise
def resolve_path_from_base(path_to_resolve, base_path):
"""
If path-to_resolve is a relative path, create an absolute path
with base_path as the base.
If path_to_resolve is an absolute path or a user path (~), just
resolve it to an absolute path and return.
"""
return os.path.abspath(
os.path.join(
base_path,
os.path.expanduser(path_to_resolve)))
def rmdir(path):
"""
Recursively deletes a directory. Includes an error handler to retry with
different permissions on Windows. Otherwise, removing directories (eg.
cloned via git) can cause rmtree to throw a PermissionError exception
"""
logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform))
if sys.platform == 'win32':
onerror = _windows_rmdir_readonly
else:
onerror = None
return shutil.rmtree(path, onerror=onerror)
def remove_file(path):
return os.remove(path)
def path_exists(path):
return os.path.lexists(path)
def path_is_symlink(path):
return os.path.islink(path)
def open_dir_cmd():
# https://docs.python.org/2/library/sys.html#sys.platform
if sys.platform == 'win32':
return 'start'
elif sys.platform == 'darwin':
return 'open'
else:
return 'xdg-open'
def _handle_posix_cwd_error(exc, cwd, cmd):
if exc.errno == errno.ENOENT:
message = 'Directory does not exist'
elif exc.errno == errno.EACCES:
message = 'Current user cannot access directory, check permissions'
elif exc.errno == errno.ENOTDIR:
message = 'Not a directory'
else:
message = 'Unknown OSError: {} - cwd'.format(str(exc))
raise dbt.exceptions.WorkingDirectoryError(cwd, cmd, message)
def _handle_posix_cmd_error(exc, cwd, cmd):
if exc.errno == errno.ENOENT:
message = "Could not find command, ensure it is in the user's PATH"
elif exc.errno == errno.EACCES:
message = 'User does not have permissions for this command'
else:
message = 'Unknown OSError: {} - cmd'.format(str(exc))
raise dbt.exceptions.ExecutableError(cwd, cmd, message)
def _handle_posix_error(exc, cwd, cmd):
"""OSError handling for posix systems.
Some things that could happen to trigger an OSError:
- cwd could not exist
- exc.errno == ENOENT
- exc.filename == cwd
- cwd could have permissions that prevent the current user moving to it
- exc.errno == EACCES
- exc.filename == cwd
- cwd could exist but not be a directory
- exc.errno == ENOTDIR
- exc.filename == cwd
- cmd[0] could not exist
- exc.errno == ENOENT
- exc.filename == None(?)
- cmd[0] could exist but have permissions that prevents the current
user from executing it (executable bit not set for the user)
- exc.errno == EACCES
- exc.filename == None(?)
"""
if getattr(exc, 'filename', None) == cwd:
_handle_posix_cwd_error(exc, cwd, cmd)
else:
_handle_posix_cmd_error(exc, cwd, cmd)
def _handle_windows_error(exc, cwd, cmd):
cls = dbt.exceptions.CommandError
if exc.errno == errno.ENOENT:
message = ("Could not find command, ensure it is in the user's PATH "
"and that the user has permissions to run it")
cls = dbt.exceptions.ExecutableError
elif exc.errno == errno.ENOEXEC:
message = ('Command was not executable, ensure it is valid')
cls = dbt.exceptions.ExecutableError
elif exc.errno == errno.ENOTDIR:
message = ('Unable to cd: path does not exist, user does not have'
' permissions, or not a directory')
cls = dbt.exceptions.WorkingDirectoryError
else:
message = 'Unknown error: {} (errno={}: "{}")'.format(
str(exc), exc.errno, errno.errorcode.get(exc.errno, '<Unknown!>')
)
raise cls(cwd, cmd, message)
def _interpret_oserror(exc, cwd, cmd):
"""Interpret an OSError exc and raise the appropriate dbt exception.
"""
if len(cmd) == 0:
raise dbt.exceptions.CommandError(cwd, cmd)
# all of these functions raise unconditionally
if os.name == 'nt':
_handle_windows_error(exc, cwd, cmd)
else:
_handle_posix_error(exc, cwd, cmd)
# this should not be reachable, raise _something_ at least!
raise dbt.exceptions.InternalException(
'Unhandled exception in _interpret_oserror: {}'.format(exc)
)
def run_cmd(cwd, cmd, env=None):
logger.debug('Executing "{}"'.format(' '.join(cmd)))
if len(cmd) == 0:
raise dbt.exceptions.CommandError(cwd, cmd)
# the env argument replaces the environment entirely, which has exciting
# consequences on Windows! Do an update instead.
full_env = env
if env is not None:
full_env = os.environ.copy()
full_env.update(env)
try:
proc = subprocess.Popen(
cmd,
cwd=cwd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env=full_env)
out, err = proc.communicate()
except OSError as exc:
_interpret_oserror(exc, cwd, cmd)
logger.debug('STDOUT: "{}"'.format(out))
logger.debug('STDERR: "{}"'.format(err))
if proc.returncode != 0:
logger.debug('command return code={}'.format(proc.returncode))
raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode,
out, err)
return out, err
def download(url, path):
response = requests.get(url)
with open(path, 'wb') as handle:
for block in response.iter_content(1024 * 64):
handle.write(block)
def rename(from_path, to_path, force=False):
is_symlink = path_is_symlink(to_path)
if os.path.exists(to_path) and force:
if is_symlink:
remove_file(to_path)
else:
rmdir(to_path)
shutil.move(from_path, to_path)
def untar_package(tar_path, dest_dir, rename_to=None):
tar_dir_name = None
with tarfile.open(tar_path, 'r') as tarball:
tarball.extractall(dest_dir)
tar_dir_name = os.path.commonprefix(tarball.getnames())
if rename_to:
downloaded_path = os.path.join(dest_dir, tar_dir_name)
desired_path = os.path.join(dest_dir, rename_to)
dbt.clients.system.rename(downloaded_path, desired_path, force=True)
def chmod_and_retry(func, path, exc_info):
"""Define an error handler to pass to shutil.rmtree.
On Windows, when a file is marked read-only as git likes to do, rmtree will
fail. To handle that, on errors try to make the file writable.
We want to retry most operations here, but listdir is one that we know will
be useless.
"""
if func is os.listdir or os.name != 'nt':
raise
os.chmod(path, stat.S_IREAD | stat.S_IWRITE)
# on error,this will raise.
func(path)
def _absnorm(path):
return os.path.normcase(os.path.abspath(path))
def move(src, dst):
"""A re-implementation of shutil.move that properly removes the source
directory on windows when it has read-only files in it and the move is
between two drives.
This is almost identical to the real shutil.move, except it uses our rmtree
and skips handling non-windows OSes since the existing one works ok there.
"""
if os.name != 'nt':
return shutil.move(src, dst)
if os.path.isdir(dst):
if _absnorm(src) == _absnorm(dst):
os.rename(src, dst)
return
dst = os.path.join(dst, os.path.basename(src.rstrip('/\\')))
if os.path.exists(dst):
raise EnvironmentError("Path '{}' already exists".format(dst))
try:
os.rename(src, dst)
except OSError:
# probably different drives
if os.path.isdir(src):
if _absnorm(dst + '\\').startswith(_absnorm(src + '\\')):
# dst is inside src
raise EnvironmentError(
"Cannot move a directory '{}' into itself '{}'"
.format(src, dst)
)
shutil.copytree(src, dst, symlinks=True)
rmtree(src)
else:
shutil.copy2(src, dst)
os.unlink(src)
def rmtree(path):
"""Recursively remove path. On permissions errors on windows, try to remove
the read-only flag and try again.
"""
return shutil.rmtree(path, onerror=chmod_and_retry)

152
core/dbt/compat.py Normal file
View File

@@ -0,0 +1,152 @@
# flake8: noqa
import abc
import codecs
import warnings
import decimal
try:
import cdecimal
except ImportError:
DECIMALS = (decimal.Decimal,)
else:
DECIMALS = (decimal.Decimal, cdecimal.Decimal)
WHICH_PYTHON = None
try:
basestring
WHICH_PYTHON = 2
except NameError:
WHICH_PYTHON = 3
if WHICH_PYTHON == 2:
basestring = basestring
bigint = long
NUMBERS = DECIMALS + (int, float, long)
import __builtin__ as builtins
else:
basestring = str
bigint = int
NUMBERS = DECIMALS + (int, float)
import builtins
if WHICH_PYTHON == 2:
from SimpleHTTPServer import SimpleHTTPRequestHandler
from SocketServer import TCPServer
from Queue import PriorityQueue, Empty as QueueEmpty
from thread import get_ident
else:
from http.server import SimpleHTTPRequestHandler
from socketserver import TCPServer
from queue import PriorityQueue, Empty as QueueEmpty
from threading import get_ident
def to_unicode(s):
if WHICH_PYTHON == 2:
return unicode(s)
else:
return str(s)
def to_string(s):
if WHICH_PYTHON == 2:
if isinstance(s, unicode):
return s
elif isinstance(s, basestring):
return to_unicode(s)
else:
return to_unicode(str(s))
else:
if isinstance(s, basestring):
return s
else:
return str(s)
def to_native_string(s):
if WHICH_PYTHON == 2:
if isinstance(s, unicode):
return str(s)
elif isinstance(s, basestring):
return s
else:
return str(s)
else:
if isinstance(s, basestring):
return s
else:
return str(s)
def write_file(path, s):
if WHICH_PYTHON == 2:
open = codecs.open
else:
open = builtins.open
with open(path, 'w', encoding='utf-8') as f:
return f.write(to_string(s))
def open_file(path):
"""Open the path for reading. It must be utf-8 encoded."""
if WHICH_PYTHON == 2:
open = codecs.open
else:
open = builtins.open
return open(path, encoding='utf-8')
if WHICH_PYTHON == 2:
BOM_UTF8 = codecs.BOM_UTF8
else:
BOM_UTF8 = codecs.BOM_UTF8.decode('utf-8')
def open_seed_file(path):
if WHICH_PYTHON == 2:
fp = open(path, 'Urb')
else:
fp = open(path, encoding='utf-8')
return fp
if WHICH_PYTHON == 2:
# In python 2, classmethod and staticmethod do not allow setters, so you
# can't treat classmethods as first-class objects like you can regular
# functions. This rarely matters, but for metaclass shenanigans on the
# adapter we do want to set attributes on classmethods.
class _classmethod(classmethod):
pass
classmethod = _classmethod
# python 2.7 is missing this
class abstractclassmethod(classmethod):
__isabstractmethod__ = True
def __init__(self, func):
func.__isabstractmethod__ = True
super(abstractclassmethod, self).__init__(func)
class abstractstaticmethod(staticmethod):
__isabstractmethod__ = True
def __init__(self, func):
func.__isabstractmethod__ = True
super(abstractstaticmethod, self).__init__(func)
else:
abstractclassmethod = abc.abstractclassmethod
abstractstaticmethod = abc.abstractstaticmethod
classmethod = classmethod
def suppress_warnings():
# in python 2, ResourceWarnings don't exist.
# in python 3, suppress ResourceWarnings about unclosed sockets, as the
# bigquery library never closes them.
if WHICH_PYTHON == 3:
warnings.filterwarnings("ignore", category=ResourceWarning,
message="unclosed.*<socket.socket.*>")

252
core/dbt/compilation.py Normal file
View File

@@ -0,0 +1,252 @@
import itertools
import os
from collections import defaultdict
import dbt.utils
import dbt.include
import dbt.tracking
from dbt.utils import get_materialization, NodeType, is_type
from dbt.linker import Linker
import dbt.compat
import dbt.context.runtime
import dbt.contracts.project
import dbt.exceptions
import dbt.flags
import dbt.loader
import dbt.config
from dbt.contracts.graph.compiled import CompiledNode
from dbt.logger import GLOBAL_LOGGER as logger
graph_file_name = 'graph.gpickle'
def print_compile_stats(stats):
names = {
NodeType.Model: 'model',
NodeType.Test: 'test',
NodeType.Snapshot: 'snapshot',
NodeType.Analysis: 'analyse',
NodeType.Macro: 'macro',
NodeType.Operation: 'operation',
NodeType.Seed: 'seed file',
NodeType.Source: 'source',
}
results = {k: 0 for k in names.keys()}
results.update(stats)
stat_line = ", ".join(
[dbt.utils.pluralize(ct, names.get(t)) for t, ct in results.items()])
logger.notice("Found {}".format(stat_line))
def _add_prepended_cte(prepended_ctes, new_cte):
for dct in prepended_ctes:
if dct['id'] == new_cte['id']:
dct['sql'] = new_cte['sql']
return
prepended_ctes.append(new_cte)
def _extend_prepended_ctes(prepended_ctes, new_prepended_ctes):
for new_cte in new_prepended_ctes:
_add_prepended_cte(prepended_ctes, new_cte)
def prepend_ctes(model, manifest):
model, _, manifest = recursively_prepend_ctes(model, manifest)
return (model, manifest)
def recursively_prepend_ctes(model, manifest):
if model.extra_ctes_injected:
return (model, model.extra_ctes, manifest)
if dbt.flags.STRICT_MODE:
# ensure that the cte we're adding to is compiled
CompiledNode(**model.serialize())
prepended_ctes = []
for cte in model.extra_ctes:
cte_id = cte['id']
cte_to_add = manifest.nodes.get(cte_id)
cte_to_add, new_prepended_ctes, manifest = recursively_prepend_ctes(
cte_to_add, manifest)
_extend_prepended_ctes(prepended_ctes, new_prepended_ctes)
new_cte_name = '__dbt__CTE__{}'.format(cte_to_add.get('name'))
sql = ' {} as (\n{}\n)'.format(new_cte_name, cte_to_add.compiled_sql)
_add_prepended_cte(prepended_ctes, {'id': cte_id, 'sql': sql})
model.prepend_ctes(prepended_ctes)
manifest.nodes[model.unique_id] = model
return (model, prepended_ctes, manifest)
class Compiler(object):
def __init__(self, config):
self.config = config
def initialize(self):
dbt.clients.system.make_directory(self.config.target_path)
dbt.clients.system.make_directory(self.config.modules_path)
def compile_node(self, node, manifest, extra_context=None):
if extra_context is None:
extra_context = {}
logger.debug("Compiling {}".format(node.get('unique_id')))
data = node.to_dict()
data.update({
'compiled': False,
'compiled_sql': None,
'extra_ctes_injected': False,
'extra_ctes': [],
'injected_sql': None,
})
compiled_node = CompiledNode(**data)
context = dbt.context.runtime.generate(
compiled_node, self.config, manifest)
context.update(extra_context)
compiled_node.compiled_sql = dbt.clients.jinja.get_rendered(
node.get('raw_sql'),
context,
node)
compiled_node.compiled = True
injected_node, _ = prepend_ctes(compiled_node, manifest)
should_wrap = {NodeType.Test, NodeType.Operation}
if injected_node.resource_type in should_wrap:
# data tests get wrapped in count(*)
# TODO : move this somewhere more reasonable
if 'data' in injected_node.tags and \
is_type(injected_node, NodeType.Test):
injected_node.wrapped_sql = (
"select count(*) as errors "
"from (\n{test_sql}\n) sbq").format(
test_sql=injected_node.injected_sql)
else:
# don't wrap schema tests or analyses.
injected_node.wrapped_sql = injected_node.injected_sql
elif is_type(injected_node, NodeType.Snapshot):
# unfortunately we do everything automagically for
# snapshots. in the future it'd be nice to generate
# the SQL at the parser level.
pass
elif(is_type(injected_node, NodeType.Model) and
get_materialization(injected_node) == 'ephemeral'):
pass
else:
injected_node.wrapped_sql = None
return injected_node
def write_graph_file(self, linker, manifest):
filename = graph_file_name
graph_path = os.path.join(self.config.target_path, filename)
linker.write_graph(graph_path, manifest)
def link_node(self, linker, node, manifest):
linker.add_node(node.unique_id)
for dependency in node.depends_on_nodes:
if manifest.nodes.get(dependency):
linker.dependency(
node.unique_id,
(manifest.nodes.get(dependency).unique_id))
else:
dbt.exceptions.dependency_not_found(node, dependency)
def link_graph(self, linker, manifest):
for node in manifest.nodes.values():
self.link_node(linker, node, manifest)
cycle = linker.find_cycles()
if cycle:
raise RuntimeError("Found a cycle: {}".format(cycle))
def compile(self, manifest, write=True):
linker = Linker()
self.link_graph(linker, manifest)
stats = defaultdict(int)
for node_name, node in itertools.chain(
manifest.nodes.items(),
manifest.macros.items()):
stats[node.resource_type] += 1
if write:
self.write_graph_file(linker, manifest)
print_compile_stats(stats)
return linker
def compile_manifest(config, manifest, write=True):
compiler = Compiler(config)
compiler.initialize()
return compiler.compile(manifest, write=write)
def _is_writable(node):
if not node.injected_sql:
return False
if dbt.utils.is_type(node, NodeType.Snapshot):
return False
return True
def compile_node(adapter, config, node, manifest, extra_context, write=True):
compiler = Compiler(config)
node = compiler.compile_node(node, manifest, extra_context)
node = _inject_runtime_config(adapter, node, extra_context)
if write and _is_writable(node):
logger.debug('Writing injected SQL for node "{}"'.format(
node.unique_id))
written_path = dbt.writer.write_node(
node,
config.target_path,
'compiled',
node.injected_sql)
node.build_path = written_path
return node
def _inject_runtime_config(adapter, node, extra_context):
wrapped_sql = node.wrapped_sql
context = _node_context(adapter, node)
context.update(extra_context)
sql = dbt.clients.jinja.get_rendered(wrapped_sql, context)
node.wrapped_sql = sql
return node
def _node_context(adapter, node):
return {
"run_started_at": dbt.tracking.active_user.run_started_at,
"invocation_id": dbt.tracking.active_user.invocation_id,
}

View File

@@ -0,0 +1,5 @@
# all these are just exports, they need "noqa" so flake8 will not complain.
from .renderer import ConfigRenderer # noqa
from .profile import Profile, UserConfig, PROFILES_DIR # noqa
from .project import Project # noqa
from .runtime import RuntimeConfig # noqa

382
core/dbt/config/profile.py Normal file
View File

@@ -0,0 +1,382 @@
import os
from dbt.adapters.factory import load_plugin
from dbt.clients.system import load_file_contents
from dbt.clients.yaml_helper import load_yaml_text
from dbt.contracts.project import ProfileConfig
from dbt.exceptions import DbtProfileError
from dbt.exceptions import DbtProjectError
from dbt.exceptions import ValidationException
from dbt.exceptions import RuntimeException
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import parse_cli_vars
from dbt import tracking
from dbt.ui import printer
from .renderer import ConfigRenderer
DEFAULT_THREADS = 1
DEFAULT_SEND_ANONYMOUS_USAGE_STATS = True
DEFAULT_USE_COLORS = True
DEFAULT_PROFILES_DIR = os.path.join(os.path.expanduser('~'), '.dbt')
PROFILES_DIR = os.path.expanduser(
os.environ.get('DBT_PROFILES_DIR', DEFAULT_PROFILES_DIR)
)
INVALID_PROFILE_MESSAGE = """
dbt encountered an error while trying to read your profiles.yml file.
{error_string}
"""
NO_SUPPLIED_PROFILE_ERROR = """\
dbt cannot run because no profile was specified for this dbt project.
To specify a profile for this project, add a line like the this to
your dbt_project.yml file:
profile: [profile name]
Here, [profile name] should be replaced with a profile name
defined in your profiles.yml file. You can find profiles.yml here:
{profiles_file}/profiles.yml
""".format(profiles_file=PROFILES_DIR)
def read_profile(profiles_dir):
path = os.path.join(profiles_dir, 'profiles.yml')
contents = None
if os.path.isfile(path):
try:
contents = load_file_contents(path, strip=False)
return load_yaml_text(contents)
except ValidationException as e:
msg = INVALID_PROFILE_MESSAGE.format(error_string=e)
raise ValidationException(msg)
return {}
class UserConfig(object):
def __init__(self, send_anonymous_usage_stats, use_colors, printer_width):
self.send_anonymous_usage_stats = send_anonymous_usage_stats
self.use_colors = use_colors
self.printer_width = printer_width
@classmethod
def from_dict(cls, cfg=None):
if cfg is None:
cfg = {}
send_anonymous_usage_stats = cfg.get(
'send_anonymous_usage_stats',
DEFAULT_SEND_ANONYMOUS_USAGE_STATS
)
use_colors = cfg.get(
'use_colors',
DEFAULT_USE_COLORS
)
printer_width = cfg.get(
'printer_width'
)
return cls(send_anonymous_usage_stats, use_colors, printer_width)
def to_dict(self):
return {
'send_anonymous_usage_stats': self.send_anonymous_usage_stats,
'use_colors': self.use_colors,
}
@classmethod
def from_directory(cls, directory):
user_cfg = None
profile = read_profile(directory)
if profile:
user_cfg = profile.get('config', {})
return cls.from_dict(user_cfg)
def set_values(self, cookie_dir):
if self.send_anonymous_usage_stats:
tracking.initialize_tracking(cookie_dir)
else:
tracking.do_not_track()
if self.use_colors:
printer.use_colors()
if self.printer_width:
printer.printer_width(self.printer_width)
class Profile(object):
def __init__(self, profile_name, target_name, config, threads,
credentials):
self.profile_name = profile_name
self.target_name = target_name
if isinstance(config, dict):
config = UserConfig.from_dict(config)
self.config = config
self.threads = threads
self.credentials = credentials
def to_profile_info(self, serialize_credentials=False):
"""Unlike to_project_config, this dict is not a mirror of any existing
on-disk data structure. It's used when creating a new profile from an
existing one.
:param serialize_credentials bool: If True, serialize the credentials.
Otherwise, the Credentials object will be copied.
:returns dict: The serialized profile.
"""
result = {
'profile_name': self.profile_name,
'target_name': self.target_name,
'config': self.config.to_dict(),
'threads': self.threads,
'credentials': self.credentials.incorporate(),
}
if serialize_credentials:
result['credentials'] = result['credentials'].serialize()
return result
def __str__(self):
return str(self.to_profile_info())
def __eq__(self, other):
if not (isinstance(other, self.__class__) and
isinstance(self, other.__class__)):
return False
return self.to_profile_info() == other.to_profile_info()
def validate(self):
if self.credentials:
self.credentials.validate()
try:
ProfileConfig(**self.to_profile_info(serialize_credentials=True))
except ValidationException as exc:
raise DbtProfileError(str(exc))
@staticmethod
def _credentials_from_profile(profile, profile_name, target_name):
# credentials carry their 'type' in their actual type, not their
# attributes. We do want this in order to pick our Credentials class.
if 'type' not in profile:
raise DbtProfileError(
'required field "type" not found in profile {} and target {}'
.format(profile_name, target_name))
typename = profile.pop('type')
try:
cls = load_plugin(typename)
credentials = cls(**profile)
except RuntimeException as e:
raise DbtProfileError(
'Credentials in profile "{}", target "{}" invalid: {}'
.format(profile_name, target_name, str(e))
)
return credentials
@staticmethod
def pick_profile_name(args_profile_name, project_profile_name=None):
profile_name = project_profile_name
if args_profile_name is not None:
profile_name = args_profile_name
if profile_name is None:
raise DbtProjectError(NO_SUPPLIED_PROFILE_ERROR)
return profile_name
@staticmethod
def _get_profile_data(profile, profile_name, target_name):
if 'outputs' not in profile:
raise DbtProfileError(
"outputs not specified in profile '{}'".format(profile_name)
)
outputs = profile['outputs']
if target_name not in outputs:
outputs = '\n'.join(' - {}'.format(output)
for output in outputs)
msg = ("The profile '{}' does not have a target named '{}'. The "
"valid target names for this profile are:\n{}"
.format(profile_name, target_name, outputs))
raise DbtProfileError(msg, result_type='invalid_target')
profile_data = outputs[target_name]
return profile_data
@classmethod
def from_credentials(cls, credentials, threads, profile_name, target_name,
user_cfg=None):
"""Create a profile from an existing set of Credentials and the
remaining information.
:param credentials dict: The credentials dict for this profile.
:param threads int: The number of threads to use for connections.
:param profile_name str: The profile name used for this profile.
:param target_name str: The target name used for this profile.
:param user_cfg Optional[dict]: The user-level config block from the
raw profiles, if specified.
:raises DbtProfileError: If the profile is invalid.
:returns Profile: The new Profile object.
"""
config = UserConfig.from_dict(user_cfg)
profile = cls(
profile_name=profile_name,
target_name=target_name,
config=config,
threads=threads,
credentials=credentials
)
profile.validate()
return profile
@classmethod
def render_profile(cls, raw_profile, profile_name, target_override,
cli_vars):
"""This is a containment zone for the hateful way we're rendering
profiles.
"""
renderer = ConfigRenderer(cli_vars=cli_vars)
# rendering profiles is a bit complex. Two constraints cause trouble:
# 1) users should be able to use environment/cli variables to specify
# the target in their profile.
# 2) Missing environment/cli variables in profiles/targets that don't
# end up getting selected should not cause errors.
# so first we'll just render the target name, then we use that rendered
# name to extract a profile that we can render.
if target_override is not None:
target_name = target_override
elif 'target' in raw_profile:
# render the target if it was parsed from yaml
target_name = renderer.render_value(raw_profile['target'])
else:
target_name = 'default'
logger.debug(
"target not specified in profile '{}', using '{}'"
.format(profile_name, target_name)
)
raw_profile_data = cls._get_profile_data(
raw_profile, profile_name, target_name
)
profile_data = renderer.render_profile_data(raw_profile_data)
return target_name, profile_data
@classmethod
def from_raw_profile_info(cls, raw_profile, profile_name, cli_vars,
user_cfg=None, target_override=None,
threads_override=None):
"""Create a profile from its raw profile information.
(this is an intermediate step, mostly useful for unit testing)
:param raw_profile dict: The profile data for a single profile, from
disk as yaml and its values rendered with jinja.
:param profile_name str: The profile name used.
:param cli_vars dict: The command-line variables passed as arguments,
as a dict.
:param user_cfg Optional[dict]: The global config for the user, if it
was present.
:param target_override Optional[str]: The target to use, if provided on
the command line.
:param threads_override Optional[str]: The thread count to use, if
provided on the command line.
:raises DbtProfileError: If the profile is invalid or missing, or the
target could not be found
:returns Profile: The new Profile object.
"""
# user_cfg is not rendered since it only contains booleans.
# TODO: should it be, and the values coerced to bool?
target_name, profile_data = cls.render_profile(
raw_profile, profile_name, target_override, cli_vars
)
# valid connections never include the number of threads, but it's
# stored on a per-connection level in the raw configs
threads = profile_data.pop('threads', DEFAULT_THREADS)
if threads_override is not None:
threads = threads_override
credentials = cls._credentials_from_profile(
profile_data, profile_name, target_name
)
return cls.from_credentials(
credentials=credentials,
profile_name=profile_name,
target_name=target_name,
threads=threads,
user_cfg=user_cfg
)
@classmethod
def from_raw_profiles(cls, raw_profiles, profile_name, cli_vars,
target_override=None, threads_override=None):
"""
:param raw_profiles dict: The profile data, from disk as yaml.
:param profile_name str: The profile name to use.
:param cli_vars dict: The command-line variables passed as arguments,
as a dict.
:param target_override Optional[str]: The target to use, if provided on
the command line.
:param threads_override Optional[str]: The thread count to use, if
provided on the command line.
:raises DbtProjectError: If there is no profile name specified in the
project or the command line arguments
:raises DbtProfileError: If the profile is invalid or missing, or the
target could not be found
:returns Profile: The new Profile object.
"""
if profile_name not in raw_profiles:
raise DbtProjectError(
"Could not find profile named '{}'".format(profile_name)
)
# First, we've already got our final decision on profile name, and we
# don't render keys, so we can pluck that out
raw_profile = raw_profiles[profile_name]
user_cfg = raw_profiles.get('config')
return cls.from_raw_profile_info(
raw_profile=raw_profile,
profile_name=profile_name,
cli_vars=cli_vars,
user_cfg=user_cfg,
target_override=target_override,
threads_override=threads_override,
)
@classmethod
def from_args(cls, args, project_profile_name=None):
"""Given the raw profiles as read from disk and the name of the desired
profile if specified, return the profile component of the runtime
config.
:param args argparse.Namespace: The arguments as parsed from the cli.
:param project_profile_name Optional[str]: The profile name, if
specified in a project.
:raises DbtProjectError: If there is no profile name specified in the
project or the command line arguments, or if the specified profile
is not found
:raises DbtProfileError: If the profile is invalid or missing, or the
target could not be found.
:returns Profile: The new Profile object.
"""
cli_vars = parse_cli_vars(getattr(args, 'vars', '{}'))
threads_override = getattr(args, 'threads', None)
target_override = getattr(args, 'target', None)
raw_profiles = read_profile(args.profiles_dir)
profile_name = cls.pick_profile_name(args.profile,
project_profile_name)
return cls.from_raw_profiles(
raw_profiles=raw_profiles,
profile_name=profile_name,
cli_vars=cli_vars,
target_override=target_override,
threads_override=threads_override
)

449
core/dbt/config/project.py Normal file
View File

@@ -0,0 +1,449 @@
from copy import deepcopy
import hashlib
import os
from dbt import compat
from dbt.clients.system import resolve_path_from_base
from dbt.clients.system import path_exists
from dbt.clients.system import load_file_contents
from dbt.clients.yaml_helper import load_yaml_text
from dbt.exceptions import DbtProjectError
from dbt.exceptions import RecursionException
from dbt.exceptions import SemverException
from dbt.exceptions import ValidationException
from dbt.exceptions import warn_or_error
from dbt.semver import VersionSpecifier
from dbt.semver import versions_compatible
from dbt.version import get_installed_version
from dbt.ui import printer
from dbt.utils import deep_map
from dbt.utils import parse_cli_vars
from dbt.parser.source_config import SourceConfig
from dbt.contracts.project import Project as ProjectContract
from dbt.contracts.project import PackageConfig
from .renderer import ConfigRenderer
UNUSED_RESOURCE_CONFIGURATION_PATH_MESSAGE = """\
WARNING: Configuration paths exist in your dbt_project.yml file which do not \
apply to any resources.
There are {} unused configuration paths:\n{}
"""
INVALID_VERSION_ERROR = """\
This version of dbt is not supported with the '{package}' package.
Installed version of dbt: {installed}
Required version of dbt for '{package}': {version_spec}
Check the requirements for the '{package}' package, or run dbt again with \
--no-version-check
"""
IMPOSSIBLE_VERSION_ERROR = """\
The package version requirement can never be satisfied for the '{package}
package.
Required versions of dbt for '{package}': {version_spec}
Check the requirements for the '{package}' package, or run dbt again with \
--no-version-check
"""
def _list_if_none(value):
if value is None:
value = []
return value
def _dict_if_none(value):
if value is None:
value = {}
return value
def _list_if_none_or_string(value):
value = _list_if_none(value)
if isinstance(value, compat.basestring):
return [value]
return value
def _load_yaml(path):
contents = load_file_contents(path)
return load_yaml_text(contents)
def _get_config_paths(config, path=(), paths=None):
if paths is None:
paths = set()
for key, value in config.items():
if isinstance(value, dict):
if key in SourceConfig.ConfigKeys:
if path not in paths:
paths.add(path)
else:
_get_config_paths(value, path + (key,), paths)
else:
if path not in paths:
paths.add(path)
return frozenset(paths)
def _is_config_used(path, fqns):
if fqns:
for fqn in fqns:
if len(path) <= len(fqn) and fqn[:len(path)] == path:
return True
return False
def package_data_from_root(project_root):
package_filepath = resolve_path_from_base(
'packages.yml', project_root
)
if path_exists(package_filepath):
packages_dict = _load_yaml(package_filepath)
else:
packages_dict = None
return packages_dict
def package_config_from_data(packages_data):
if packages_data is None:
packages_data = {'packages': []}
try:
packages = PackageConfig(**packages_data)
except ValidationException as e:
raise DbtProjectError('Invalid package config: {}'.format(str(e)))
return packages
def _parse_versions(versions):
"""Parse multiple versions as read from disk. The versions value may be any
one of:
- a single version string ('>0.12.1')
- a single string specifying multiple comma-separated versions
('>0.11.1,<=0.12.2')
- an array of single-version strings (['>0.11.1', '<=0.12.2'])
Regardless, this will return a list of VersionSpecifiers
"""
if isinstance(versions, compat.basestring):
versions = versions.split(',')
return [VersionSpecifier.from_version_string(v) for v in versions]
class Project(object):
def __init__(self, project_name, version, project_root, profile_name,
source_paths, macro_paths, data_paths, test_paths,
analysis_paths, docs_paths, target_path, snapshot_paths,
clean_targets, log_path, modules_path, quoting, models,
on_run_start, on_run_end, archive, seeds, dbt_version,
packages):
self.project_name = project_name
self.version = version
self.project_root = project_root
self.profile_name = profile_name
self.source_paths = source_paths
self.macro_paths = macro_paths
self.data_paths = data_paths
self.test_paths = test_paths
self.analysis_paths = analysis_paths
self.docs_paths = docs_paths
self.target_path = target_path
self.snapshot_paths = snapshot_paths
self.clean_targets = clean_targets
self.log_path = log_path
self.modules_path = modules_path
self.quoting = quoting
self.models = models
self.on_run_start = on_run_start
self.on_run_end = on_run_end
self.archive = archive
self.seeds = seeds
self.dbt_version = dbt_version
self.packages = packages
@staticmethod
def _preprocess(project_dict):
"""Pre-process certain special keys to convert them from None values
into empty containers, and to turn strings into arrays of strings.
"""
handlers = {
('archive',): _list_if_none,
('on-run-start',): _list_if_none_or_string,
('on-run-end',): _list_if_none_or_string,
}
for k in ('models', 'seeds'):
handlers[(k,)] = _dict_if_none
handlers[(k, 'vars')] = _dict_if_none
handlers[(k, 'pre-hook')] = _list_if_none_or_string
handlers[(k, 'post-hook')] = _list_if_none_or_string
handlers[('seeds', 'column_types')] = _dict_if_none
def converter(value, keypath):
if keypath in handlers:
handler = handlers[keypath]
return handler(value)
else:
return value
return deep_map(converter, project_dict)
@classmethod
def from_project_config(cls, project_dict, packages_dict=None):
"""Create a project from its project and package configuration, as read
by yaml.safe_load().
:param project_dict dict: The dictionary as read from disk
:param packages_dict Optional[dict]: If it exists, the packages file as
read from disk.
:raises DbtProjectError: If the project is missing or invalid, or if
the packages file exists and is invalid.
:returns Project: The project, with defaults populated.
"""
try:
project_dict = cls._preprocess(project_dict)
except RecursionException:
raise DbtProjectError(
'Cycle detected: Project input has a reference to itself',
project=project_dict
)
# just for validation.
try:
ProjectContract(**project_dict)
except ValidationException as e:
raise DbtProjectError(str(e))
# name/version are required in the Project definition, so we can assume
# they are present
name = project_dict['name']
version = project_dict['version']
# this is added at project_dict parse time and should always be here
# once we see it.
project_root = project_dict['project-root']
# this is only optional in the sense that if it's not present, it needs
# to have been a cli argument.
profile_name = project_dict.get('profile')
# these are optional
source_paths = project_dict.get('source-paths', ['models'])
macro_paths = project_dict.get('macro-paths', ['macros'])
data_paths = project_dict.get('data-paths', ['data'])
test_paths = project_dict.get('test-paths', ['test'])
analysis_paths = project_dict.get('analysis-paths', [])
docs_paths = project_dict.get('docs-paths', source_paths[:])
target_path = project_dict.get('target-path', 'target')
snapshot_paths = project_dict.get('snapshot-paths', ['snapshots'])
# should this also include the modules path by default?
clean_targets = project_dict.get('clean-targets', [target_path])
log_path = project_dict.get('log-path', 'logs')
modules_path = project_dict.get('modules-path', 'dbt_modules')
# in the default case we'll populate this once we know the adapter type
quoting = project_dict.get('quoting', {})
models = project_dict.get('models', {})
on_run_start = project_dict.get('on-run-start', [])
on_run_end = project_dict.get('on-run-end', [])
archive = project_dict.get('archive', [])
seeds = project_dict.get('seeds', {})
dbt_raw_version = project_dict.get('require-dbt-version', '>=0.0.0')
try:
dbt_version = _parse_versions(dbt_raw_version)
except SemverException as e:
raise DbtProjectError(str(e))
packages = package_config_from_data(packages_dict)
project = cls(
project_name=name,
version=version,
project_root=project_root,
profile_name=profile_name,
source_paths=source_paths,
macro_paths=macro_paths,
data_paths=data_paths,
test_paths=test_paths,
analysis_paths=analysis_paths,
docs_paths=docs_paths,
target_path=target_path,
snapshot_paths=snapshot_paths,
clean_targets=clean_targets,
log_path=log_path,
modules_path=modules_path,
quoting=quoting,
models=models,
on_run_start=on_run_start,
on_run_end=on_run_end,
archive=archive,
seeds=seeds,
dbt_version=dbt_version,
packages=packages
)
# sanity check - this means an internal issue
project.validate()
return project
def __str__(self):
cfg = self.to_project_config(with_packages=True)
return str(cfg)
def __eq__(self, other):
if not (isinstance(other, self.__class__) and
isinstance(self, other.__class__)):
return False
return self.to_project_config(with_packages=True) == \
other.to_project_config(with_packages=True)
def to_project_config(self, with_packages=False):
"""Return a dict representation of the config that could be written to
disk with `yaml.safe_dump` to get this configuration.
:param with_packages bool: If True, include the serialized packages
file in the root.
:returns dict: The serialized profile.
"""
result = deepcopy({
'name': self.project_name,
'version': self.version,
'project-root': self.project_root,
'profile': self.profile_name,
'source-paths': self.source_paths,
'macro-paths': self.macro_paths,
'data-paths': self.data_paths,
'test-paths': self.test_paths,
'analysis-paths': self.analysis_paths,
'docs-paths': self.docs_paths,
'target-path': self.target_path,
'snapshot-paths': self.snapshot_paths,
'clean-targets': self.clean_targets,
'log-path': self.log_path,
'quoting': self.quoting,
'models': self.models,
'on-run-start': self.on_run_start,
'on-run-end': self.on_run_end,
'archive': self.archive,
'seeds': self.seeds,
'require-dbt-version': [
v.to_version_string() for v in self.dbt_version
],
})
if with_packages:
result.update(self.packages.serialize())
return result
def validate(self):
try:
ProjectContract(**self.to_project_config())
except ValidationException as exc:
raise DbtProjectError(str(exc))
@classmethod
def from_project_root(cls, project_root, cli_vars):
"""Create a project from a root directory. Reads in dbt_project.yml and
packages.yml, if it exists.
:param project_root str: The path to the project root to load.
:raises DbtProjectError: If the project is missing or invalid, or if
the packages file exists and is invalid.
:returns Project: The project, with defaults populated.
"""
project_root = os.path.normpath(project_root)
project_yaml_filepath = os.path.join(project_root, 'dbt_project.yml')
# get the project.yml contents
if not path_exists(project_yaml_filepath):
raise DbtProjectError(
'no dbt_project.yml found at expected path {}'
.format(project_yaml_filepath)
)
if isinstance(cli_vars, compat.basestring):
cli_vars = parse_cli_vars(cli_vars)
renderer = ConfigRenderer(cli_vars)
project_dict = _load_yaml(project_yaml_filepath)
rendered_project = renderer.render_project(project_dict)
rendered_project['project-root'] = project_root
packages_dict = package_data_from_root(project_root)
return cls.from_project_config(rendered_project, packages_dict)
@classmethod
def from_current_directory(cls, cli_vars):
return cls.from_project_root(os.getcwd(), cli_vars)
@classmethod
def from_args(cls, args):
return cls.from_current_directory(getattr(args, 'vars', '{}'))
def hashed_name(self):
return hashlib.md5(self.project_name.encode('utf-8')).hexdigest()
def get_resource_config_paths(self):
"""Return a dictionary with 'seeds' and 'models' keys whose values are
lists of lists of strings, where each inner list of strings represents
a configured path in the resource.
"""
return {
'models': _get_config_paths(self.models),
'seeds': _get_config_paths(self.seeds),
}
def get_unused_resource_config_paths(self, resource_fqns, disabled):
"""Return a list of lists of strings, where each inner list of strings
represents a type + FQN path of a resource configuration that is not
used.
"""
disabled_fqns = frozenset(tuple(fqn) for fqn in disabled)
resource_config_paths = self.get_resource_config_paths()
unused_resource_config_paths = []
for resource_type, config_paths in resource_config_paths.items():
used_fqns = resource_fqns.get(resource_type, frozenset())
fqns = used_fqns | disabled_fqns
for config_path in config_paths:
if not _is_config_used(config_path, fqns):
unused_resource_config_paths.append(
(resource_type,) + config_path
)
return unused_resource_config_paths
def warn_for_unused_resource_config_paths(self, resource_fqns, disabled):
unused = self.get_unused_resource_config_paths(resource_fqns, disabled)
if len(unused) == 0:
return
msg = UNUSED_RESOURCE_CONFIGURATION_PATH_MESSAGE.format(
len(unused),
'\n'.join('- {}'.format('.'.join(u)) for u in unused)
)
warn_or_error(msg, log_fmt=printer.yellow('{}'))
def validate_version(self):
"""Ensure this package works with the installed version of dbt."""
installed = get_installed_version()
if not versions_compatible(*self.dbt_version):
msg = IMPOSSIBLE_VERSION_ERROR.format(
package=self.project_name,
version_spec=[
x.to_version_string() for x in self.dbt_version
]
)
raise DbtProjectError(msg)
if not versions_compatible(installed, *self.dbt_version):
msg = INVALID_VERSION_ERROR.format(
package=self.project_name,
installed=installed.to_version_string(),
version_spec=[
x.to_version_string() for x in self.dbt_version
]
)
raise DbtProjectError(msg)

111
core/dbt/config/renderer.py Normal file
View File

@@ -0,0 +1,111 @@
from dbt import compat
from dbt.clients.jinja import get_rendered
from dbt.context.common import generate_config_context
from dbt.exceptions import DbtProfileError
from dbt.exceptions import DbtProjectError
from dbt.exceptions import RecursionException
from dbt.utils import deep_map
class ConfigRenderer(object):
"""A renderer provides configuration rendering for a given set of cli
variables and a render type.
"""
def __init__(self, cli_vars):
self.context = generate_config_context(cli_vars)
@staticmethod
def _is_hook_or_model_vars_path(keypath):
if not keypath:
return False
first = keypath[0]
# run hooks
if first in {'on-run-start', 'on-run-end'}:
return True
# models have two things to avoid
if first in {'seeds', 'models'}:
# model-level hooks
if 'pre-hook' in keypath or 'post-hook' in keypath:
return True
# model-level 'vars' declarations
if 'vars' in keypath:
return True
return False
def _render_project_entry(self, value, keypath):
"""Render an entry, in case it's jinja. This is meant to be passed to
deep_map.
If the parsed entry is a string and has the name 'port', this will
attempt to cast it to an int, and on failure will return the parsed
string.
:param value Any: The value to potentially render
:param key str: The key to convert on.
:return Any: The rendered entry.
"""
# hooks should be treated as raw sql, they'll get rendered later.
# Same goes for 'vars' declarations inside 'models'/'seeds'.
if self._is_hook_or_model_vars_path(keypath):
return value
return self.render_value(value)
def render_value(self, value, keypath=None):
# keypath is ignored.
# if it wasn't read as a string, ignore it
if not isinstance(value, compat.basestring):
return value
# force the result of rendering into this python version's native
# string type
return compat.to_native_string(get_rendered(value, self.context))
def _render_profile_data(self, value, keypath):
result = self.render_value(value)
if len(keypath) == 1 and keypath[-1] == 'port':
try:
result = int(result)
except ValueError:
# let the validator or connection handle this
pass
return result
def _render_schema_source_data(self, value, keypath):
# things to not render:
# - descriptions
if len(keypath) > 0 and keypath[-1] == 'description':
return value
return self.render_value(value)
def render_project(self, as_parsed):
"""Render the parsed data, returning a new dict (or whatever was read).
"""
try:
return deep_map(self._render_project_entry, as_parsed)
except RecursionException:
raise DbtProjectError(
'Cycle detected: Project input has a reference to itself',
project=as_parsed
)
def render_profile_data(self, as_parsed):
"""Render the chosen profile entry, as it was parsed."""
try:
return deep_map(self._render_profile_data, as_parsed)
except RecursionException:
raise DbtProfileError(
'Cycle detected: Profile input has a reference to itself',
project=as_parsed
)
def render_schema_source(self, as_parsed):
try:
return deep_map(self._render_schema_source_data, as_parsed)
except RecursionException:
raise DbtProfileError(
'Cycle detected: schema.yml input has a reference to itself',
project=as_parsed
)

205
core/dbt/config/runtime.py Normal file
View File

@@ -0,0 +1,205 @@
from copy import deepcopy
from dbt.utils import parse_cli_vars
from dbt.contracts.project import Configuration
from dbt.exceptions import DbtProjectError
from dbt.exceptions import ValidationException
from dbt.adapters.factory import get_relation_class_by_name
from .profile import Profile
from .project import Project
_ARCHIVE_REMOVED_MESSAGE = '''
The `archive` section in `dbt_project.yml` is no longer supported. Please use a
`snapshot` block instead. For more information on snapshot blocks and a script
to help migrate these archives, please consult the 0.14.0 migration guide:
https://docs.getdbt.com/v0.14/docs/upgrading-to-014
'''.strip()
class RuntimeConfig(Project, Profile):
"""The runtime configuration, as constructed from its components. There's a
lot because there is a lot of stuff!
"""
def __init__(self, project_name, version, project_root, source_paths,
macro_paths, data_paths, test_paths, analysis_paths,
docs_paths, target_path, snapshot_paths, clean_targets,
log_path, modules_path, quoting, models, on_run_start,
on_run_end, archive, seeds, dbt_version, profile_name,
target_name, config, threads, credentials, packages, args):
# 'vars'
self.args = args
self.cli_vars = parse_cli_vars(getattr(args, 'vars', '{}'))
# 'project'
Project.__init__(
self,
project_name=project_name,
version=version,
project_root=project_root,
profile_name=profile_name,
source_paths=source_paths,
macro_paths=macro_paths,
data_paths=data_paths,
test_paths=test_paths,
analysis_paths=analysis_paths,
docs_paths=docs_paths,
target_path=target_path,
snapshot_paths=snapshot_paths,
clean_targets=clean_targets,
log_path=log_path,
modules_path=modules_path,
quoting=quoting,
models=models,
on_run_start=on_run_start,
on_run_end=on_run_end,
archive=archive,
seeds=seeds,
dbt_version=dbt_version,
packages=packages
)
# 'profile'
Profile.__init__(
self,
profile_name=profile_name,
target_name=target_name,
config=config,
threads=threads,
credentials=credentials
)
self.validate()
@classmethod
def from_parts(cls, project, profile, args, allow_archive_configs=False):
"""Instantiate a RuntimeConfig from its components.
:param profile Profile: A parsed dbt Profile.
:param project Project: A parsed dbt Project.
:param args argparse.Namespace: The parsed command-line arguments.
:param allow_archive_configs bool: If True, ignore archive blocks in
configs. This flag exists to enable archive migration.
:returns RuntimeConfig: The new configuration.
"""
quoting = deepcopy(
get_relation_class_by_name(profile.credentials.type)
.DEFAULTS['quote_policy']
)
quoting.update(project.quoting)
if project.archive and not allow_archive_configs:
# if the user has an `archive` section, raise an error
raise DbtProjectError(_ARCHIVE_REMOVED_MESSAGE)
return cls(
project_name=project.project_name,
version=project.version,
project_root=project.project_root,
source_paths=project.source_paths,
macro_paths=project.macro_paths,
data_paths=project.data_paths,
test_paths=project.test_paths,
analysis_paths=project.analysis_paths,
docs_paths=project.docs_paths,
target_path=project.target_path,
snapshot_paths=project.snapshot_paths,
clean_targets=project.clean_targets,
log_path=project.log_path,
modules_path=project.modules_path,
quoting=quoting,
models=project.models,
on_run_start=project.on_run_start,
on_run_end=project.on_run_end,
archive=project.archive,
seeds=project.seeds,
dbt_version=project.dbt_version,
packages=project.packages,
profile_name=profile.profile_name,
target_name=profile.target_name,
config=profile.config,
threads=profile.threads,
credentials=profile.credentials,
args=args
)
def new_project(self, project_root):
"""Given a new project root, read in its project dictionary, supply the
existing project's profile info, and create a new project file.
:param project_root str: A filepath to a dbt project.
:raises DbtProfileError: If the profile is invalid.
:raises DbtProjectError: If project is missing or invalid.
:returns RuntimeConfig: The new configuration.
"""
# copy profile
profile = Profile(**self.to_profile_info())
profile.validate()
# load the new project and its packages. Don't pass cli variables.
project = Project.from_project_root(project_root, {})
cfg = self.from_parts(
project=project,
profile=profile,
args=deepcopy(self.args),
)
# force our quoting back onto the new project.
cfg.quoting = deepcopy(self.quoting)
return cfg
def serialize(self):
"""Serialize the full configuration to a single dictionary. For any
instance that has passed validate() (which happens in __init__), it
matches the Configuration contract.
Note that args are not serialized.
:returns dict: The serialized configuration.
"""
result = self.to_project_config(with_packages=True)
result.update(self.to_profile_info(serialize_credentials=True))
result['cli_vars'] = deepcopy(self.cli_vars)
return result
def __str__(self):
return str(self.serialize())
def validate(self):
"""Validate the configuration against its contract.
:raises DbtProjectError: If the configuration fails validation.
"""
try:
Configuration(**self.serialize())
except ValidationException as e:
raise DbtProjectError(str(e))
if getattr(self.args, 'version_check', False):
self.validate_version()
@classmethod
def from_args(cls, args, allow_archive_configs=False):
"""Given arguments, read in dbt_project.yml from the current directory,
read in packages.yml if it exists, and use them to find the profile to
load.
:param args argparse.Namespace: The arguments as parsed from the cli.
:param allow_archive_configs bool: If True, ignore archive blocks in
configs. This flag exists to enable archive migration.
:raises DbtProjectError: If the project is invalid or missing.
:raises DbtProfileError: If the profile is invalid or missing.
:raises ValidationException: If the cli variables are invalid.
"""
# build the project and read in packages.yml
project = Project.from_args(args)
# build the profile
profile = Profile.from_args(
args=args,
project_profile_name=project.profile_name
)
return cls.from_parts(
project=project,
profile=profile,
args=args,
allow_archive_configs=allow_archive_configs
)

496
core/dbt/context/common.py Normal file
View File

@@ -0,0 +1,496 @@
import json
import os
from dbt.adapters.factory import get_adapter
from dbt.compat import basestring
from dbt.node_types import NodeType
from dbt.contracts.graph.parsed import ParsedMacro, ParsedNode
from dbt.include.global_project import PACKAGES
from dbt.include.global_project import PROJECT_NAME as GLOBAL_PROJECT_NAME
import dbt.clients.jinja
import dbt.clients.agate_helper
import dbt.flags
import dbt.tracking
import dbt.writer
import dbt.utils
from dbt.logger import GLOBAL_LOGGER as logger # noqa
# These modules are added to the context. Consider alternative
# approaches which will extend well to potentially many modules
import pytz
import datetime
class RelationProxy(object):
def __init__(self, adapter):
self.quoting_config = adapter.config.quoting
self.relation_type = adapter.Relation
def __getattr__(self, key):
return getattr(self.relation_type, key)
def create_from_source(self, *args, **kwargs):
# bypass our create when creating from source so as not to mess up
# the source quoting
return self.relation_type.create_from_source(*args, **kwargs)
def create(self, *args, **kwargs):
kwargs['quote_policy'] = dbt.utils.merge(
self.quoting_config,
kwargs.pop('quote_policy', {})
)
return self.relation_type.create(*args, **kwargs)
class BaseDatabaseWrapper(object):
"""
Wrapper for runtime database interaction. Applies the runtime quote policy
via a relation proxy.
"""
def __init__(self, adapter):
self.adapter = adapter
self.Relation = RelationProxy(adapter)
def __getattr__(self, name):
raise NotImplementedError('subclasses need to implement this')
@property
def config(self):
return self.adapter.config
def type(self):
return self.adapter.type()
def commit(self):
return self.adapter.commit_if_has_connection()
class BaseResolver(object):
def __init__(self, db_wrapper, model, config, manifest):
self.db_wrapper = db_wrapper
self.model = model
self.config = config
self.manifest = manifest
@property
def current_project(self):
return self.config.project_name
@property
def Relation(self):
return self.db_wrapper.Relation
def _add_macro_map(context, package_name, macro_map):
"""Update an existing context in-place, adding the given macro map to the
appropriate package namespace. Adapter packages get inserted into the
global namespace.
"""
key = package_name
if package_name in PACKAGES:
key = GLOBAL_PROJECT_NAME
if key not in context:
context[key] = {}
context[key].update(macro_map)
def _add_macros(context, model, manifest):
macros_to_add = {'global': [], 'local': []}
for unique_id, macro in manifest.macros.items():
if macro.resource_type != NodeType.Macro:
continue
package_name = macro.package_name
macro_map = {
macro.name: macro.generator(context)
}
# adapter packages are part of the global project space
_add_macro_map(context, package_name, macro_map)
if package_name == model.package_name:
macros_to_add['local'].append(macro_map)
elif package_name in PACKAGES:
macros_to_add['global'].append(macro_map)
# Load global macros before local macros -- local takes precedence
unprefixed_macros = macros_to_add['global'] + macros_to_add['local']
for macro_map in unprefixed_macros:
context.update(macro_map)
return context
def _add_tracking(context):
if dbt.tracking.active_user is not None:
context = dbt.utils.merge(context, {
"run_started_at": dbt.tracking.active_user.run_started_at,
"invocation_id": dbt.tracking.active_user.invocation_id,
})
else:
context = dbt.utils.merge(context, {
"run_started_at": None,
"invocation_id": None
})
return context
def _add_validation(context):
def validate_any(*args):
def inner(value):
for arg in args:
if isinstance(arg, type) and isinstance(value, arg):
return
elif value == arg:
return
raise dbt.exceptions.ValidationException(
'Expected value "{}" to be one of {}'
.format(value, ','.join(map(str, args))))
return inner
validation_utils = dbt.utils.AttrDict({
'any': validate_any,
})
return dbt.utils.merge(
context,
{'validation': validation_utils})
def env_var(var, default=None):
if var in os.environ:
return os.environ[var]
elif default is not None:
return default
else:
msg = "Env var required but not provided: '{}'".format(var)
dbt.clients.jinja.undefined_error(msg)
def _store_result(sql_results):
def call(name, status, agate_table=None):
if agate_table is None:
agate_table = dbt.clients.agate_helper.empty_table()
sql_results[name] = dbt.utils.AttrDict({
'status': status,
'data': dbt.clients.agate_helper.as_matrix(agate_table),
'table': agate_table
})
return ''
return call
def _load_result(sql_results):
def call(name):
return sql_results.get(name)
return call
def _debug_here():
import sys
import ipdb
frame = sys._getframe(3)
ipdb.set_trace(frame)
def _add_sql_handlers(context):
sql_results = {}
return dbt.utils.merge(context, {
'_sql_results': sql_results,
'store_result': _store_result(sql_results),
'load_result': _load_result(sql_results),
})
def log(msg, info=False):
if info:
logger.info(msg)
else:
logger.debug(msg)
return ''
class Var(object):
UndefinedVarError = "Required var '{}' not found in config:\nVars "\
"supplied to {} = {}"
_VAR_NOTSET = object()
def __init__(self, model, context, overrides):
self.model = model
self.context = context
# These are hard-overrides (eg. CLI vars) that should take
# precedence over context-based var definitions
self.overrides = overrides
if isinstance(model, dict) and model.get('unique_id'):
local_vars = model.get('config', {}).get('vars', {})
self.model_name = model.get('name')
elif isinstance(model, ParsedMacro):
local_vars = {} # macros have no config
self.model_name = model.name
elif isinstance(model, ParsedNode):
local_vars = model.config.get('vars', {})
self.model_name = model.name
elif model is None:
# during config parsing we have no model and no local vars
self.model_name = '<Configuration>'
local_vars = {}
else:
# still used for wrapping
self.model_name = model.nice_name
local_vars = model.config.get('vars', {})
self.local_vars = dbt.utils.merge(local_vars, overrides)
def pretty_dict(self, data):
return json.dumps(data, sort_keys=True, indent=4)
def get_missing_var(self, var_name):
pretty_vars = self.pretty_dict(self.local_vars)
msg = self.UndefinedVarError.format(
var_name, self.model_name, pretty_vars
)
dbt.exceptions.raise_compiler_error(msg, self.model)
def assert_var_defined(self, var_name, default):
if var_name not in self.local_vars and default is self._VAR_NOTSET:
return self.get_missing_var(var_name)
def get_rendered_var(self, var_name):
raw = self.local_vars[var_name]
# if bool/int/float/etc are passed in, don't compile anything
if not isinstance(raw, basestring):
return raw
return dbt.clients.jinja.get_rendered(raw, self.context)
def __call__(self, var_name, default=_VAR_NOTSET):
if var_name in self.local_vars:
return self.get_rendered_var(var_name)
elif default is not self._VAR_NOTSET:
return default
else:
return self.get_missing_var(var_name)
def write(node, target_path, subdirectory):
def fn(payload):
node['build_path'] = dbt.writer.write_node(
node, target_path, subdirectory, payload)
return ''
return fn
def render(context, node):
def fn(string):
return dbt.clients.jinja.get_rendered(string, context, node)
return fn
def fromjson(string, default=None):
try:
return json.loads(string)
except ValueError:
return default
def tojson(value, default=None):
try:
return json.dumps(value)
except ValueError:
return default
def try_or_compiler_error(model):
def impl(message_if_exception, func, *args, **kwargs):
try:
return func(*args, **kwargs)
except Exception:
dbt.exceptions.raise_compiler_error(message_if_exception, model)
return impl
def _return(value):
raise dbt.exceptions.MacroReturn(value)
def get_this_relation(db_wrapper, config, model):
return db_wrapper.Relation.create_from_node(config, model)
def get_pytz_module_context():
context_exports = pytz.__all__
return {
name: getattr(pytz, name) for name in context_exports
}
def get_datetime_module_context():
context_exports = [
'date',
'datetime',
'time',
'timedelta',
'tzinfo'
]
return {
name: getattr(datetime, name) for name in context_exports
}
def get_context_modules():
return {
'pytz': get_pytz_module_context(),
'datetime': get_datetime_module_context(),
}
def generate_config_context(cli_vars):
context = {
'env_var': env_var,
'modules': get_context_modules(),
}
context['var'] = Var(None, context, cli_vars)
return _add_tracking(context)
def generate_base(model, model_dict, config, manifest, source_config,
provider, adapter=None):
"""Generate the common aspects of the config dict."""
if provider is None:
raise dbt.exceptions.InternalException(
"Invalid provider given to context: {}".format(provider))
target_name = config.target_name
target = config.to_profile_info()
del target['credentials']
target.update(config.credentials.serialize(with_aliases=True))
target['type'] = config.credentials.type
target.pop('pass', None)
target['name'] = target_name
adapter = get_adapter(config)
context = {'env': target}
pre_hooks = None
post_hooks = None
db_wrapper = provider.DatabaseWrapper(adapter)
context = dbt.utils.merge(context, {
"adapter": db_wrapper,
"api": {
"Relation": db_wrapper.Relation,
"Column": adapter.Column,
},
"column": adapter.Column,
"config": provider.Config(model_dict, source_config),
"database": config.credentials.database,
"env_var": env_var,
"exceptions": dbt.exceptions.wrapped_exports(model),
"execute": provider.execute,
"flags": dbt.flags,
"graph": manifest.to_flat_graph(),
"log": log,
"model": model_dict,
"modules": get_context_modules(),
"post_hooks": post_hooks,
"pre_hooks": pre_hooks,
"ref": provider.ref(db_wrapper, model, config, manifest),
"return": _return,
"schema": config.credentials.schema,
"sql": None,
"sql_now": adapter.date_function(),
"source": provider.source(db_wrapper, model, config, manifest),
"fromjson": fromjson,
"tojson": tojson,
"target": target,
"try_or_compiler_error": try_or_compiler_error(model)
})
if os.environ.get('DBT_MACRO_DEBUGGING'):
context['debug'] = _debug_here
return context
def modify_generated_context(context, model, model_dict, config, manifest,
provider):
cli_var_overrides = config.cli_vars
context = _add_tracking(context)
context = _add_validation(context)
context = _add_sql_handlers(context)
# we make a copy of the context for each of these ^^
context = _add_macros(context, model, manifest)
context["write"] = write(model_dict, config.target_path, 'run')
context["render"] = render(context, model_dict)
context["var"] = provider.Var(model, context=context,
overrides=cli_var_overrides)
context['context'] = context
return context
def generate_execute_macro(model, config, manifest, provider):
"""Internally, macros can be executed like nodes, with some restrictions:
- they don't have have all values available that nodes do:
- 'this', 'pre_hooks', 'post_hooks', and 'sql' are missing
- 'schema' does not use any 'model' information
- they can't be configured with config() directives
"""
model_dict = model.serialize()
context = generate_base(model, model_dict, config, manifest, None,
provider)
return modify_generated_context(context, model, model_dict, config,
manifest, provider)
def generate_model(model, config, manifest, source_config, provider):
model_dict = model.to_dict()
context = generate_base(model, model_dict, config, manifest,
source_config, provider)
# operations (hooks) don't get a 'this'
if model.resource_type != NodeType.Operation:
this = get_this_relation(context['adapter'], config, model_dict)
context['this'] = this
# overwrite schema/database if we have them, and hooks + sql
context.update({
'schema': model.get('schema', context['schema']),
'database': model.get('database', context['database']),
'pre_hooks': model.config.get('pre-hook'),
'post_hooks': model.config.get('post-hook'),
'sql': model.get('injected_sql'),
})
return modify_generated_context(context, model, model_dict, config,
manifest, provider)
def generate(model, config, manifest, source_config=None, provider=None):
"""
Not meant to be called directly. Call with either:
dbt.context.parser.generate
or
dbt.context.runtime.generate
"""
return generate_model(model, config, manifest, source_config, provider)

View File

@@ -0,0 +1,29 @@
import dbt.context.common
from dbt.context import runtime
from dbt.exceptions import raise_compiler_error
class RefResolver(runtime.BaseRefResolver):
def __call__(self, *args):
# When you call ref(), this is what happens at operation runtime
target_model, name = self.resolve(args)
return self.create_relation(target_model, name)
def create_ephemeral_relation(self, target_model, name):
# In operations, we can't ref() ephemeral nodes, because ParsedMacros
# do not support set_cte
raise_compiler_error(
'Operations can not ref() ephemeral nodes, but {} is ephemeral'
.format(target_model.name),
self.model
)
class Provider(runtime.Provider):
ref = RefResolver
def generate(model, runtime_config, manifest):
return dbt.context.common.generate_execute_macro(
model, runtime_config, manifest, Provider()
)

146
core/dbt/context/parser.py Normal file
View File

@@ -0,0 +1,146 @@
import dbt.exceptions
import dbt.context.common
from dbt.adapters.factory import get_adapter
def docs(unparsed, docrefs, column_name=None):
def do_docs(*args):
if len(args) != 1 and len(args) != 2:
dbt.exceptions.doc_invalid_args(unparsed, args)
doc_package_name = ''
doc_name = args[0]
if len(args) == 2:
doc_package_name = args[1]
docref = {
'documentation_package': doc_package_name,
'documentation_name': doc_name,
}
if column_name is not None:
docref['column_name'] = column_name
docrefs.append(docref)
# IDK
return True
return do_docs
class Config(object):
def __init__(self, model, source_config):
self.model = model
self.source_config = source_config
def _transform_config(self, config):
for oldkey in ('pre_hook', 'post_hook'):
if oldkey in config:
newkey = oldkey.replace('_', '-')
if newkey in config:
dbt.exceptions.raise_compiler_error(
'Invalid config, has conflicting keys "{}" and "{}"'
.format(oldkey, newkey),
self.model
)
config[newkey] = config.pop(oldkey)
return config
def __call__(self, *args, **kwargs):
if len(args) == 1 and len(kwargs) == 0:
opts = args[0]
elif len(args) == 0 and len(kwargs) > 0:
opts = kwargs
else:
dbt.exceptions.raise_compiler_error(
"Invalid inline model config",
self.model)
opts = self._transform_config(opts)
self.source_config.update_in_model_config(opts)
return ''
def set(self, name, value):
return self.__call__({name: value})
def require(self, name, validator=None):
return ''
def get(self, name, validator=None, default=None):
return ''
class DatabaseWrapper(dbt.context.common.BaseDatabaseWrapper):
"""The parser subclass of the database wrapper applies any explicit
parse-time overrides.
"""
def __getattr__(self, name):
override = (name in self.adapter._available_ and
name in self.adapter._parse_replacements_)
if override:
return self.adapter._parse_replacements_[name]
elif name in self.adapter._available_:
return getattr(self.adapter, name)
else:
raise AttributeError(
"'{}' object has no attribute '{}'".format(
self.__class__.__name__, name
)
)
class Var(dbt.context.common.Var):
def get_missing_var(self, var_name):
# in the parser, just always return None.
return None
class RefResolver(dbt.context.common.BaseResolver):
def __call__(self, *args):
# When you call ref(), this is what happens at parse time
if len(args) == 1 or len(args) == 2:
self.model.refs.append(list(args))
else:
dbt.exceptions.ref_invalid_args(self.model, args)
return self.Relation.create_from_node(self.config, self.model)
class SourceResolver(dbt.context.common.BaseResolver):
def __call__(self, source_name, table_name):
# When you call source(), this is what happens at parse time
self.model.sources.append([source_name, table_name])
return self.Relation.create_from_node(self.config, self.model)
class Provider(object):
execute = False
Config = Config
DatabaseWrapper = DatabaseWrapper
Var = Var
ref = RefResolver
source = SourceResolver
def generate(model, runtime_config, manifest, source_config):
# during parsing, we don't have a connection, but we might need one, so we
# have to acquire it.
# In the future, it would be nice to lazily open the connection, as in some
# projects it would be possible to parse without connecting to the db
with get_adapter(runtime_config).connection_named(model.get('name')):
return dbt.context.common.generate(
model, runtime_config, manifest, source_config, Provider()
)
def generate_macro(model, runtime_config, manifest):
# parser.generate_macro is called by the get_${attr}_func family of Parser
# methods, which preparse and cache the generate_${attr}_name family of
# macros for use during parsing
return dbt.context.common.generate_execute_macro(
model, runtime_config, manifest, Provider()
)

150
core/dbt/context/runtime.py Normal file
View File

@@ -0,0 +1,150 @@
from dbt.utils import get_materialization, add_ephemeral_model_prefix
import dbt.clients.jinja
import dbt.context.common
import dbt.flags
from dbt.parser import ParserUtils
from dbt.logger import GLOBAL_LOGGER as logger # noqa
class BaseRefResolver(dbt.context.common.BaseResolver):
def resolve(self, args):
name = None
package = None
if len(args) == 1:
name = args[0]
elif len(args) == 2:
package, name = args
else:
dbt.exceptions.ref_invalid_args(self.model, args)
target_model = ParserUtils.resolve_ref(
self.manifest,
name,
package,
self.current_project,
self.model.package_name)
if target_model is None or target_model is ParserUtils.DISABLED:
dbt.exceptions.ref_target_not_found(
self.model,
name,
package)
return target_model, name
def create_ephemeral_relation(self, target_model, name):
self.model.set_cte(target_model.unique_id, None)
return self.Relation.create(
type=self.Relation.CTE,
identifier=add_ephemeral_model_prefix(name)
).quote(identifier=False)
def create_relation(self, target_model, name):
if get_materialization(target_model) == 'ephemeral':
return self.create_ephemeral_relation(target_model, name)
else:
return self.Relation.create_from_node(self.config, target_model)
class RefResolver(BaseRefResolver):
def validate(self, resolved, args):
if resolved.unique_id not in self.model.depends_on.get('nodes'):
dbt.exceptions.ref_bad_context(self.model, args)
def __call__(self, *args):
# When you call ref(), this is what happens at runtime
target_model, name = self.resolve(args)
self.validate(target_model, args)
return self.create_relation(target_model, name)
class SourceResolver(dbt.context.common.BaseResolver):
def resolve(self, source_name, table_name):
target_source = ParserUtils.resolve_source(
self.manifest,
source_name,
table_name,
self.current_project,
self.model.package_name
)
if target_source is None:
dbt.exceptions.source_target_not_found(
self.model,
source_name,
table_name)
return target_source
def __call__(self, source_name, table_name):
"""When you call source(), this is what happens at runtime"""
target_source = self.resolve(source_name, table_name)
return self.Relation.create_from_source(target_source)
class Config:
def __init__(self, model, source_config=None):
self.model = model
# we never use or get a source config, only the parser cares
def __call__(*args, **kwargs):
return ''
def set(self, name, value):
return self.__call__({name: value})
def _validate(self, validator, value):
validator(value)
def require(self, name, validator=None):
if name not in self.model['config']:
dbt.exceptions.missing_config(self.model, name)
to_return = self.model['config'][name]
if validator is not None:
self._validate(validator, to_return)
return to_return
def get(self, name, validator=None, default=None):
to_return = self.model['config'].get(name, default)
if validator is not None and default is not None:
self._validate(validator, to_return)
return to_return
class DatabaseWrapper(dbt.context.common.BaseDatabaseWrapper):
"""The runtime database wrapper exposes everything the adapter marks
available.
"""
def __getattr__(self, name):
if name in self.adapter._available_:
return getattr(self.adapter, name)
else:
raise AttributeError(
"'{}' object has no attribute '{}'".format(
self.__class__.__name__, name
)
)
class Var(dbt.context.common.Var):
pass
class Provider(object):
execute = True
Config = Config
DatabaseWrapper = DatabaseWrapper
Var = Var
ref = RefResolver
source = SourceResolver
def generate(model, runtime_config, manifest):
return dbt.context.common.generate(
model, runtime_config, manifest, None, Provider())

View File

@@ -0,0 +1,11 @@
def named_property(name, doc=None):
def get_prop(self):
return self._contents.get(name)
def set_prop(self, value):
self._contents[name] = value
self.validate()
return property(get_prop, set_prop, doc=doc)

View File

@@ -0,0 +1,71 @@
from dbt.api.object import APIObject
from dbt.contracts.common import named_property
CONNECTION_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'type': {
'type': 'string',
# valid python identifiers only
'pattern': r'^[A-Za-z_][A-Za-z0-9_]+$',
},
'name': {
'type': ['null', 'string'],
},
'state': {
'enum': ['init', 'open', 'closed', 'fail'],
},
'transaction_open': {
'type': 'boolean',
},
# we can't serialize this so we can't require it as part of the
# contract.
# 'handle': {
# 'type': ['null', 'object'],
# },
# credentials are validated separately by the adapter packages
'credentials': {
'description': (
'The credentials object here should match the connection type.'
),
'type': 'object',
'additionalProperties': True,
}
},
'required': [
'type', 'name', 'state', 'transaction_open', 'credentials'
],
}
class Connection(APIObject):
SCHEMA = CONNECTION_CONTRACT
def __init__(self, credentials, *args, **kwargs):
# we can't serialize handles
self._handle = kwargs.pop('handle')
super(Connection, self).__init__(credentials=credentials.serialize(),
*args, **kwargs)
# this will validate itself in its own __init__.
self._credentials = credentials
@property
def credentials(self):
return self._credentials
@property
def handle(self):
return self._handle
@handle.setter
def handle(self, value):
self._handle = value
name = named_property('name', 'The name of this connection')
state = named_property('state', 'The state of the connection')
transaction_open = named_property(
'transaction_open',
'True if there is an open transaction, False otherwise.'
)

View File

@@ -0,0 +1,236 @@
from dbt.api import APIObject
from dbt.utils import deep_merge
from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT, \
PARSED_MACRO_CONTRACT, ParsedNode
import dbt.compat
import sqlparse
INJECTED_CTE_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': 'A single entry in the CTEs list',
'properties': {
'id': {
'type': 'string',
'description': 'The id of the CTE',
},
'sql': {
'type': ['string', 'null'],
'description': 'The compiled SQL of the CTE',
'additionalProperties': True,
},
},
'required': ['id', 'sql'],
}
COMPILED_NODE_CONTRACT = deep_merge(
PARSED_NODE_CONTRACT,
{
# TODO: when we add 'extra_ctes' back in, flip this back to False
'additionalProperties': True,
'properties': {
'compiled': {
'description': (
'This is true after the node has been compiled, but ctes '
'have not necessarily been injected into the node.'
),
'type': 'boolean'
},
'compiled_sql': {
'type': ['string', 'null'],
},
'extra_ctes_injected': {
'description': (
'This is true after extra ctes have been injected into '
'the compiled node.'
),
'type': 'boolean',
},
'extra_ctes': {
'type': 'array',
'description': 'The injected CTEs for a model',
'items': INJECTED_CTE_CONTRACT,
},
'injected_sql': {
'type': ['string', 'null'],
'description': 'The SQL after CTEs have been injected',
},
'wrapped_sql': {
'type': ['string', 'null'],
'description': (
'The SQL after it has been wrapped (for tests, '
'operations, and analysis)'
),
},
},
'required': PARSED_NODE_CONTRACT['required'] + [
'compiled', 'compiled_sql', 'extra_ctes_injected',
'injected_sql', 'extra_ctes'
]
}
)
COMPILED_NODES_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'A collection of the compiled nodes, stored by their unique IDs.'
),
'patternProperties': {
'.*': COMPILED_NODE_CONTRACT
},
}
COMPILED_MACRO_CONTRACT = PARSED_MACRO_CONTRACT
COMPILED_MACROS_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'A collection of the compiled macros, stored by their unique IDs.'
),
'patternProperties': {
'.*': COMPILED_MACRO_CONTRACT
},
}
COMPILED_GRAPH_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'The full compiled graph, with both the required nodes and required '
'macros.'
),
'properties': {
'nodes': COMPILED_NODES_CONTRACT,
'macros': COMPILED_MACROS_CONTRACT,
},
'required': ['nodes', 'macros'],
}
def _inject_ctes_into_sql(sql, ctes):
"""
`ctes` is a dict of CTEs in the form:
{
"cte_id_1": "__dbt__CTE__ephemeral as (select * from table)",
"cte_id_2": "__dbt__CTE__events as (select id, type from events)"
}
Given `sql` like:
"with internal_cte as (select * from sessions)
select * from internal_cte"
This will spit out:
"with __dbt__CTE__ephemeral as (select * from table),
__dbt__CTE__events as (select id, type from events),
with internal_cte as (select * from sessions)
select * from internal_cte"
(Whitespace enhanced for readability.)
"""
if len(ctes) == 0:
return sql
parsed_stmts = sqlparse.parse(sql)
parsed = parsed_stmts[0]
with_stmt = None
for token in parsed.tokens:
if token.is_keyword and token.normalized == 'WITH':
with_stmt = token
break
if with_stmt is None:
# no with stmt, add one, and inject CTEs right at the beginning
first_token = parsed.token_first()
with_stmt = sqlparse.sql.Token(sqlparse.tokens.Keyword, 'with')
parsed.insert_before(first_token, with_stmt)
else:
# stmt exists, add a comma (which will come after injected CTEs)
trailing_comma = sqlparse.sql.Token(sqlparse.tokens.Punctuation, ',')
parsed.insert_after(with_stmt, trailing_comma)
token = sqlparse.sql.Token(
sqlparse.tokens.Keyword,
", ".join(c['sql'] for c in ctes)
)
parsed.insert_after(with_stmt, token)
return dbt.compat.to_string(parsed)
class CompiledNode(ParsedNode):
SCHEMA = COMPILED_NODE_CONTRACT
def prepend_ctes(self, prepended_ctes):
self._contents['extra_ctes_injected'] = True
self._contents['extra_ctes'] = prepended_ctes
self._contents['injected_sql'] = _inject_ctes_into_sql(
self.compiled_sql,
prepended_ctes
)
self.validate()
@property
def extra_ctes_injected(self):
return self._contents.get('extra_ctes_injected')
@property
def extra_ctes(self):
return self._contents.get('extra_ctes')
@property
def compiled(self):
return self._contents.get('compiled')
@compiled.setter
def compiled(self, value):
self._contents['compiled'] = value
@property
def injected_sql(self):
return self._contents.get('injected_sql')
@property
def compiled_sql(self):
return self._contents.get('compiled_sql')
@compiled_sql.setter
def compiled_sql(self, value):
self._contents['compiled_sql'] = value
@property
def wrapped_sql(self):
return self._contents.get('wrapped_sql')
@wrapped_sql.setter
def wrapped_sql(self, value):
self._contents['wrapped_sql'] = value
def set_cte(self, cte_id, sql):
"""This is the equivalent of what self.extra_ctes[cte_id] = sql would
do if extra_ctes were an OrderedDict
"""
for cte in self.extra_ctes:
if cte['id'] == cte_id:
cte['sql'] = sql
break
else:
self.extra_ctes.append(
{'id': cte_id, 'sql': sql}
)
class CompiledGraph(APIObject):
SCHEMA = COMPILED_GRAPH_CONTRACT

View File

@@ -0,0 +1,419 @@
from dbt.api import APIObject
from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT, \
PARSED_MACRO_CONTRACT, PARSED_DOCUMENTATION_CONTRACT, \
PARSED_SOURCE_DEFINITION_CONTRACT
from dbt.contracts.graph.compiled import COMPILED_NODE_CONTRACT, CompiledNode
from dbt.exceptions import raise_duplicate_resource_name
from dbt.node_types import NodeType
from dbt.logger import GLOBAL_LOGGER as logger
from dbt import tracking
import dbt.utils
# We allow either parsed or compiled nodes, or parsed sources, as some
# 'compile()' calls in the runner actually just return the original parsed
# node they were given.
COMPILE_RESULT_NODE_CONTRACT = {
'anyOf': [
PARSED_NODE_CONTRACT,
COMPILED_NODE_CONTRACT,
PARSED_SOURCE_DEFINITION_CONTRACT,
]
}
COMPILE_RESULT_NODES_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'A collection of the parsed nodes, stored by their unique IDs.'
),
'patternProperties': {
'.*': COMPILE_RESULT_NODE_CONTRACT
},
}
PARSED_MACROS_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'A collection of the parsed macros, stored by their unique IDs.'
),
'patternProperties': {
'.*': PARSED_MACRO_CONTRACT
},
}
PARSED_DOCUMENTATIONS_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'A collection of the parsed docs, stored by their uniqe IDs.'
),
'patternProperties': {
'.*': PARSED_DOCUMENTATION_CONTRACT,
},
}
NODE_EDGE_MAP = {
'type': 'object',
'additionalProperties': False,
'description': 'A map of node relationships',
'patternProperties': {
'.*': {
'type': 'array',
'items': {
'type': 'string',
'description': 'A node name',
}
}
}
}
PARSED_MANIFEST_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'The full parsed manifest of the graph, with both the required nodes'
' and required macros.'
),
'properties': {
'nodes': COMPILE_RESULT_NODES_CONTRACT,
'macros': PARSED_MACROS_CONTRACT,
'docs': PARSED_DOCUMENTATIONS_CONTRACT,
'disabled': {
'type': 'array',
'items': PARSED_NODE_CONTRACT,
'description': 'An array of disabled nodes',
},
'generated_at': {
'type': 'string',
'format': 'date-time',
'description': (
'The time at which the manifest was generated'
),
},
'parent_map': NODE_EDGE_MAP,
'child_map': NODE_EDGE_MAP,
'metadata': {
'type': 'object',
'additionalProperties': False,
'properties': {
'project_id': {
'type': ('string', 'null'),
'description': (
'The anonymized ID of the project. Persists as long '
'as the project name stays the same.'
),
'pattern': '[0-9a-f]{32}',
},
'user_id': {
'type': ('string', 'null'),
'description': (
'The user ID assigned by dbt. Persists per-user as '
'long as the user cookie file remains in place.'
),
'pattern': (
'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-'
'[0-9a-f]{12}'
),
},
'send_anonymous_usage_stats': {
'type': ('boolean', 'null'),
'description': (
'Whether or not to send anonymized usage statistics.'
),
},
},
'required': [
'project_id', 'user_id', 'send_anonymous_usage_stats',
],
},
},
'required': ['nodes', 'macros', 'docs', 'generated_at', 'metadata'],
}
class CompileResultNode(CompiledNode):
SCHEMA = COMPILE_RESULT_NODE_CONTRACT
def _sort_values(dct):
"""Given a dictionary, sort each value. This makes output deterministic,
which helps for tests.
"""
return {k: sorted(v) for k, v in dct.items()}
def build_edges(nodes):
"""Build the forward and backward edges on the given list of ParsedNodes
and return them as two separate dictionaries, each mapping unique IDs to
lists of edges.
"""
backward_edges = {}
# pre-populate the forward edge dict for simplicity
forward_edges = {node.unique_id: [] for node in nodes}
for node in nodes:
backward_edges[node.unique_id] = node.depends_on_nodes[:]
for unique_id in node.depends_on_nodes:
forward_edges[unique_id].append(node.unique_id)
return _sort_values(forward_edges), _sort_values(backward_edges)
class Manifest(APIObject):
SCHEMA = PARSED_MANIFEST_CONTRACT
"""The manifest for the full graph, after parsing and during compilation.
Nodes may be either ParsedNodes or CompiledNodes or a mix, depending upon
the current state of the compiler. Macros will always be ParsedMacros and
docs will always be ParsedDocumentations.
"""
def __init__(self, nodes, macros, docs, generated_at, disabled,
config=None):
"""The constructor. nodes and macros are dictionaries mapping unique
IDs to ParsedNode/CompiledNode and ParsedMacro objects, respectively.
docs is a dictionary mapping unique IDs to ParsedDocumentation objects.
generated_at is a text timestamp in RFC 3339 format.
disabled is a list of disabled FQNs (as strings).
"""
metadata = self.get_metadata(config)
self.nodes = nodes
self.macros = macros
self.docs = docs
self.generated_at = generated_at
self.metadata = metadata
self.disabled = disabled
self._flat_graph = None
super(Manifest, self).__init__()
@staticmethod
def get_metadata(config):
project_id = None
user_id = None
send_anonymous_usage_stats = None
if config is not None:
project_id = config.hashed_name()
if tracking.active_user is not None:
user_id = tracking.active_user.id
send_anonymous_usage_stats = not tracking.active_user.do_not_track
return {
'project_id': project_id,
'user_id': user_id,
'send_anonymous_usage_stats': send_anonymous_usage_stats,
}
def serialize(self):
"""Convert the parsed manifest to a nested dict structure that we can
safely serialize to JSON.
"""
forward_edges, backward_edges = build_edges(self.nodes.values())
return {
'nodes': {k: v.serialize() for k, v in self.nodes.items()},
'macros': {k: v.serialize() for k, v in self.macros.items()},
'docs': {k: v.serialize() for k, v in self.docs.items()},
'parent_map': backward_edges,
'child_map': forward_edges,
'generated_at': self.generated_at,
'metadata': self.metadata,
'disabled': [v.serialize() for v in self.disabled],
}
def to_flat_graph(self):
"""This function gets called in context.common by each node, so we want
to cache it. Make sure you don't call this until you're done with
building your manifest!
"""
if self._flat_graph is None:
self._flat_graph = {
'nodes': {
k: v.serialize() for k, v in self.nodes.items()
},
}
return self._flat_graph
def find_disabled_by_name(self, name, package=None):
return dbt.utils.find_in_list_by_name(self.disabled, name, package,
NodeType.refable())
def _find_by_name(self, name, package, subgraph, nodetype):
"""
Find a node by its given name in the appropriate sugraph. If package is
None, all pacakges will be searched.
nodetype should be a list of NodeTypes to accept.
"""
if subgraph == 'nodes':
search = self.nodes
elif subgraph == 'macros':
search = self.macros
else:
raise NotImplementedError(
'subgraph search for {} not implemented'.format(subgraph)
)
return dbt.utils.find_in_subgraph_by_name(
search,
name,
package,
nodetype)
def find_docs_by_name(self, name, package=None):
for unique_id, doc in self.docs.items():
parts = unique_id.split('.')
if len(parts) != 2:
msg = "documentation names cannot contain '.' characters"
dbt.exceptions.raise_compiler_error(msg, doc)
found_package, found_node = parts
if (name == found_node and package in {None, found_package}):
return doc
return None
def find_macro_by_name(self, name, package):
"""Find a macro in the graph by its name and package name, or None for
any package.
"""
return self._find_by_name(name, package, 'macros', [NodeType.Macro])
def find_refable_by_name(self, name, package):
"""Find any valid target for "ref()" in the graph by its name and
package name, or None for any package.
"""
return self._find_by_name(name, package, 'nodes', NodeType.refable())
def find_source_by_name(self, source_name, table_name, package):
"""Find any valid target for "source()" in the graph by its name and
package name, or None for any package.
"""
name = '{}.{}'.format(source_name, table_name)
return self._find_by_name(name, package, 'nodes', [NodeType.Source])
def get_materialization_macro(self, materialization_name,
adapter_type=None):
macro_name = dbt.utils.get_materialization_macro_name(
materialization_name=materialization_name,
adapter_type=adapter_type,
with_prefix=False)
macro = self.find_macro_by_name(
macro_name,
None)
if adapter_type not in ('default', None) and macro is None:
macro_name = dbt.utils.get_materialization_macro_name(
materialization_name=materialization_name,
adapter_type='default',
with_prefix=False)
macro = self.find_macro_by_name(
macro_name,
None)
return macro
def get_resource_fqns(self):
resource_fqns = {}
for unique_id, node in self.nodes.items():
if node.resource_type == NodeType.Source:
continue # sources have no FQNs and can't be configured
resource_type_plural = node.resource_type + 's'
if resource_type_plural not in resource_fqns:
resource_fqns[resource_type_plural] = set()
resource_fqns[resource_type_plural].add(tuple(node.fqn))
return resource_fqns
def _filter_subgraph(self, subgraph, predicate):
"""
Given a subgraph of the manifest, and a predicate, filter
the subgraph using that predicate. Generates a list of nodes.
"""
to_return = []
for unique_id, item in subgraph.items():
if predicate(item):
to_return.append(item)
return to_return
def _model_matches_schema_and_table(self, schema, table, model):
if model.resource_type == NodeType.Source:
return (model.schema.lower() == schema.lower() and
model.identifier.lower() == table.lower())
return (model.schema.lower() == schema.lower() and
model.alias.lower() == table.lower())
def get_unique_ids_for_schema_and_table(self, schema, table):
"""
Given a schema and table, find matching models, and return
their unique_ids. A schema and table may have more than one
match if the relation matches both a source and a seed, for instance.
"""
def predicate(model):
return self._model_matches_schema_and_table(schema, table, model)
matching = list(self._filter_subgraph(self.nodes, predicate))
return [match.get('unique_id') for match in matching]
def add_nodes(self, new_nodes):
"""Add the given dict of new nodes to the manifest."""
for unique_id, node in new_nodes.items():
if unique_id in self.nodes:
raise_duplicate_resource_name(node, self.nodes[unique_id])
self.nodes[unique_id] = node
def patch_nodes(self, patches):
"""Patch nodes with the given dict of patches. Note that this consumes
the input!
"""
# because we don't have any mapping from node _names_ to nodes, and we
# only have the node name in the patch, we have to iterate over all the
# nodes looking for matching names. We could use _find_by_name if we
# were ok with doing an O(n*m) search (one nodes scan per patch)
for node in self.nodes.values():
if node.resource_type != NodeType.Model:
continue
patch = patches.pop(node.name, None)
if not patch:
continue
node.patch(patch)
# log debug-level warning about nodes we couldn't find
if patches:
for patch in patches.values():
# since patches aren't nodes, we can't use the existing
# target_not_found warning
logger.debug((
'WARNING: Found documentation for model "{}" which was '
'not found or is disabled').format(patch.name)
)
def __getattr__(self, name):
raise AttributeError("'{}' object has no attribute '{}'".format(
type(self).__name__, name)
)
def get_used_schemas(self, resource_types=None):
return frozenset({
(node.database, node.schema)
for node in self.nodes.values()
if not resource_types or node.resource_type in resource_types
})
def get_used_databases(self):
return frozenset(node.database for node in self.nodes.values())
def deepcopy(self, config=None):
return Manifest(
nodes={k: v.incorporate() for k, v in self.nodes.items()},
macros={k: v.incorporate() for k, v in self.macros.items()},
docs={k: v.incorporate() for k, v in self.docs.items()},
generated_at=self.generated_at,
disabled=[n.incorporate() for n in self.disabled],
config=config
)

View File

@@ -0,0 +1,815 @@
from dbt.api import APIObject
from dbt.utils import deep_merge
from dbt.node_types import NodeType
import dbt.clients.jinja
from dbt.contracts.graph.unparsed import UNPARSED_NODE_CONTRACT, \
UNPARSED_MACRO_CONTRACT, UNPARSED_DOCUMENTATION_FILE_CONTRACT, \
UNPARSED_BASE_CONTRACT, TIME_CONTRACT
from dbt.logger import GLOBAL_LOGGER as logger # noqa
# TODO: which of these do we _really_ support? or is it both?
HOOK_CONTRACT = {
'anyOf': [
{
'type': 'object',
'additionalProperties': False,
'properties': {
'sql': {
'type': 'string',
},
'transaction': {
'type': 'boolean',
},
'index': {
'type': 'integer',
}
},
'required': ['sql', 'transaction'],
},
{
'type': 'string',
},
],
}
CONFIG_CONTRACT = {
'type': 'object',
'additionalProperties': True,
'properties': {
'enabled': {
'type': 'boolean',
},
'materialized': {
'type': 'string',
},
'persist_docs': {
'type': 'object',
'additionalProperties': True,
},
'post-hook': {
'type': 'array',
'items': HOOK_CONTRACT,
},
'pre-hook': {
'type': 'array',
'items': HOOK_CONTRACT,
},
'vars': {
'type': 'object',
'additionalProperties': True,
},
'quoting': {
'type': 'object',
'additionalProperties': True,
},
'column_types': {
'type': 'object',
'additionalProperties': True,
},
'tags': {
'anyOf': [
{
'type': 'array',
'items': {
'type': 'string'
},
},
{
'type': 'string'
}
]
},
'severity': {
'type': 'string',
'pattern': '([eE][rR][rR][oO][rR]|[wW][aA][rR][nN])',
},
},
'required': [
'enabled', 'materialized', 'post-hook', 'pre-hook', 'vars',
'quoting', 'column_types', 'tags', 'persist_docs'
]
}
# Note that description must be present, but may be empty.
COLUMN_INFO_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': 'Information about a single column in a model',
'properties': {
'name': {
'type': 'string',
'description': 'The column name',
},
'description': {
'type': 'string',
'description': 'A description of the column',
},
},
'required': ['name', 'description'],
}
# Docrefs are not quite like regular references, as they indicate what they
# apply to as well as what they are referring to (so the doc package + doc
# name, but also the column name if relevant). This is because column
# descriptions are rendered separately from their models.
DOCREF_CONTRACT = {
'type': 'object',
'properties': {
'documentation_name': {
'type': 'string',
'description': 'The name of the documentation block referred to',
},
'documentation_package': {
'type': 'string',
'description': (
'If provided, the documentation package name referred to'
),
},
'column_name': {
'type': 'string',
'description': (
'If the documentation refers to a column instead of the '
'model, the column name should be set'
),
},
},
'required': ['documentation_name', 'documentation_package']
}
HAS_FQN_CONTRACT = {
'properties': {
'fqn': {
'type': 'array',
'items': {
'type': 'string',
}
},
},
'required': ['fqn'],
}
HAS_UNIQUE_ID_CONTRACT = {
'properties': {
'unique_id': {
'type': 'string',
'minLength': 1,
},
},
'required': ['unique_id'],
}
CAN_REF_CONTRACT = {
'properties': {
'refs': {
'type': 'array',
'items': {
'type': 'array',
'description': (
'The list of arguments passed to a single ref call.'
),
},
'description': (
'The list of call arguments, one list of arguments per '
'call.'
)
},
'sources': {
'type': 'array',
'items': {
'type': 'array',
'description': (
'The list of arguments passed to a single source call.'
),
},
'description': (
'The list of call arguments, one list of arguments per '
'call.'
)
},
'depends_on': {
'type': 'object',
'additionalProperties': False,
'properties': {
'nodes': {
'type': 'array',
'items': {
'type': 'string',
'minLength': 1,
'description': (
'A node unique ID that this depends on.'
)
}
},
'macros': {
'type': 'array',
'items': {
'type': 'string',
'minLength': 1,
'description': (
'A macro unique ID that this depends on.'
)
}
},
},
'description': (
'A list of unique IDs for nodes and macros that this '
'node depends upon.'
),
'required': ['nodes', 'macros'],
},
},
'required': ['refs', 'sources', 'depends_on'],
}
HAS_DOCREFS_CONTRACT = {
'properties': {
'docrefs': {
'type': 'array',
'items': DOCREF_CONTRACT,
},
},
}
HAS_DESCRIPTION_CONTRACT = {
'properties': {
'description': {
'type': 'string',
'description': 'A user-supplied description of the model',
},
'columns': {
'type': 'object',
'properties': {
'.*': COLUMN_INFO_CONTRACT,
},
},
},
'required': ['description', 'columns'],
}
# does this belong inside another contract?
HAS_CONFIG_CONTRACT = {
'properties': {
'config': CONFIG_CONTRACT,
},
'required': ['config'],
}
COLUMN_TEST_CONTRACT = {
'properties': {
'column_name': {
'type': 'string',
'description': (
'In tests parsed from a v2 schema, the column the test is '
'associated with (if there is one)'
)
},
}
}
HAS_RELATION_METADATA_CONTRACT = {
'properties': {
'database': {
'type': 'string',
'description': (
'The actual database string that this will build into.'
)
},
'schema': {
'type': 'string',
'description': (
'The actual schema string that this will build into.'
)
},
},
'required': ['database', 'schema'],
}
PARSED_NODE_CONTRACT = deep_merge(
UNPARSED_NODE_CONTRACT,
HAS_UNIQUE_ID_CONTRACT,
HAS_FQN_CONTRACT,
CAN_REF_CONTRACT,
HAS_DOCREFS_CONTRACT,
HAS_DESCRIPTION_CONTRACT,
HAS_CONFIG_CONTRACT,
COLUMN_TEST_CONTRACT,
HAS_RELATION_METADATA_CONTRACT,
{
'properties': {
'alias': {
'type': 'string',
'description': (
'The name of the relation that this will build into'
)
},
# TODO: move this into a class property.
'empty': {
'type': 'boolean',
'description': 'True if the SQL is empty',
},
'tags': {
'type': 'array',
'items': {
'type': 'string',
}
},
# this is really nodes-only
'patch_path': {
'type': 'string',
'description': (
'The path to the patch source if the node was patched'
),
},
'build_path': {
'type': 'string',
'description': (
'In seeds, the path to the source file used during build.'
),
},
},
'required': ['empty', 'tags', 'alias'],
}
)
class ParsedNode(APIObject):
SCHEMA = PARSED_NODE_CONTRACT
def __init__(self, agate_table=None, **kwargs):
self.agate_table = agate_table
kwargs.setdefault('columns', {})
kwargs.setdefault('description', '')
super(ParsedNode, self).__init__(**kwargs)
@property
def is_refable(self):
return self.resource_type in NodeType.refable()
@property
def is_ephemeral(self):
return self.get('config', {}).get('materialized') == 'ephemeral'
@property
def is_ephemeral_model(self):
return self.is_refable and self.is_ephemeral
@property
def depends_on_nodes(self):
"""Return the list of node IDs that this node depends on."""
return self.depends_on['nodes']
def to_dict(self):
"""Similar to 'serialize', but tacks the agate_table attribute in too.
Why we need this:
- networkx demands that the attr_dict it gets (the node) be a dict
or subclass and does not respect the abstract Mapping class
- many jinja things access the agate_table attribute (member) of
the node dict.
- the nodes are passed around between those two contexts in a way
that I don't quite have clear enough yet.
"""
ret = self.serialize()
# note: not a copy/deep copy.
ret['agate_table'] = self.agate_table
return ret
def to_shallow_dict(self):
ret = self._contents.copy()
ret['agate_table'] = self.agate_table
return ret
def patch(self, patch):
"""Given a ParsedNodePatch, add the new information to the node."""
# explicitly pick out the parts to update so we don't inadvertently
# step on the model name or anything
self._contents.update({
'patch_path': patch.original_file_path,
'description': patch.description,
'columns': patch.columns,
'docrefs': patch.docrefs,
})
# patches always trigger re-validation
self.validate()
def get_materialization(self):
return self.config.get('materialized')
@property
def build_path(self):
return self._contents.get('build_path')
@build_path.setter
def build_path(self, value):
self._contents['build_path'] = value
@property
def database(self):
return self._contents['database']
@database.setter
def database(self, value):
self._contents['database'] = value
@property
def schema(self):
return self._contents['schema']
@schema.setter
def schema(self, value):
self._contents['schema'] = value
@property
def alias(self):
return self._contents['alias']
@alias.setter
def alias(self, value):
self._contents['alias'] = value
@property
def config(self):
return self._contents['config']
@config.setter
def config(self, value):
self._contents['config'] = value
SNAPSHOT_CONFIG_CONTRACT = {
'properties': {
'target_database': {
'type': 'string',
},
'target_schema': {
'type': 'string',
},
'unique_key': {
'type': 'string',
},
'anyOf': [
{
'properties': {
'strategy': {
'enum': ['timestamp'],
},
'updated_at': {
'type': 'string',
'description': (
'The column name with the timestamp to compare'
),
},
},
'required': ['updated_at'],
},
{
'properties': {
'strategy': {
'enum': ['check'],
},
'check_cols': {
'oneOf': [
{
'type': 'array',
'items': {'type': 'string'},
'description': 'The columns to check',
'minLength': 1,
},
{
'enum': ['all'],
'description': 'Check all columns',
},
],
},
},
'required': ['check_cols'],
}
]
},
'required': [
'target_schema', 'unique_key', 'strategy',
],
}
PARSED_SNAPSHOT_NODE_CONTRACT = deep_merge(
PARSED_NODE_CONTRACT,
{
'properties': {
'config': SNAPSHOT_CONFIG_CONTRACT,
'resource_type': {
'enum': [NodeType.Snapshot],
},
},
}
)
class ParsedSnapshotNode(ParsedNode):
SCHEMA = PARSED_SNAPSHOT_NODE_CONTRACT
# The parsed node update is only the 'patch', not the test. The test became a
# regular parsed node. Note that description and columns must be present, but
# may be empty.
PARSED_NODE_PATCH_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': 'A collection of values that can be set on a node',
'properties': {
'name': {
'type': 'string',
'description': 'The name of the node this modifies',
},
'description': {
'type': 'string',
'description': 'The description of the node to add',
},
'original_file_path': {
'type': 'string',
'description': (
'Relative path to the originating file path for the patch '
'from the project root'
),
},
'columns': {
'type': 'object',
'properties': {
'.*': COLUMN_INFO_CONTRACT,
}
},
'docrefs': {
'type': 'array',
'items': DOCREF_CONTRACT,
}
},
'required': [
'name', 'original_file_path', 'description', 'columns', 'docrefs'
],
}
class ParsedNodePatch(APIObject):
SCHEMA = PARSED_NODE_PATCH_CONTRACT
PARSED_MACRO_CONTRACT = deep_merge(
UNPARSED_MACRO_CONTRACT,
{
# This is required for the 'generator' field to work.
# TODO: fix before release
'additionalProperties': True,
'properties': {
'name': {
'type': 'string',
'description': (
'Name of this node. For models, this is used as the '
'identifier in the database.'),
'minLength': 1,
'maxLength': 127,
},
'resource_type': {
'enum': [
NodeType.Macro,
],
},
'unique_id': {
'type': 'string',
'minLength': 1,
'maxLength': 255,
},
'tags': {
'description': (
'An array of arbitrary strings to use as tags.'
),
'type': 'array',
'items': {
'type': 'string',
},
},
'depends_on': {
'type': 'object',
'additionalProperties': False,
'properties': {
'macros': {
'type': 'array',
'items': {
'type': 'string',
'minLength': 1,
'maxLength': 255,
'description': 'A single macro unique ID.'
}
}
},
'description': 'A list of all macros this macro depends on.',
'required': ['macros'],
},
},
'required': [
'resource_type', 'unique_id', 'tags', 'depends_on', 'name',
]
}
)
class ParsedMacro(APIObject):
SCHEMA = PARSED_MACRO_CONTRACT
@property
def generator(self):
"""
Returns a function that can be called to render the macro results.
"""
# TODO: we can generate self.template from the other properties
# available in this class. should we just generate this here?
return dbt.clients.jinja.macro_generator(self._contents)
# This is just the file + its ID
PARSED_DOCUMENTATION_CONTRACT = deep_merge(
UNPARSED_DOCUMENTATION_FILE_CONTRACT,
{
'properties': {
'name': {
'type': 'string',
'description': (
'Name of this node, as referred to by doc() references'
),
},
'unique_id': {
'type': 'string',
'minLength': 1,
'maxLength': 255,
'description': (
'The unique ID of this node as stored in the manifest'
),
},
'block_contents': {
'type': 'string',
'description': 'The contents of just the docs block',
},
},
'required': ['name', 'unique_id', 'block_contents'],
}
)
NODE_EDGE_MAP = {
'type': 'object',
'additionalProperties': False,
'description': 'A map of node relationships',
'patternProperties': {
'.*': {
'type': 'array',
'items': {
'type': 'string',
'description': 'A node name',
}
}
}
}
class ParsedDocumentation(APIObject):
SCHEMA = PARSED_DOCUMENTATION_CONTRACT
class Hook(APIObject):
SCHEMA = HOOK_CONTRACT
FRESHNESS_CONTRACT = {
'properties': {
'loaded_at_field': {
'type': ['null', 'string'],
'description': 'The field to use as the "loaded at" timestamp',
},
'freshness': {
'anyOf': [
{'type': 'null'},
{
'type': 'object',
'additionalProperties': False,
'properties': {
'warn_after': TIME_CONTRACT,
'error_after': TIME_CONTRACT,
},
},
],
},
},
}
QUOTING_CONTRACT = {
'properties': {
'quoting': {
'type': 'object',
'additionalProperties': False,
'properties': {
'database': {'type': 'boolean'},
'schema': {'type': 'boolean'},
'identifier': {'type': 'boolean'},
},
},
},
'required': ['quoting'],
}
PARSED_SOURCE_DEFINITION_CONTRACT = deep_merge(
UNPARSED_BASE_CONTRACT,
FRESHNESS_CONTRACT,
QUOTING_CONTRACT,
HAS_DESCRIPTION_CONTRACT,
HAS_UNIQUE_ID_CONTRACT,
HAS_DOCREFS_CONTRACT,
HAS_RELATION_METADATA_CONTRACT,
HAS_FQN_CONTRACT,
{
'description': (
'A source table definition, as parsed from the one provided in the'
'"tables" subsection of the "sources" section of schema.yml'
),
'properties': {
'name': {
'type': 'string',
'description': (
'The name of this node, which is the name of the model it'
'refers to'
),
'minLength': 1,
},
'source_name': {
'type': 'string',
'description': 'The reference name of the source definition',
'minLength': 1,
},
'source_description': {
'type': 'string',
'description': 'The user-supplied description of the source',
},
'loader': {
'type': 'string',
'description': 'The user-defined loader for this source',
},
'identifier': {
'type': 'string',
'description': 'The identifier for the source table',
'minLength': 1,
},
# the manifest search stuff really requires this, sadly
'resource_type': {
'enum': [NodeType.Source],
},
},
# note that while required, loaded_at_field and freshness may be null
'required': [
'source_name', 'source_description', 'loaded_at_field', 'loader',
'freshness', 'description', 'columns', 'docrefs', 'identifier',
],
}
)
class ParsedSourceDefinition(APIObject):
SCHEMA = PARSED_SOURCE_DEFINITION_CONTRACT
is_ephemeral_model = False
def to_shallow_dict(self):
return self._contents.copy()
# provide some emtpy/meaningless properties so these look more like
# ParsedNodes
@property
def depends_on_nodes(self):
return []
@property
def refs(self):
return []
@property
def sources(self):
return []
@property
def tags(self):
return []
@property
def has_freshness(self):
return bool(self.freshness) and self.loaded_at_field is not None

View File

@@ -0,0 +1,389 @@
from dbt.api import APIObject
from dbt.node_types import NodeType
from dbt.utils import deep_merge
UNPARSED_BASE_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'package_name': {
'type': 'string',
},
# filesystem
'root_path': {
'type': 'string',
'description': 'The absolute path to the project root',
},
'path': {
'type': 'string',
'description': (
'Relative path to the source file from the project root. '
'Usually the same as original_file_path, but in some cases '
'dbt will generate a path.'),
},
'original_file_path': {
'type': 'string',
'description': (
'Relative path to the originating file from the project root.'
),
}
},
'required': ['package_name', 'root_path', 'path', 'original_file_path']
}
UNPARSED_HAS_SQL_CONTRACT = {
'properties': {
'raw_sql': {
'type': 'string',
'description': (
'For nodes defined in SQL files, this is just the contents '
'of that file. For schema tests, snapshots, etc. this is '
'generated by dbt.'),
},
'index': {
'type': 'integer',
}
},
'required': ['raw_sql']
}
UNPARSED_MACRO_CONTRACT = deep_merge(
UNPARSED_BASE_CONTRACT,
UNPARSED_HAS_SQL_CONTRACT
)
UNPARSED_NODE_CONTRACT = deep_merge(
UNPARSED_BASE_CONTRACT,
UNPARSED_HAS_SQL_CONTRACT,
{
'properties': {
'name': {
'type': 'string',
'description': (
'Name of this node. For models, this is used as the '
'identifier in the database.'),
'minLength': 1,
},
'resource_type': {
'enum': [
NodeType.Model,
NodeType.Test,
NodeType.Analysis,
NodeType.Operation,
NodeType.Seed,
# we need this if parse_node is going to handle snapshots.
NodeType.Snapshot,
NodeType.RPCCall,
]
},
},
'required': ['resource_type', 'name']
}
)
class UnparsedMacro(APIObject):
SCHEMA = UNPARSED_MACRO_CONTRACT
class UnparsedNode(APIObject):
SCHEMA = UNPARSED_NODE_CONTRACT
COLUMN_TEST_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'name': {
'type': 'string',
'description': 'The name of the column this test is for',
},
'description': {
'type': 'string',
'description': 'The description of this test',
},
'tests': {
'type': 'array',
'items': {
"anyOf": [
# 'not_null', 'unique', ...
{'type': 'string'},
# 'relationships: {...}', 'accepted_values: {...}'
{'type': 'object', 'additionalProperties': True}
],
},
'description': 'The list of tests to perform',
},
},
'required': ['name'],
}
UNPARSED_COLUMN_DESCRIPTION_CONTRACT = {
'properties': {
'columns': {
'type': 'array',
'items': COLUMN_TEST_CONTRACT,
},
},
}
UNPARSED_NODE_DESCRIPTION_CONTRACT = {
'properties': {
'name': {
'type': 'string',
'description': (
'The name of this node, which is the name of the model it'
'refers to'
),
'minLength': 1,
},
'description': {
'type': 'string',
'description': (
'The raw string description of the node after parsing the yaml'
),
},
'tests': {
'type': 'array',
'items': {
"anyOf": [
{'type': 'string'},
{'type': 'object', 'additionalProperties': True}
],
},
},
},
'required': ['name'],
}
UNPARSED_NODE_UPDATE_CONTRACT = deep_merge(
UNPARSED_NODE_DESCRIPTION_CONTRACT,
UNPARSED_COLUMN_DESCRIPTION_CONTRACT,
{
'type': 'object',
'additionalProperties': False,
'description': (
'A collection of the unparsed node updates, as provided in the '
'"models" section of schema.yml'
),
}
)
class UnparsedNodeUpdate(APIObject):
"""An unparsed node update is the blueprint for tests to be added and nodes
to be updated, referencing a certain node (specifically, a Model or
Source).
"""
SCHEMA = UNPARSED_NODE_UPDATE_CONTRACT
TIME_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'count': {
'type': 'integer',
},
'period': {
'enum': ['minute', 'hour', 'day'],
},
},
'required': ['count', 'period'],
}
_FRESHNESS_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'warn_after': {
'anyOf': [
{'type': 'null'},
TIME_CONTRACT,
]
},
'error_after': {
'anyOf': [
{'type': 'null'},
TIME_CONTRACT,
]
},
},
}
_QUOTING_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'database': {'type': 'boolean'},
'schema': {'type': 'boolean'},
'identifier': {'type': 'boolean'},
},
}
QUOTING_CONTRACT = {
'properties': {
'quoting': {
'anyOf': [
{'type': 'null'},
_QUOTING_CONTRACT,
],
},
},
}
FRESHNESS_CONTRACT = {
'properties': {
'loaded_at_field': {
'type': ['null', 'string'],
'description': 'The field to use as the "loaded at" timestamp',
},
'freshness': {
'anyOf': [
{'type': 'null'},
_FRESHNESS_CONTRACT,
],
},
},
}
UNPARSED_SOURCE_TABLE_DEFINITION_CONTRACT = deep_merge(
UNPARSED_NODE_DESCRIPTION_CONTRACT,
UNPARSED_COLUMN_DESCRIPTION_CONTRACT,
FRESHNESS_CONTRACT,
QUOTING_CONTRACT,
{
'description': (
'A source table definition, as provided in the "tables" '
'subsection of the "sources" section of schema.yml'
),
'properties': {
'identifier': {
'type': 'string',
'description': 'The identifier for the source table',
'minLength': 1,
},
},
}
)
UNPARSED_SOURCE_DEFINITION_CONTRACT = deep_merge(
FRESHNESS_CONTRACT,
QUOTING_CONTRACT,
{
'type': 'object',
'additionalProperties': False,
'description': (
'A collection of the unparsed sources, as provided in the '
'"sources" section of schema.yml'
),
'properties': {
'name': {
'type': 'string',
'description': 'The reference name of the source definition',
'minLength': 1,
},
'loader': {
'type': 'string',
'description': 'The user-defined loader for this source',
'minLength': 1,
},
'description': {
'type': 'string',
'description': 'The user-supplied description of the source',
},
'database': {
'type': 'string',
'description': 'The database name for the source table',
'minLength': 1,
},
'schema': {
'type': 'string',
'description': 'The schema name for the source table',
'minLength': 1,
},
'tables': {
'type': 'array',
'items': UNPARSED_SOURCE_TABLE_DEFINITION_CONTRACT,
'description': 'The tables for this source',
'minLength': 1,
},
},
'required': ['name'],
}
)
class UnparsedTableDefinition(APIObject):
SCHEMA = UNPARSED_SOURCE_TABLE_DEFINITION_CONTRACT
class UnparsedSourceDefinition(APIObject):
SCHEMA = UNPARSED_SOURCE_DEFINITION_CONTRACT
@property
def tables(self):
return [UnparsedTableDefinition(**t) for t in self.get('tables', [])]
UNPARSED_DOCUMENTATION_FILE_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'package_name': {
'type': 'string',
},
# filesystem
'root_path': {
'type': 'string',
'description': 'The absolute path to the project root',
},
'path': {
'type': 'string',
'description': (
'Relative path to the source file from the project root. '
'Usually the same as original_file_path, but in some cases '
'dbt will generate a path.'),
},
'original_file_path': {
'type': 'string',
'description': (
'Relative path to the originating file from the project root.'
),
},
'file_contents': {
'type': 'string',
'description': (
'The raw text provided in the documentation block, presumably '
'markdown.'
),
},
# TODO: I would like to remove this, but some graph error handling
# cares about it.
'resource_type': {
'enum': [
NodeType.Documentation,
]
},
},
'required': [
'package_name', 'root_path', 'path', 'original_file_path',
'file_contents', 'resource_type'
],
}
class UnparsedDocumentationFile(APIObject):
SCHEMA = UNPARSED_DOCUMENTATION_FILE_CONTRACT

View File

@@ -0,0 +1,400 @@
from dbt.api.object import APIObject
from dbt.logger import GLOBAL_LOGGER as logger # noqa
from dbt.utils import deep_merge
ARCHIVE_TABLE_CONFIG_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'source_table': {'type': 'string'},
'target_table': {'type': 'string'},
'updated_at': {'type': 'string'},
'unique_key': {'type': 'string'},
},
'required': ['source_table', 'target_table', 'updated_at', 'unique_key'],
}
ARCHIVE_CONFIG_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'source_database': {'type': 'string'},
'target_database': {'type': 'string'},
'source_schema': {'type': 'string'},
'target_schema': {'type': 'string'},
'tables': {
'type': 'array',
'items': ARCHIVE_TABLE_CONFIG_CONTRACT,
}
},
'required': ['source_schema', 'target_schema', 'tables'],
}
PROJECT_CONTRACT = {
'type': 'object',
'description': 'The project configuration.',
'additionalProperties': False,
'properties': {
'name': {
'type': 'string',
'pattern': r'^[^\d\W]\w*\Z',
},
'version': {
'anyOf': [
{
'type': 'string',
'pattern': (
# this does not support the full semver (does not
# allow a trailing -fooXYZ) and is not restrictive
# enough for full semver, (allows '1.0'). But it's like
# 'semver lite'.
r'^(?:0|[1-9]\d*)\.(?:0|[1-9]\d*)(\.(?:0|[1-9]\d*))?$'
),
},
{
# the internal global_project/dbt_project.yml is actually
# 1.0. Heaven only knows how many users have done the same
'type': 'number',
},
],
},
'project-root': {
'type': 'string',
},
'source-paths': {
'type': 'array',
'items': {'type': 'string'},
},
'macro-paths': {
'type': 'array',
'items': {'type': 'string'},
},
'data-paths': {
'type': 'array',
'items': {'type': 'string'},
},
'test-paths': {
'type': 'array',
'items': {'type': 'string'},
},
'analysis-paths': {
'type': 'array',
'items': {'type': 'string'},
},
'docs-paths': {
'type': 'array',
'items': {'type': 'string'},
},
'target-path': {
'type': 'string',
},
'snapshot-paths': {
'type': 'array',
'items': {'type': 'string'},
},
'clean-targets': {
'type': 'array',
'items': {'type': 'string'},
},
'profile': {
'type': ['null', 'string'],
},
'log-path': {
'type': 'string',
},
'modules-path': {
'type': 'string',
},
'quoting': {
'type': 'object',
'additionalProperties': False,
'properties': {
'identifier': {
'type': 'boolean',
},
'schema': {
'type': 'boolean',
},
'database': {
'type': 'boolean',
},
'project': {
'type': 'boolean',
}
},
},
'models': {
'type': 'object',
'additionalProperties': True,
},
'on-run-start': {
'type': 'array',
'items': {'type': 'string'},
},
'on-run-end': {
'type': 'array',
'items': {'type': 'string'},
},
'archive': {
'type': 'array',
'items': ARCHIVE_CONFIG_CONTRACT,
},
'seeds': {
'type': 'object',
'additionalProperties': True,
},
# we validate the regex separately, using the pattern in dbt.semver
'require-dbt-version': {
'type': ['string', 'array'],
'items': {'type': 'string'},
},
},
'required': ['name', 'version'],
}
class Project(APIObject):
SCHEMA = PROJECT_CONTRACT
LOCAL_PACKAGE_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'local': {
'type': 'string',
'description': 'The absolute path to the local package.',
},
'required': ['local'],
},
}
GIT_PACKAGE_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'git': {
'type': 'string',
'description': (
'The URL to the git repository that stores the pacakge'
),
},
'revision': {
'type': ['string', 'array'],
'items': {'type': 'string'},
'description': 'The git revision to use, if it is not tip',
},
'warn-unpinned': {
'type': 'boolean',
}
},
'required': ['git'],
}
VERSION_SPECIFICATION_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'major': {
'type': ['string', 'null'],
},
'minor': {
'type': ['string', 'null'],
},
'patch': {
'type': ['string', 'null'],
},
'prerelease': {
'type': ['string', 'null'],
},
'build': {
'type': ['string', 'null'],
},
'matcher': {
'type': 'string',
'enum': ['=', '>=', '<=', '>', '<'],
},
},
'required': ['major', 'minor', 'patch', 'prerelease', 'build', 'matcher'],
}
REGISTRY_PACKAGE_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'package': {
'type': 'string',
'description': 'The name of the package',
},
'version': {
'type': ['string', 'array'],
'items': {
'anyOf': [
VERSION_SPECIFICATION_CONTRACT,
{'type': 'string'}
],
},
'description': 'The version of the package',
},
},
'required': ['package', 'version'],
}
PACKAGE_FILE_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'packages': {
'type': 'array',
'items': {
'anyOf': [
LOCAL_PACKAGE_CONTRACT,
GIT_PACKAGE_CONTRACT,
REGISTRY_PACKAGE_CONTRACT,
],
},
},
},
'required': ['packages'],
}
# the metadata from the registry has extra things that we don't care about.
REGISTRY_PACKAGE_METADATA_CONTRACT = deep_merge(
PACKAGE_FILE_CONTRACT,
{
'additionalProperties': True,
'properties': {
'name': {
'type': 'string',
},
'downloads': {
'type': 'object',
'additionalProperties': True,
'properties': {
'tarball': {
'type': 'string',
},
},
'required': ['tarball']
},
},
'required': PACKAGE_FILE_CONTRACT['required'][:] + ['downloads']
}
)
class PackageConfig(APIObject):
SCHEMA = PACKAGE_FILE_CONTRACT
USER_CONFIG_CONTRACT = {
'type': 'object',
'additionalProperties': True,
'properties': {
'send_anonymous_usage_stats': {
'type': 'boolean',
},
'use_colors': {
'type': 'boolean',
},
},
}
PROFILE_INFO_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'profile_name': {
'type': 'string',
},
'target_name': {
'type': 'string',
},
'config': USER_CONFIG_CONTRACT,
'threads': {
'type': 'number',
},
'credentials': {
'type': 'object',
'additionalProperties': True,
},
},
'required': [
'profile_name', 'target_name', 'config', 'threads', 'credentials'
],
}
class ProfileConfig(APIObject):
SCHEMA = PROFILE_INFO_CONTRACT
def _merge_requirements(base, *args):
required = base[:]
for arg in args:
required.extend(arg['required'])
return required
CONFIG_CONTRACT = deep_merge(
PROJECT_CONTRACT,
PACKAGE_FILE_CONTRACT,
PROFILE_INFO_CONTRACT,
{
'properties': {
'cli_vars': {
'type': 'object',
'additionalProperties': True,
},
# override quoting: both 'identifier' and 'schema' must be
# populated
'quoting': {
'required': ['identifier', 'schema'],
},
},
'required': _merge_requirements(
['cli_vars'],
PROJECT_CONTRACT,
PACKAGE_FILE_CONTRACT,
PROFILE_INFO_CONTRACT
),
},
)
def update_config_contract(typename, connection):
PROFILE_INFO_CONTRACT['properties']['credentials']['anyOf'].append(
connection.SCHEMA
)
CONFIG_CONTRACT['properties']['credentials']['anyOf'].append(
connection.SCHEMA
)
class Configuration(APIObject):
SCHEMA = CONFIG_CONTRACT
PROJECTS_LIST_PROJECT = {
'type': 'object',
'additionalProperties': False,
'patternProperties': {
'.*': CONFIG_CONTRACT,
},
}
class ProjectList(APIObject):
SCHEMA = PROJECTS_LIST_PROJECT
def serialize(self):
return {k: v.serialize() for k, v in self._contents.items()}

View File

@@ -0,0 +1,544 @@
from dbt.api.object import APIObject
from dbt.utils import deep_merge, timestring
from dbt.contracts.common import named_property
from dbt.contracts.graph.manifest import COMPILE_RESULT_NODE_CONTRACT
from dbt.contracts.graph.unparsed import TIME_CONTRACT
from dbt.contracts.graph.parsed import PARSED_SOURCE_DEFINITION_CONTRACT
TIMING_INFO_CONTRACT = {
'type': 'object',
'properties': {
'name': {
'type': 'string',
},
'started_at': {
'type': 'string',
'format': 'date-time',
},
'completed_at': {
'type': 'string',
'format': 'date-time',
},
}
}
class TimingInfo(APIObject):
SCHEMA = TIMING_INFO_CONTRACT
@classmethod
def create(cls, name):
return cls(name=name)
def begin(self):
self.set('started_at', timestring())
def end(self):
self.set('completed_at', timestring())
class collect_timing_info:
def __init__(self, name):
self.timing_info = TimingInfo.create(name)
def __enter__(self):
self.timing_info.begin()
return self.timing_info
def __exit__(self, exc_type, exc_value, traceback):
self.timing_info.end()
class NodeSerializable(APIObject):
def serialize(self):
result = super(NodeSerializable, self).serialize()
result['node'] = self.node.serialize()
return result
PARTIAL_RESULT_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': 'The partial result of a single node being run',
'properties': {
'error': {
'type': ['string', 'null'],
'description': 'The error string, or None if there was no error',
},
'status': {
'type': ['string', 'null', 'number', 'boolean'],
'description': 'The status result of the node execution',
},
'execution_time': {
'type': 'number',
'description': 'The execution time, in seconds',
},
'thread_id': {
'type': ['string', 'null'],
'description': 'ID of the executing thread, e.g. Thread-3',
},
'timing': {
'type': 'array',
'items': TIMING_INFO_CONTRACT,
},
'node': COMPILE_RESULT_NODE_CONTRACT,
},
'required': ['node', 'status', 'error', 'execution_time', 'thread_id',
'timing'],
}
class PartialResult(NodeSerializable):
"""Represent a "partial" execution result, i.e. one that has not (fully)
been executed.
This may be an ephemeral node (they are not compiled) or any error.
"""
SCHEMA = PARTIAL_RESULT_CONTRACT
def __init__(self, node, error=None, status=None, execution_time=0,
thread_id=None, timing=None):
if timing is None:
timing = []
super(PartialResult, self).__init__(
node=node,
error=error,
status=status,
execution_time=execution_time,
thread_id=thread_id,
timing=timing,
)
# if the result got to the point where it could be skipped/failed, we would
# be returning a real result, not a partial.
@property
def skipped(self):
return False
@property
def failed(self):
return None
RUN_MODEL_RESULT_CONTRACT = deep_merge(PARTIAL_RESULT_CONTRACT, {
'description': 'The result of a single node being run',
'properties': {
'skip': {
'type': 'boolean',
'description': 'True if this node was skipped',
},
'warn': {
'type': ['boolean', 'null'],
'description': 'True if this node succeeded with a warning',
},
'fail': {
'type': ['boolean', 'null'],
'description': 'On tests, true if the test failed',
},
},
'required': ['skip', 'fail', 'warn']
})
class RunModelResult(NodeSerializable):
SCHEMA = RUN_MODEL_RESULT_CONTRACT
def __init__(self, node, error=None, skip=False, status=None, failed=None,
warned=None, thread_id=None, timing=None, execution_time=0):
if timing is None:
timing = []
super(RunModelResult, self).__init__(
node=node,
error=error,
skip=skip,
status=status,
fail=failed,
warn=warned,
execution_time=execution_time,
thread_id=thread_id,
timing=timing,
)
# these all get set after the fact, generally
error = named_property('error',
'If there was an error, the text of that error')
skip = named_property('skip', 'True if the model was skipped')
warn = named_property('warn', 'True if this was a test and it warned')
fail = named_property('fail', 'True if this was a test and it failed')
status = named_property('status', 'The status of the model execution')
execution_time = named_property('execution_time',
'The time in seconds to execute the model')
thread_id = named_property(
'thread_id',
'ID of the executing thread, e.g. Thread-3'
)
timing = named_property(
'timing',
'List of TimingInfo objects'
)
@property
def failed(self):
return self.fail
@property
def warned(self):
return self.warn
@property
def skipped(self):
return self.skip
EXECUTION_RESULT_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': 'The result of a single dbt invocation',
'properties': {
'results': {
'type': 'array',
'items': {
'anyOf': [
RUN_MODEL_RESULT_CONTRACT,
PARTIAL_RESULT_CONTRACT,
]
},
'description': 'An array of results, one per model',
},
'generated_at': {
'type': 'string',
'format': 'date-time',
'description': (
'The time at which the execution result was generated'
),
},
'elapsed_time': {
'type': 'number',
'description': (
'The time elapsed from before_run to after_run (hooks are not '
'included)'
),
}
},
'required': ['results', 'generated_at', 'elapsed_time'],
}
class ExecutionResult(APIObject):
SCHEMA = EXECUTION_RESULT_CONTRACT
def serialize(self):
return {
'results': [r.serialize() for r in self.results],
'generated_at': self.generated_at,
'elapsed_time': self.elapsed_time,
}
SOURCE_FRESHNESS_RESULT_CONTRACT = deep_merge(PARTIAL_RESULT_CONTRACT, {
'properties': {
'max_loaded_at': {
'type': 'string',
'format': 'date-time',
},
'snapshotted_at': {
'type': 'string',
'format': 'date-time',
},
'age': {
'type': 'number',
},
'status': {
'enum': ['pass', 'warn', 'error']
},
'node': PARSED_SOURCE_DEFINITION_CONTRACT,
},
'required': ['max_loaded_at', 'snapshotted_at', 'age']
})
class SourceFreshnessResult(NodeSerializable):
SCHEMA = SOURCE_FRESHNESS_RESULT_CONTRACT
def __init__(self, node, max_loaded_at, snapshotted_at,
age, status, thread_id, error=None,
timing=None, execution_time=0):
max_loaded_at = max_loaded_at.isoformat()
snapshotted_at = snapshotted_at.isoformat()
if timing is None:
timing = []
super(SourceFreshnessResult, self).__init__(
node=node,
max_loaded_at=max_loaded_at,
snapshotted_at=snapshotted_at,
age=age,
status=status,
thread_id=thread_id,
error=error,
timing=timing,
execution_time=execution_time
)
@property
def failed(self):
return self.status == 'error'
@property
def warned(self):
return self.status == 'warn'
@property
def skipped(self):
return False
FRESHNESS_METADATA_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'generated_at': {
'type': 'string',
'format': 'date-time',
'description': (
'The time at which the execution result was generated'
),
},
'elapsed_time': {
'type': 'number',
'description': (
'The time elapsed from before_run to after_run (hooks '
'are not included)'
),
},
},
'required': ['generated_at', 'elapsed_time']
}
FRESHNESS_RESULTS_CONTRACT = deep_merge(FRESHNESS_METADATA_CONTRACT, {
'description': 'The result of a single dbt source freshness invocation',
'properties': {
'results': {
'type': 'array',
'items': {
'anyOf': [
PARTIAL_RESULT_CONTRACT,
SOURCE_FRESHNESS_RESULT_CONTRACT,
],
},
},
},
'required': ['results'],
})
class FreshnessExecutionResult(APIObject):
SCHEMA = FRESHNESS_RESULTS_CONTRACT
def __init__(self, elapsed_time, generated_at, results):
super(FreshnessExecutionResult, self).__init__(
elapsed_time=elapsed_time,
generated_at=generated_at,
results=results
)
def serialize(self):
return {
'generated_at': self.generated_at,
'elapsed_time': self.elapsed_time,
'results': [s.serialize() for s in self.results]
}
def write(self, path):
"""Create a new object with the desired output schema and write it."""
meta = {
'generated_at': self.generated_at,
'elapsed_time': self.elapsed_time,
}
sources = {}
for result in self.results:
unique_id = result.node.unique_id
if result.error is not None:
result_dict = {
'error': result.error,
'state': 'runtime error'
}
else:
result_dict = {
'max_loaded_at': result.max_loaded_at,
'snapshotted_at': result.snapshotted_at,
'max_loaded_at_time_ago_in_s': result.age,
'state': result.status,
'criteria': result.node.freshness,
}
sources[unique_id] = result_dict
output = FreshnessRunOutput(meta=meta, sources=sources)
output.write(path)
def _copykeys(src, keys, **updates):
return {k: getattr(src, k) for k in keys}
SOURCE_FRESHNESS_OUTPUT_ERROR_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'The source freshness output for a single source table',
),
'properties': {
'error': {
'type': 'string',
'description': 'The error string',
},
'state': {
'enum': ['runtime error'],
},
}
}
SOURCE_FRESHNESS_OUTPUT_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'The source freshness output for a single source table',
),
'properties': {
'max_loaded_at': {
'type': 'string',
'format': 'date-time',
},
'snapshotted_at': {
'type': 'string',
'format': 'date-time',
},
'max_loaded_at_time_ago_in_s': {
'type': 'number',
},
'state': {
'enum': ['pass', 'warn', 'error']
},
'criteria': {
'type': 'object',
'additionalProperties': False,
'properties': {
'warn_after': TIME_CONTRACT,
'error_after': TIME_CONTRACT,
},
},
'required': ['state', 'criteria', 'max_loaded_at', 'snapshotted_at',
'max_loaded_at_time_ago_in_s']
}
}
FRESHNESS_RUN_OUTPUT_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': 'The output contract for dbt source freshness invocations',
'properties': {
'meta': FRESHNESS_METADATA_CONTRACT,
'sources': {
'type': 'object',
'additionalProperties': False,
'description': (
'A collection of the source results, stored by their unique '
'IDs.'
),
'patternProperties': {
'.*': {
'anyOf': [
SOURCE_FRESHNESS_OUTPUT_ERROR_CONTRACT,
SOURCE_FRESHNESS_OUTPUT_CONTRACT
],
},
},
}
}
}
class FreshnessRunOutput(APIObject):
SCHEMA = FRESHNESS_RUN_OUTPUT_CONTRACT
def __init__(self, meta, sources):
super(FreshnessRunOutput, self).__init__(meta=meta, sources=sources)
REMOTE_COMPILE_RESULT_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'raw_sql': {
'type': 'string',
},
'compiled_sql': {
'type': 'string',
},
'timing': {
'type': 'array',
'items': TIMING_INFO_CONTRACT,
},
},
'required': ['raw_sql', 'compiled_sql', 'timing']
}
class RemoteCompileResult(APIObject):
SCHEMA = REMOTE_COMPILE_RESULT_CONTRACT
def __init__(self, raw_sql, compiled_sql, node, timing=None, **kwargs):
if timing is None:
timing = []
# this should not show up in the serialized output.
self.node = node
super(RemoteCompileResult, self).__init__(
raw_sql=raw_sql,
compiled_sql=compiled_sql,
timing=timing,
**kwargs
)
@property
def error(self):
return None
REMOTE_RUN_RESULT_CONTRACT = deep_merge(REMOTE_COMPILE_RESULT_CONTRACT, {
'properties': {
'table': {
'type': 'object',
'properties': {
'column_names': {
'type': 'array',
'items': {'type': 'string'},
},
'rows': {
'type': 'array',
# any item type is ok
},
},
'required': ['rows', 'column_names'],
},
},
'required': ['table'],
})
class RemoteRunResult(RemoteCompileResult):
SCHEMA = REMOTE_RUN_RESULT_CONTRACT
def __init__(self, raw_sql, compiled_sql, node, timing=None, table=None):
if table is None:
table = []
super(RemoteRunResult, self).__init__(
raw_sql=raw_sql,
compiled_sql=compiled_sql,
timing=timing,
table=table,
node=node
)

97
core/dbt/deprecations.py Normal file
View File

@@ -0,0 +1,97 @@
import dbt.links
import dbt.flags
class DBTDeprecation(object):
name = None
description = None
def show(self, *args, **kwargs):
if self.name not in active_deprecations:
desc = self.description.format(**kwargs)
dbt.exceptions.warn_or_error(
"* Deprecation Warning: {}\n".format(desc)
)
active_deprecations.add(self.name)
class DBTRepositoriesDeprecation(DBTDeprecation):
name = "repositories"
description = """The dbt_project.yml configuration option 'repositories' is
deprecated. Please place dependencies in the `packages.yml` file instead.
The 'repositories' option will be removed in a future version of dbt.
For more information, see: https://docs.getdbt.com/docs/package-management
# Example packages.yml contents:
{recommendation}
"""
class GenerateSchemaNameSingleArgDeprecated(DBTDeprecation):
name = 'generate-schema-name-single-arg'
description = '''As of dbt v0.14.0, the `generate_schema_name` macro
accepts a second "node" argument. The one-argument form of `generate_schema_name`
is deprecated, and will become unsupported in a future release.
For more information, see:
https://docs.getdbt.com/v0.14/docs/upgrading-to-014
''' # noqa
class ArchiveDeprecated(DBTDeprecation):
name = 'archives'
description = '''As of dbt v0.14.0, the `dbt archive` command is renamed to
`dbt snapshot` and "archives" are "snapshots". The `dbt archive` command will
be removed in a future release.
For more information, see:
https://docs.getdbt.com/v0.14/docs/upgrading-to-014
'''
_adapter_renamed_description = """\
The adapter function `adapter.{old_name}` is deprecated and will be removed in
a future release of dbt. Please use `adapter.{new_name}` instead.
Documentation for {new_name} can be found here:
https://docs.getdbt.com/docs/adapter"""
def renamed_method(old_name, new_name):
class AdapterDeprecationWarning(DBTDeprecation):
name = 'adapter:{}'.format(old_name)
description = _adapter_renamed_description.format(old_name=old_name,
new_name=new_name)
dep = AdapterDeprecationWarning()
deprecations_list.append(dep)
deprecations[dep.name] = dep
def warn(name, *args, **kwargs):
if name not in deprecations:
# this should (hopefully) never happen
raise RuntimeError(
"Error showing deprecation warning: {}".format(name)
)
deprecations[name].show(*args, **kwargs)
# these are globally available
# since modules are only imported once, active_deprecations is a singleton
active_deprecations = set()
deprecations_list = [
DBTRepositoriesDeprecation(),
GenerateSchemaNameSingleArgDeprecated(),
ArchiveDeprecated(),
]
deprecations = {d.name: d for d in deprecations_list}
def reset_deprecations():
active_deprecations.clear()

718
core/dbt/exceptions.py Normal file
View File

@@ -0,0 +1,718 @@
import sys
import six
import functools
from dbt.compat import builtins
from dbt.logger import GLOBAL_LOGGER as logger
import dbt.flags
class Exception(builtins.Exception):
CODE = -32000
MESSAGE = "Server Error"
def data(self):
# if overriding, make sure the result is json-serializable.
return {
'type': self.__class__.__name__,
'message': str(self),
}
class MacroReturn(builtins.BaseException):
"""
Hack of all hacks
"""
def __init__(self, value):
self.value = value
class InternalException(Exception):
pass
class RuntimeException(RuntimeError, Exception):
CODE = 10001
MESSAGE = "Runtime error"
def __init__(self, msg, node=None):
self.stack = []
self.node = node
self.msg = msg
@property
def type(self):
return 'Runtime'
def node_to_string(self, node):
if node is None:
return "<Unknown>"
return "{} {} ({})".format(
node.get('resource_type'),
node.get('name', 'unknown'),
node.get('original_file_path'))
def process_stack(self):
lines = []
stack = self.stack + [self.node]
first = True
if len(stack) > 1:
lines.append("")
for item in stack:
msg = 'called by'
if first:
msg = 'in'
first = False
lines.append("> {} {}".format(
msg,
self.node_to_string(item)))
return lines
def __str__(self, prefix="! "):
node_string = ""
if self.node is not None:
node_string = " in {}".format(self.node_to_string(self.node))
if hasattr(self.msg, 'split'):
split_msg = self.msg.split("\n")
else:
# can't use basestring here, as on python2 it's an abstract class
split_msg = str(self.msg).split("\n")
lines = ["{}{}".format(self.type + ' Error',
node_string)] + split_msg
lines += self.process_stack()
return lines[0] + "\n" + "\n".join(
[" " + line for line in lines[1:]])
def data(self):
result = Exception.data(self)
if self.node is None:
return result
result.update({
'raw_sql': self.node.get('raw_sql'),
'compiled_sql': self.node.get('injected_sql'),
})
return result
class RPCFailureResult(RuntimeException):
CODE = 10002
MESSAGE = "RPC execution error"
class RPCTimeoutException(RuntimeException):
CODE = 10008
MESSAGE = 'RPC timeout error'
def __init__(self, timeout):
super(RPCTimeoutException, self).__init__(self.MESSAGE)
self.timeout = timeout
def data(self):
result = super(RPCTimeoutException, self).data()
result.update({
'timeout': self.timeout,
'message': 'RPC timed out after {}s'.format(self.timeout),
})
return result
class RPCKilledException(RuntimeException):
CODE = 10009
MESSAGE = 'RPC process killed'
def __init__(self, signum):
self.signum = signum
self.message = 'RPC process killed by signal {}'.format(self.signum)
super(RPCKilledException, self).__init__(self.message)
def data(self):
return {
'signum': self.signum,
'message': self.message,
}
class DatabaseException(RuntimeException):
CODE = 10003
MESSAGE = "Database Error"
def process_stack(self):
lines = []
if self.node is not None and self.node.get('build_path'):
lines.append(
"compiled SQL at {}".format(self.node.get('build_path')))
return lines + RuntimeException.process_stack(self)
@property
def type(self):
return 'Database'
class CompilationException(RuntimeException):
CODE = 10004
MESSAGE = "Compilation Error"
@property
def type(self):
return 'Compilation'
class RecursionException(RuntimeException):
pass
class ValidationException(RuntimeException):
CODE = 10005
MESSAGE = "Validation Error"
class JSONValidationException(ValidationException):
def __init__(self, typename, errors):
self.typename = typename
self.errors = errors
self.errors_message = ', '.join(errors)
msg = 'Invalid arguments passed to "{}" instance: {}'.format(
self.typename, self.errors_message
)
super(JSONValidationException, self).__init__(msg)
def __reduce__(self):
# see https://stackoverflow.com/a/36342588 for why this is necessary
return (JSONValidationException, (self.typename, self.errors))
class AliasException(ValidationException):
pass
class DependencyException(Exception):
# this can happen due to raise_dependency_error and its callers
CODE = 10006
MESSAGE = "Dependency Error"
class DbtConfigError(RuntimeException):
CODE = 10007
MESSAGE = "DBT Configuration Error"
def __init__(self, message, project=None, result_type='invalid_project'):
self.project = project
super(DbtConfigError, self).__init__(message)
self.result_type = result_type
class DbtProjectError(DbtConfigError):
pass
class DbtProfileError(DbtConfigError):
pass
class SemverException(Exception):
def __init__(self, msg=None):
self.msg = msg
if msg is not None:
super(SemverException, self).__init__(msg)
class VersionsNotCompatibleException(SemverException):
pass
class NotImplementedException(Exception):
pass
class FailedToConnectException(DatabaseException):
pass
class CommandError(RuntimeException):
def __init__(self, cwd, cmd, message='Error running command'):
super(CommandError, self).__init__(message)
self.cwd = cwd
self.cmd = cmd
self.args = (cwd, cmd, message)
def __str__(self):
if len(self.cmd) == 0:
return '{}: No arguments given'.format(self.msg)
return '{}: "{}"'.format(self.msg, self.cmd[0])
class ExecutableError(CommandError):
def __init__(self, cwd, cmd, message):
super(ExecutableError, self).__init__(cwd, cmd, message)
class WorkingDirectoryError(CommandError):
def __init__(self, cwd, cmd, message):
super(WorkingDirectoryError, self).__init__(cwd, cmd, message)
def __str__(self):
return '{}: "{}"'.format(self.msg, self.cwd)
class CommandResultError(CommandError):
def __init__(self, cwd, cmd, returncode, stdout, stderr,
message='Got a non-zero returncode'):
super(CommandResultError, self).__init__(cwd, cmd, message)
self.returncode = returncode
self.stdout = stdout
self.stderr = stderr
self.args = (cwd, cmd, returncode, stdout, stderr, message)
def __str__(self):
return '{} running: {}'.format(self.msg, self.cmd)
def raise_compiler_error(msg, node=None):
raise CompilationException(msg, node)
def raise_database_error(msg, node=None):
raise DatabaseException(msg, node)
def raise_dependency_error(msg):
raise DependencyException(msg)
def invalid_type_error(method_name, arg_name, got_value, expected_type,
version='0.13.0'):
"""Raise a CompilationException when an adapter method available to macros
has changed.
"""
got_type = type(got_value)
msg = ("As of {version}, 'adapter.{method_name}' expects argument "
"'{arg_name}' to be of type '{expected_type}', instead got "
"{got_value} ({got_type})")
raise_compiler_error(msg.format(version=version, method_name=method_name,
arg_name=arg_name, expected_type=expected_type,
got_value=got_value, got_type=got_type))
def ref_invalid_args(model, args):
raise_compiler_error(
"ref() takes at most two arguments ({} given)".format(len(args)),
model)
def ref_bad_context(model, args):
ref_args = ', '.join("'{}'".format(a) for a in args)
ref_string = '{{{{ ref({}) }}}}'.format(ref_args)
base_error_msg = """dbt was unable to infer all dependencies for the model "{model_name}".
This typically happens when ref() is placed within a conditional block.
To fix this, add the following hint to the top of the model "{model_name}":
-- depends_on: {ref_string}"""
# This explicitly references model['name'], instead of model['alias'], for
# better error messages. Ex. If models foo_users and bar_users are aliased
# to 'users', in their respective schemas, then you would want to see
# 'bar_users' in your error messge instead of just 'users'.
error_msg = base_error_msg.format(
model_name=model['name'],
model_path=model['path'],
ref_string=ref_string
)
raise_compiler_error(error_msg, model)
def doc_invalid_args(model, args):
raise_compiler_error(
"doc() takes at most two arguments ({} given)".format(len(args)),
model)
def doc_target_not_found(model, target_doc_name, target_doc_package):
target_package_string = ''
if target_doc_package is not None:
target_package_string = "in package '{}' ".format(target_doc_package)
msg = (
"Documentation for '{}' depends on doc '{}' {} which was not found"
).format(
model.get('unique_id'),
target_doc_name,
target_package_string
)
raise_compiler_error(msg, model)
def _get_target_failure_msg(model, target_model_name, target_model_package,
include_path, reason):
target_package_string = ''
if target_model_package is not None:
target_package_string = "in package '{}' ".format(target_model_package)
source_path_string = ''
if include_path:
source_path_string = ' ({})'.format(model.get('original_file_path'))
return ("{} '{}'{} depends on model '{}' {}which {}"
.format(model.get('resource_type').title(),
model.get('unique_id'),
source_path_string,
target_model_name,
target_package_string,
reason))
def get_target_disabled_msg(model, target_model_name, target_model_package):
return _get_target_failure_msg(model, target_model_name,
target_model_package, include_path=True,
reason='is disabled')
def get_target_not_found_msg(model, target_model_name, target_model_package):
return _get_target_failure_msg(model, target_model_name,
target_model_package, include_path=True,
reason='was not found')
def get_target_not_found_or_disabled_msg(model, target_model_name,
target_model_package):
return _get_target_failure_msg(model, target_model_name,
target_model_package, include_path=False,
reason='was not found or is disabled')
def ref_target_not_found(model, target_model_name, target_model_package):
msg = get_target_not_found_or_disabled_msg(model, target_model_name,
target_model_package)
raise_compiler_error(msg, model)
def source_disabled_message(model, target_name, target_table_name):
return ("{} '{}' ({}) depends on source '{}.{}' which was not found"
.format(model.get('resource_type').title(),
model.get('unique_id'),
model.get('original_file_path'),
target_name,
target_table_name))
def source_target_not_found(model, target_name, target_table_name):
msg = source_disabled_message(model, target_name, target_table_name)
raise_compiler_error(msg, model)
def ref_disabled_dependency(model, target_model):
raise_compiler_error(
"Model '{}' depends on model '{}' which is disabled in "
"the project config".format(model.get('unique_id'),
target_model.get('unique_id')),
model)
def dependency_not_found(model, target_model_name):
raise_compiler_error(
"'{}' depends on '{}' which is not in the graph!"
.format(model.get('unique_id'), target_model_name),
model)
def macro_not_found(model, target_macro_id):
raise_compiler_error(
model,
"'{}' references macro '{}' which is not defined!"
.format(model.get('unique_id'), target_macro_id))
def materialization_not_available(model, adapter_type):
from dbt.utils import get_materialization # noqa
materialization = get_materialization(model)
raise_compiler_error(
"Materialization '{}' is not available for {}!"
.format(materialization, adapter_type),
model)
def missing_materialization(model, adapter_type):
from dbt.utils import get_materialization # noqa
materialization = get_materialization(model)
valid_types = "'default'"
if adapter_type != 'default':
valid_types = "'default' and '{}'".format(adapter_type)
raise_compiler_error(
"No materialization '{}' was found for adapter {}! (searched types {})"
.format(materialization, adapter_type, valid_types),
model)
def bad_package_spec(repo, spec, error_message):
raise InternalException(
"Error checking out spec='{}' for repo {}\n{}".format(
spec, repo, error_message))
def raise_cache_inconsistent(message):
raise InternalException('Cache inconsistency detected: {}'.format(message))
def missing_config(model, name):
raise_compiler_error(
"Model '{}' does not define a required config parameter '{}'."
.format(model.get('unique_id'), name),
model)
def missing_relation(relation, model=None):
raise_compiler_error(
"Relation {} not found!".format(relation),
model)
def relation_wrong_type(relation, expected_type, model=None):
raise_compiler_error(
('Trying to create {expected_type} {relation}, '
'but it currently exists as a {current_type}. Either '
'drop {relation} manually, or run dbt with '
'`--full-refresh` and dbt will drop it for you.')
.format(relation=relation,
current_type=relation.type,
expected_type=expected_type),
model)
def package_not_found(package_name):
raise_dependency_error(
"Package {} was not found in the package index".format(package_name))
def package_version_not_found(package_name, version_range, available_versions):
base_msg = ('Could not find a matching version for package {}\n'
' Requested range: {}\n'
' Available versions: {}')
raise_dependency_error(base_msg.format(package_name,
version_range,
available_versions))
def invalid_materialization_argument(name, argument):
raise_compiler_error(
"materialization '{}' received unknown argument '{}'."
.format(name, argument))
def system_error(operation_name):
raise_compiler_error(
"dbt encountered an error when attempting to {}. "
"If this error persists, please create an issue at: \n\n"
"https://github.com/fishtown-analytics/dbt"
.format(operation_name))
class RegistryException(Exception):
pass
def raise_dep_not_found(node, node_description, required_pkg):
raise_compiler_error(
'Error while parsing {}.\nThe required package "{}" was not found. '
'Is the package installed?\nHint: You may need to run '
'`dbt deps`.'.format(node_description, required_pkg), node=node)
def multiple_matching_relations(kwargs, matches):
raise_compiler_error(
'get_relation returned more than one relation with the given args. '
'Please specify a database or schema to narrow down the result set.'
'\n{}\n\n{}'
.format(kwargs, matches))
def get_relation_returned_multiple_results(kwargs, matches):
multiple_matching_relations(kwargs, matches)
def approximate_relation_match(target, relation):
raise_compiler_error(
'When searching for a relation, dbt found an approximate match. '
'Instead of guessing \nwhich relation to use, dbt will move on. '
'Please delete {relation}, or rename it to be less ambiguous.'
'\nSearched for: {target}\nFound: {relation}'
.format(target=target,
relation=relation))
def raise_duplicate_resource_name(node_1, node_2):
duped_name = node_1['name']
raise_compiler_error(
'dbt found two resources with the name "{}". Since these resources '
'have the same name,\ndbt will be unable to find the correct resource '
'when ref("{}") is used. To fix this,\nchange the name of one of '
'these resources:\n- {} ({})\n- {} ({})'.format(
duped_name,
duped_name,
node_1['unique_id'], node_1['original_file_path'],
node_2['unique_id'], node_2['original_file_path']))
def raise_ambiguous_alias(node_1, node_2):
duped_name = "{}.{}".format(node_1['schema'], node_1['alias'])
raise_compiler_error(
'dbt found two resources with the database representation "{}".\ndbt '
'cannot create two resources with identical database representations. '
'To fix this,\nchange the "schema" or "alias" configuration of one of '
'these resources:\n- {} ({})\n- {} ({})'.format(
duped_name,
node_1['unique_id'], node_1['original_file_path'],
node_2['unique_id'], node_2['original_file_path']))
def raise_ambiguous_catalog_match(unique_id, match_1, match_2):
def get_match_string(match):
return "{}.{}".format(
match.get('metadata', {}).get('schema'),
match.get('metadata', {}).get('name'))
raise_compiler_error(
'dbt found two relations in your warehouse with similar database '
'identifiers. dbt\nis unable to determine which of these relations '
'was created by the model "{unique_id}".\nIn order for dbt to '
'correctly generate the catalog, one of the following relations must '
'be deleted or renamed:\n\n - {match_1_s}\n - {match_2_s}'.format(
unique_id=unique_id,
match_1_s=get_match_string(match_1),
match_2_s=get_match_string(match_2),
))
def raise_patch_targets_not_found(patches):
patch_list = '\n\t'.join(
'model {} (referenced in path {})'.format(p.name, p.original_file_path)
for p in patches.values()
)
raise_compiler_error(
'dbt could not find models for the following patches:\n\t{}'.format(
patch_list
)
)
def raise_duplicate_patch_name(name, patch_1, patch_2):
raise_compiler_error(
'dbt found two schema.yml entries for the same model named {}. The '
'first patch was specified in {} and the second in {}. Models and '
'their associated columns may only be described a single time.'.format(
name,
patch_1,
patch_2,
)
)
def raise_invalid_schema_yml_version(path, issue):
raise_compiler_error(
'The schema file at {} is invalid because {}. Please consult the '
'documentation for more information on schema.yml syntax:\n\n'
'https://docs.getdbt.com/docs/schemayml-files'
.format(path, issue)
)
def raise_unrecognized_credentials_type(typename, supported_types):
raise_compiler_error(
'Unrecognized credentials type "{}" - supported types are ({})'
.format(typename, ', '.join('"{}"'.format(t) for t in supported_types))
)
def raise_not_implemented(msg):
raise NotImplementedException(
"ERROR: {}"
.format(msg))
def warn_or_error(msg, node=None, log_fmt=None):
if dbt.flags.WARN_ERROR:
raise_compiler_error(msg, node)
else:
if log_fmt is not None:
msg = log_fmt.format(msg)
logger.warning(msg)
def warn_or_raise(exc, log_fmt=None):
if dbt.flags.WARN_ERROR:
raise exc
else:
msg = str(exc)
if log_fmt is not None:
msg = log_fmt.format(msg)
logger.warning(msg)
# Update this when a new function should be added to the
# dbt context's `exceptions` key!
CONTEXT_EXPORTS = {
fn.__name__: fn
for fn in
[
missing_config,
missing_materialization,
missing_relation,
raise_ambiguous_alias,
raise_ambiguous_catalog_match,
raise_cache_inconsistent,
raise_compiler_error,
raise_database_error,
raise_dep_not_found,
raise_dependency_error,
raise_duplicate_patch_name,
raise_duplicate_resource_name,
raise_invalid_schema_yml_version,
raise_not_implemented,
relation_wrong_type,
]
}
def wrapper(model):
def wrap(func):
@functools.wraps(func)
def inner(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception:
exc_type, exc, exc_tb = sys.exc_info()
if hasattr(exc, 'node') and exc.node is None:
exc.node = model
six.reraise(exc_type, exc, exc_tb)
return inner
return wrap
def wrapped_exports(model):
wrap = wrapper(model)
return {
name: wrap(export) for name, export in CONTEXT_EXPORTS.items()
}

29
core/dbt/flags.py Normal file
View File

@@ -0,0 +1,29 @@
STRICT_MODE = False
FULL_REFRESH = False
USE_CACHE = True
WARN_ERROR = False
TEST_NEW_PARSER = False
def reset():
global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER
STRICT_MODE = False
FULL_REFRESH = False
USE_CACHE = True
WARN_ERROR = False
TEST_NEW_PARSER = False
def set_from_args(args):
global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER
USE_CACHE = getattr(args, 'use_cache', True)
FULL_REFRESH = getattr(args, 'full_refresh', False)
STRICT_MODE = getattr(args, 'strict', False)
WARN_ERROR = (
STRICT_MODE or
getattr(args, 'warn_error', False)
)
TEST_NEW_PARSER = getattr(args, 'test_new_parser', False)

348
core/dbt/graph/selector.py Normal file
View File

@@ -0,0 +1,348 @@
import networkx as nx
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.utils import is_enabled, coalesce
from dbt.node_types import NodeType
import dbt.exceptions
SELECTOR_PARENTS = '+'
SELECTOR_CHILDREN = '+'
SELECTOR_GLOB = '*'
SELECTOR_CHILDREN_AND_ANCESTORS = '@'
SELECTOR_DELIMITER = ':'
class SelectionCriteria(object):
def __init__(self, node_spec):
self.raw = node_spec
self.select_children = False
self.select_parents = False
self.select_childrens_parents = False
self.selector_type = SELECTOR_FILTERS.FQN
if node_spec.startswith(SELECTOR_CHILDREN_AND_ANCESTORS):
self.select_childrens_parents = True
node_spec = node_spec[1:]
if node_spec.startswith(SELECTOR_PARENTS):
self.select_parents = True
node_spec = node_spec[1:]
if node_spec.endswith(SELECTOR_CHILDREN):
self.select_children = True
node_spec = node_spec[:-1]
if self.select_children and self.select_childrens_parents:
raise dbt.exceptions.RuntimeException(
'Invalid node spec {} - "@" prefix and "+" suffix are '
'incompatible'.format(self.raw)
)
if SELECTOR_DELIMITER in node_spec:
selector_parts = node_spec.split(SELECTOR_DELIMITER, 1)
self.selector_type, self.selector_value = selector_parts
else:
self.selector_value = node_spec
class SELECTOR_FILTERS(object):
FQN = 'fqn'
TAG = 'tag'
SOURCE = 'source'
def split_specs(node_specs):
specs = set()
for spec in node_specs:
parts = spec.split(" ")
specs.update(parts)
return specs
def get_package_names(graph):
return set([node.split(".")[1] for node in graph.nodes()])
def is_selected_node(real_node, node_selector):
for i, selector_part in enumerate(node_selector):
is_last = (i == len(node_selector) - 1)
# if we hit a GLOB, then this node is selected
if selector_part == SELECTOR_GLOB:
return True
# match package.node_name or package.dir.node_name
elif is_last and selector_part == real_node[-1]:
return True
elif len(real_node) <= i:
return False
elif real_node[i] == selector_part:
continue
else:
return False
# if we get all the way down here, then the node is a match
return True
def _node_is_match(qualified_name, package_names, fqn):
"""Determine if a qualfied name matches an fqn, given the set of package
names in the graph.
:param List[str] qualified_name: The components of the selector or node
name, split on '.'.
:param Set[str] package_names: The set of pacakge names in the graph.
:param List[str] fqn: The node's fully qualified name in the graph.
"""
if len(qualified_name) == 1 and fqn[-1] == qualified_name[0]:
return True
if qualified_name[0] in package_names:
if is_selected_node(fqn, qualified_name):
return True
for package_name in package_names:
local_qualified_node_name = [package_name] + qualified_name
if is_selected_node(fqn, local_qualified_node_name):
return True
return False
def warn_if_useless_spec(spec, nodes):
if len(nodes) > 0:
return
msg = (
"* Spec='{}' does not identify any models"
.format(spec['raw'])
)
dbt.exceptions.warn_or_error(msg, log_fmt='{} and was ignored\n')
class NodeSelector(object):
def __init__(self, linker, manifest):
self.linker = linker
self.manifest = manifest
def _node_iterator(self, graph, exclude, include):
for node in graph.nodes():
real_node = self.manifest.nodes[node]
if include is not None and real_node.resource_type not in include:
continue
if exclude is not None and real_node.resource_type in exclude:
continue
yield node, real_node
def parsed_nodes(self, graph):
return self._node_iterator(
graph,
exclude=(NodeType.Source,),
include=None)
def source_nodes(self, graph):
return self._node_iterator(
graph,
exclude=None,
include=(NodeType.Source,))
def get_nodes_by_qualified_name(self, graph, qualified_name_selector):
"""Yield all nodes in the graph that match the qualified_name_selector.
:param str qualified_name_selector: The selector or node name
"""
qualified_name = qualified_name_selector.split(".")
package_names = get_package_names(graph)
for node, real_node in self.parsed_nodes(graph):
if _node_is_match(qualified_name, package_names, real_node.fqn):
yield node
def get_nodes_by_tag(self, graph, tag_name):
""" yields nodes from graph that have the specified tag """
for node, real_node in self.parsed_nodes(graph):
if tag_name in real_node.tags:
yield node
def get_nodes_by_source(self, graph, source_full_name):
"""yields nodes from graph are the specified source."""
parts = source_full_name.split('.')
target_package = SELECTOR_GLOB
if len(parts) == 1:
target_source, target_table = parts[0], None
elif len(parts) == 2:
target_source, target_table = parts
elif len(parts) == 3:
target_package, target_source, target_table = parts
else: # len(parts) > 3 or len(parts) == 0
msg = (
'Invalid source selector value "{}". Sources must be of the '
'form `${{source_name}}`, '
'`${{source_name}}.${{target_name}}`, or '
'`${{package_name}}.${{source_name}}.${{target_name}}'
).format(source_full_name)
raise dbt.exceptions.RuntimeException(msg)
for node, real_node in self.source_nodes(graph):
if target_package not in (real_node.package_name, SELECTOR_GLOB):
continue
if target_source not in (real_node.source_name, SELECTOR_GLOB):
continue
if target_table in (None, real_node.name, SELECTOR_GLOB):
yield node
def select_childrens_parents(self, graph, selected):
ancestors_for = self.select_children(graph, selected) | selected
return self.select_parents(graph, ancestors_for) | ancestors_for
def select_children(self, graph, selected):
descendants = set()
for node in selected:
descendants.update(nx.descendants(graph, node))
return descendants
def select_parents(self, graph, selected):
ancestors = set()
for node in selected:
ancestors.update(nx.ancestors(graph, node))
return ancestors
def collect_models(self, graph, selected, spec):
additional = set()
if spec.select_childrens_parents:
additional.update(self.select_childrens_parents(graph, selected))
if spec.select_parents:
additional.update(self.select_parents(graph, selected))
if spec.select_children:
additional.update(self.select_children(graph, selected))
return additional
def collect_tests(self, graph, model_nodes):
test_nodes = set()
for node in model_nodes:
# include tests that depend on this node. if we aren't running
# tests, they'll be filtered out later.
child_tests = [n for n in graph.successors(node)
if self.manifest.nodes[n].resource_type ==
NodeType.Test]
test_nodes.update(child_tests)
return test_nodes
def get_nodes_from_spec(self, graph, spec):
filter_map = {
SELECTOR_FILTERS.FQN: self.get_nodes_by_qualified_name,
SELECTOR_FILTERS.TAG: self.get_nodes_by_tag,
SELECTOR_FILTERS.SOURCE: self.get_nodes_by_source,
}
filter_method = filter_map.get(spec.selector_type)
if filter_method is None:
valid_selectors = ", ".join(filter_map.keys())
logger.info("The '{}' selector specified in {} is invalid. Must "
"be one of [{}]".format(
spec.selector_type,
spec.raw,
valid_selectors))
return set()
collected = set(filter_method(graph, spec.selector_value))
collected.update(self.collect_models(graph, collected, spec))
collected.update(self.collect_tests(graph, collected))
return collected
def select_nodes(self, graph, raw_include_specs, raw_exclude_specs):
selected_nodes = set()
for raw_spec in split_specs(raw_include_specs):
spec = SelectionCriteria(raw_spec)
included_nodes = self.get_nodes_from_spec(graph, spec)
selected_nodes.update(included_nodes)
for raw_spec in split_specs(raw_exclude_specs):
spec = SelectionCriteria(raw_spec)
excluded_nodes = self.get_nodes_from_spec(graph, spec)
selected_nodes.difference_update(excluded_nodes)
return selected_nodes
def _is_graph_member(self, node_name):
node = self.manifest.nodes[node_name]
if node.resource_type == NodeType.Source:
return True
return not node.get('empty') and is_enabled(node)
def get_valid_nodes(self, graph):
return [
node_name for node_name in graph.nodes()
if self._is_graph_member(node_name)
]
def _is_match(self, node_name, resource_types, tags, required):
node = self.manifest.nodes[node_name]
if node.resource_type not in resource_types:
return False
tags = set(tags)
if tags and not bool(set(node.tags) & tags):
# there are tags specified but none match
return False
for attr in required:
if not getattr(node, attr):
return False
return True
def get_selected(self, include, exclude, resource_types, tags, required):
graph = self.linker.graph
include = coalesce(include, ['fqn:*', 'source:*'])
exclude = coalesce(exclude, [])
tags = coalesce(tags, [])
to_run = self.get_valid_nodes(graph)
filtered_graph = graph.subgraph(to_run)
selected_nodes = self.select_nodes(filtered_graph, include, exclude)
filtered_nodes = set()
for node_name in selected_nodes:
if self._is_match(node_name, resource_types, tags, required):
filtered_nodes.add(node_name)
return filtered_nodes
def select(self, query):
include = query.get('include')
exclude = query.get('exclude')
resource_types = query.get('resource_types')
tags = query.get('tags')
required = query.get('required', ())
addin_ephemeral_nodes = query.get('addin_ephemeral_nodes', True)
selected = self.get_selected(include, exclude, resource_types, tags,
required)
# if you haven't selected any nodes, return that so we can give the
# nice "no models selected" message.
if not selected:
return selected
# we used to carefully go through all node ancestors and add those if
# they were ephemeral. Sadly, the algorithm we used ended up being
# O(n^2). Instead, since ephemeral nodes are almost free, just add all
# ephemeral nodes in the graph.
# someday at large enough scale we might want to prune it to only be
# ancestors of the selected nodes so we can skip the compile.
if addin_ephemeral_nodes:
addins = {
uid for uid, node in self.manifest.nodes.items()
if node.is_ephemeral_model
}
else:
addins = set()
return selected | addins

View File

@@ -12,7 +12,7 @@ class ModelHookType:
def _parse_hook_to_dict(hook_string):
try:
hook_dict = json.loads(hook_string)
except ValueError as e:
except ValueError:
hook_dict = {"sql": hook_string}
if 'transaction' not in hook_dict:
@@ -32,7 +32,7 @@ def get_hook_dict(hook, index):
def get_hooks(model, hook_key):
hooks = model.get('config', {}).get(hook_key, [])
hooks = model.config.get(hook_key, [])
if not isinstance(hooks, (list, tuple)):
hooks = [hooks]

View File

@@ -0,0 +1 @@
__path__ = __import__('pkgutil').extend_path(__path__, __name__)

View File

@@ -0,0 +1,11 @@
import os
PACKAGE_PATH = os.path.dirname(__file__)
PROJECT_NAME = 'dbt'
DOCS_INDEX_FILE_PATH = os.path.normpath(
os.path.join(PACKAGE_PATH, '..', "index.html"))
# Adapter registration will add to this
PACKAGES = {PROJECT_NAME: PACKAGE_PATH}

View File

@@ -2,4 +2,5 @@
name: dbt
version: 1.0
docs-paths: ['docs']
macro-paths: ["macros"]

View File

@@ -0,0 +1,43 @@
{% docs __overview__ %}
### Welcome!
Welcome to the auto-generated documentation for your dbt project!
### Navigation
You can use the `Project` and `Database` navigation tabs on the left side of the window to explore the models
in your project.
#### Project Tab
The `Project` tab mirrors the directory structure of your dbt project. In this tab, you can see all of the
models defined in your dbt project, as well as models imported from dbt packages.
#### Database Tab
The `Database` tab also exposes your models, but in a format that looks more like a database explorer. This view
shows relations (tables and views) grouped into database schemas. Note that ephemeral models are _not_ shown
in this interface, as they do not exist in the database.
### Graph Exploration
You can click the blue icon on the bottom-right corner of the page to view the lineage graph of your models.
On model pages, you'll see the immediate parents and children of the model you're exploring. By clicking the `Expand`
button at the top-right of this lineage pane, you'll be able to see all of the models that are used to build,
or are built from, the model you're exploring.
Once expanded, you'll be able to use the `--models` and `--exclude` model selection syntax to filter the
models in the graph. For more information on model selection, check out the [dbt docs](https://docs.getdbt.com/docs/model-selection-syntax).
Note that you can also right-click on models to interactively filter and explore the graph.
---
### More information
- [What is dbt](https://docs.getdbt.com/docs/overview)?
- Read the [dbt viewpoint](https://docs.getdbt.com/docs/viewpoint)
- [Installation](https://docs.getdbt.com/docs/installation)
- Join the [chat](https://slack.getdbt.com/) on Slack for live questions and support.
{% enddocs %}

View File

@@ -0,0 +1,269 @@
{% macro adapter_macro(name) -%}
{% set original_name = name %}
{% if '.' in name %}
{% set package_name, name = name.split(".", 1) %}
{% else %}
{% set package_name = none %}
{% endif %}
{% if package_name is none %}
{% set package_context = context %}
{% elif package_name in context %}
{% set package_context = context[package_name] %}
{% else %}
{% set error_msg %}
In adapter_macro: could not find package '{{package_name}}', called with '{{original_name}}'
{% endset %}
{{ exceptions.raise_compiler_error(error_msg | trim) }}
{% endif %}
{%- set separator = '__' -%}
{%- set search_name = adapter.type() + separator + name -%}
{%- set default_name = 'default' + separator + name -%}
{%- if package_context.get(search_name) is not none -%}
{{ return(package_context[search_name](*varargs, **kwargs)) }}
{%- else -%}
{{ return(package_context[default_name](*varargs, **kwargs)) }}
{%- endif -%}
{%- endmacro %}
{% macro get_columns_in_query(select_sql) -%}
{{ return(adapter_macro('get_columns_in_query', select_sql)) }}
{% endmacro %}
{% macro default__get_columns_in_query(select_sql) %}
{% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}
select * from (
{{ select_sql }}
) as __dbt_sbq
where false
limit 0
{% endcall %}
{{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}
{% endmacro %}
{% macro create_schema(database_name, schema_name) -%}
{{ adapter_macro('create_schema', database_name, schema_name) }}
{% endmacro %}
{% macro default__create_schema(database_name, schema_name) -%}
{%- call statement('create_schema') -%}
create schema if not exists {{database_name}}.{{schema_name}}
{% endcall %}
{% endmacro %}
{% macro drop_schema(database_name, schema_name) -%}
{{ adapter_macro('drop_schema', database_name, schema_name) }}
{% endmacro %}
{% macro default__drop_schema(database_name, schema_name) -%}
{%- call statement('drop_schema') -%}
drop schema if exists {{database_name}}.{{schema_name}} cascade
{% endcall %}
{% endmacro %}
{% macro create_table_as(temporary, relation, sql) -%}
{{ adapter_macro('create_table_as', temporary, relation, sql) }}
{%- endmacro %}
{% macro default__create_table_as(temporary, relation, sql) -%}
create {% if temporary: -%}temporary{%- endif %} table
{{ relation.include(database=(not temporary), schema=(not temporary)) }}
as (
{{ sql }}
);
{% endmacro %}
{% macro create_view_as(relation, sql) -%}
{{ adapter_macro('create_view_as', relation, sql) }}
{%- endmacro %}
{% macro default__create_view_as(relation, sql) -%}
create view {{ relation }} as (
{{ sql }}
);
{% endmacro %}
{% macro get_catalog(information_schemas) -%}
{{ return(adapter_macro('get_catalog', information_schemas)) }}
{%- endmacro %}
{% macro default__get_catalog(information_schemas) -%}
{% set typename = adapter.type() %}
{% set msg -%}
get_catalog not implemented for {{ typename }}
{%- endset %}
{{ exceptions.raise_compiler_error(msg) }}
{% endmacro %}
{% macro get_columns_in_relation(relation) -%}
{{ return(adapter_macro('get_columns_in_relation', relation)) }}
{% endmacro %}
{% macro sql_convert_columns_in_relation(table) -%}
{% set columns = [] %}
{% for row in table %}
{% do columns.append(api.Column(*row)) %}
{% endfor %}
{{ return(columns) }}
{% endmacro %}
{% macro default__get_columns_in_relation(relation) -%}
{{ exceptions.raise_not_implemented(
'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro alter_column_type(relation, column_name, new_column_type) -%}
{{ return(adapter_macro('alter_column_type', relation, column_name, new_column_type)) }}
{% endmacro %}
{% macro default__alter_column_type(relation, column_name, new_column_type) -%}
{#
1. Create a new column (w/ temp name and correct type)
2. Copy data over to it
3. Drop the existing column (cascade!)
4. Rename the new column to existing column
#}
{%- set tmp_column = column_name + "__dbt_alter" -%}
{% call statement('alter_column_type') %}
alter table {{ relation }} add column {{ tmp_column }} {{ new_column_type }};
update {{ relation }} set {{ tmp_column }} = {{ column_name }};
alter table {{ relation }} drop column {{ column_name }} cascade;
alter table {{ relation }} rename column {{ tmp_column }} to {{ column_name }}
{% endcall %}
{% endmacro %}
{% macro drop_relation(relation) -%}
{{ return(adapter_macro('drop_relation', relation)) }}
{% endmacro %}
{% macro default__drop_relation(relation) -%}
{% call statement('drop_relation', auto_begin=False) -%}
drop {{ relation.type }} if exists {{ relation }} cascade
{%- endcall %}
{% endmacro %}
{% macro truncate_relation(relation) -%}
{{ return(adapter_macro('truncate_relation', relation)) }}
{% endmacro %}
{% macro default__truncate_relation(relation) -%}
{% call statement('truncate_relation') -%}
truncate table {{ relation }}
{%- endcall %}
{% endmacro %}
{% macro rename_relation(from_relation, to_relation) -%}
{{ return(adapter_macro('rename_relation', from_relation, to_relation)) }}
{% endmacro %}
{% macro default__rename_relation(from_relation, to_relation) -%}
{% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}
{% call statement('rename_relation') -%}
alter table {{ from_relation }} rename to {{ target_name }}
{%- endcall %}
{% endmacro %}
{% macro information_schema_name(database) %}
{{ return(adapter_macro('information_schema_name', database)) }}
{% endmacro %}
{% macro default__information_schema_name(database) -%}
{%- if database -%}
{{ adapter.quote_as_configured(database, 'database') }}.information_schema
{%- else -%}
information_schema
{%- endif -%}
{%- endmacro %}
{% macro list_schemas(database) -%}
{{ return(adapter_macro('list_schemas', database)) }}
{% endmacro %}
{% macro default__list_schemas(database) -%}
{% call statement('list_schemas', fetch_result=True, auto_begin=False) %}
select distinct schema_name
from {{ information_schema_name(database) }}.schemata
where catalog_name ilike '{{ database }}'
{% endcall %}
{{ return(load_result('list_schemas').table) }}
{% endmacro %}
{% macro check_schema_exists(information_schema, schema) -%}
{{ return(adapter_macro('check_schema_exists', information_schema, schema)) }}
{% endmacro %}
{% macro default__check_schema_exists(information_schema, schema) -%}
{% call statement('check_schema_exists', fetch_result=True, auto_begin=False) -%}
select count(*)
from {{ information_schema }}.schemata
where catalog_name='{{ information_schema.database }}'
and schema_name='{{ schema }}'
{%- endcall %}
{{ return(load_result('check_schema_exists').table) }}
{% endmacro %}
{% macro list_relations_without_caching(information_schema, schema) %}
{{ return(adapter_macro('list_relations_without_caching', information_schema, schema)) }}
{% endmacro %}
{% macro default__list_relations_without_caching(information_schema, schema) %}
{{ exceptions.raise_not_implemented(
'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}
{% endmacro %}
{% macro current_timestamp() -%}
{{ adapter_macro('current_timestamp') }}
{%- endmacro %}
{% macro default__current_timestamp() -%}
{{ exceptions.raise_not_implemented(
'current_timestamp macro not implemented for adapter '+adapter.type()) }}
{%- endmacro %}
{% macro collect_freshness(source, loaded_at_field) %}
{{ return(adapter_macro('collect_freshness', source, loaded_at_field))}}
{% endmacro %}
{% macro default__collect_freshness(source, loaded_at_field) %}
{% call statement('check_schema_exists', fetch_result=True, auto_begin=False) -%}
select
max({{ loaded_at_field }}) as max_loaded_at,
{{ current_timestamp() }} as snapshotted_at
from {{ source }}
{% endcall %}
{{ return(load_result('check_schema_exists').table) }}
{% endmacro %}
{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}
{{ return(adapter_macro('make_temp_relation', base_relation, suffix))}}
{% endmacro %}
{% macro default__make_temp_relation(base_relation, suffix) %}
{% set tmp_identifier = base_relation.identifier ~ suffix %}
{% set tmp_relation = base_relation.incorporate(
path={"identifier": tmp_identifier},
table_name=tmp_identifier) -%}
{% do return(tmp_relation) %}
{% endmacro %}

View File

@@ -48,9 +48,13 @@
{% set start_date = partition_range[0] %}
{% set end_date = partition_range[1] %}
{% else %}
{{ dbt.exceptions.raise_compiler_error("Invalid partition time. Expected format: {Start Date}[,{End Date}]. Got: " ~ raw_partition_date) }}
{{ exceptions.raise_compiler_error("Invalid partition time. Expected format: {Start Date}[,{End Date}]. Got: " ~ raw_partition_date) }}
{% endif %}
{{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }}
{% endmacro %}
{% macro py_current_timestring() %}
{% set dt = modules.datetime.datetime.now() %}
{% do return(dt.strftime("%Y%m%d%H%M%S%f")) %}
{% endmacro %}

View File

@@ -0,0 +1,27 @@
{#
Renders a alias name given a custom alias name. If the custom
alias name is none, then the resulting alias is just the filename of the
model. If an alias override is specified, then that is used.
This macro can be overriden in projects to define different semantics
for rendering a alias name.
Arguments:
custom_alias_name: The custom alias name specified for a model, or none
node: The available node that an alias is being generated for, or none
#}
{% macro generate_alias_name(custom_alias_name=none, node=none) -%}
{%- if custom_alias_name is none -%}
{{ node.name }}
{%- else -%}
{{ custom_alias_name | trim }}
{%- endif -%}
{%- endmacro %}

View File

@@ -3,7 +3,7 @@
Renders a schema name given a custom schema name. If the custom
schema name is none, then the resulting schema is just the "schema"
value in the specified target. If a schema override is specified, then
the resulting schema is the default schema concatenated with the
the resulting schema is the default schema concatenated with the
custom schema.
This macro can be overriden in projects to define different semantics
@@ -11,9 +11,10 @@
Arguments:
custom_schema_name: The custom schema name specified for a model, or none
node: The node the schema is being generated for
#}
{% macro generate_schema_name(custom_schema_name=none) -%}
{% macro generate_schema_name(custom_schema_name, node) -%}
{%- set default_schema = target.schema -%}
{%- if custom_schema_name is none -%}
@@ -36,9 +37,10 @@
Arguments:
custom_schema_name: The custom schema name specified for a model, or none
node: The node the schema is being generated for
#}
{% macro generate_schema_name_for_env(custom_schema_name=none) -%}
{% macro generate_schema_name_for_env(custom_schema_name, node) -%}
{%- set default_schema = target.schema -%}
{%- if target.name == 'prod' and custom_schema_name is not none -%}

View File

@@ -0,0 +1,18 @@
{% macro table_options() %}
{%- set raw_persist_docs = config.get('persist_docs', {}) -%}
{%- endmacro -%}
{% macro get_relation_comment(persist_docs, model) %}
{%- if persist_docs is not mapping -%}
{{ exceptions.raise_compiler_error("Invalid value provided for 'persist_docs'. Expected dict but got value: " ~ raw_persist_docs) }}
{% endif %}
{% if persist_docs.get('relation', false) %}
{{ return((model.description | tojson)[1:-1]) }}
{%- else -%}
{{ return(none) }}
{% endif %}
{% endmacro %}

View File

@@ -0,0 +1,13 @@
{% macro is_incremental() %}
{#-- do not run introspective queries in parsing #}
{% if not execute %}
{{ return(False) }}
{% else %}
{% set relation = adapter.get_relation(this.database, this.schema, this.table) %}
{{ return(relation is not none
and relation.type == 'table'
and model.config.materialized == 'incremental'
and not flags.FULL_REFRESH) }}
{% endif %}
{% endmacro %}

View File

@@ -0,0 +1,8 @@
{% macro run_query(sql) %}
{% call statement("run_query_statement", fetch_result=true, auto_begin=false) %}
{{ sql }}
{% endcall %}
{% do return(load_result("run_query_statement").table) %}
{% endmacro %}

View File

@@ -0,0 +1,71 @@
{% macro get_merge_sql(target, source, unique_key, dest_columns) -%}
{{ adapter_macro('get_merge_sql', target, source, unique_key, dest_columns) }}
{%- endmacro %}
{% macro get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{{ adapter_macro('get_delete_insert_merge_sql', target, source, unique_key, dest_columns) }}
{%- endmacro %}
{% macro common_get_merge_sql(target, source, unique_key, dest_columns) -%}
{%- set dest_cols_csv = dest_columns | map(attribute="name") | join(', ') -%}
merge into {{ target }} as DBT_INTERNAL_DEST
using {{ source }} as DBT_INTERNAL_SOURCE
{% if unique_key %}
on DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }}
{% else %}
on FALSE
{% endif %}
{% if unique_key %}
when matched then update set
{% for column in dest_columns -%}
{{ column.name }} = DBT_INTERNAL_SOURCE.{{ column.name }}
{%- if not loop.last %}, {%- endif %}
{%- endfor %}
{% endif %}
when not matched then insert
({{ dest_cols_csv }})
values
({{ dest_cols_csv }})
{%- endmacro %}
{% macro default__get_merge_sql(target, source, unique_key, dest_columns) -%}
{% set typename = adapter.type() %}
{{ exceptions.raise_compiler_error(
'get_merge_sql is not implemented for {}'.format(typename)
)
}}
{% endmacro %}
{% macro common_get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{%- set dest_cols_csv = dest_columns | map(attribute="name") | join(', ') -%}
{% if unique_key is not none %}
delete from {{ target }}
where ({{ unique_key }}) in (
select ({{ unique_key }})
from {{ source }}
);
{% endif %}
insert into {{ target }} ({{ dest_cols_csv }})
(
select {{ dest_cols_csv }}
from {{ source }}
);
{%- endmacro %}
{% macro default__get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}
{{ common_get_delete_insert_merge_sql(target, source, unique_key, dest_columns) }}
{% endmacro %}

View File

@@ -14,14 +14,14 @@
{% macro column_list(columns) %}
{%- for col in columns %}
"{{ col.name }}" {% if not loop.last %},{% endif %}
{{ col.name }} {% if not loop.last %},{% endif %}
{% endfor -%}
{% endmacro %}
{% macro column_list_for_create_table(columns) %}
{%- for col in columns %}
"{{ col.name }}" {{ col.data_type }} {%- if not loop.last %},{% endif %}
{{ col.name }} {{ col.data_type }} {%- if not loop.last %},{% endif %}
{% endfor -%}
{% endmacro %}
@@ -45,10 +45,8 @@
{{ make_hook_config(sql, inside_transaction=False) }}
{% endmacro %}
{% macro drop_if_exists(existing, schema, name) %}
{% set existing_type = existing.get(name) %}
{% if existing_type is not none %}
{{ adapter.drop(schema, name, existing_type) }}
{% macro drop_relation_if_exists(relation) %}
{% if relation is not none %}
{{ adapter.drop_relation(relation) }}
{% endif %}
{% endmacro %}

View File

@@ -0,0 +1,82 @@
{% macro dbt__incremental_delete(target_relation, tmp_relation) -%}
{%- set unique_key = config.require('unique_key') -%}
delete
from {{ target_relation }}
where ({{ unique_key }}) in (
select ({{ unique_key }})
from {{ tmp_relation.include(schema=False, database=False) }}
);
{%- endmacro %}
{% materialization incremental, default -%}
{%- set unique_key = config.get('unique_key') -%}
{%- set identifier = model['alias'] -%}
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
{%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%}
{%- set tmp_relation = make_temp_relation(target_relation) %}
{%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%}
{%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}
{%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%}
{%- set should_drop = (full_refresh_mode or exists_not_as_table) -%}
-- setup
{% if old_relation is none -%}
-- noop
{%- elif should_drop -%}
{{ adapter.drop_relation(old_relation) }}
{%- set old_relation = none -%}
{%- endif %}
{{ run_hooks(pre_hooks, inside_transaction=False) }}
-- `BEGIN` happens here:
{{ run_hooks(pre_hooks, inside_transaction=True) }}
-- build model
{% if full_refresh_mode or old_relation is none -%}
{%- call statement('main') -%}
{{ create_table_as(False, target_relation, sql) }}
{%- endcall -%}
{%- else -%}
{%- call statement() -%}
{{ dbt.create_table_as(True, tmp_relation, sql) }}
{%- endcall -%}
{{ adapter.expand_target_column_types(from_relation=tmp_relation,
to_relation=target_relation) }}
{%- call statement('main') -%}
{% set dest_columns = adapter.get_columns_in_relation(target_relation) %}
{% set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') %}
{% if unique_key is not none -%}
{{ dbt__incremental_delete(target_relation, tmp_relation) }}
{%- endif %}
insert into {{ target_relation }} ({{ dest_cols_csv }})
(
select {{ dest_cols_csv }}
from {{ tmp_relation }}
);
{% endcall %}
{%- endif %}
{{ run_hooks(post_hooks, inside_transaction=True) }}
-- `COMMIT` happens here
{{ adapter.commit() }}
{{ run_hooks(post_hooks, inside_transaction=False) }}
{%- endmaterialization %}

View File

@@ -0,0 +1,135 @@
{% macro create_csv_table(model) -%}
{{ adapter_macro('create_csv_table', model) }}
{%- endmacro %}
{% macro reset_csv_table(model, full_refresh, old_relation) -%}
{{ adapter_macro('reset_csv_table', model, full_refresh, old_relation) }}
{%- endmacro %}
{% macro load_csv_rows(model) -%}
{{ adapter_macro('load_csv_rows', model) }}
{%- endmacro %}
{% macro default__create_csv_table(model) %}
{%- set agate_table = model['agate_table'] -%}
{%- set column_override = model['config'].get('column_types', {}) -%}
{% set sql %}
create table {{ this.render(False) }} (
{%- for col_name in agate_table.column_names -%}
{%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%}
{%- set type = column_override.get(col_name, inferred_type) -%}
{{ col_name | string }} {{ type }} {%- if not loop.last -%}, {%- endif -%}
{%- endfor -%}
)
{% endset %}
{% call statement('_') -%}
{{ sql }}
{%- endcall %}
{{ return(sql) }}
{% endmacro %}
{% macro default__reset_csv_table(model, full_refresh, old_relation) %}
{% set sql = "" %}
{% if full_refresh %}
{{ adapter.drop_relation(old_relation) }}
{% set sql = create_csv_table(model) %}
{% else %}
{{ adapter.truncate_relation(old_relation) }}
{% set sql = "truncate table " ~ old_relation %}
{% endif %}
{{ return(sql) }}
{% endmacro %}
{% macro basic_load_csv_rows(model, batch_size) %}
{% set agate_table = model['agate_table'] %}
{% set cols_sql = ", ".join(agate_table.column_names) %}
{% set bindings = [] %}
{% set statements = [] %}
{% for chunk in agate_table.rows | batch(batch_size) %}
{% set bindings = [] %}
{% for row in chunk %}
{% set _ = bindings.extend(row) %}
{% endfor %}
{% set sql %}
insert into {{ this.render(False) }} ({{ cols_sql }}) values
{% for row in chunk -%}
({%- for column in agate_table.column_names -%}
%s
{%- if not loop.last%},{%- endif %}
{%- endfor -%})
{%- if not loop.last%},{%- endif %}
{%- endfor %}
{% endset %}
{% set _ = adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %}
{% if loop.index0 == 0 %}
{% set _ = statements.append(sql) %}
{% endif %}
{% endfor %}
{# Return SQL so we can render it out into the compiled files #}
{{ return(statements[0]) }}
{% endmacro %}
{% macro default__load_csv_rows(model) %}
{{ return(basic_load_csv_rows(model, 10000) )}}
{% endmacro %}
{% materialization seed, default %}
{%- set identifier = model['alias'] -%}
{%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%}
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
{%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
{%- set csv_table = model["agate_table"] -%}
{{ run_hooks(pre_hooks, inside_transaction=False) }}
-- `BEGIN` happens here:
{{ run_hooks(pre_hooks, inside_transaction=True) }}
-- build model
{% set create_table_sql = "" %}
{% if exists_as_view %}
{{ exceptions.raise_compiler_error("Cannot seed to '{}', it is a view".format(old_relation)) }}
{% elif exists_as_table %}
{% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation) %}
{% else %}
{% set create_table_sql = create_csv_table(model) %}
{% endif %}
{% set status = 'CREATE' if full_refresh_mode else 'INSERT' %}
{% set num_rows = (csv_table.rows | length) %}
{% set sql = load_csv_rows(model) %}
{% call noop_statement('main', status ~ ' ' ~ num_rows) %}
{{ create_table_sql }};
-- dbt seed --
{{ sql }}
{% endcall %}
{{ run_hooks(post_hooks, inside_transaction=True) }}
-- `COMMIT` happens here
{{ adapter.commit() }}
{{ run_hooks(post_hooks, inside_transaction=False) }}
{% endmaterialization %}

View File

@@ -0,0 +1,262 @@
{#
Add new columns to the table if applicable
#}
{% macro create_columns(relation, columns) %}
{{ adapter_macro('create_columns', relation, columns) }}
{% endmacro %}
{% macro default__create_columns(relation, columns) %}
{% for column in columns %}
{% call statement() %}
alter table {{ relation }} add column "{{ column.name }}" {{ column.data_type }};
{% endcall %}
{% endfor %}
{% endmacro %}
{% macro post_snapshot(staging_relation) %}
{{ adapter_macro('post_snapshot', staging_relation) }}
{% endmacro %}
{% macro default__post_snapshot(staging_relation) %}
{# no-op #}
{% endmacro %}
{% macro snapshot_staging_table_inserts(strategy, source_sql, target_relation) -%}
with snapshot_query as (
{{ source_sql }}
),
snapshotted_data as (
select *,
{{ strategy.unique_key }} as dbt_unique_key
from {{ target_relation }}
),
source_data as (
select *,
{{ strategy.scd_id }} as dbt_scd_id,
{{ strategy.unique_key }} as dbt_unique_key,
{{ strategy.updated_at }} as dbt_updated_at,
{{ strategy.updated_at }} as dbt_valid_from,
nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to
from snapshot_query
),
insertions as (
select
'insert' as dbt_change_type,
source_data.*
from source_data
left outer join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key
where snapshotted_data.dbt_unique_key is null
or (
snapshotted_data.dbt_unique_key is not null
and snapshotted_data.dbt_valid_to is null
and (
{{ strategy.row_changed }}
)
)
)
select * from insertions
{%- endmacro %}
{% macro snapshot_staging_table_updates(strategy, source_sql, target_relation) -%}
with snapshot_query as (
{{ source_sql }}
),
snapshotted_data as (
select *,
{{ strategy.unique_key }} as dbt_unique_key
from {{ target_relation }}
),
source_data as (
select
*,
{{ strategy.scd_id }} as dbt_scd_id,
{{ strategy.unique_key }} as dbt_unique_key,
{{ strategy.updated_at }} as dbt_updated_at,
{{ strategy.updated_at }} as dbt_valid_from
from snapshot_query
),
updates as (
select
'update' as dbt_change_type,
snapshotted_data.dbt_scd_id,
source_data.dbt_valid_from as dbt_valid_to
from source_data
join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key
where snapshotted_data.dbt_valid_to is null
and (
{{ strategy.row_changed }}
)
)
select * from updates
{%- endmacro %}
{% macro build_snapshot_table(strategy, sql) %}
select *,
{{ strategy.scd_id }} as dbt_scd_id,
{{ strategy.updated_at }} as dbt_updated_at,
{{ strategy.updated_at }} as dbt_valid_from,
nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to
from (
{{ sql }}
) sbq
{% endmacro %}
{% macro get_or_create_relation(database, schema, identifier, type) %}
{%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}
{% if target_relation %}
{% do return([true, target_relation]) %}
{% endif %}
{%- set new_relation = api.Relation.create(
database=database,
schema=schema,
identifier=identifier,
type=type
) -%}
{% do return([false, new_relation]) %}
{% endmacro %}
{% macro build_snapshot_staging_table(strategy, sql, target_relation) %}
{% set tmp_relation = make_temp_relation(target_relation) %}
{% set inserts_select = snapshot_staging_table_inserts(strategy, sql, target_relation) %}
{% set updates_select = snapshot_staging_table_updates(strategy, sql, target_relation) %}
{% call statement('build_snapshot_staging_relation_inserts') %}
{{ create_table_as(True, tmp_relation, inserts_select) }}
{% endcall %}
{% call statement('build_snapshot_staging_relation_updates') %}
insert into {{ tmp_relation }} (dbt_change_type, dbt_scd_id, dbt_valid_to)
select dbt_change_type, dbt_scd_id, dbt_valid_to from (
{{ updates_select }}
) dbt_sbq;
{% endcall %}
{% do return(tmp_relation) %}
{% endmacro %}
{% materialization snapshot, default %}
{%- set config = model['config'] -%}
{%- set target_database = config.get('target_database') -%}
{%- set target_schema = config.get('target_schema') -%}
{%- set target_table = model.get('alias', model.get('name')) -%}
{%- set strategy_name = config.get('strategy') -%}
{%- set unique_key = config.get('unique_key') %}
{% if not adapter.check_schema_exists(target_database, target_schema) %}
{% do create_schema(target_database, target_schema) %}
{% endif %}
{% set target_relation_exists, target_relation = get_or_create_relation(
database=target_database,
schema=target_schema,
identifier=target_table,
type='table') -%}
{%- if not target_relation.is_table -%}
{% do exceptions.relation_wrong_type(target_relation, 'table') %}
{%- endif -%}
{% set strategy_macro = strategy_dispatch(strategy_name) %}
{% set strategy = strategy_macro(model, "snapshotted_data", "source_data", config, target_relation_exists) %}
{% if not target_relation_exists %}
{% set build_sql = build_snapshot_table(strategy, model['injected_sql']) %}
{% call statement('main') -%}
{{ create_table_as(False, target_relation, build_sql) }}
{% endcall %}
{% else %}
{{ adapter.valid_snapshot_target(target_relation) }}
{% set staging_table = build_snapshot_staging_table(strategy, sql, target_relation) %}
-- this may no-op if the database does not require column expansion
{% do adapter.expand_target_column_types(from_relation=staging_table,
to_relation=target_relation) %}
{% set missing_columns = adapter.get_missing_columns(staging_table, target_relation)
| rejectattr('name', 'equalto', 'dbt_change_type')
| rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')
| rejectattr('name', 'equalto', 'dbt_unique_key')
| rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')
| list %}
{% do create_columns(target_relation, missing_columns) %}
{% set source_columns = adapter.get_columns_in_relation(staging_table)
| rejectattr('name', 'equalto', 'dbt_change_type')
| rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')
| rejectattr('name', 'equalto', 'dbt_unique_key')
| rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')
| list %}
{% set quoted_source_columns = [] %}
{% for column in source_columns %}
{% do quoted_source_columns.append(adapter.quote(column.name)) %}
{% endfor %}
{% call statement('main') %}
{{ snapshot_merge_sql(
target = target_relation,
source = staging_table,
insert_cols = quoted_source_columns
)
}}
{% endcall %}
{% endif %}
{{ adapter.commit() }}
{% if staging_table is defined %}
{% do post_snapshot(staging_table) %}
{% endif %}
{% endmaterialization %}

View File

@@ -0,0 +1,27 @@
{% macro snapshot_merge_sql(target, source, insert_cols) -%}
{{ adapter_macro('snapshot_merge_sql', target, source, insert_cols) }}
{%- endmacro %}
{% macro default__snapshot_merge_sql(target, source, insert_cols) -%}
{%- set insert_cols_csv = insert_cols | join(', ') -%}
merge into {{ target }} as DBT_INTERNAL_DEST
using {{ source }} as DBT_INTERNAL_SOURCE
on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id
when matched
and DBT_INTERNAL_DEST.dbt_valid_to is null
and DBT_INTERNAL_SOURCE.dbt_change_type = 'update'
then update
set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to
when not matched
and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert'
then insert ({{ insert_cols_csv }})
values ({{ insert_cols_csv }})
;
{% endmacro %}

View File

@@ -0,0 +1,118 @@
{#
Dispatch strategies by name, optionally qualified to a package
#}
{% macro strategy_dispatch(name) -%}
{% set original_name = name %}
{% if '.' in name %}
{% set package_name, name = name.split(".", 1) %}
{% else %}
{% set package_name = none %}
{% endif %}
{% if package_name is none %}
{% set package_context = context %}
{% elif package_name in context %}
{% set package_context = context[package_name] %}
{% else %}
{% set error_msg %}
Could not find package '{{package_name}}', called with '{{original_name}}'
{% endset %}
{{ exceptions.raise_compiler_error(error_msg | trim) }}
{% endif %}
{%- set search_name = 'snapshot_' ~ name ~ '_strategy' -%}
{% if search_name not in package_context %}
{% set error_msg %}
The specified strategy macro '{{name}}' was not found in package '{{ package_name }}'
{% endset %}
{{ exceptions.raise_compiler_error(error_msg | trim) }}
{% endif %}
{{ return(package_context[search_name]) }}
{%- endmacro %}
{#
Create SCD Hash SQL fields cross-db
#}
{% macro snapshot_hash_arguments(args) -%}
{{ adapter_macro('snapshot_hash_arguments', args) }}
{%- endmacro %}
{% macro default__snapshot_hash_arguments(args) -%}
md5({%- for arg in args -%}
coalesce(cast({{ arg }} as varchar ), '')
{% if not loop.last %} || '|' || {% endif %}
{%- endfor -%})
{%- endmacro %}
{#
Get the current time cross-db
#}
{% macro snapshot_get_time() -%}
{{ adapter_macro('snapshot_get_time') }}
{%- endmacro %}
{% macro default__snapshot_get_time() -%}
{{ current_timestamp() }}
{%- endmacro %}
{#
Core strategy definitions
#}
{% macro snapshot_timestamp_strategy(node, snapshotted_rel, current_rel, config, target_exists) %}
{% set primary_key = config['unique_key'] %}
{% set updated_at = config['updated_at'] %}
{% set row_changed_expr -%}
({{ snapshotted_rel }}.{{ updated_at }} < {{ current_rel }}.{{ updated_at }})
{%- endset %}
{% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}
{% do return({
"unique_key": primary_key,
"updated_at": updated_at,
"row_changed": row_changed_expr,
"scd_id": scd_id_expr
}) %}
{% endmacro %}
{% macro snapshot_check_strategy(node, snapshotted_rel, current_rel, config, target_exists) %}
{% set check_cols_config = config['check_cols'] %}
{% set primary_key = config['unique_key'] %}
{% set updated_at = snapshot_get_time() %}
{% if check_cols_config == 'all' %}
{% set check_cols = get_columns_in_query(node['injected_sql']) %}
{% elif check_cols_config is iterable and (check_cols_config | length) > 0 %}
{% set check_cols = check_cols_config %}
{% else %}
{% do exceptions.raise_compiler_error("Invalid value for 'check_cols': " ~ check_cols_config) %}
{% endif %}
{% set row_changed_expr -%}
(
{% for col in check_cols %}
{{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }}
or
({{ snapshotted_rel }}.{{ col }} is null) != ({{ current_rel }}.{{ col }} is null)
{%- if not loop.last %} or {% endif %}
{% endfor %}
)
{%- endset %}
{% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}
{% do return({
"unique_key": primary_key,
"updated_at": updated_at,
"row_changed": row_changed_expr,
"scd_id": scd_id_expr
}) %}
{% endmacro %}

View File

@@ -0,0 +1,59 @@
{% materialization table, default %}
{%- set identifier = model['alias'] -%}
{%- set tmp_identifier = model['name'] + '__dbt_tmp' -%}
{%- set backup_identifier = model['name'] + '__dbt_backup' -%}
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
{%- set target_relation = api.Relation.create(identifier=identifier,
schema=schema,
database=database,
type='table') -%}
{%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier,
schema=schema,
database=database,
type='table') -%}
/*
See ../view/view.sql for more information about this relation.
*/
{%- set backup_relation_type = 'table' if old_relation is none else old_relation.type -%}
{%- set backup_relation = api.Relation.create(identifier=backup_identifier,
schema=schema,
database=database,
type=backup_relation_type) -%}
{%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
-- drop the temp relations if they exists for some reason
{{ adapter.drop_relation(intermediate_relation) }}
{{ adapter.drop_relation(backup_relation) }}
{{ run_hooks(pre_hooks, inside_transaction=False) }}
-- `BEGIN` happens here:
{{ run_hooks(pre_hooks, inside_transaction=True) }}
-- build model
{% call statement('main') -%}
{{ create_table_as(False, intermediate_relation, sql) }}
{%- endcall %}
-- cleanup
{% if old_relation is not none %}
{{ adapter.rename_relation(target_relation, backup_relation) }}
{% endif %}
{{ adapter.rename_relation(intermediate_relation, target_relation) }}
{{ run_hooks(post_hooks, inside_transaction=True) }}
-- `COMMIT` happens here
{{ adapter.commit() }}
-- finally, drop the existing/backup relation after the commit
{{ drop_relation_if_exists(backup_relation) }}
{{ run_hooks(post_hooks, inside_transaction=False) }}
{% endmaterialization %}

View File

@@ -0,0 +1,61 @@
{% macro handle_existing_table(full_refresh, old_relation) %}
{{ adapter_macro("dbt.handle_existing_table", full_refresh, old_relation) }}
{% endmacro %}
{% macro default__handle_existing_table(full_refresh, old_relation) %}
{{ adapter.drop_relation(old_relation) }}
{% endmacro %}
{# /*
Core materialization implementation. BigQuery and Snowflake are similar
because both can use `create or replace view` where the resulting view schema
is not necessarily the same as the existing view. On Redshift, this would
result in: ERROR: cannot change number of columns in view
This implementation is superior to the create_temp, swap_with_existing, drop_old
paradigm because transactions don't run DDL queries atomically on Snowflake. By using
`create or replace view`, the materialization becomes atomic in nature.
*/
#}
{% macro create_or_replace_view(run_outside_transaction_hooks=True) %}
{%- set identifier = model['alias'] -%}
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
{%- set target_relation = api.Relation.create(
identifier=identifier, schema=schema, database=database,
type='view') -%}
{% if run_outside_transaction_hooks %}
-- no transactions on BigQuery
{{ run_hooks(pre_hooks, inside_transaction=False) }}
{% endif %}
-- `BEGIN` happens here on Snowflake
{{ run_hooks(pre_hooks, inside_transaction=True) }}
-- If there's a table with the same name and we weren't told to full refresh,
-- that's an error. If we were told to full refresh, drop it. This behavior differs
-- for Snowflake and BigQuery, so multiple dispatch is used.
{%- if old_relation is not none and old_relation.is_table -%}
{{ handle_existing_table(flags.FULL_REFRESH, old_relation) }}
{%- endif -%}
-- build model
{% call statement('main') -%}
{{ create_view_as(target_relation, sql) }}
{%- endcall %}
{{ run_hooks(post_hooks, inside_transaction=True) }}
{{ adapter.commit() }}
{% if run_outside_transaction_hooks %}
-- No transactions on BigQuery
{{ run_hooks(post_hooks, inside_transaction=False) }}
{% endif %}
{% endmacro %}

View File

@@ -0,0 +1,62 @@
{%- materialization view, default -%}
{%- set identifier = model['alias'] -%}
{%- set tmp_identifier = model['name'] + '__dbt_tmp' -%}
{%- set backup_identifier = model['name'] + '__dbt_backup' -%}
{%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}
{%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database,
type='view') -%}
{%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier,
schema=schema, database=database, type='view') -%}
/*
This relation (probably) doesn't exist yet. If it does exist, it's a leftover from
a previous run, and we're going to try to drop it immediately. At the end of this
materialization, we're going to rename the "old_relation" to this identifier,
and then we're going to drop it. In order to make sure we run the correct one of:
- drop view ...
- drop table ...
We need to set the type of this relation to be the type of the old_relation, if it exists,
or else "view" as a sane default if it does not. Note that if the old_relation does not
exist, then there is nothing to move out of the way and subsequentally drop. In that case,
this relation will be effectively unused.
*/
{%- set backup_relation_type = 'view' if old_relation is none else old_relation.type -%}
{%- set backup_relation = api.Relation.create(identifier=backup_identifier,
schema=schema, database=database,
type=backup_relation_type) -%}
{%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}
{{ run_hooks(pre_hooks, inside_transaction=False) }}
-- drop the temp relations if they exists for some reason
{{ adapter.drop_relation(intermediate_relation) }}
{{ adapter.drop_relation(backup_relation) }}
-- `BEGIN` happens here:
{{ run_hooks(pre_hooks, inside_transaction=True) }}
-- build model
{% call statement('main') -%}
{{ create_view_as(intermediate_relation, sql) }}
{%- endcall %}
-- cleanup
-- move the existing view out of the way
{% if old_relation is not none %}
{{ adapter.rename_relation(target_relation, backup_relation) }}
{% endif %}
{{ adapter.rename_relation(intermediate_relation, target_relation) }}
{{ run_hooks(post_hooks, inside_transaction=True) }}
{{ adapter.commit() }}
{{ drop_relation_if_exists(backup_relation) }}
{{ run_hooks(post_hooks, inside_transaction=False) }}
{%- endmaterialization -%}

View File

@@ -1,10 +1,12 @@
{% macro test_accepted_values(model, field, values) %}
{% macro test_accepted_values(model, values) %}
{% set column_name = kwargs.get('column_name', kwargs.get('field')) %}
with all_values as (
select distinct
{{ field }} as value_field
{{ column_name }} as value_field
from {{ model }}

View File

@@ -0,0 +1,11 @@
{% macro test_not_null(model) %}
{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}
select count(*)
from {{ model }}
where {{ column_name }} is null
{% endmacro %}

View File

@@ -0,0 +1,18 @@
{% macro test_relationships(model, to, field) %}
{% set column_name = kwargs.get('column_name', kwargs.get('from')) %}
select count(*)
from (
select {{ column_name }} as id from {{ model }}
) as child
left join (
select {{ field }} as id from {{ to }}
) as parent on parent.id = child.id
where child.id is not null
and parent.id is null
{% endmacro %}

View File

@@ -0,0 +1,19 @@
{% macro test_unique(model) %}
{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}
select count(*)
from (
select
{{ column_name }}
from {{ model }}
where {{ column_name }} is not null
group by {{ column_name }}
having count(*) > 1
) validation_errors
{% endmacro %}

126
core/dbt/include/index.html Normal file

File diff suppressed because one or more lines are too long

272
core/dbt/linker.py Normal file
View File

@@ -0,0 +1,272 @@
import networkx as nx
import threading
from dbt.compat import PriorityQueue
from dbt.node_types import NodeType
GRAPH_SERIALIZE_BLACKLIST = [
'agate_table'
]
def from_file(graph_file):
linker = Linker()
linker.read_graph(graph_file)
return linker
def is_blocking_dependency(node):
return node.resource_type == NodeType.Model
class GraphQueue(object):
"""A fancy queue that is backed by the dependency graph.
Note: this will mutate input!
This queue is thread-safe for `mark_done` calls, though you must ensure
that separate threads do not call `.empty()` or `__len__()` and `.get()` at
the same time, as there is an unlocked race!
"""
def __init__(self, graph, manifest):
self.graph = graph
self.manifest = manifest
# store the queue as a priority queue.
self.inner = PriorityQueue()
# things that have been popped off the queue but not finished
# and worker thread reservations
self.in_progress = set()
# things that are in the queue
self.queued = set()
# this lock controls most things
self.lock = threading.Lock()
# store the 'score' of each node as a number. Lower is higher priority.
self._scores = self._calculate_scores()
# populate the initial queue
self._find_new_additions()
def get_node(self, node_id):
return self.manifest.nodes[node_id]
def _include_in_cost(self, node_id):
node = self.get_node(node_id)
if not is_blocking_dependency(node):
return False
if node.get_materialization() == 'ephemeral':
return False
return True
def _calculate_scores(self):
"""Calculate the 'value' of each node in the graph based on how many
blocking descendants it has. We use this score for the internal
priority queue's ordering, so the quality of this metric is important.
The score is stored as a negative number because the internal
PriorityQueue picks lowest values first.
We could do this in one pass over the graph instead of len(self.graph)
passes but this is easy. For large graphs this may hurt performance.
This operates on the graph, so it would require a lock if called from
outside __init__.
:return Dict[str, int]: The score dict, mapping unique IDs to integer
scores. Lower scores are higher priority.
"""
scores = {}
for node in self.graph.nodes():
score = -1 * len([
d for d in nx.descendants(self.graph, node)
if self._include_in_cost(d)
])
scores[node] = score
return scores
def get(self, block=True, timeout=None):
"""Get a node off the inner priority queue. By default, this blocks.
This takes the lock, but only for part of it.
:param bool block: If True, block until the inner queue has data
:param Optional[float] timeout: If set, block for timeout seconds
waiting for data.
:return ParsedNode: The node as present in the manifest.
See `queue.PriorityQueue` for more information on `get()` behavior and
exceptions.
"""
_, node_id = self.inner.get(block=block, timeout=timeout)
with self.lock:
self._mark_in_progress(node_id)
return self.get_node(node_id)
def __len__(self):
"""The length of the queue is the number of tasks left for the queue to
give out, regardless of where they are. Incomplete tasks are not part
of the length.
This takes the lock.
"""
with self.lock:
return len(self.graph) - len(self.in_progress)
def empty(self):
"""The graph queue is 'empty' if it all remaining nodes in the graph
are in progress.
This takes the lock.
"""
return len(self) == 0
def _already_known(self, node):
"""Decide if a node is already known (either handed out as a task, or
in the queue).
Callers must hold the lock.
:param str node: The node ID to check
:returns bool: If the node is in progress/queued.
"""
return node in self.in_progress or node in self.queued
def _find_new_additions(self):
"""Find any nodes in the graph that need to be added to the internal
queue and add them.
Callers must hold the lock.
"""
for node, in_degree in dict(self.graph.in_degree()).items():
if not self._already_known(node) and in_degree == 0:
self.inner.put((self._scores[node], node))
self.queued.add(node)
def mark_done(self, node_id):
"""Given a node's unique ID, mark it as done.
This method takes the lock.
:param str node_id: The node ID to mark as complete.
"""
with self.lock:
self.in_progress.remove(node_id)
self.graph.remove_node(node_id)
self._find_new_additions()
self.inner.task_done()
def _mark_in_progress(self, node_id):
"""Mark the node as 'in progress'.
Callers must hold the lock.
:param str node_id: The node ID to mark as in progress.
"""
self.queued.remove(node_id)
self.in_progress.add(node_id)
def join(self):
"""Join the queue. Blocks until all tasks are marked as done.
Make sure not to call this before the queue reports that it is empty.
"""
self.inner.join()
def _subset_graph(graph, include_nodes):
"""Create and return a new graph that is a shallow copy of graph but with
only the nodes in include_nodes. Transitive edges across removed nodes are
preserved as explicit new edges.
"""
new_graph = nx.algorithms.transitive_closure(graph)
include_nodes = set(include_nodes)
for node in graph.nodes():
if node not in include_nodes:
new_graph.remove_node(node)
for node in include_nodes:
if node not in new_graph:
raise RuntimeError(
"Couldn't find model '{}' -- does it exist or is "
"it disabled?".format(node)
)
return new_graph
class Linker(object):
def __init__(self, data=None):
if data is None:
data = {}
self.graph = nx.DiGraph(**data)
def edges(self):
return self.graph.edges()
def nodes(self):
return self.graph.nodes()
def find_cycles(self):
# There's a networkx find_cycle function, but there's a bug in the
# nx 1.11 release that prevents us from using it. We should use that
# function when we upgrade to 2.X. More info:
# https://github.com/networkx/networkx/pull/2473
cycles = list(nx.simple_cycles(self.graph))
if len(cycles) > 0:
cycle_nodes = cycles[0]
cycle_nodes.append(cycle_nodes[0])
return " --> ".join(cycle_nodes)
return None
def as_graph_queue(self, manifest, limit_to=None):
"""Returns a queue over nodes in the graph that tracks progress of
dependecies.
"""
if limit_to is None:
graph_nodes = self.graph.nodes()
else:
graph_nodes = limit_to
new_graph = _subset_graph(self.graph, graph_nodes)
return GraphQueue(new_graph, manifest)
def get_dependent_nodes(self, node):
return nx.descendants(self.graph, node)
def dependency(self, node1, node2):
"indicate that node1 depends on node2"
self.graph.add_node(node1)
self.graph.add_node(node2)
self.graph.add_edge(node2, node1)
def add_node(self, node):
self.graph.add_node(node)
def remove_node(self, node):
children = nx.descendants(self.graph, node)
self.graph.remove_node(node)
return children
def write_graph(self, outfile, manifest):
"""Write the graph to a gpickle file. Before doing so, serialize and
include all nodes in their corresponding graph entries.
"""
out_graph = _updated_graph(self.graph, manifest)
nx.write_gpickle(out_graph, outfile)
def read_graph(self, infile):
self.graph = nx.read_gpickle(infile)
def _updated_graph(graph, manifest):
graph = graph.copy()
for node_id in graph.nodes():
# serialize() removes the agate table
data = manifest.nodes[node_id].serialize()
for key in GRAPH_SERIALIZE_BLACKLIST:
if key in data:
del data[key]
graph.add_node(node_id, **data)
return graph

3
core/dbt/links.py Normal file
View File

@@ -0,0 +1,3 @@
ProfileConfigDocs = 'https://docs.getdbt.com/docs/configure-your-profile'
SnowflakeQuotingDocs = 'https://docs.getdbt.com/v0.10/docs/configuring-quoting'
IncrementalDocs = 'https://docs.getdbt.com/docs/configuring-incremental-models'

265
core/dbt/loader.py Normal file
View File

@@ -0,0 +1,265 @@
import os
import itertools
from dbt.include.global_project import PACKAGES
import dbt.exceptions
import dbt.flags
from dbt.node_types import NodeType
from dbt.contracts.graph.manifest import Manifest
from dbt.utils import timestring
from dbt.parser import MacroParser, ModelParser, SeedParser, AnalysisParser, \
DocumentationParser, DataTestParser, HookParser, SchemaParser, \
ParserUtils, SnapshotParser
from dbt.contracts.project import ProjectList
class GraphLoader(object):
def __init__(self, root_project, all_projects):
self.root_project = root_project
self.all_projects = all_projects
self.nodes = {}
self.docs = {}
self.macros = {}
self.tests = {}
self.patches = {}
self.disabled = []
self.macro_manifest = None
def _load_sql_nodes(self, parser_type, resource_type, relative_dirs_attr,
**kwargs):
parser = parser_type(self.root_project, self.all_projects,
self.macro_manifest)
for project_name, project in self.all_projects.items():
parse_results = parser.load_and_parse(
package_name=project_name,
root_dir=project.project_root,
relative_dirs=getattr(project, relative_dirs_attr),
resource_type=resource_type,
**kwargs
)
self.nodes.update(parse_results.parsed)
self.disabled.extend(parse_results.disabled)
def _load_macros(self, internal_manifest=None):
# skip any projects in the internal manifest
all_projects = self.all_projects.copy()
if internal_manifest is not None:
for name in internal_project_names():
all_projects.pop(name, None)
self.macros.update(internal_manifest.macros)
# give the macroparser all projects but then only load what we haven't
# loaded already
parser = MacroParser(self.root_project, self.all_projects)
for project_name, project in all_projects.items():
self.macros.update(parser.load_and_parse(
package_name=project_name,
root_dir=project.project_root,
relative_dirs=project.macro_paths,
resource_type=NodeType.Macro,
))
def _load_seeds(self):
parser = SeedParser(self.root_project, self.all_projects,
self.macro_manifest)
for project_name, project in self.all_projects.items():
self.nodes.update(parser.load_and_parse(
package_name=project_name,
root_dir=project.project_root,
relative_dirs=project.data_paths,
))
def _load_nodes(self):
self._load_sql_nodes(ModelParser, NodeType.Model, 'source_paths')
self._load_sql_nodes(SnapshotParser, NodeType.Snapshot,
'snapshot_paths')
self._load_sql_nodes(AnalysisParser, NodeType.Analysis,
'analysis_paths')
self._load_sql_nodes(DataTestParser, NodeType.Test, 'test_paths',
tags=['data'])
hook_parser = HookParser(self.root_project, self.all_projects,
self.macro_manifest)
self.nodes.update(hook_parser.load_and_parse())
self._load_seeds()
def _load_docs(self):
parser = DocumentationParser(self.root_project, self.all_projects)
for project_name, project in self.all_projects.items():
self.docs.update(parser.load_and_parse(
package_name=project_name,
root_dir=project.project_root,
relative_dirs=project.docs_paths
))
def _load_schema_tests(self):
parser = SchemaParser(self.root_project, self.all_projects,
self.macro_manifest)
for project_name, project in self.all_projects.items():
tests, patches, sources = parser.load_and_parse(
package_name=project_name,
root_dir=project.project_root,
relative_dirs=project.source_paths
)
for unique_id, test in tests.items():
if unique_id in self.tests:
dbt.exceptions.raise_duplicate_resource_name(
test, self.tests[unique_id],
)
self.tests[unique_id] = test
for unique_id, source in sources.items():
if unique_id in self.nodes:
dbt.exceptions.raise_duplicate_resource_name(
source, self.nodes[unique_id],
)
self.nodes[unique_id] = source
for name, patch in patches.items():
if name in self.patches:
dbt.exceptions.raise_duplicate_patch_name(
name, patch, self.patches[name]
)
self.patches[name] = patch
def load(self, internal_manifest=None):
self._load_macros(internal_manifest=internal_manifest)
# make a manifest with just the macros to get the context
self.macro_manifest = Manifest(macros=self.macros, nodes={}, docs={},
generated_at=timestring(), disabled=[])
self._load_nodes()
self._load_docs()
self._load_schema_tests()
def create_manifest(self):
manifest = Manifest(
nodes=self.nodes,
macros=self.macros,
docs=self.docs,
generated_at=timestring(),
config=self.root_project,
disabled=self.disabled
)
manifest.add_nodes(self.tests)
manifest.patch_nodes(self.patches)
manifest = ParserUtils.process_sources(manifest, self.root_project)
manifest = ParserUtils.process_refs(manifest,
self.root_project.project_name)
manifest = ParserUtils.process_docs(manifest, self.root_project)
return manifest
@classmethod
def _load_from_projects(cls, root_config, projects, internal_manifest):
if dbt.flags.STRICT_MODE:
ProjectList(**projects)
loader = cls(root_config, projects)
loader.load(internal_manifest=internal_manifest)
return loader.create_manifest()
@classmethod
def load_all(cls, root_config, internal_manifest=None):
projects = load_all_projects(root_config)
manifest = cls._load_from_projects(root_config, projects,
internal_manifest)
_check_manifest(manifest, root_config)
return manifest
@classmethod
def load_internal(cls, root_config):
projects = load_internal_projects(root_config)
return cls._load_from_projects(root_config, projects, None)
def _check_resource_uniqueness(manifest):
names_resources = {}
alias_resources = {}
for resource, node in manifest.nodes.items():
if node.resource_type not in NodeType.refable():
continue
name = node.name
alias = "{}.{}".format(node.schema, node.alias)
existing_node = names_resources.get(name)
if existing_node is not None:
dbt.exceptions.raise_duplicate_resource_name(
existing_node, node
)
existing_alias = alias_resources.get(alias)
if existing_alias is not None:
dbt.exceptions.raise_ambiguous_alias(
existing_alias, node
)
names_resources[name] = node
alias_resources[alias] = node
def _warn_for_unused_resource_config_paths(manifest, config):
resource_fqns = manifest.get_resource_fqns()
disabled_fqns = [n.fqn for n in manifest.disabled]
config.warn_for_unused_resource_config_paths(resource_fqns, disabled_fqns)
def _check_manifest(manifest, config):
_check_resource_uniqueness(manifest)
_warn_for_unused_resource_config_paths(manifest, config)
def internal_project_names():
return iter(PACKAGES.values())
def _load_projects(config, paths):
for path in paths:
try:
project = config.new_project(path)
except dbt.exceptions.DbtProjectError as e:
raise dbt.exceptions.DbtProjectError(
'Failed to read package at {}: {}'
.format(path, e)
)
else:
yield project.project_name, project
def _project_directories(config):
root = os.path.join(config.project_root, config.modules_path)
dependencies = []
if os.path.exists(root):
dependencies = os.listdir(root)
for name in dependencies:
full_obj = os.path.join(root, name)
if not os.path.isdir(full_obj) or name.startswith('__'):
# exclude non-dirs and dirs that start with __
# the latter could be something like __pycache__
# for the global dbt modules dir
continue
yield full_obj
def load_all_projects(config):
all_projects = {config.project_name: config}
project_paths = itertools.chain(
internal_project_names(),
_project_directories(config)
)
all_projects.update(_load_projects(config, project_paths))
return all_projects
def load_internal_projects(config):
return dict(_load_projects(config, internal_project_names()))

Some files were not shown because too many files have changed in this diff Show More