Compare commits

...

3 Commits

Author SHA1 Message Date
Alessandro Puccetti
d97fcbb3d7 plugins/bigquery: log processed bytes value for all statement types
Fixes https://github.com/fishtown-analytics/dbt/issues/2526
2020-06-16 20:32:36 +02:00
Alessandro Puccetti
c713cf9501 core/dbt/utils: add format_rows_number 2020-06-16 20:29:05 +02:00
Alessandro Puccetti
d189a9a8d4 core/dbt/utils: add PB to format_bytes and always return the accurate value 2020-06-16 20:29:05 +02:00
4 changed files with 44 additions and 7 deletions

View File

@@ -10,10 +10,12 @@
- Extends model selection syntax with at most n-th parent/children `dbt run --models 3+m1+2` ([#2052](https://github.com/fishtown-analytics/dbt/issues/2052), [#2485](https://github.com/fishtown-analytics/dbt/pull/2485))
- Added support for renaming BigQuery relations ([#2520](https://github.com/fishtown-analytics/dbt/issues/2520), [#2521](https://github.com/fishtown-analytics/dbt/pull/2521))
- Added support for BigQuery authorized views ([#1718](https://github.com/fishtown-analytics/dbt/issues/1718), [#2517](https://github.com/fishtown-analytics/dbt/issues/2517))
- Format rows number in BigQuery plugin logs, [#2526](https://github.com/fishtown-analytics/dbt/issues/2526)
### Fixes
- Fixed an error in create_adapter_plugins.py script when -dependency arg not passed ([#2507](https://github.com/fishtown-analytics/dbt/issues/2507), [#2508](https://github.com/fishtown-analytics/dbt/pull/2508))
- Remove misleading "Opening a new connection" log message in set_connection_name. ([#2511](https://github.com/fishtown-analytics/dbt/issues/2511))
- Now all the BigQuery statement types return the number of bytes processed ([#2526](https://github.com/fishtown-analytics/dbt/issues/2526)).
Contributors:
- [@raalsky](https://github.com/Raalsky) ([#2417](https://github.com/fishtown-analytics/dbt/pull/2417), [#2485](https://github.com/fishtown-analytics/dbt/pull/2485))
@@ -21,6 +23,7 @@ Contributors:
- [@scarrucciu](https://github.com/scarrucciu) ([#2508](https://github.com/fishtown-analytics/dbt/pull/2508))
- [@southpolemonkey](https://github.com/southpolemonkey) ([#2511](https://github.com/fishtown-analytics/dbt/issues/2511))
- [@azhard](https://github.com/azhard) ([#2517](https://github.com/fishtown-analytics/dbt/issues/2517), ([#2521](https://github.com/fishtown-analytics/dbt/pull/2521)))
- [@alepuccetti](https://github.com/alepuccetti) ([#2526](https://github.com/fishtown-analytics/dbt/issues/2526))
## dbt 0.17.1 (Release TBD)

View File

@@ -510,12 +510,23 @@ class classproperty(object):
def format_bytes(num_bytes):
for unit in ['Bytes', 'KB', 'MB', 'GB', 'TB']:
for unit in ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB']:
if abs(num_bytes) < 1024.0:
return f"{num_bytes:3.1f} {unit}"
num_bytes /= 1024.0
return "> 1024 TB"
num_bytes *= 1024.0
return f"{num_bytes:3.1f} {unit}"
def format_rows_number(rows_number):
for unit in ['', 'k', 'm', 'b', 't']:
if abs(rows_number) < 1000.0:
return f"{rows_number:3.1f} {unit}".strip()
rows_number /= 1000.0
rows_number *= 1000.0
return f"{rows_number:3.1f} {unit}".strip()
# a little concurrent.futures.Executor for single-threaded mode

View File

@@ -8,7 +8,7 @@ import google.cloud.exceptions
from google.api_core import retry, client_info
from google.oauth2 import service_account
from dbt.utils import format_bytes
from dbt.utils import format_bytes, format_rows_number
from dbt.clients import agate_helper, gcloud
from dbt.exceptions import (
FailedToConnectException, RuntimeException, DatabaseException
@@ -245,16 +245,21 @@ class BigQueryConnectionManager(BaseConnectionManager):
conn = self.get_thread_connection()
client = conn.handle
table = client.get_table(query_job.destination)
status = 'CREATE TABLE ({})'.format(table.num_rows)
processed = format_bytes(query_job.total_bytes_processed)
status = 'CREATE TABLE ({} rows, {} processed)'.format(
format_rows_number(table.num_rows),
format_bytes(query_job.total_bytes_processed),
)
elif query_job.statement_type == 'SCRIPT':
processed = format_bytes(query_job.total_bytes_processed)
status = f'SCRIPT ({processed} processed)'
elif query_job.statement_type in ['INSERT', 'DELETE', 'MERGE']:
status = '{} ({})'.format(
status = '{} ({} rows, {} processed)'.format(
query_job.statement_type,
query_job.num_dml_affected_rows
format_rows_number(query_job.num_dml_affected_rows),
format_bytes(query_job.total_bytes_processed),
)
else:

View File

@@ -150,7 +150,25 @@ class TestBytesFormatting(unittest.TestCase):
self.assertEqual(dbt.utils.format_bytes(1024**2*1.5), '1.5 MB')
self.assertEqual(dbt.utils.format_bytes(1024**3*52.6), '52.6 GB')
self.assertEqual(dbt.utils.format_bytes(1024**4*128), '128.0 TB')
self.assertEqual(dbt.utils.format_bytes(1024**5+1), '> 1024 TB')
self.assertEqual(dbt.utils.format_bytes(1024**5), '1.0 PB')
self.assertEqual(dbt.utils.format_bytes(1024**5*31.4), '31.4 PB')
self.assertEqual(dbt.utils.format_bytes(1024**6), '1024.0 PB')
self.assertEqual(dbt.utils.format_bytes(1024**6*42), '43008.0 PB')
class TestRowsNumberFormatting(unittest.TestCase):
def test__simple_cases(self):
self.assertEqual(dbt.utils.format_rows_number(-1), '-1.0')
self.assertEqual(dbt.utils.format_rows_number(0), '0.0')
self.assertEqual(dbt.utils.format_rows_number(20), '20.0')
self.assertEqual(dbt.utils.format_rows_number(1030), '1.0 k')
self.assertEqual(dbt.utils.format_rows_number(1000**2*1.5), '1.5 m')
self.assertEqual(dbt.utils.format_rows_number(1000**3*52.6), '52.6 b')
self.assertEqual(dbt.utils.format_rows_number(1000**3*128), '128.0 b')
self.assertEqual(dbt.utils.format_rows_number(1000**4), '1.0 t')
self.assertEqual(dbt.utils.format_rows_number(1000**4*31.4), '31.4 t')
self.assertEqual(dbt.utils.format_rows_number(1000**5*31.4), '31400.0 t')
class TestMultiDict(unittest.TestCase):