moves dlt core in

2025-12-17 19:31:30 +00:00 · 2022-06-03 18:48:12 +02:00
parent ed85c90f03
commit 892e1e3139
154 changed files with 26239 additions and 212 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,13 @@
+.idea
+.direnv
+.mypy_cache
+.pytest_cache
+htmlcov
+.coverage
+__pycache__
+.eggs
+.egg-info
+_storage
+_test_storage
+Dockerfile
+.md
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -0,0 +1,203 @@
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2022 ScaleVector
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/91
+++ b/91
@@ -1,3 +1,28 @@
+PYV=$(shell python3 -c "import sys;t='{v[0]}.{v[1]}'.format(v=list(sys.version_info[:2]));sys.stdout.write(t)")
+.SILENT:has-poetry
+
+# pipeline version info
+AUTV=$(shell python3 -c "from dlt import __version__;print(__version__)")
+AUTVMINMAJ=$(shell python3 -c "from dlt import __version__;print('.'.join(__version__.split('.')[:-1]))")
+
+NAME   := scalevector/dlt
+TAG    := $(shell git log -1 --pretty=%h)
+IMG    := ${NAME}:${TAG}
+LATEST := ${NAME}:latest${VERSION_SUFFIX}
+VERSION := ${AUTV}${VERSION_SUFFIX}
+VERSION_MM := ${AUTVMINMAJ}${VERSION_SUFFIX}
+
+
+# dbt runner version info
+DBT_AUTV=$(shell python3 -c "from dlt.dbt_runner._version import __version__;print(__version__)")
+DBT_AUTVMINMAJ=$(shell python3 -c "from dlt.dbt_runner._version import __version__;print('.'.join(__version__.split('.')[:-1]))")
+
+DBT_NAME   := scalevector/dlt-dbt-runner
+DBT_IMG    := ${DBT_NAME}:${TAG}
+DBT_LATEST := ${DBT_NAME}:latest${VERSION_SUFFIX}
+DBT_VERSION := ${DBT_AUTV}${VERSION_SUFFIX}
+DBT_VERSION_MM := ${DBT_AUTVMINMAJ}${VERSION_SUFFIX}
+
 install-poetry:
 ifneq ($(VIRTUAL_ENV),)
 	$(error you cannot be under virtual environment $(VIRTUAL_ENV))
@@ -8,14 +33,70 @@ has-poetry:
 	poetry --version

 dev: has-poetry
-	# will install itself as editable module
-	poetry install
-	poetry run pip install -e ../rasa_data_ingestion
+	# will install itself as editable module with all the extras
+	poetry install -E "postgres redshift dbt gcp"

 lint:
 	poetry run mypy --config-file mypy.ini dlt examples
-	poetry run flake8 --max-line-length=200 dlt examples
+	# poetry run flake8 --max-line-length=200 dlt examples tests
 	$(MAKE) lint-security

 lint-security:
-	poetry run bandit -r autopoiesis/ -n 3 -ll
+	poetry run bandit -r dlt/ -n 3 -l
+
+reset-test-storage:
+	-rm -r _storage
+	mkdir _storage
+	python3 test/tools/create_storages.py
+
+recreate-compiled-deps:
+	poetry export -f requirements.txt --output _gen_requirements.txt --without-hashes --extras gcp --extras redshift
+	grep `cat compiled_packages.txt` _gen_requirements.txt > compiled_requirements.txt
+
+publish-library:
+	poetry version ${VERSION}
+	poetry build
+	poetry publish -u __token__
+
+build-image-tags:
+	@echo ${IMG}
+	@echo ${LATEST}
+	@echo ${NAME}:${VERSION_MM}
+	@echo ${NAME}:${VERSION}
+
+build-image-no-version-tags:
+	poetry export -f requirements.txt --output _gen_requirements.txt --without-hashes --extras gcp --extras redshift
+	docker build -f deploy/dlt/Dockerfile --build-arg=COMMIT_SHA=${TAG} --build-arg=IMAGE_VERSION="${VERSION}" . -t ${IMG}
+
+build-image: build-image-no-version-tags
+	docker tag ${IMG} ${LATEST}
+	docker tag ${IMG} ${NAME}:${VERSION_MM}
+	docker tag ${IMG} ${NAME}:${VERSION}
+
+push-image:
+	docker push ${IMG}
+	docker push ${LATEST}
+	docker push ${NAME}:${VERSION_MM}
+	docker push ${NAME}:${VERSION}
+
+dbt-build-image-tags:
+	@echo ${DBT_IMG}
+	@echo ${DBT_LATEST}
+	@echo ${DBT_VERSION_MM}
+	@echo ${DBT_VERSION}
+
+dbt-build-image:
+	poetry export -f requirements.txt --output _gen_requirements_dbt.txt --without-hashes --extras dbt
+	docker build -f dlt/dbt_runner/Dockerfile --build-arg=COMMIT_SHA=${TAG} --build-arg=IMAGE_VERSION="${DBT_VERSION}" . -t ${DBT_IMG}
+	docker tag ${DBT_IMG} ${DBT_LATEST}
+	docker tag ${DBT_IMG} ${DBT_NAME}:${DBT_VERSION_MM}
+	docker tag ${DBT_IMG} ${DBT_NAME}:${DBT_VERSION}
+
+dbt-push-image:
+	docker push ${DBT_IMG}
+	docker push ${DBT_LATEST}
+	docker push ${DBT_NAME}:${DBT_VERSION_MM}
+	docker push ${DBT_NAME}:${DBT_VERSION}
+
+docker-login:
+	docker login -u scalevector -p ${DOCKER_PASS}
--- a/compiled_packages.txt
+++ b/compiled_packages.txt
@@ -0,0 +1 @@
+cffi\|idna\|simplejson\|pendulum\|grpcio\|google-crc32c
--- a/compiled_requirements.txt
+++ b/compiled_requirements.txt
@@ -0,0 +1,6 @@
+google-crc32c==1.3.0; python_version >= "3.6" and python_version < "3.11"
+grpcio-status==1.43.0; python_version >= "3.6" and python_version < "3.11"
+grpcio==1.43.0; python_version >= "3.6"
+idna==3.3; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.11" or python_full_version >= "3.6.0" and python_version >= "3.6" and python_version < "3.11"
+pendulum==2.1.2; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
+simplejson==3.17.6; (python_version >= "2.5" and python_full_version < "3.0.0") or (python_full_version >= "3.3.0")
--- a/deploy/dbt_runner/Dockerfile
+++ b/deploy/dbt_runner/Dockerfile
@@ -0,0 +1,54 @@
+FROM python:3.8-slim-bullseye as base
+
+# Metadata
+LABEL org.label-schema.vendor="ScaleVector" \
+    org.label-schema.url="https://scalevector.ai" \
+    org.label-schema.name="dbt_runner" \
+    org.label-schema.description="DBT Package Runner for DLT"
+
+# prepare dirs to install autopoieses
+RUN mkdir -p /usr/src/app && mkdir /var/local/app && mkdir /usr/src/app/autopoiesis
+
+WORKDIR /usr/src/app
+
+# System setup for DBT
+RUN apt-get update \
+  && apt-get dist-upgrade -y \
+  && apt-get install -y --no-install-recommends \
+    git \
+    ssh-client \
+    software-properties-common \
+    make \
+    build-essential \
+    ca-certificates \
+    libpq-dev \
+  && apt-get clean \
+  && rm -rf \
+    /var/lib/apt/lists/* \
+    /tmp/* \
+    /var/tmp/*
+
+# Env vars
+ENV PYTHONIOENCODING=utf-8
+ENV LANG=C.UTF-8
+
+# Update python
+RUN python -m pip install --upgrade pip setuptools wheel --no-cache-dir
+
+
+ENV PYTHONPATH $PYTHONPATH:/usr/src/app
+
+ADD _gen_requirements_dbt.txt .
+RUN pip3 install -r _gen_requirements_dbt.txt
+
+COPY autopoiesis/common autopoiesis/common
+COPY autopoiesis/dbt_runner autopoiesis/dbt_runner
+COPY autopoiesis/*.py autopoiesis/
+
+# add build labels and envs
+ARG COMMIT_SHA=""
+ARG IMAGE_VERSION=""
+LABEL commit_sha = ${COMMIT_SHA}
+LABEL version=${IMAGE_VERSION}
+ENV COMMIT_SHA=${COMMIT_SHA}
+ENV IMAGE_VERSION=${IMAGE_VERSION}
--- a/deploy/dlt/Dockerfile
+++ b/deploy/dlt/Dockerfile
@@ -0,0 +1,43 @@
+# Python 3.8 required
+FROM alpine:3.15
+
+# Metadata
+LABEL org.label-schema.vendor="ScaleVector" \
+    org.label-schema.url="https://scalevector.ai" \
+    org.label-schema.name="DLT" \
+    org.label-schema.description="DLT is an open-source python-native scalable data loading framework that does not require any devops efforts to run."
+
+# prepare dirs to install autopoieses
+RUN mkdir -p /tmp/pydlt
+
+WORKDIR /tmp/pydlt
+
+# generated by make recreate-compiled-deps to install packages requiring compiler
+# recreate only when you have new deps requiring compilation - step below is very slow
+ADD compiled_requirements.txt .
+
+# install alpine deps
+RUN apk update &&\
+    apk add --no-cache python3 ca-certificates curl postgresql &&\
+    apk add --no-cache --virtual build-deps build-base automake autoconf libtool python3-dev postgresql-dev libffi-dev linux-headers gcc musl-dev &&\
+    ln -s /usr/bin/python3 /usr/bin/python &&\
+    curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py &&\
+    rm get-pip.py &&\
+    pip3 install --upgrade setuptools wheel &&\
+    rm -r /usr/lib/python*/ensurepip &&\
+    pip3 install -r compiled_requirements.txt &&\
+    apk del --purge build-deps
+    #rm -r /root/.cache
+
+# add build labels and envs
+ARG COMMIT_SHA=""
+ARG IMAGE_VERSION=""
+LABEL commit_sha = ${COMMIT_SHA}
+LABEL version=${IMAGE_VERSION}
+ENV COMMIT_SHA=${COMMIT_SHA}
+ENV IMAGE_VERSION=${IMAGE_VERSION}
+
+# install exactly the same version of the library we used to build
+RUN pip3 install python-dlt==${IMAGE_VERSION}[gcp,redshift]
+
+RUN rm -r /tmp/pydlt
--- a/dlt/init.py
+++ b/dlt/init.py
@@ -0,0 +1 @@
+from dlt._version import common_version as __version__
--- a/dlt/_version.py
+++ b/dlt/_version.py
@@ -0,0 +1,3 @@
+common_version = "0.1.0"
+loader_version = "0.1.0"
+unpacker_version = "0.1.0"
--- a/dlt/common/init.py
+++ b/dlt/common/init.py
@@ -0,0 +1,5 @@
+from .pendulum import pendulum  # noqa: F401
+from .json import json  # noqa: F401, I251
+from .time import sleep   # noqa: F401
+from .arithmetics import Decimal  # noqa: F401
+from dlt._version import common_version as __version__
--- a/dlt/common/arithmetics.py
+++ b/dlt/common/arithmetics.py
@@ -0,0 +1,32 @@
+import decimal
+from contextlib import contextmanager
+from typing import Iterator
+from decimal import ROUND_HALF_UP, Decimal, DefaultContext, DivisionByZero, InvalidOperation, localcontext, Context, ConversionSyntax
+
+
+DefaultContext.rounding = ROUND_HALF_UP
+# use small caps for exponent
+DefaultContext.capitals = 0
+# prevent NaN to be returned
+DefaultContext.traps[InvalidOperation] = True
+# prevent Inf to be returned
+DefaultContext.traps[DivisionByZero] = True
+decimal.setcontext(DefaultContext)
+
+DEFAULT_NUMERIC_PRECISION = 38
+DEFAULT_NUMERIC_SCALE = 9
+
+NUMERIC_DEFAULT_QUANTIZER = Decimal("1." + "0" * DEFAULT_NUMERIC_SCALE)
+
+
+@contextmanager
+def numeric_default_context() -> Iterator[Context]:
+    with localcontext() as c:
+        c.prec=DEFAULT_NUMERIC_PRECISION
+        yield c
+
+
+def numeric_default_quantize(v: Decimal) -> Decimal:
+    if v == 0:
+        return v
+    return v.quantize(NUMERIC_DEFAULT_QUANTIZER)
--- a/dlt/common/configuration/init.py
+++ b/dlt/common/configuration/init.py
@@ -0,0 +1,11 @@
+from .basic_configuration import BasicConfiguration  # noqa: F401
+from .unpacking_volume_configuration import UnpackingVolumeConfiguration, ProductionUnpackingVolumeConfiguration  # noqa: F401
+from .loading_volume_configuration import LoadingVolumeConfiguration, ProductionLoadingVolumeConfiguration  # noqa: F401
+from .schema_volume_configuration import SchemaVolumeConfiguration, ProductionSchemaVolumeConfiguration  # noqa: F401
+from .pool_runner_configuration import PoolRunnerConfiguration, TPoolType  # noqa: F401
+from .gcp_client_configuration import GcpClientConfiguration, GcpClientProductionConfiguration  # noqa: F401
+from .postgres_configuration import PostgresConfiguration, PostgresProductionConfiguration  # noqa: F401
+from .utils import make_configuration, TConfigSecret, open_configuration_file  # noqa: F401
+
+from .exceptions import (  # noqa: F401
+    ConfigEntryMissingException, ConfigEnvValueCannotBeCoercedException, ConfigIntegrityException, ConfigFileNotFoundException)
--- a/dlt/common/configuration/basic_configuration.py
+++ b/dlt/common/configuration/basic_configuration.py
@@ -0,0 +1,21 @@
+from typing import Optional, Tuple
+
+DEVELOPMENT_CONFIG_FILES_STORAGE_PATH = "_storage/config/%s"
+PRODUCTION_CONFIG_FILES_STORAGE_PATH = "/run/config/%s"
+
+class BasicConfiguration:
+    NAME: str = None  # the name of the component, must be supplied
+    SENTRY_DSN: Optional[str] = None  # keep None to disable Sentry
+    PROMETHEUS_PORT: Optional[int] = None  # keep None to disable Prometheus
+    LOG_FORMAT: str = '{asctime}|[{levelname:<21}]|{process}|{name}|{filename}|{funcName}:{lineno}|{message}'
+    LOG_LEVEL: str = "DEBUG"
+    IS_DEVELOPMENT_CONFIG: bool = True
+    REQUEST_TIMEOUT: Tuple[int, int] = (15, 300)  # default request timeout for all http clients
+    CONFIG_FILES_STORAGE_PATH: str = DEVELOPMENT_CONFIG_FILES_STORAGE_PATH
+
+    @classmethod
+    def check_integrity(cls) -> None:
+        # if CONFIG_FILES_STORAGE_PATH not overwritten and we are in production mode
+        if cls.CONFIG_FILES_STORAGE_PATH == DEVELOPMENT_CONFIG_FILES_STORAGE_PATH and not cls.IS_DEVELOPMENT_CONFIG:
+            # set to mount where config files will be present
+            cls.CONFIG_FILES_STORAGE_PATH = PRODUCTION_CONFIG_FILES_STORAGE_PATH
--- a/dlt/common/configuration/exceptions.py
+++ b/dlt/common/configuration/exceptions.py
@@ -0,0 +1,43 @@
+from typing import Iterable, Union
+
+from dlt.common.exceptions import DltException
+
+
+class ConfigurationException(DltException):
+    def __init__(self, msg: str) -> None:
+        super().__init__(msg)
+
+
+class ConfigEntryMissingException(ConfigurationException):
+    """thrown when not all required config elements are present"""
+
+    def __init__(self, missing_set: Iterable[str]) -> None:
+        self.missing_set = missing_set
+        super().__init__('Missing config keys: ' + str(missing_set))
+
+
+class ConfigEnvValueCannotBeCoercedException(ConfigurationException):
+    """thrown when value from ENV cannot be coerced to hinted type"""
+
+    def __init__(self, attr_name: str, env_value: str, hint: type) -> None:
+        self.attr_name = attr_name
+        self.env_value = env_value
+        self.hint = hint
+        super().__init__('env value %s cannot be coerced into type %s in attr %s' % (env_value, str(hint), attr_name))
+
+
+class ConfigIntegrityException(ConfigurationException):
+    """thrown when value from ENV cannot be coerced to hinted type"""
+
+    def __init__(self, attr_name: str, env_value: str, info: Union[type, str]) -> None:
+        self.attr_name = attr_name
+        self.env_value = env_value
+        self.info = info
+        super().__init__('integrity error for attr %s with value %s. %s.' % (attr_name, env_value, info))
+
+
+class ConfigFileNotFoundException(ConfigurationException):
+    """thrown when configuration file cannot be found in config folder"""
+
+    def __init__(self, path: str) -> None:
+        super().__init__(f"Missing config file in {path}")
--- a/dlt/common/configuration/gcp_client_configuration.py
+++ b/dlt/common/configuration/gcp_client_configuration.py
@@ -0,0 +1,34 @@
+from dlt.common.typing import StrStr
+from dlt.common.configuration.utils import TConfigSecret
+
+class GcpClientConfiguration:
+    PROJECT_ID: str = None
+    DATASET: str = None
+    TIMEOUT: float = 30.0
+    BQ_CRED_TYPE: str = "service_account"
+    BQ_CRED_PRIVATE_KEY: TConfigSecret = None
+    BQ_CRED_TOKEN_URI: str = "https://oauth2.googleapis.com/token"
+    BQ_CRED_CLIENT_EMAIL: str = None
+
+    @classmethod
+    def check_integrity(cls) -> None:
+        if cls.BQ_CRED_PRIVATE_KEY and cls.BQ_CRED_PRIVATE_KEY[-1] != "\n":
+            # must end with new line, otherwise won't be parsed by Crypto
+            cls.BQ_CRED_PRIVATE_KEY = TConfigSecret(cls.BQ_CRED_PRIVATE_KEY + "\n")
+
+    @classmethod
+    def to_service_credentials(cls) -> StrStr:
+        return {
+                "type": cls.BQ_CRED_TYPE,
+                "project_id": cls.PROJECT_ID,
+                "private_key": cls.BQ_CRED_PRIVATE_KEY,
+                "token_uri": cls.BQ_CRED_TOKEN_URI,
+                "client_email": cls.BQ_CRED_CLIENT_EMAIL
+            }
+
+
+class GcpClientProductionConfiguration(GcpClientConfiguration):
+    PROJECT_ID: str = None
+    DATASET: str = None
+    BQ_CRED_PRIVATE_KEY: TConfigSecret = None
+    BQ_CRED_CLIENT_EMAIL: str = None
--- a/dlt/common/configuration/loading_volume_configuration.py
+++ b/dlt/common/configuration/loading_volume_configuration.py
@@ -0,0 +1,6 @@
+class LoadingVolumeConfiguration:
+    LOADING_VOLUME_PATH: str = "_storage/loading"  # path to volume where files to be loaded to analytical storage are stored
+    DELETE_COMPLETED_JOBS: bool = False  # if set to true the folder with completed jobs will be deleted
+
+class ProductionLoadingVolumeConfiguration(LoadingVolumeConfiguration):
+    LOADING_VOLUME_PATH: str = None
--- a/dlt/common/configuration/pool_runner_configuration.py
+++ b/dlt/common/configuration/pool_runner_configuration.py
@@ -0,0 +1,13 @@
+from typing import Literal, Optional
+from dlt.common.configuration import BasicConfiguration
+
+TPoolType = Literal["process", "thread", "none"]
+
+class PoolRunnerConfiguration(BasicConfiguration):
+    MAX_PARALLELISM: Optional[int] = None  # how many threads/processes in the pool
+    EXIT_ON_EXCEPTION: bool = False  # should exit on exception
+    STOP_AFTER_RUNS: int = 10000  # will stop runner with exit code -2 after so many runs, that prevents memory fragmentation
+    POOL_TYPE: TPoolType = None  # type of pool to run, must be set in derived configs
+    RUN_SLEEP: float = 0.5  # how long to sleep between runs with workload, seconds
+    RUN_SLEEP_IDLE: float = 1.0  # how long to sleep when no more items are pending, seconds
+    RUN_SLEEP_WHEN_FAILED: float = 1.0  # how long to sleep between the runs when failed
--- a/dlt/common/configuration/postgres_configuration.py
+++ b/dlt/common/configuration/postgres_configuration.py
@@ -0,0 +1,25 @@
+from dlt.common.configuration.utils import TConfigSecret
+
+
+class PostgresConfiguration:
+    PG_DATABASE_NAME: str = None
+    PG_SCHEMA_PREFIX: str = None
+    PG_PASSWORD: TConfigSecret = None
+    PG_USER: str = None
+    PG_HOST: str = None
+    PG_PORT: int = 5439
+    PG_CONNECTION_TIMEOUT: int = 15
+
+    @classmethod
+    def check_integrity(cls) -> None:
+        cls.PG_DATABASE_NAME = cls.PG_DATABASE_NAME.lower()
+        cls.PG_SCHEMA_PREFIX = cls.PG_SCHEMA_PREFIX.lower()
+        cls.PG_PASSWORD = TConfigSecret(cls.PG_PASSWORD.strip())
+
+
+class PostgresProductionConfiguration(PostgresConfiguration):
+    PG_DATABASE_NAME: str = None
+    PG_SCHEMA_PREFIX: str = None
+    PG_PASSWORD: TConfigSecret = None
+    PG_USER: str = None
+    PG_HOST: str = None
--- a/dlt/common/configuration/schema_volume_configuration.py
+++ b/dlt/common/configuration/schema_volume_configuration.py
@@ -0,0 +1,6 @@
+class SchemaVolumeConfiguration:
+    SCHEMA_VOLUME_PATH: str = "_storage/schemas"  # path to volume with default schemas
+
+
+class ProductionSchemaVolumeConfiguration:
+    SCHEMA_VOLUME_PATH: str = None
--- a/dlt/common/configuration/unpacking_volume_configuration.py
+++ b/dlt/common/configuration/unpacking_volume_configuration.py
@@ -0,0 +1,6 @@
+class UnpackingVolumeConfiguration:
+    UNPACKING_VOLUME_PATH: str = "_storage/unpacking"  # path to volume where unpacking will happen
+
+
+class ProductionUnpackingVolumeConfiguration:
+    UNPACKING_VOLUME_PATH: str = None
--- a/dlt/common/configuration/utils.py
+++ b/dlt/common/configuration/utils.py
@@ -0,0 +1,214 @@
+import sys
+import semver
+from os import environ
+from os.path import isdir, isfile
+from typing import Any, Dict, List, Mapping, NewType, Optional, Type, TypeVar, Union, Literal, IO, cast
+
+from dlt.common.typing import StrAny
+from dlt.common.configuration import BasicConfiguration
+from dlt.common.configuration.exceptions import (ConfigEntryMissingException,
+                                                         ConfigEnvValueCannotBeCoercedException, ConfigFileNotFoundException)
+from dlt.common.utils import uniq_id
+
+SIMPLE_TYPES: List[Any] = [int, bool, list, dict, tuple, bytes, set, float]
+# those types and Optionals of those types should not be passed to eval function
+NON_EVAL_TYPES = [str, None, Any]
+# allows to coerce (type1 from type2)
+ALLOWED_TYPE_COERCIONS = [(float, int), (str, int), (str, float)]
+IS_DEVELOPMENT_CONFIG_KEY: str = "IS_DEVELOPMENT_CONFIG"
+CHECK_INTEGRITY_F: str = "check_integrity"
+SECRET_STORAGE_PATH: str = "/run/secrets/%s"
+
+TConfiguration = TypeVar("TConfiguration", bound=Type[BasicConfiguration])
+TProductionConfiguration = TypeVar("TProductionConfiguration", bound=Type[BasicConfiguration])
+TConfigSecret = NewType("TConfigSecret", str)
+
+
+def make_configuration(config: TConfiguration,
+                       production_config: TProductionConfiguration,
+                       initial_values: StrAny = None,
+                       accept_partial: bool = False,
+                       skip_subclass_check: bool = False) -> TConfiguration:
+    if not skip_subclass_check:
+        assert issubclass(production_config, config)
+
+    final_config: TConfiguration = config if _is_development_config() else production_config
+    possible_keys_in_config = _get_config_attrs_with_hints(final_config)
+    # create dynamic class type to not touch original config variables
+    derived_config: TConfiguration = cast(TConfiguration,
+                                          type(final_config.__name__ + "_" + uniq_id(), (final_config, ), {})
+                                    )
+    # apply initial values while preserving hints
+    if initial_values:
+        for k, v in initial_values.items():
+            setattr(derived_config, k, v)
+
+    _apply_environ_to_config(derived_config, possible_keys_in_config)
+    try:
+        _is_config_bounded(derived_config, possible_keys_in_config)
+        _check_configuration_integrity(derived_config)
+    except ConfigEntryMissingException:
+        if not accept_partial:
+            raise
+    _add_module_version(derived_config)
+
+    return derived_config
+
+
+def has_configuration_file(name: str, config: TConfiguration) -> bool:
+    return isfile(get_configuration_file_path(name, config))
+
+
+def open_configuration_file(name: str, mode: str, config: TConfiguration) -> IO[Any]:
+    path = get_configuration_file_path(name, config)
+    if not has_configuration_file(name, config):
+        raise ConfigFileNotFoundException(path)
+    return open(path, mode)
+
+
+def get_configuration_file_path(name: str, config: TConfiguration) -> str:
+    return config.CONFIG_FILES_STORAGE_PATH % name
+
+
+def is_direct_descendant(child: Type[Any], base: Type[Any]) -> bool:
+    # TODO: there may be faster way to get direct descendant that mro
+    # note: at index zero there's child
+    return base == type.mro(child)[1]
+
+
+def _is_development_config() -> bool:
+    is_dev_config = True
+
+    # get from environment
+    if IS_DEVELOPMENT_CONFIG_KEY in environ:
+        is_dev_config = _coerce_single_value(IS_DEVELOPMENT_CONFIG_KEY, environ[IS_DEVELOPMENT_CONFIG_KEY], bool)
+    return is_dev_config
+
+
+def _add_module_version(config: TConfiguration) -> None:
+    try:
+        v = sys._getframe(1).f_back.f_globals["__version__"]
+        semver.VersionInfo.parse(v)
+        setattr(config, "_VERSION", v)  # noqa: B010
+    except KeyError:
+        pass
+
+
+def _apply_environ_to_config(config: TConfiguration, keys_in_config: Mapping[str, type]) -> None:
+    for key, hint in keys_in_config.items():
+        value = _get_key_value(key, hint)
+        if value is not None:
+            value_from_environment_variable = _coerce_single_value(key, value, hint)
+            # set value
+            setattr(config, key, value_from_environment_variable)
+
+
+def _get_key_value(key: str, hint: Type[Any]) -> Optional[str]:
+    if hint is TConfigSecret:
+        # try secret storage
+        try:
+            # must conform to RFC1123
+            secret_name = key.lower().replace("_", "-")
+            secret_path = SECRET_STORAGE_PATH % secret_name
+            # kubernetes stores secrets as files in a dir, docker compose plainly
+            if isdir(secret_path):
+                secret_path += "/" + secret_name
+            with open(secret_path, "r") as f:
+                secret = f.read()
+            # add secret to environ so forks have access
+            # TODO: removing new lines is not always good. for password OK for PEMs not
+            # TODO: in regular secrets that is dealt with in particular configuration logic
+            environ[key] = secret.strip()
+            # do not strip returned secret
+            return secret
+
+        except FileNotFoundError:
+            pass
+    return environ.get(key, None)
+
+
+def _is_config_bounded(config: TConfiguration, keys_in_config: Mapping[str, type]) -> None:
+    _unbound_attrs = [
+        key for key in keys_in_config if getattr(config, key) is None and not _is_optional_type(keys_in_config[key])
+    ]
+
+    if len(_unbound_attrs) > 0:
+        raise ConfigEntryMissingException(_unbound_attrs)
+
+
+def _check_configuration_integrity(config: TConfiguration) -> None:
+    # python multi-inheritance is cooperative and this would require that all configurations cooperatively
+    # call each other check_integrity. this is not at all possible as we do not know which configs in the end will
+    # be mixed together.
+
+    # get base classes in order of derivation
+    mro = type.mro(config)
+    for c in mro:
+        # check if this class implements check_integrity (skip pure inheritance to not do double work)
+        if CHECK_INTEGRITY_F in c.__dict__ and callable(getattr(c, CHECK_INTEGRITY_F)):
+            # access unbounded __func__ to pass right class type so we check settings of the tip of mro
+            c.__dict__[CHECK_INTEGRITY_F].__func__(config)
+
+
+def _coerce_single_value(key: str, value: str, hint: Type[Any]) -> Any:
+    try:
+        hint_primitive_type = _extract_simple_type(hint)
+        if hint_primitive_type not in NON_EVAL_TYPES:
+            # create primitive types out of strings
+            typed_value = eval(value)  # nosec
+            # for primitive types check coercion
+            if hint_primitive_type in SIMPLE_TYPES and type(typed_value) != hint_primitive_type:
+                # allow some exceptions
+                coerce_exception = next(
+                    (e for e in ALLOWED_TYPE_COERCIONS if e == (hint_primitive_type, type(typed_value))), None)
+                if coerce_exception:
+                    return hint_primitive_type(typed_value)
+                else:
+                    raise ConfigEnvValueCannotBeCoercedException(key, typed_value, hint)
+            return typed_value
+        else:
+            return value
+    except ConfigEnvValueCannotBeCoercedException:
+        raise
+    except Exception as exc:
+        raise ConfigEnvValueCannotBeCoercedException(key, value, hint) from exc
+
+
+def _extract_simple_type(hint: Type[Any]) -> Type[Any]:
+    # extract optional type and call recursively
+    if _is_literal_type(hint):
+        # assume that all literals are of the same type
+        return _extract_simple_type(type(hint.__args__[0]))
+    if _is_optional_type(hint):
+        # todo: use `get_args` in python 3.8
+        return _extract_simple_type(hint.__args__[0])
+    if not hasattr(hint, "__supertype__"):
+        return hint
+    # descend into supertypes of NewType
+    return _extract_simple_type(hint.__supertype__)
+
+
+def _get_config_attrs_with_hints(config: TConfiguration) -> Dict[str, type]:
+    keys: Dict[str, type] = {}
+    mro = type.mro(config)
+    for cls in reversed(mro):
+        # update in reverse derivation order so derived classes overwrite hints from base classes
+        if cls is not object:
+            keys.update(
+                [(attr, cls.__annotations__.get(attr, None))
+                  # if hasattr(config, '__annotations__') and attr in config.__annotations__ else None)
+                 for attr in cls.__dict__.keys() if not callable(getattr(cls, attr)) and not attr.startswith("__")
+                 ])
+    return keys
+
+
+def _is_optional_type(hint: Type[Any]) -> bool:
+    # todo: use typing get_args and get_origin in python 3.8
+    if hasattr(hint, "__origin__"):
+        return hint.__origin__ is Union and type(None) in hint.__args__
+    return False
+
+
+def _is_literal_type(hint: Type[Any]) -> bool:
+    return hasattr(hint, "__origin__") and hint.__origin__ is Literal
+
--- a/dlt/common/dataset_writers.py
+++ b/dlt/common/dataset_writers.py
@@ -0,0 +1,58 @@
+import jsonlines
+from typing import Any, Iterable, Literal, Sequence, IO
+
+from dlt.common import json
+from dlt.common.typing import StrAny
+
+TWriterType = Literal["jsonl", "insert_values"]
+
+def write_jsonl(f: IO[Any], rows: Sequence[Any]) -> None:
+    # use jsonl to write load files https://jsonlines.org/
+    with jsonlines.Writer(f, dumps=json.dumps) as w:
+        w.write_all(rows)
+
+
+def write_insert_values(f: IO[Any], rows: Sequence[StrAny], headers: Iterable[str]) -> None:
+    # dict lookup is always faster
+    headers_lookup = {v: i for i, v in enumerate(headers)}
+    # do not write INSERT INTO command, this must be added together with table name by the loader
+    f.write("INSERT INTO {}(")
+    f.write(",".join(map(escape_redshift_identifier, headers)))
+    f.write(")\nVALUES\n")
+
+    def stringify(v: Any) -> str:
+        if type(v) is bytes:
+           return f"from_hex('{v.hex()}')"
+        else:
+            return str(v)
+
+    def write_row(row: StrAny) -> None:
+        output = ["NULL" for _ in range(len(headers_lookup))]
+        for n,v  in row.items():
+            output[headers_lookup[n]] = escape_redshift_literal(v) if type(v) is str else stringify(v)
+        f.write("(")
+        f.write(",".join(output))
+        f.write(")")
+
+    for row in rows[:-1]:
+        write_row(row)
+        f.write(",\n")
+
+    write_row(rows[-1])
+    f.write(";")
+
+
+def escape_redshift_literal(v: str) -> str:
+    # https://www.postgresql.org/docs/9.3/sql-syntax-lexical.html
+    # looks like this is the only thing we need to escape for Postgres > 9.1
+    # redshift keeps \ as escape character which is pre 9 behavior
+    return "'" + v.replace("'", "''").replace("\\", "\\\\") + "'"
+
+
+def escape_redshift_identifier(v: str) -> str:
+    return '"' + v.replace('"', '""').replace("\\", "\\\\") + '"'
+
+
+def escape_bigquery_identifier(v: str) -> str:
+    # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical
+    return "`" + v.replace("\\", "\\\\").replace("`","\\`") + "`"
--- a/dlt/common/exceptions.py
+++ b/dlt/common/exceptions.py
@@ -0,0 +1,58 @@
+class DltException(Exception):
+    pass
+
+
+class SignalReceivedException(DltException):
+    def __init__(self, signal_code: int) -> None:
+        self.signal_code = signal_code
+        super().__init__(f"Signal {signal_code} received")
+
+
+class PoolException(DltException):
+    """
+    Thrown by worker pool to pass information when thrown during processing an item
+    """
+    def __init__(self, pool_name: str = None, item: str = None, internal_exception: Exception = None) -> None:
+        # we need it to make it pickle compatible
+        if pool_name:
+            self.pool_name = pool_name
+            self.item = item
+            self.internal_exception = internal_exception
+            super().__init__(f"Pool {pool_name} raised on item {item} with {str(internal_exception)}")
+
+
+class UnsupportedProcessStartMethodException(DltException):
+    def __init__(self, method: str) -> None:
+        self.method = method
+        super().__init__(f"Process pool supports only fork start method, {method} not supported. Switch the pool type to threading")
+
+
+class TerminalException(Exception):
+    """
+    Marks an exception that cannot be recovered from, should be mixed in into concrete exception class
+    """
+    pass
+
+
+class TransientException(Exception):
+    """
+    Marks an exception in operation that can be retried, should be mixed in into concrete exception class
+    """
+    pass
+
+
+class TerminalValueError(ValueError, TerminalException):
+    """
+    ValueError that is unrecoverable
+    """
+    pass
+
+
+class TimeRangeExhaustedException(DltException):
+    """
+    Raised when backfilling complete and no more time ranges can be generated
+    """
+    def __init__(self, start_ts: float, end_ts: float) -> None:
+        self.start_ts = start_ts
+        self.end_ts = end_ts
+        super().__init__(f"Timerange ({start_ts} to {end_ts}> exhausted")
--- a/dlt/common/file_storage.py
+++ b/dlt/common/file_storage.py
@@ -0,0 +1,135 @@
+import os
+import tempfile
+import shutil
+from pathlib import Path
+from typing import IO, Any, List
+
+
+class FileStorage:
+    def __init__(self,
+                 storage_path: str,
+                 file_type: str = "t",
+                 makedirs: bool = False) -> None:
+        # make it absolute path
+        self.storage_path = os.path.join(os.path.realpath(storage_path), '')
+        self.file_type = file_type
+        if makedirs:
+            os.makedirs(storage_path, exist_ok=True)
+
+    @classmethod
+    def from_file(cls, file_path: str, file_type: str = "t",) -> "FileStorage":
+        return cls(os.path.dirname(file_path), file_type)
+
+    def save(self, relative_path: str, data: Any) -> str:
+        return self.save_atomic(self.storage_path, relative_path, data, file_type=self.file_type)
+
+    @staticmethod
+    def save_atomic(storage_path: str, relative_path: str, data: Any, file_type: str = "t") -> str:
+        with tempfile.NamedTemporaryFile(dir=storage_path, mode="w" + file_type, delete=False) as f:
+            tmp_path = f.name
+            f.write(data)
+        try:
+            dest_path = os.path.join(storage_path, relative_path)
+            os.rename(tmp_path, dest_path)
+            return dest_path
+        except Exception:
+            if os.path.isfile(tmp_path):
+                os.remove(tmp_path)
+            raise
+
+    def load(self, relative_path: str) -> Any:
+        # raises on file not existing
+        with self.open(relative_path) as text_file:
+            return text_file.read()
+
+    def delete(self, relative_path: str) -> None:
+        file_path = self._make_path(relative_path)
+        if os.path.isfile(file_path):
+            os.remove(file_path)
+        else:
+            raise FileNotFoundError(file_path)
+
+    def delete_folder(self, relative_path: str, recursively: bool = False) -> None:
+        folder_path = self._make_path(relative_path)
+        if os.path.isdir(folder_path):
+            if recursively:
+                shutil.rmtree(folder_path)
+            else:
+                os.rmdir(folder_path)
+        else:
+            raise NotADirectoryError(folder_path)
+
+    def open(self, realtive_path: str, mode: str = "r") -> IO[Any]:
+        return open(self._make_path(realtive_path), mode + self.file_type)
+
+    def open_temp(self, delete: bool = False, mode: str = "w", file_type: str = None) -> IO[Any]:
+        ft = file_type or self.file_type
+        return tempfile.NamedTemporaryFile(dir=self.storage_path, mode=mode + ft, delete=delete)
+
+    def has_file(self, relative_path: str) -> bool:
+        return os.path.isfile(self._make_path(relative_path))
+
+    def has_folder(self, relative_path: str) -> bool:
+        return os.path.isdir(self._make_path(relative_path))
+
+    def list_folder_files(self, relative_path: str, to_root: bool = True) -> List[str]:
+        scan_path = self._make_path(relative_path)
+        if to_root:
+            # list files in relative path, returning paths relative to storage root
+            return [os.path.join(relative_path, e.name) for e in os.scandir(scan_path) if e.is_file()]
+        else:
+            # or to the folder
+            return [e.name for e in os.scandir(scan_path) if e.is_file()]
+
+    def list_folder_dirs(self, relative_path: str, to_root: bool = True) -> List[str]:
+        # list content of relative path, returning paths relative to storage root
+        scan_path = self._make_path(relative_path)
+        if to_root:
+            # list folders in relative path, returning paths relative to storage root
+            return [os.path.join(relative_path, e.name) for e in os.scandir(scan_path) if e.is_dir()]
+        else:
+            # or to the folder
+            return [e.name for e in os.scandir(scan_path) if e.is_dir()]
+
+    def create_folder(self, relative_path: str, exists_ok: bool = False) -> None:
+        os.makedirs(self._make_path(relative_path), exist_ok=exists_ok)
+
+    def copy_cross_storage_atomically(self, dest_volume_root: str, dest_relative_path: str, source_path: str, dest_name: str) -> None:
+        external_tmp_file = tempfile.mktemp(dir=dest_volume_root)
+        # first copy to temp file
+        shutil.copy(self._make_path(source_path), external_tmp_file)
+        # then rename to dest name
+        external_dest = os.path.join(dest_volume_root, dest_relative_path, dest_name)
+        try:
+            os.rename(external_tmp_file, external_dest)
+        except Exception:
+            if os.path.isfile(external_tmp_file):
+                os.remove(external_tmp_file)
+            raise
+
+    def atomic_rename(self, from_relative_path: str, to_relative_path: str) -> None:
+        os.rename(
+            self._make_path(from_relative_path),
+            self._make_path(to_relative_path)
+        )
+
+    def in_storage(self, path: str) -> bool:
+        file = os.path.realpath(path)
+        # return true, if the common prefix of both is equal to directory
+        # e.g. /a/b/c/d.rst and directory is /a/b, the common prefix is /a/b
+        return os.path.commonprefix([file, self.storage_path]) == self.storage_path
+
+    def to_relative_path(self, path: str) -> str:
+        if not self.in_storage(path):
+            raise ValueError(path)
+        return os.path.relpath(path, start=self.storage_path)
+
+    def get_file_stem(self, path: str) ->  str:
+        return Path(os.path.basename(path)).stem
+
+    def get_file_name(self, path: str) ->  str:
+        return Path(path).name
+
+    def _make_path(self, relative_path: str) -> str:
+        return os.path.join(self.storage_path, relative_path)
+
--- a/dlt/common/json.py
+++ b/dlt/common/json.py
@@ -0,0 +1,46 @@
+import base64
+from datetime import date, datetime  # noqa: I251
+from functools import partial
+from typing import Any, Union
+from uuid import UUID
+from hexbytes import HexBytes
+import simplejson
+from simplejson.raw_json import RawJSON
+
+from dlt.common.arithmetics import Decimal
+
+# simplejson._toggle_speedups(False)
+
+def custom_encode(obj: Any) -> Union[RawJSON, str]:
+    if isinstance(obj, Decimal):
+        # always return decimals as string (not RawJSON) so they are not deserialized back to float
+        return str(obj.normalize())
+    # this works both for standard datetime and pendulum
+    elif isinstance(obj, datetime):
+        # See "Date Time String Format" in the ECMA-262 specification.
+        r = obj.isoformat()
+        # leave microseconds alone
+        # if obj.microsecond:
+        #     r = r[:23] + r[26:]
+        if r.endswith('+00:00'):
+            r = r[:-6] + 'Z'
+        return r
+    elif isinstance(obj, date):
+        return obj.isoformat()
+    elif isinstance(obj, UUID):
+        return str(obj)
+    elif isinstance(obj, HexBytes):
+        return obj.hex()
+    elif isinstance(obj, bytes):
+        return base64.b64encode(obj).decode('ascii')
+    raise TypeError(repr(obj) + " is not JSON serializable")
+
+
+simplejson.loads = partial(simplejson.loads, use_decimal=False)
+simplejson.load = partial(simplejson.load, use_decimal=False)
+# prevent default decimal serializer (use_decimal=False) and binary serializer (encoding=None)
+simplejson.dumps = partial(simplejson.dumps, use_decimal=False, default=custom_encode, encoding=None)
+simplejson.dump = partial(simplejson.dump, use_decimal=False, default=custom_encode, encoding=None)
+
+# provide drop-in replacement
+json = simplejson
--- a/dlt/common/logger.py
+++ b/dlt/common/logger.py
@@ -0,0 +1,207 @@
+import logging
+import json_logging
+import traceback
+import sentry_sdk
+from sentry_sdk.transport import HttpTransport
+from logging import LogRecord, Logger
+from typing import Any, Callable, Dict, Type
+
+from dlt.common.json import json
+from dlt.common.typing import DictStrAny, DictStrStr, StrStr
+from dlt.common.configuration import BasicConfiguration
+from dlt.common.utils import filter_env_vars
+from dlt._version import common_version as __version__
+
+DLT_LOGGER_NAME = "sv-dlt"
+LOGGER: Logger = None
+
+def _add_logging_level(level_name: str, level: int, method_name:str = None) -> None:
+    """
+    Comprehensively adds a new logging level to the `logging` module and the
+    currently configured logging class.
+
+    `levelName` becomes an attribute of the `logging` module with the value
+    `levelNum`. `methodName` becomes a convenience method for both `logging`
+    itself and the class returned by `logging.getLoggerClass()` (usually just
+    `logging.Logger`). If `methodName` is not specified, `levelName.lower()` is
+    used.
+
+    To avoid accidental clobberings of existing attributes, this method will
+    raise an `AttributeError` if the level name is already an attribute of the
+    `logging` module or if the method name is already present
+
+    """
+    if not method_name:
+        method_name = level_name.lower()
+
+    if hasattr(logging, level_name):
+       raise AttributeError('{} already defined in logging module'.format(level_name))
+    if hasattr(logging, method_name):
+       raise AttributeError('{} already defined in logging module'.format(method_name))
+    if hasattr(logging.getLoggerClass(), method_name):
+       raise AttributeError('{} already defined in logger class'.format(method_name))
+
+    # This method was inspired by the answers to Stack Overflow post
+    # http://stackoverflow.com/q/2183233/2988730, especially
+    # http://stackoverflow.com/a/13638084/2988730
+    def logForLevel(self: logging.Logger, message: str, *args: Any, **kwargs: Any) -> None:
+        if self.isEnabledFor(level):
+            self._log(level, message, args, **kwargs)
+    def logToRoot(message: str, *args: Any, **kwargs: Any) -> None:
+        logging.root._log(level, message, args, **kwargs)
+
+    logging.addLevelName(level, level_name)
+    setattr(logging, level_name, level)
+    setattr(logging.getLoggerClass(), method_name, logForLevel)
+    setattr(logging, method_name, logToRoot)
+
+
+class _MetricsFormatter(logging.Formatter):
+    def format(self, record: LogRecord) -> str:
+        s = super(_MetricsFormatter, self).format(record)
+        if record.exc_text:
+            s = s + '|'
+        # dump metrics dictionary nicely
+        if "metrics" in record.__dict__:
+            s = s + ": " + json.dumps(record.__dict__["metrics"])
+        return s
+
+
+class _CustomJsonFormatter(json_logging.JSONLogFormatter):
+
+    version: StrStr = None
+
+    def _format_log_object(self, record: LogRecord, request_util: Any) -> Any:
+        json_log_object = super(_CustomJsonFormatter, self)._format_log_object(record, request_util)
+        if self.version:
+            json_log_object.update({"version": self.version})
+        return json_log_object
+
+
+def _init_logging(logger_name: str, level: str, format: str, component: str, version: StrStr) -> Logger:
+    if logger_name == "root":
+        logging.basicConfig(level=level)
+        handler = logging.getLogger().handlers[0]
+        # handler.setFormatter(_MetricsFormatter(fmt=format, style='{'))
+        logger = logging.getLogger()
+    else:
+        logger = logging.getLogger(DLT_LOGGER_NAME)
+        logger.propagate = False
+        logger.setLevel(level)
+        handler = logging.StreamHandler()
+        # handler.setFormatter(_MetricsFormatter(fmt=format, style='{'))
+        logger.addHandler(handler)
+
+    # set right formatter
+    if is_json_logging(format):
+        json_logging.COMPONENT_NAME = component
+        json_logging.JSON_SERIALIZER = json.dumps
+        json_logging.RECORD_ATTR_SKIP_LIST.remove("process")
+        # set version as class variable as we cannot pass custom constructor parameters
+        _CustomJsonFormatter.version = version
+        # the only thing method above effectively does is to replace the formatter
+        json_logging.init_non_web(enable_json=True, custom_formatter=_CustomJsonFormatter)
+        if logger_name == "root":
+            json_logging.config_root_logger()
+    else:
+        handler.setFormatter(_MetricsFormatter(fmt=format, style='{'))
+
+    return logger
+
+
+def __getattr__(name: str) -> Callable[..., Any]:
+    # a catch all function for a module that forwards calls to unknown methods to LOGGER
+    def wrapper(msg: str, *args: Any, **kwargs: Any) -> None:
+        if LOGGER:
+            getattr(LOGGER, name)(msg, *args, **kwargs, stacklevel=2)
+    return wrapper
+
+
+def _extract_version_info(config: Type[BasicConfiguration]) -> StrStr:
+    version_info = {"version": __version__, "component_name": config.NAME}
+    version = getattr(config, "_VERSION", None)
+    if version:
+        version_info["component_version"] = version
+    # extract envs with build info
+    version_info.update(filter_env_vars(["COMMIT_SHA", "IMAGE_VERSION"]))
+    return version_info
+
+
+def _extract_pod_info() -> StrStr:
+    return filter_env_vars(["KUBE_NODE_NAME", "KUBE_POD_NAME", "KUBE_POD_NAMESPACE"])
+
+class _SentryHttpTransport(HttpTransport):
+
+    timeout: int = 0
+
+    def _get_pool_options(self, *a: Any, **kw: Any) -> DictStrAny:
+        rv = HttpTransport._get_pool_options(self, *a, **kw)
+        rv['timeout'] = self.timeout
+        return rv
+
+
+def _init_sentry(config: Type[BasicConfiguration], version: StrStr) -> None:
+    if config.SENTRY_DSN:
+        global sentry_client
+
+        sys_ver = version["version"]
+        release = sys_ver + "_" + version.get("commit_sha", "")
+        _SentryHttpTransport.timeout = config.REQUEST_TIMEOUT[0]
+        # TODO: setup automatic sending of log messages by log level (ie. we send a lot dbt trash logs)
+        # https://docs.sentry.io/platforms/python/guides/logging/
+        sentry_sdk.init(config.SENTRY_DSN, release=release, transport=_SentryHttpTransport)
+        # add version tags
+        for k, v in version.items():
+            sentry_sdk.set_tag(k, v)
+        # add kubernetes tags
+        pod_tags = _extract_pod_info()
+        for k, v in pod_tags.items():
+            sentry_sdk.set_tag(k, v)
+
+
+def init_telemetry(config: Type[BasicConfiguration]) -> None:
+    if config.PROMETHEUS_PORT:
+        from prometheus_client import start_http_server, Info
+
+        logging.info(f"Starting prometheus server port {config.PROMETHEUS_PORT}")
+        start_http_server(config.PROMETHEUS_PORT)
+        # collect info
+        Info("runs_component_name", "Name of the executing component").info(_extract_version_info(config))
+
+
+def init_logging_from_config(config: Type[BasicConfiguration]) -> None:
+    global LOGGER
+
+    # add HEALTH and METRICS log levels
+    _add_logging_level("HEALTH", logging.WARNING - 1, "health")
+    _add_logging_level("METRICS", logging.WARNING - 2, "metrics")
+
+    version = _extract_version_info(config)
+    LOGGER = _init_logging(
+        DLT_LOGGER_NAME,
+        # "root",
+        config.LOG_LEVEL,
+        config.LOG_FORMAT,
+        config.NAME,
+        version)
+    _init_sentry(config, version)
+
+
+def is_json_logging(log_format: str) -> bool:
+    return log_format == "JSON"
+
+
+def process_internal_exception(msg: str, exc_info: Any = True) -> None:
+    # Passing default True value will cause implementation to use data provided by sys.exc_info
+    if LOGGER:
+        LOGGER.error(msg, exc_info=exc_info, stacklevel=2)
+    report_exception()
+
+
+def report_exception() -> None:
+    if sentry_sdk.Hub.current:
+        sentry_sdk.capture_exception()
+
+
+def pretty_format_exception() -> str:
+    return traceback.format_exc()
--- a/dlt/common/parser.py
+++ b/dlt/common/parser.py
@@ -0,0 +1,147 @@
+import re
+from typing import Iterator, Optional, Tuple, Callable, cast
+
+from dlt.common import json
+from dlt.common.schema import Schema
+from dlt.common.utils import uniq_id, digest128
+from dlt.common.typing import TEvent, TEventRowChild, TEventRowRoot, StrAny
+
+
+# I(table name, row data)
+TUnpackedRowIterator = Iterator[Tuple[str, StrAny]]
+TExtractFunc = Callable[[Schema, TEvent, str, bool], TUnpackedRowIterator]
+
+RE_UNDERSCORES = re.compile("_+")
+RE_LEADING_DIGITS = re.compile(r"^\d+")
+INVALID_SQL_IDENT_CHARS = "- *!:,.'\\\"`"
+INVALID_SQL_TX = str.maketrans(INVALID_SQL_IDENT_CHARS, "_" * len(INVALID_SQL_IDENT_CHARS))
+
+# subsequent nested fields will be separated with the string below, applies both to field and table names
+PATH_SEPARATOR = "__"
+
+# for those paths the complex nested objects should be left in place
+# current use case: we want to preserve event_slot__value in db even if it's an object
+# TODO: pass table definition and accept complex type
+def _should_preserve_complex_value(table: str, field_name: str) -> bool:
+    path = f"{table}{PATH_SEPARATOR}{field_name}"
+    return path in ["event_slot__value"]
+
+
+def _fix_field_name(name: str) -> str:
+
+    def camel_to_snake(name: str) -> str:
+        name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
+        return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
+
+    # fix field name so it's an acceptable name for a database column
+    # all characters that are not letters digits or a few special chars
+    name = camel_to_snake(name.translate(INVALID_SQL_TX))
+    name = RE_LEADING_DIGITS.sub("_", name)
+    # replace consecutive underscores with single one to prevent name clashes with parent child
+    return RE_UNDERSCORES.sub("_", name)
+
+
+def _flatten(table: str, dict_row: TEventRowChild) -> TEventRowChild:
+    out_rec_row: TEventRowChild = {}
+
+    def unpack_row_dicts(dict_row: StrAny, parent_name: Optional[str]) -> None:
+        for k, v in dict_row.items():
+            corrected_k = _fix_field_name(k)
+            child_name = corrected_k if not parent_name else f'{parent_name}{PATH_SEPARATOR}{corrected_k}'
+            if type(v) is dict:
+                unpack_row_dicts(v, parent_name=child_name)
+                if _should_preserve_complex_value(table, child_name):
+                    out_rec_row[child_name] = v  # type: ignore
+            else:
+                out_rec_row[child_name] = v  # type: ignore
+
+    unpack_row_dicts(dict_row, None)
+    return out_rec_row
+
+
+def _get_child_row_hash(parent_hash: str, child_table: str, list_pos: int) -> str:
+    # create deterministic unique id of the child row taking into account that all lists are ordered
+    # and all child tables must be lists
+    return digest128(f"{parent_hash}_{child_table}_{list_pos}")
+
+
+def _unpack_row(
+    schema: Schema,
+    dict_row: TEventRowChild,
+    extend: TEventRowChild,
+    table: str,
+    parent_hash: Optional[str] = None,
+    pos: Optional[int] = None
+    ) -> TUnpackedRowIterator:
+
+    def _append_child_meta(_row: TEventRowChild, _hash: str, _p_hash: str, _p_pos: int) -> TEventRowChild:
+        _row["_parent_hash"] = _p_hash
+        _row["_pos"] = _p_pos
+        _row.update(extend)
+
+        return _row
+
+    is_top_level = parent_hash is None
+
+    # flatten current row
+    new_dict_row = _flatten(table, dict_row)
+    # infer record hash or leave existing primary key if present
+    record_hash = new_dict_row.get("_record_hash", None)
+    if not record_hash:
+        # check if we have primary key: if so use it
+        primary_key = schema.filter_hints_in_row(table, "primary_key", new_dict_row)
+        if primary_key:
+            # create row id from primary key
+            record_hash = digest128("_".join(map(lambda v: str(v), primary_key.values())))
+        elif not is_top_level:
+            # child table row deterministic hash
+            record_hash = _get_child_row_hash(parent_hash, table, pos)
+            # link to parent table
+            _append_child_meta(new_dict_row, record_hash, parent_hash, pos)
+        else:
+            # create random row id, note that incremental loads will not work with such tables
+            record_hash = uniq_id()
+        new_dict_row["_record_hash"] = record_hash
+
+    # if _root_hash propagation requested and we are at the top level then update extend
+    if "_root_hash" in extend and extend["_root_hash"] is None and is_top_level:
+        extend["_root_hash"] = record_hash
+
+    # generate child tables only for lists
+    children = [k for k in new_dict_row if type(new_dict_row[k]) is list]  # type: ignore
+    for k in children:
+        child_table = f"{table}{PATH_SEPARATOR}{k}"
+        # this will skip empty lists
+        v: TEventRowChild
+        for idx, v in enumerate(new_dict_row[k]):  # type: ignore
+            # yield child table row
+            if type(v) is dict:
+                yield from _unpack_row(schema, v, extend, child_table, record_hash, idx)
+            elif type(v) is list:
+                # unpack lists of lists
+                raise ValueError(v)
+            else:
+                # list of simple types
+                child_row_hash = _get_child_row_hash(record_hash, child_table, idx)
+                e = _append_child_meta({"value": v, "_record_hash": child_row_hash}, child_row_hash, record_hash, idx)
+                yield child_table, e
+        if not _should_preserve_complex_value(table, k):
+            # remove child list
+            del new_dict_row[k]  # type: ignore
+
+    yield table, new_dict_row
+
+
+def extract(schema: Schema, source_event: TEvent, load_id: str, add_json: bool) -> TUnpackedRowIterator:
+    # we will extend event with all the fields necessary to load it as root row
+    event = cast(TEventRowRoot, source_event)
+    # identify load id if loaded data must be processed after loading incrementally
+    event["_load_id"] = load_id
+    # add original json field, mostly useful for debugging
+    if add_json:
+        event["_event_json"] = json.dumps(event)
+    # find table name
+    table_name = event.pop("_event_type", None) or schema.schema_name
+    # TODO: if table_name exist get "_dist_key" and "_timestamp" from the table definition in schema and propagate, if not take them from global hints
+    # use event type or schema name as table name, request _root_hash propagation
+    yield from _unpack_row(schema, cast(TEventRowChild, event), {"_root_hash": None}, table_name)
--- a/dlt/common/pendulum.py
+++ b/dlt/common/pendulum.py
@@ -0,0 +1,16 @@
+import pendulum  # noqa: I251
+
+# force UTC as the local timezone to prevent local dates to be written to dbs
+pendulum.set_local_timezone(pendulum.timezone('UTC'))  # type: ignore
+
+
+def __utcnow() -> pendulum.DateTime:
+    """
+    Use this function instead of datetime.now
+    Returns:
+        pendulum.DateTime -- current time in UTC timezone
+    """
+    return pendulum.now()
+
+
+pendulum.utcnow = __utcnow  # type: ignore
--- a/dlt/common/runners.py
+++ b/dlt/common/runners.py
@@ -0,0 +1,181 @@
+import argparse
+import multiprocessing
+from prometheus_client import Counter, Gauge, Summary, CollectorRegistry, REGISTRY
+from typing import Callable, Dict, NamedTuple, Optional, Type, TypeVar, Union, cast
+from multiprocessing.pool import ThreadPool, Pool
+
+from dlt.common import logger, signals
+from dlt.common.configuration.basic_configuration import BasicConfiguration
+from dlt.common.time import sleep
+from dlt.common.telemetry import TRunHealth, TRunMetrics, get_logging_extras, get_metrics_from_prometheus
+from dlt.common.logger import init_logging_from_config, init_telemetry, process_internal_exception
+from dlt.common.signals import register_signals
+from dlt.common.utils import str2bool
+from dlt.common.exceptions import SignalReceivedException, TimeRangeExhaustedException, UnsupportedProcessStartMethodException
+from dlt.common.configuration import PoolRunnerConfiguration
+
+TPool = TypeVar("TPool", bound=Pool)
+
+
+class TRunArgs(NamedTuple):
+    single_run: bool
+    wait_runs: int
+
+
+RUN_ARGS = TRunArgs(False, 0)
+
+HEALTH_PROPS_GAUGES: Dict[str, Union[Counter, Gauge]] = None
+RUN_DURATION_GAUGE: Gauge = None
+RUN_DURATION_SUMMARY: Summary = None
+
+LAST_RUN_METRICS: TRunMetrics = None
+LAST_RUN_EXCEPTION: BaseException = None
+
+def create_gauges(registry: CollectorRegistry) -> None:
+    global HEALTH_PROPS_GAUGES, RUN_DURATION_GAUGE, RUN_DURATION_SUMMARY
+
+    HEALTH_PROPS_GAUGES = {
+        "runs_count": Counter("runs_count", "Count runs", registry=registry),
+        "runs_not_idle_count": Counter("runs_not_idle_count", "Count not idle runs", registry=registry),
+        "runs_healthy_count": Counter("runs_healthy_count", "Count healthy runs", registry=registry),
+        "runs_cs_healthy_gauge": Gauge("runs_cs_healthy_gauge", "Count consecutive healthy runs, reset on failed run", registry=registry),
+        "runs_failed_count": Counter("runs_failed_count", "Count failed runs", registry=registry),
+        "runs_cs_failed_gauge": Gauge("runs_cs_failed_gauge", "Count consecutive failed runs, reset on healthy run", registry=registry),
+        "runs_pending_items_gauge": Gauge("runs_pending_items_gauge", "Number of items pending at the end of the run", registry=registry),
+    }
+
+    RUN_DURATION_GAUGE = Gauge("runs_duration_seconds", "Duration of the run", registry=registry)
+    RUN_DURATION_SUMMARY = Summary("runs_duration_summary", "Summary of the run duration", registry=registry)
+
+
+def update_gauges() -> TRunHealth:
+    return get_metrics_from_prometheus(HEALTH_PROPS_GAUGES.values())  # type: ignore
+
+
+def str2bool_a(v: str) -> bool:
+    try:
+        return str2bool(v)
+    except ValueError:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+
+
+def create_default_args(C: Type[PoolRunnerConfiguration]) -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description=f"Default runner for {C.NAME}")
+    add_pool_cli_arguments(parser)
+    return parser
+
+
+def add_pool_cli_arguments(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument("--single-run", type=str2bool_a, nargs='?', const=True, default=False, help="exit when all pending items are processed")
+    parser.add_argument("--wait-runs", type=int, nargs='?', const=True, default=1, help="maximum idle runs to wait for incoming data")
+
+
+
+def initialize_runner(C: Type[BasicConfiguration], run_args: Optional[TRunArgs] = None) -> None:
+    global RUN_ARGS
+
+    init_logging_from_config(C)
+    init_telemetry(C)
+    create_gauges(REGISTRY)
+    register_signals()
+    if run_args is not None:
+        RUN_ARGS = run_args
+
+
+def pool_runner(C: Type[PoolRunnerConfiguration], run_f: Callable[[TPool], TRunMetrics]) -> int:
+    # start pool
+    pool: Pool = None
+    if C.POOL_TYPE == "process":
+        # our pool implementation do not work on spawn
+        if multiprocessing.get_start_method() != "fork":
+            raise UnsupportedProcessStartMethodException(multiprocessing.get_start_method())
+        pool = Pool(processes=C.MAX_PARALLELISM)
+    elif C.POOL_TYPE == "thread":
+        pool = ThreadPool(processes=C.MAX_PARALLELISM)
+    else:
+        pool = None
+    logger.info(f"Created {C.POOL_TYPE} pool with {C.MAX_PARALLELISM or 'default no.'} workers")
+
+
+    try:
+        while True:
+            run_metrics: TRunMetrics = None
+            try:
+                HEALTH_PROPS_GAUGES["runs_count"].inc()
+                # run pool logic
+                with RUN_DURATION_SUMMARY.time(), RUN_DURATION_GAUGE.time():
+                    run_metrics = run_f(cast(TPool, pool))
+            except Exception as exc:
+                if (type(exc) is SignalReceivedException) or (type(exc) is TimeRangeExhaustedException):
+                    # always exit
+                    raise
+                else:
+                    process_internal_exception("run")
+                    # the run failed
+                    run_metrics = TRunMetrics(True, True, -1)
+                    # preserve exception
+                    global LAST_RUN_EXCEPTION
+                    LAST_RUN_EXCEPTION = exc
+
+            # gather and emit metrics
+            if not run_metrics.was_idle:
+                HEALTH_PROPS_GAUGES["runs_not_idle_count"].inc()
+            if run_metrics.has_failed:
+                HEALTH_PROPS_GAUGES["runs_failed_count"].inc()
+                HEALTH_PROPS_GAUGES["runs_cs_failed_gauge"].inc()
+                HEALTH_PROPS_GAUGES["runs_cs_healthy_gauge"].set(0)
+            else:
+                HEALTH_PROPS_GAUGES["runs_healthy_count"].inc()
+                HEALTH_PROPS_GAUGES["runs_cs_healthy_gauge"].inc()
+                HEALTH_PROPS_GAUGES["runs_cs_failed_gauge"].set(0)
+            HEALTH_PROPS_GAUGES["runs_pending_items_gauge"].set(run_metrics.pending_items)
+            health_props = update_gauges()
+            logger.health("run health counters", extra={"metrics": health_props})
+            logger.metrics("run metrics", extra=get_logging_extras([RUN_DURATION_GAUGE, RUN_DURATION_SUMMARY]))
+
+            # preserve last run metrics
+            global LAST_RUN_METRICS
+            LAST_RUN_METRICS = run_metrics
+
+            # exit due to signal
+            signals.raise_if_signalled()
+
+            # exit due to exception and flag
+            if run_metrics.has_failed and C.EXIT_ON_EXCEPTION:
+                logger.warning(f"Exiting runner due to EXIT_ON_EXCEPTION flag set")
+                return -1
+
+            # single run may be forced but at least wait_runs must pass
+            if RUN_ARGS.single_run and (health_props["runs_count"] >= RUN_ARGS.wait_runs and
+                # and was all the time idle or (was not idle but now pending is 0)
+                (health_props["runs_not_idle_count"] == 0 or run_metrics.pending_items == 0)):
+                logger.warning(f"Stopping runner due to single run override")
+                return 0
+
+            if run_metrics.has_failed:
+                sleep(C.RUN_SLEEP_WHEN_FAILED)
+            elif run_metrics.pending_items == 0:
+                # nothing is pending so we can sleep longer
+                sleep(C.RUN_SLEEP_IDLE)
+            else:
+                # more items are pending, sleep (typically) shorter
+                sleep(C.RUN_SLEEP)
+
+            # this allows to recycle long living process that get their memory fragmented
+            # exit after runners sleeps so we keep the running period
+            if health_props["runs_count"] == C.STOP_AFTER_RUNS:
+                logger.warning(f"Stopping runner due to max runs {health_props['runs_count']} exceeded")
+                return -2
+    except SignalReceivedException as sigex:
+        # sleep this may raise SignalReceivedException
+        logger.warning(f"Exiting runner due to signal {sigex.signal_code}")
+        return sigex.signal_code
+    except TimeRangeExhaustedException as tre:
+        logger.info(f"{str(tre)}, not further processing will be done")
+        return 0
+    finally:
+        if pool:
+            logger.info("Closing processing pool")
+            pool.close()
+            pool.join()
+            pool = None
--- a/dlt/common/schema.py
+++ b/dlt/common/schema.py
@@ -0,0 +1,575 @@
+import base64
+import binascii
+import yaml
+import re
+from re import Pattern
+from copy import deepcopy
+from dateutil.parser import isoparse
+from typing import Dict, List, Set, Mapping, Optional, Sequence, Tuple, Type, TypedDict, Literal, Any, cast
+
+from dlt.common import pendulum, json, Decimal
+from dlt.common.typing import DictStrAny, StrAny, StrStr
+from dlt.common.arithmetics import ConversionSyntax
+from dlt.common.exceptions import DltException
+
+DataType = Literal["text", "double", "bool", "timestamp", "bigint", "binary", "complex", "decimal", "wei"]
+HintType = Literal["not_null", "partition", "cluster", "primary_key", "foreign_key", "sort", "unique"]
+ColumnProp = Literal["name", "data_type", "nullable", "partition", "cluster", "primary_key", "foreign_key", "sort", "unique"]
+
+DATA_TYPES: Set[DataType] = set(["text", "double", "bool", "timestamp", "bigint", "binary", "complex", "decimal", "wei"])
+COLUMN_PROPS: Set[ColumnProp] = set(["name", "data_type", "nullable", "partition", "cluster", "primary_key", "foreign_key", "sort", "unique"])
+COLUMN_HINTS: Set[HintType] = set(["partition", "cluster", "primary_key", "foreign_key", "sort", "unique"])
+
+class ColumnBase(TypedDict, total=True):
+    name: str
+    data_type: DataType
+    nullable: bool
+
+class Column(ColumnBase, total=True):
+    partition: bool
+    cluster: bool
+    unique: bool
+    sort: bool
+    primary_key: bool
+    foreign_key: bool
+
+Table = Dict[str, Column]
+SchemaTables = Dict[str, Table]
+SchemaUpdate = Dict[str, List[Column]]
+
+
+class StoredSchema(TypedDict, total=True):
+    version: int
+    engine_version: int
+    name: str
+    tables: SchemaTables
+    preferred_types: Mapping[str, DataType]
+    hints: Mapping[HintType, Sequence[str]]
+    excludes: Sequence[str]
+    includes: Sequence[str]
+
+
+class Schema:
+
+    VERSION_TABLE_NAME = "_version"
+    VERSION_COLUMN_NAME = "version"
+    LOADS_TABLE_NAME = "_loads"
+    ENGINE_VERSION = 2
+
+    def __init__(self, name: str) -> None:
+        self._schema_tables: SchemaTables = {}
+        self._schema_name: str = name
+        self._version = 1
+        # list of preferred types: map regex on columns into types
+        self._preferred_types: Mapping[str, DataType] = {}
+        # compiled regexes
+        self._compiled_preferred_types: List[Tuple[Pattern[str], DataType]] = []
+        # table hints
+        self._hints: Mapping[HintType, Sequence[str]] = {}
+        self._compiled_hints: Dict[HintType, Sequence[Pattern[str]]] = {}
+        # excluded paths
+        self._excludes: Sequence[str] = []
+        self._compiled_excludes: Sequence[Pattern[str]] = []
+        # included paths
+        self._includes: Sequence[str] = []
+        self._compiled_includes: Sequence[Pattern[str]] = []
+        # add version table
+        self._add_standard_tables()
+        # add standard hints
+        self._add_standard_hints()
+        # compile hints
+        self._compile_regexes()
+
+    @classmethod
+    def from_dict(cls, stored_schema: StoredSchema) -> "Schema":
+        # upgrade engine if needed
+        cls._upgrade_engine_version(stored_schema, stored_schema["engine_version"], cls.ENGINE_VERSION)
+        # create new instance from dict
+        self: Schema = cls(stored_schema["name"])
+        self._schema_tables = stored_schema["tables"]
+        # TODO: generate difference if STANDARD SCHEMAS are different than those and increase schema version
+        if Schema.VERSION_TABLE_NAME not in self._schema_tables:
+            raise SchemaCorruptedException(f"Schema must contain table {Schema.VERSION_TABLE_NAME}")
+        if Schema.LOADS_TABLE_NAME not in self._schema_tables:
+            raise SchemaCorruptedException(f"Schema must contain table {Schema.LOADS_TABLE_NAME}")
+        # verify table schemas
+        for table_name, table in self._schema_tables.items():
+            for column_name in table:
+                # add default hints to tables
+                column = self._add_missing_hints(table[column_name])
+                # overwrite column name
+                column["name"] = column_name
+                # verify column
+                self._verify_column(table_name, column_name, column)
+                table[column_name] = column
+        self._version = stored_schema["version"]
+        self._preferred_types = stored_schema["preferred_types"]
+        self._hints = stored_schema["hints"]
+        self._excludes = stored_schema["excludes"]
+        self._includes = stored_schema["includes"]
+        # compile regexes
+        self._compile_regexes()
+
+        return self
+
+    def filter_row(self, table_name: str, row: StrAny, path_separator: str) -> StrAny:
+        # include and exclude paths follow the naming convention of the unpacker and correspond to json document nesting
+        # current version of the unpacker separates json elements with __
+
+        def _exclude(path: str) -> bool:
+            is_included = False
+            is_excluded = any(exclude.search(path) for exclude in self._compiled_excludes)
+            if is_excluded:
+                # we may have exception if explicitely included
+                is_included = any(include.search(path) for include in self._compiled_includes)
+            return is_excluded and not is_included
+
+        # check if any of the rows is excluded
+        for field_name in list(row.keys()):
+            path = f"{table_name}{path_separator}{field_name}"
+            # excluded if any rule matches
+            if _exclude(path):
+                # TODO: copy to new instance
+                del row[field_name]  # type: ignore
+        return row
+
+    def coerce_row(self, table_name: str, row: StrAny) -> Tuple[StrAny, List[Column]]:
+        table_schema: Table = self._schema_tables.get(table_name, {})
+        new_columns: List[Column] = []
+        new_row: DictStrAny = {}
+        for col_name, v in row.items():
+            # skip None values, we should infer the types later
+            if v is None:
+                # just check if column is nullable if exists
+                self._coerce_null_value(table_schema, table_name, col_name)
+            else:
+                new_col_name, new_col_def, new_v = self._coerce_non_null_value(table_schema, table_name, col_name, v)
+                new_row[new_col_name] = new_v
+                if new_col_def:
+                    new_columns.append(new_col_def)
+
+        return new_row, new_columns
+
+    def filter_hints_in_row(self, table_name: str, hint_type: HintType, row: StrAny) -> StrAny:
+        rv_row: DictStrAny = {}
+        column_prop: ColumnProp = self._hint_to_column_prop(hint_type)
+        try:
+            table = self.get_table(table_name)
+            for column_name in table:
+                if column_name in row:
+                    hint_value = table[column_name][column_prop]
+                    if (hint_value and column_prop != "nullable") or (column_prop == "nullable" and not hint_value):
+                        rv_row[column_name] = row[column_name]
+        except KeyError:
+            for k, v in row.items():
+                if self._infer_hint(hint_type, v, k):
+                    rv_row[k] = v
+
+        # dicts are ordered and we will return the rows with hints in the same order as they appear in the columns
+        return rv_row
+
+    def update_schema(self, table_name: str, updated_columns: List[Column]) -> None:
+        # all tables in the schema must start with the schema name
+        # if not table_name.startswith(f"{self._schema_name}"):
+        #     raise InvalidTableNameException(self._schema_name, table_name)
+
+        if table_name not in self._schema_tables:
+            # add the whole new table to SchemaTables
+            self._schema_tables[table_name] = {c["name"]: c for c in updated_columns}
+        else:
+            # add several columns to existing table
+            table_schema = self._schema_tables[table_name]
+            for column in updated_columns:
+                column_name = column["name"]
+                if column_name in table_schema:
+                    # we do not support changing existing columns
+                    if not Schema._compare_columns(table_schema[column_name], column):
+                        # attempt to update to incompatible columns
+                        raise CannotCoerceColumnException(table_name, column_name, table_schema[column_name]["data_type"], column["data_type"], None)
+                else:
+                    table_schema[column_name] = column
+        # bump schema version
+        self._version += 1
+
+    def get_schema_update_for(self, table_name: str, t: Table) -> List[Column]:
+        # gets new columns to be added to "t" to bring up to date with stored schema
+        diff_c: List[Column] = []
+        s_t = self.get_table(table_name)
+        for c in s_t.values():
+            if c["name"] not in t:
+                diff_c.append(c)
+        return diff_c
+
+    def get_table(self, table_name: str) -> Table:
+        return self._schema_tables[table_name]
+
+    def to_dict(self) -> StoredSchema:
+        return  {
+            "tables": self._schema_tables,
+            "name": self._schema_name,
+            "version": self._version,
+            "preferred_types": self._preferred_types,
+            "hints": self._hints,
+            "excludes": self._excludes,
+            "includes": self._includes,
+            "engine_version": Schema.ENGINE_VERSION
+        }
+
+    @property
+    def schema_version(self) -> int:
+        return self._version
+
+    @property
+    def schema_name(self) -> str:
+        return self._schema_name
+
+    @property
+    def schema_tables(self) -> SchemaTables:
+        return self._schema_tables
+
+    def as_yaml(self, remove_default_hints: bool = False) -> str:
+        d = self.to_dict()
+        clean_tables = deepcopy(d["tables"])
+
+        for t in clean_tables.values():
+            for c in t.values():
+                # do not save names
+                del c["name"]  # type: ignore
+                # remove hints with default values
+                if remove_default_hints:
+                    for h in list(c.keys()):
+                        if type(c[h]) is bool and c[h] is False and h != "nullable":  # type: ignore
+                            del c[h]  # type: ignore
+
+        d["tables"] = clean_tables
+
+        return cast(str, yaml.dump(d, allow_unicode=True, default_flow_style=False, sort_keys=False))
+
+    def _infer_column(self, k: str, v: Any) -> Column:
+        return Column(
+            name=k,
+            data_type=self._map_value_to_column_type(v, k),
+            nullable=not self._infer_hint("not_null", v, k),
+            partition=self._infer_hint("partition", v, k),
+            cluster=self._infer_hint("cluster", v, k),
+            sort=self._infer_hint("sort", v, k),
+            unique=self._infer_hint("unique", v, k),
+            primary_key=self._infer_hint("primary_key", v, k),
+            foreign_key=self._infer_hint("foreign_key", v, k)
+        )
+
+    def _coerce_null_value(self, table_schema: Table, table_name: str, col_name: str) -> None:
+        if col_name in table_schema:
+            existing_column = table_schema[col_name]
+            if not existing_column["nullable"]:
+                raise CannotCoerceNullException(table_name, col_name)
+
+    def _coerce_non_null_value(self, table_schema: Table, table_name: str, col_name: str, v: Any) -> Tuple[str, Column, Any]:
+        new_column: Column = None
+        rv = v
+        variant_col_name = col_name
+
+        if col_name in table_schema:
+            existing_column = table_schema[col_name]
+            # existing columns cannot be changed so we must update row
+            py_data_type = Schema._py_type_to_sc_type(type(v))
+            if existing_column["data_type"] != py_data_type:
+                # first try to coerce existing value into destination type
+                try:
+                    rv = Schema._coerce_type(existing_column["data_type"], py_data_type, v)
+                except (ValueError, SyntaxError):
+                    # for complex types we must coerce to text
+                    if py_data_type == "complex":
+                        py_data_type = "text"
+                        rv = Schema._coerce_type("text", "complex", v)
+                    # if that does not work we must create variant extension to the table
+                    variant_col_name = f"{col_name}_v_{py_data_type}"
+                    # if variant exists check type, coercions are not required
+                    if variant_col_name in table_schema:
+                        if table_schema[variant_col_name]["data_type"] != py_data_type:
+                            raise CannotCoerceColumnException(table_name, variant_col_name, table_schema[variant_col_name]["data_type"], py_data_type, v)
+                    else:
+                        # new column
+                        # add new column
+                        new_column = self._infer_column(variant_col_name, v)
+                        # must have variant type, not preferred or coerced type
+                        new_column["data_type"] = py_data_type
+            else:
+                # just copy row: types match
+                pass
+        else:
+            # infer new column
+            new_column = self._infer_column(col_name, v)
+            # and coerce type if inference changed the python type
+            py_type = Schema._py_type_to_sc_type(type(v))
+            rv = Schema._coerce_type(new_column["data_type"], py_type, v)
+
+        return variant_col_name, new_column, rv
+
+    def _map_value_to_column_type(self, v: Any, k: str) -> DataType:
+        mapped_type = Schema._py_type_to_sc_type(type(v))
+         # if complex type was detected we must coerce to string
+        if mapped_type == "complex":
+            mapped_type = "text"
+        # get preferred type based on column name
+        preferred_type = self._get_preferred_type(k)
+        # try to match python type to preferred
+        if preferred_type:
+            # try to coerce to destination type
+            try:
+                Schema._coerce_type(preferred_type, mapped_type, v)
+                # coercion possible so preferred type may be used
+                mapped_type = preferred_type
+            except ValueError:
+                # coercion not possible
+                pass
+        return mapped_type
+
+    def _get_preferred_type(self, col_name: str) -> Optional[DataType]:
+        return next((m[1] for m in self._compiled_preferred_types if m[0].search(col_name)), None)
+
+    def _infer_hint(self, hint_type: HintType, _: Any, k: str) -> bool:
+        if hint_type in self._compiled_hints:
+            return any(h.search(k) for h in self._compiled_hints[hint_type])
+        else:
+            return False
+
+    def _add_standard_tables(self) -> None:
+        version_table: Table = {
+            "version": self._add_missing_hints({
+                "name": "version",
+                "data_type": "bigint",
+                "nullable": False,
+            }),
+            "engine_version": self._add_missing_hints({
+                "name": "engine_version",
+                "data_type": "bigint",
+                "nullable": False
+            }),
+            "inserted_at": self._add_missing_hints({
+                "name": "inserted_at",
+                "data_type": "timestamp",
+                "nullable": False
+            })
+
+        }
+        self._schema_tables[Schema.VERSION_TABLE_NAME] = version_table
+        load_table: Table = {
+            "load_id": self._add_missing_hints({
+                "name": "load_id",
+                "data_type": "text",
+                "nullable": False
+            }),
+            "status": self._add_missing_hints({
+                "name": "status",
+                "data_type": "bigint",
+                "nullable": False
+            }),
+            "inserted_at": self._add_missing_hints({
+                "name": "inserted_at",
+                "data_type": "timestamp",
+                "nullable": False
+            })
+        }
+        self._schema_tables[Schema.LOADS_TABLE_NAME] = load_table
+
+    def _add_standard_hints(self) -> None:
+        self._hints = {
+            "not_null": ["^_record_hash$", "^_root_hash$", "^_parent_hash$", "^_pos$", "_load_id"],
+            "foreign_key": ["^_parent_hash$"],
+            "unique": ["^_record_hash$"]
+        }
+
+    def _compile_regexes(self) -> None:
+        for pattern, dt in self._preferred_types.items():
+            # add tuples to be searched in coercions
+            self._compiled_preferred_types.append((re.compile(pattern), dt))
+        for hint_name, hint_list in self._hints.items():
+            # compile hints which are column matching regexes
+            self._compiled_hints[hint_name] = list(map(lambda hint: re.compile(hint), hint_list))
+        self._compiled_excludes = list(map(lambda exclude: re.compile(exclude), self._excludes))
+        self._compiled_includes = list(map(lambda include: re.compile(include), self._includes))
+
+    @staticmethod
+    def _verify_column(table_name: str, column_name: str, column: Column) -> None:
+        existing_props = set(column.keys())
+        missing_props = COLUMN_PROPS.difference(existing_props)
+        if len(missing_props) > 0:
+            raise SchemaCorruptedException(f"In table {table_name} column {column_name}: Column definition is missing following properties {missing_props}")
+        data_type = column["data_type"]
+        if data_type not in DATA_TYPES:
+            raise SchemaCorruptedException(f"In table {table_name} column {column_name}: {data_type} is not one of available types: {DATA_TYPES}")
+        for p, v in column.items():
+            if p in COLUMN_HINTS and not type(v) is bool:
+                 raise SchemaCorruptedException(f"In table {table_name} column {column_name}: hint {p} is not boolean.")
+
+    @staticmethod
+    def _upgrade_engine_version(schema_dict: StoredSchema, from_engine: int, to_engine: int) -> None:
+        if from_engine == 1:
+            schema_dict["engine_version"] = 2
+            schema_dict["includes"] = []
+            schema_dict["excludes"] = []
+            from_engine = 2
+        if from_engine == 2:
+            pass
+        if from_engine != to_engine:
+            raise SchemaEngineNoUpgradePathException(schema_dict["name"], schema_dict["engine_version"], from_engine, to_engine)
+
+    @staticmethod
+    def _add_missing_hints(column: ColumnBase) -> Column:
+        return {
+            **{  # type:ignore
+                "partition": False,
+                "cluster": False,
+                "unique": False,
+                "sort": False,
+                "primary_key": False,
+                "foreign_key": False,
+            },
+            **column
+        }
+
+
+    @staticmethod
+    def _py_type_to_sc_type(t: Type[Any]) -> DataType:
+        if t is float:
+            return "double"
+        elif t is int:
+            return "bigint"
+        elif t is bool:
+            return "bool"
+        elif t is bytes:
+            return "binary"
+        elif t in [dict, list]:
+            return "complex"
+        elif t is Decimal:
+            return "decimal"
+        else:
+            return "text"
+
+    @staticmethod
+    def _coerce_type(to_type: DataType, from_type: DataType, value: Any) -> Any:
+        if to_type == from_type:
+            return value
+
+        if to_type == "text":
+            if from_type == "complex":
+                return json.dumps(value)
+            else:
+                return str(value)
+
+        if to_type == "binary":
+            if from_type == "text":
+                if value.startswith("0x"):
+                    return bytes.fromhex(value[2:])
+                try:
+                    return base64.b64decode(value, validate=True)
+                except binascii.Error:
+                    raise ValueError(value)
+            if from_type == "bigint":
+                return value.to_bytes((value.bit_length() + 7) // 8, 'little')
+
+        if to_type in ["wei", "bigint"]:
+            if from_type == "bigint":
+                return value
+            if from_type in ["decimal", "double"]:
+                if value % 1 != 0:
+                    # only integer decimals and floats can be coerced
+                    raise ValueError(value)
+                return int(value)
+            if from_type == "text":
+                trim_value = value.strip()
+                if trim_value.startswith("0x"):
+                    return int(trim_value[2:], 16)
+                else:
+                    return int(trim_value)
+
+        if to_type == "double":
+             if from_type in ["bigint", "wei", "decimal"]:
+                return float(value)
+             if from_type == "text":
+                trim_value = value.strip()
+                if trim_value.startswith("0x"):
+                    return float(int(trim_value[2:], 16))
+                else:
+                    return float(trim_value)
+
+        if to_type == "decimal":
+            if from_type in ["bigint", "wei"]:
+                return value
+            if from_type == "double":
+                return Decimal(value)
+            if from_type == "text":
+                trim_value = value.strip()
+                if trim_value.startswith("0x"):
+                    return int(trim_value[2:], 16)
+                elif "." not in trim_value and "e" not in trim_value:
+                    return int(trim_value)
+                else:
+                    try:
+                        return Decimal(trim_value)
+                    except ConversionSyntax:
+                        raise ValueError(trim_value)
+
+        if to_type == "timestamp":
+            if from_type in ["bigint", "double"]:
+                # returns ISO datetime with timezone
+                return str(pendulum.from_timestamp(value))
+
+            if from_type == "text":
+                # if parses as ISO date then pass it
+                try:
+                    isoparse(value)
+                    return value
+                except ValueError:
+                    # try to convert string to integer, or float
+                    try:
+                        value = int(value)
+                    except ValueError:
+                        # raises ValueError if not parsing correctly
+                        value = float(value)
+                    return str(pendulum.from_timestamp(value))
+
+        raise ValueError(value)
+
+    @staticmethod
+    def _compare_columns(a: Column, b: Column) -> bool:
+        return a["data_type"] == b["data_type"] and a["nullable"] == b["nullable"]
+
+    @staticmethod
+    def _hint_to_column_prop(h: HintType) -> ColumnProp:
+        if h == "not_null":
+            return "nullable"
+        return h
+
+class SchemaException(DltException):
+    pass
+
+
+class CannotCoerceColumnException(SchemaException):
+    def __init__(self, table_name: str, column_name: str, from_type: DataType, to_type: DataType, value: Any) -> None:
+        super().__init__(f"Cannot coerce type in table {table_name} column {column_name} existing type {from_type} coerced type {to_type} value: {value}")
+
+
+class CannotCoerceNullException(SchemaException):
+    def __init__(self, table_name: str, column_name: str) -> None:
+        super().__init__(f"Cannot coerce NULL in table {table_name} column {column_name} which is not nullable")
+
+
+class InvalidTableNameException(SchemaException):
+    def __init__(self, schema_name: str, table_name: str) -> None:
+        self.schema_name = schema_name
+        self.table_name = table_name
+        super().__init__(f"All table names must start with '{schema_name}' so {table_name} is invalid")
+
+class SchemaCorruptedException(SchemaException):
+    pass
+
+
+class SchemaEngineNoUpgradePathException(SchemaException):
+    def __init__(self, schema_name: str, init_engine: int, from_engine: int, to_engine: int) -> None:
+        self.schema_name = schema_name
+        self.init_engine = init_engine
+        self.from_engine = from_engine
+        self.to_engine = to_engine
+        super().__init__(f"No engine upgrade path in schema {schema_name} from {init_engine} to {to_engine}, stopped at {from_engine}")
--- a/dlt/common/signals.py
+++ b/dlt/common/signals.py
@@ -0,0 +1,35 @@
+import signal
+from threading import Event
+from typing import Any
+
+from dlt.common import logger
+from dlt.common.exceptions import SignalReceivedException
+
+_received_signal: int = 0
+exit_event = Event()
+
+
+def signal_receiver(signal: int, frame: Any) -> None:
+    global _received_signal
+
+    logger.info(f"Signal {signal} received")
+
+    if _received_signal > 0:
+        logger.info(f"Another signal received after {_received_signal}")
+        return
+
+    _received_signal = signal
+    # awake all threads sleeping on event
+    exit_event.set()
+
+    logger.info(f"Sleeping threads signalled")
+
+
+def raise_if_signalled() -> None:
+    if _received_signal:
+        raise SignalReceivedException(_received_signal)
+
+
+def register_signals() -> None:
+    signal.signal(signal.SIGINT, signal_receiver)
+    signal.signal(signal.SIGTERM, signal_receiver)
--- a/dlt/common/storages/init.py
+++ b/dlt/common/storages/init.py
@@ -0,0 +1 @@
+from .schema_storage import SchemaStorage  # noqa: F401
--- a/dlt/common/storages/exceptions.py
+++ b/dlt/common/storages/exceptions.py
@@ -0,0 +1,23 @@
+import semver
+from dlt.common.exceptions import DltException
+
+class StorageException(DltException):
+    def __init__(self, msg: str) -> None:
+        super().__init__(msg)
+
+
+class NoMigrationPathException(StorageException):
+    def __init__(self, storage_path: str, initial_version: semver.VersionInfo, migrated_version: semver.VersionInfo, target_version: semver.VersionInfo) -> None:
+        self.storage_path = storage_path
+        self.initial_version = initial_version
+        self.migrated_version = migrated_version
+        self.target_version = target_version
+        super().__init__(f"Could not find migration path for {storage_path} from v {initial_version} to {target_version}, stopped at {migrated_version}")
+
+
+class WrongStorageVersionException(StorageException):
+    def __init__(self, storage_path: str, initial_version: semver.VersionInfo, target_version: semver.VersionInfo) -> None:
+        self.storage_path = storage_path
+        self.initial_version = initial_version
+        self.target_version = target_version
+        super().__init__(f"Expected storage {storage_path} with v {target_version} but found {initial_version}")
--- a/dlt/common/storages/loader_storage.py
+++ b/dlt/common/storages/loader_storage.py
@@ -0,0 +1,181 @@
+import os
+from pathlib import Path
+from typing import List, Literal, Optional, Sequence, Tuple, Type
+
+from dlt.common import json, pendulum
+from dlt.common.file_storage import FileStorage
+from dlt.common.dataset_writers import TWriterType, write_jsonl, write_insert_values
+from dlt.common.configuration import LoadingVolumeConfiguration
+from dlt.common.exceptions import TerminalValueError
+from dlt.common.schema import SchemaUpdate, Table
+from dlt.common.storages.versioned_storage import VersionedStorage
+from dlt.common.typing import StrAny
+
+from dlt.common.storages.exceptions import StorageException
+
+
+# folders to manage load jobs in a single load package
+TWorkingFolder = Literal["new_jobs", "failed_jobs", "started_jobs", "completed_jobs"]
+
+class LoaderStorage(VersionedStorage):
+
+    STORAGE_VERSION = "1.0.0"
+    LOADING_FOLDER = "loading"  # folder within the volume where load packages are stored
+    LOADED_FOLDER = "loaded"  # folder to keep the loads that were completely processed
+
+    NEW_JOBS_FOLDER: TWorkingFolder = "new_jobs"
+    FAILED_JOBS_FOLDER: TWorkingFolder = "failed_jobs"
+    STARTED_JOBS_FOLDER: TWorkingFolder = "started_jobs"
+    COMPLETED_JOBS_FOLDER: TWorkingFolder = "completed_jobs"
+
+    LOAD_SCHEMA_UPDATE_FILE_NAME = "schema_updates.json"
+
+    SUPPORTED_WRITERS: List[TWriterType] = ["jsonl", "insert_values"]
+
+    def __init__(self, is_owner: bool,  C: Type[LoadingVolumeConfiguration], writer_type: TWriterType) -> None:
+        if writer_type not in LoaderStorage.SUPPORTED_WRITERS:
+            raise TerminalValueError(writer_type)
+        self.writer_type = writer_type
+        self.delete_completed_jobs = C.DELETE_COMPLETED_JOBS
+        super().__init__(LoaderStorage.STORAGE_VERSION, is_owner, FileStorage(C.LOADING_VOLUME_PATH, "t", makedirs=is_owner))
+
+    def initialize_storage(self) -> None:
+        self.storage.create_folder(LoaderStorage.LOADED_FOLDER, exists_ok=True)
+        self.storage.create_folder(LoaderStorage.LOADING_FOLDER, exists_ok=True)
+
+    def create_temp_load_folder(self, load_id: str) -> None:
+        # delete previous version
+        if self.storage.has_folder(load_id):
+            self.storage.delete_folder(load_id, recursively=True)
+        self.storage.create_folder(load_id)
+        # create processing directories
+        self.storage.create_folder(f"{load_id}/{LoaderStorage.NEW_JOBS_FOLDER}")
+        self.storage.create_folder(f"{load_id}/{LoaderStorage.COMPLETED_JOBS_FOLDER}")
+        self.storage.create_folder(f"{load_id}/{LoaderStorage.FAILED_JOBS_FOLDER}")
+        self.storage.create_folder(f"{load_id}/{LoaderStorage.STARTED_JOBS_FOLDER}")
+
+    def write_temp_loading_file(self, load_id: str, table_name: str, table: Table, file_id: str, rows: Sequence[StrAny]) -> str:
+        file_name = self.build_loading_file_name(load_id, table_name, file_id)
+        with self.storage.open(file_name, mode = "w") as f:
+            if self.writer_type == "jsonl":
+                write_jsonl(f, rows)
+            elif self.writer_type == "insert_values":
+                write_insert_values(f, rows, table.keys())
+        return Path(file_name).name
+
+    def save_schema_updates(self, load_id: str, schema_updates: Sequence[SchemaUpdate]) -> None:
+        with self.storage.open(f"{load_id}/{LoaderStorage.LOAD_SCHEMA_UPDATE_FILE_NAME}", mode="w") as f:
+            json.dump(schema_updates, f)
+
+    def commit_temp_load_folder(self, load_id: str) -> None:
+        self.storage.atomic_rename(load_id, self.get_load_path(load_id))
+
+    def list_loads(self) -> Sequence[str]:
+        loads = self.storage.list_folder_dirs(LoaderStorage.LOADING_FOLDER, to_root=False)
+        # start from the oldest packages
+        return sorted(loads)
+
+    def list_completed_loads(self) -> Sequence[str]:
+        loads = self.storage.list_folder_dirs(LoaderStorage.LOADED_FOLDER, to_root=False)
+        # start from the oldest packages
+        return sorted(loads)
+
+    def list_new_jobs(self, load_id: str) -> Sequence[str]:
+        new_jobs = self.storage.list_folder_files(f"{self.get_load_path(load_id)}/{LoaderStorage.NEW_JOBS_FOLDER}")
+        # make sure all jobs have supported writers
+        wrong_job = next((j for j in new_jobs if LoaderStorage.parse_load_file_name(j)[1] != self.writer_type), None)
+        if wrong_job is not None:
+            raise JobWithUnsupportedWriterException(load_id, self.writer_type, wrong_job)
+        return new_jobs
+
+    def list_started_jobs(self, load_id: str) -> Sequence[str]:
+        return self.storage.list_folder_files(f"{self.get_load_path(load_id)}/{LoaderStorage.STARTED_JOBS_FOLDER}")
+
+    def list_failed_jobs(self, load_id: str) -> Sequence[str]:
+        return self.storage.list_folder_files(f"{self.get_load_path(load_id)}/{LoaderStorage.FAILED_JOBS_FOLDER}")
+
+    def list_archived_failed_jobs(self, load_id: str) -> Sequence[str]:
+        return self.storage.list_folder_files(f"{self.get_archived_path(load_id)}/{LoaderStorage.FAILED_JOBS_FOLDER}")
+
+    def begin_schema_update(self, load_id: str) -> Optional[SchemaUpdate]:
+        schema_update_file = f"{self.get_load_path(load_id)}/{LoaderStorage.LOAD_SCHEMA_UPDATE_FILE_NAME}"
+        if self.storage.has_file(schema_update_file):
+            schema_update: SchemaUpdate = json.loads(self.storage.load(schema_update_file))
+            return schema_update
+        else:
+            return None
+
+    def commit_schema_update(self, load_id: str) -> None:
+        load_path = self.get_load_path(load_id)
+        schema_update_file = f"{load_path}/{LoaderStorage.LOAD_SCHEMA_UPDATE_FILE_NAME}"
+        self.storage.atomic_rename(schema_update_file, f"{load_path}/{LoaderStorage.COMPLETED_JOBS_FOLDER}/{LoaderStorage.LOAD_SCHEMA_UPDATE_FILE_NAME}")
+
+    def start_job(self, load_id: str, file_name: str) -> str:
+        return self._move_file(load_id, LoaderStorage.NEW_JOBS_FOLDER, LoaderStorage.STARTED_JOBS_FOLDER, file_name)
+
+    def fail_job(self, load_id: str, file_name: str, failed_message: Optional[str]) -> str:
+        load_path = self.get_load_path(load_id)
+        if failed_message:
+            self.storage.save(f"{load_path}/{LoaderStorage.FAILED_JOBS_FOLDER}/{file_name}.exception", failed_message)
+        # move to failed jobs
+        return self._move_file(load_id, LoaderStorage.STARTED_JOBS_FOLDER, LoaderStorage.FAILED_JOBS_FOLDER, file_name)
+
+    def retry_job(self, load_id: str, file_name: str) -> str:
+        return self._move_file(load_id, LoaderStorage.STARTED_JOBS_FOLDER, LoaderStorage.NEW_JOBS_FOLDER, file_name)
+
+    def complete_job(self, load_id: str, file_name: str) -> str:
+        return self._move_file(load_id, LoaderStorage.STARTED_JOBS_FOLDER, LoaderStorage.COMPLETED_JOBS_FOLDER, file_name)
+
+    def archive_load(self, load_id: str) -> None:
+        load_path = self.get_load_path(load_id)
+        has_failed_jobs = len(self.list_failed_jobs(load_id)) > 0
+        # delete load that does not contain failed jobs
+        if self.delete_completed_jobs and not has_failed_jobs:
+            self.storage.delete_folder(load_path, recursively=True)
+        else:
+            archive_path = self.get_archived_path(load_id)
+            self.storage.atomic_rename(load_path, archive_path)
+
+    def get_load_path(self, load_id: str) -> str:
+        return f"{LoaderStorage.LOADING_FOLDER}/{load_id}"
+
+    def get_archived_path(self, load_id: str) -> str:
+        return f"{LoaderStorage.LOADED_FOLDER}/{load_id}"
+
+    def build_loading_file_name(self, load_id: str, table_name: str, file_id: str) -> str:
+        file_name = f"{table_name}.{file_id}.{self.writer_type}"
+        return f"{load_id}/{LoaderStorage.NEW_JOBS_FOLDER}/{file_name}"
+
+    def _move_file(self, load_id: str, source_folder: TWorkingFolder, dest_folder: TWorkingFolder, file_name: str) -> str:
+        load_path = self.get_load_path(load_id)
+        dest_path = f"{load_path}/{dest_folder}/{file_name}"
+        self.storage.atomic_rename(f"{load_path}/{source_folder}/{file_name}", dest_path)
+        return self.storage._make_path(dest_path)
+
+    def job_elapsed_time_seconds(self, file_path: str) -> float:
+        return pendulum.now().timestamp() - os.path.getmtime(file_path)  # type: ignore
+
+    def _get_file_path(self, load_id: str, folder: TWorkingFolder, file_name: str) -> str:
+        load_path = self.get_load_path(load_id)
+        return f"{load_path}/{folder}/{file_name}"
+
+    @staticmethod
+    def parse_load_file_name(file_name: str) -> Tuple[str, TWriterType]:
+        p = Path(file_name)
+        ext: TWriterType = p.suffix[1:]  # type: ignore
+        if ext not in LoaderStorage.SUPPORTED_WRITERS:
+            raise TerminalValueError(ext)
+
+        parts = p.stem.split(".")
+        return (parts[0], ext)
+
+
+class LoaderStorageException(StorageException):
+    pass
+
+
+class JobWithUnsupportedWriterException(LoaderStorageException):
+    def __init__(self, load_id: str, expected_writer_type: TWriterType, wrong_job: str) -> None:
+        self.load_id = load_id
+        self.expected_writer_type = expected_writer_type
+        self.wrong_job = wrong_job
--- a/dlt/common/storages/schema_storage.py
+++ b/dlt/common/storages/schema_storage.py
@@ -0,0 +1,49 @@
+import os
+from typing import Optional
+
+from dlt.common import json
+from dlt.common.file_storage import FileStorage
+from dlt.common.schema import Schema, StoredSchema
+
+
+class SchemaStorage:
+
+    STORE_SCHEMA_FILE_PATTERN = "%s_schema.json"
+    FOLDER_SCHEMA_FILE = "schema.json"
+
+    def __init__(self, schema_storage_root: str, makedirs: bool = False) -> None:
+        self.storage = FileStorage(schema_storage_root, makedirs=makedirs)
+
+    def load_store_schema(self, name: str) -> Schema:
+        # loads a schema from a store holding many schemas
+        schema_file = self._get_file_by_name(name)
+        stored_schema: StoredSchema = json.loads(self.storage.load(schema_file))
+        return Schema.from_dict(stored_schema)
+
+    def load_folder_schema(self, from_folder: str) -> Schema:
+        # loads schema from a folder containing one default schema
+        schema_path = self._get_file_in_folder(from_folder)
+        stored_schema: StoredSchema = json.loads(self.storage.load(schema_path))
+        return Schema.from_dict(stored_schema)
+
+    def save_store_schema(self, schema: Schema) -> str:
+        # save a schema to schema store
+        dump = json.dumps(schema.to_dict(), indent=2)
+        schema_file = self._get_file_by_name(schema.schema_name)
+        return self.storage.save(schema_file, dump)
+
+    def save_folder_schema(self, schema: Schema, in_folder: str) -> str:
+        # save a schema to a folder holding one schema
+        dump = json.dumps(schema.to_dict())
+        schema_file = self._get_file_in_folder(in_folder)
+        return self.storage.save(schema_file, dump)
+
+    def has_store_schema(self, name: str) -> bool:
+        schema_file = self._get_file_by_name(name)
+        return self.storage.has_file(schema_file)
+
+    def _get_file_by_name(self, name: str) -> str:
+        return SchemaStorage.STORE_SCHEMA_FILE_PATTERN % name
+
+    def _get_file_in_folder(self, folder: str) -> str:
+        return os.path.join(folder, SchemaStorage.FOLDER_SCHEMA_FILE)   # if folder is None else os.path.join(folder, SchemaStorage.SCHEMA_FILE)
--- a/dlt/common/storages/unpacker_storage.py
+++ b/dlt/common/storages/unpacker_storage.py
@@ -0,0 +1,73 @@
+from typing import List, Sequence, Tuple, Type
+from itertools import groupby
+from pathlib import Path
+
+from dlt.common.utils import chunks
+from dlt.common.file_storage import FileStorage
+from dlt.common.configuration import UnpackingVolumeConfiguration
+from dlt.common.storages.versioned_storage import VersionedStorage
+
+
+class UnpackerStorage(VersionedStorage):
+
+    STORAGE_VERSION = "1.0.0"
+    UNPACKING_FOLDER: str = "unpacking"  # folder within the volume where files to be unpacked are stored
+    UNPACK_FILE_EXTENSION = ".unpack.json"
+    UNPACK_FILE_EXTENSION_LEN = len(UNPACK_FILE_EXTENSION)
+
+    def __init__(self, is_owner: bool, C: Type[UnpackingVolumeConfiguration]) -> None:
+        super().__init__(UnpackerStorage.STORAGE_VERSION, is_owner, FileStorage(C.UNPACKING_VOLUME_PATH, "t", makedirs=is_owner))
+
+    def initialize_storage(self) -> None:
+        self.storage.create_folder(UnpackerStorage.UNPACKING_FOLDER, exists_ok=True)
+
+    def list_files_to_unpack_sorted(self) -> Sequence[str]:
+        return sorted(self.storage.list_folder_files(UnpackerStorage.UNPACKING_FOLDER))
+
+    def get_grouped_iterator(self, files: Sequence[str]) -> "groupby[str, str]":
+        return groupby(files, lambda f: UnpackerStorage.get_schema_name(f))
+
+    @staticmethod
+    def chunk_by_events(files: Sequence[str], max_events: int, processing_cores: int) -> List[Sequence[str]]:
+        # should distribute ~ N events evenly among m cores with fallback for small amounts of events
+
+        def count_events(file_name : str) -> int:
+            # return event count from file name
+            return UnpackerStorage.get_events_count(file_name)
+
+        counts = list(map(count_events, files))
+        # make a list of files containing ~max_events
+        events_count = 0
+        m = 0
+        while events_count < max_events and m < len(files):
+            events_count += counts[m]
+            m += 1
+        processing_chunks = round(m / processing_cores)
+        if processing_chunks == 0:
+            # return one small chunk
+            return [files]
+        else:
+            # should return ~ amount of chunks to fill all the cores
+            return list(chunks(files[:m], processing_chunks))
+
+    @staticmethod
+    def get_events_count(file_name: str) -> int:
+        return UnpackerStorage._parse_unpack_file_name(file_name)[0]
+
+    @staticmethod
+    def get_schema_name(file_name: str) -> str:
+        return UnpackerStorage._parse_unpack_file_name(file_name)[2]
+
+    @staticmethod
+    def build_unpack_file_name(schema_name: str, stem: str, event_count: int, load_id: str) -> str:
+        # builds file name of the unpack file for the tracker
+        return f"{schema_name}_{stem}_{load_id}_{event_count}{UnpackerStorage.UNPACK_FILE_EXTENSION}"
+
+    @staticmethod
+    def _parse_unpack_file_name(file_name: str) -> Tuple[int, str, str]:
+        # parser unpack tracker file and returns (events found, load id, schema_name)
+        if not file_name.endswith(UnpackerStorage.UNPACK_FILE_EXTENSION):
+            raise ValueError(file_name)
+
+        parts = Path(file_name[:-UnpackerStorage.UNPACK_FILE_EXTENSION_LEN]).stem.split("_")
+        return (int(parts[-1]), parts[-2], parts[0])
--- a/dlt/common/storages/versioned_storage.py
+++ b/dlt/common/storages/versioned_storage.py
@@ -0,0 +1,54 @@
+import semver
+
+from dlt.common.file_storage import FileStorage
+from dlt.common.storages.exceptions import NoMigrationPathException, WrongStorageVersionException
+
+
+class VersionedStorage:
+
+    VERSION_FILE = ".version"
+
+    def __init__(self, version: semver.VersionInfo, is_owner: bool, storage: FileStorage) -> None:
+        self.storage = storage
+        # read current version
+        if self.storage.has_file(VersionedStorage.VERSION_FILE):
+            existing_version = self._load_version()
+            if existing_version != version:
+                if existing_version > version:
+                    # version cannot be downgraded
+                    raise NoMigrationPathException(storage.storage_path, existing_version, existing_version, version)
+                if is_owner:
+                    # only owner can migrate storage
+                    self.migrate_storage(existing_version, version)
+                    # storage should be migrated to desired version
+                    migrated_version = self._load_version()
+                    if version != migrated_version:
+                        raise NoMigrationPathException(storage.storage_path, existing_version, migrated_version, version)
+                else:
+                    # we cannot use storage and we must wait for owner to upgrade it
+                    raise WrongStorageVersionException(storage.storage_path, existing_version, version)
+        else:
+            if is_owner:
+                self._save_version(version)
+            else:
+               raise WrongStorageVersionException(storage.storage_path, semver.VersionInfo.parse("0.0.0"), version)
+
+    def migrate_storage(self, from_version: semver.VersionInfo, to_version: semver.VersionInfo) -> None:
+        # migration example:
+        # # semver lib supports comparing both to string and other semvers
+        # if from_version == "1.0.0" and from_version < to_version:
+        #     # do migration
+        #     # save migrated version
+        #     from_version = semver.VersionInfo.parse("1.1.0")
+        #     self._save_version(from_version)
+        pass
+
+    @property
+    def version(self) -> semver.VersionInfo:
+        return self._load_version()
+
+    def _load_version(self) -> semver.VersionInfo:
+        return self.storage.load(VersionedStorage.VERSION_FILE)
+
+    def _save_version(self, version: semver.VersionInfo) -> None:
+        self.storage.save(VersionedStorage.VERSION_FILE, str(version))
--- a/dlt/common/telemetry.py
+++ b/dlt/common/telemetry.py
@@ -0,0 +1,65 @@
+from typing import Iterable, Sequence, TypedDict, NamedTuple
+from prometheus_client import Gauge
+from prometheus_client.metrics import MetricWrapperBase
+
+from dlt.common.typing import DictStrAny, StrAny
+
+
+class TRunHealth(TypedDict):
+    # count runs
+    runs_count: int
+    # count not idle runs
+    runs_not_idle_count: int
+    # succesfull runs
+    runs_healthy_count: int
+    # count consecutive successful runs
+    runs_cs_healthy_gauge: int
+    # count failed runs
+    runs_failed_count: int
+    # count consecutive failed runs
+    runs_cs_failed_gauge: int
+    # number of items pending at the end of the run
+    runs_pending_items_gauge: int
+
+
+class TRunMetrics(NamedTuple):
+    was_idle: bool
+    has_failed: bool
+    pending_items: int
+
+
+def get_metrics_from_prometheus(gauges: Iterable[MetricWrapperBase]) -> StrAny:
+    metrics: DictStrAny = {}
+    for g in gauges:
+        name = g._name
+        if g._is_parent():
+            # for gauges containing many label values, enumerate all
+            metrics.update(get_metrics_from_prometheus([g.labels(*l) for l in g._metrics.keys()]))
+            continue
+        # for gauges with labels: add the label to the name and enumerate samples
+        if g._labelvalues:
+            name += "_" + "_".join(g._labelvalues)
+        for m in g._child_samples():
+            k = name
+            if m[0] == "_created":
+                continue
+            if m[0] != "_total":
+                k += m[0]
+            if g._type == "info":
+                # actual descriptive value is held in [1], [2] is a placeholder in info
+                metrics[k] = m[1]
+            else:
+                metrics[k] = m[2]
+    return metrics
+
+
+def set_gauge_all_labels(gauge: Gauge, value: float) -> None:
+    if gauge._is_parent():
+        for l in gauge._metrics.keys():
+            set_gauge_all_labels(gauge.labels(*l), value)
+    else:
+        gauge.set(value)
+
+
+def get_logging_extras(gauges: Iterable[MetricWrapperBase]) -> StrAny:
+    return {"metrics": get_metrics_from_prometheus(gauges)}
--- a/dlt/common/time.py
+++ b/dlt/common/time.py
@@ -0,0 +1,30 @@
+from typing import Optional  # noqa
+
+from dlt.common import signals
+
+PAST_TIMESTAMP: float = 0.0
+FUTURE_TIMESTAMP: float = 9999999999.0
+DAY_DURATION_SEC: float = 24 * 60 * 60.0
+
+
+def timestamp_within(timestamp: float, min_exclusive: Optional[float], max_inclusive: Optional[float]) -> bool:
+    """
+    check if timestamp within range uniformly treating none and range inclusiveness
+    """
+    return timestamp > (min_exclusive or PAST_TIMESTAMP) and timestamp <= (max_inclusive or FUTURE_TIMESTAMP)
+
+
+def timestamp_before(timestamp: float, max_inclusive: Optional[float]) -> bool:
+    """
+    check if timestamp is before max timestamp, inclusive
+    """
+    return timestamp <= (max_inclusive or FUTURE_TIMESTAMP)
+
+
+def sleep(sleep_seconds: float) -> None:
+    # do not allow sleeping if signal was received
+    signals.raise_if_signalled()
+    # sleep or wait for signal
+    signals.exit_event.wait(sleep_seconds)
+    # if signal then raise
+    signals.raise_if_signalled()
--- a/dlt/common/typing.py
+++ b/dlt/common/typing.py
@@ -0,0 +1,32 @@
+from typing import Dict, Any, List, Literal, Mapping, Sequence, TypedDict, Optional, Union
+
+DictStrAny = Dict[str, Any]
+DictStrStr = Dict[str, str]
+StrAny = Mapping[str, Any]  # immutable, covariant entity
+StrStr = Mapping[str, str]  # immutable, covariant entity
+StrStrStr = Mapping[str, Mapping[str, str]]  # immutable, covariant entity
+
+class TEventRow(TypedDict, total=False):
+    _timestamp: float  # used for partitioning
+    _dist_key: str  # distribution key used for clustering
+    _record_hash: str  # unique id of current row
+    _root_hash: str  # unique id of top level parent
+
+class TEventRowRoot(TEventRow, total=False):
+    _load_id: str  # load id to identify records loaded together that ie. need to be processed
+    _event_json: str  # dump of the original event
+    _event_type: str  # sets event type which will be translated to table
+
+
+class TEventRowChild(TEventRow, total=False):
+    _parent_hash: str  # unique id of parent row
+    _pos: int  # position in the list of rows
+    value: Any  # for lists of simple types
+
+
+class TEvent(TypedDict, total=False):
+    pass
+
+
+class TTimestampEvent(TEvent, total=False):
+    timestamp: float  # timestamp of event
--- a/dlt/common/utils.py
+++ b/dlt/common/utils.py
@@ -0,0 +1,117 @@
+import hashlib
+from os import environ
+from uuid import uuid4
+from typing import Any, Iterator, Sequence, TypeVar, Mapping, List, Union
+
+from dlt.common.typing import StrAny, DictStrAny, StrStr
+
+T = TypeVar("T")
+
+
+def chunks(list: Sequence[T], n: int) -> Iterator[Sequence[T]]:
+    for i in range(0, len(list), n):
+        yield list[i:i + n]
+
+
+def uniq_id() -> str:
+    return uuid4().hex
+
+
+def digest128(v: str) -> str:
+    return hashlib.shake_128(v.encode("utf-8")).hexdigest(16)
+
+
+def str2bool(v: str) -> bool:
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise ValueError('Boolean value expected.')
+
+
+def flatten_list_of_dicts(dicts: Sequence[StrAny]) -> StrAny:
+    """
+    Transforms a list of objects [{K: {...}}, {L: {....}}, ...] -> {K: {...}, L: {...}...}
+    """
+    o: DictStrAny = {}
+    for d in dicts:
+        for k,v in d.items():
+            if k in o:
+                raise KeyError(f"Cannot flatten with duplicate key {k}")
+            o[k] = v
+    return o
+
+
+def flatten_list_of_str_or_dicts(seq: Sequence[Union[StrAny, str]]) -> StrAny:
+    """
+    Transforms a list of objects or strings [{K: {...}}, L, ...] -> {K: {...}, L: None, ...}
+    """
+    o: DictStrAny = {}
+    for e in seq:
+        if type(e) is dict:
+            for k,v in e.items():
+                if k in o:
+                    raise KeyError(f"Cannot flatten with duplicate key {k}")
+                o[k] = v
+        else:
+            key = str(e)
+            if key in o:
+                raise KeyError(f"Cannot flatten with duplicate key {k}")
+            o[key] = None
+    return o
+
+
+def flatten_dicts_of_dicts(dicts: Mapping[str, Any]) -> Sequence[Any]:
+    """
+    Transform and object {K: {...}, L: {...}...} -> [{key:K, ....}, {key: L, ...}, ...]
+    """
+    o: List[Any] = []
+    for k, v in dicts.items():
+        if type(v) is list:
+            # if v is a list then add "key" to each list element
+            for lv in v:
+                lv["key"] = k
+        else:
+            # add as "key" to dict
+            v["key"] = k
+
+        o.append(v)
+    return o
+
+
+def tuplify_list_of_dicts(dicts: Sequence[DictStrAny]) -> Sequence[DictStrAny]:
+    """
+    Transform dicts with single key into {"key": orig_key, "value": orig_value}
+    """
+    for d in dicts:
+        if len(d) > 1:
+            raise ValueError(f"Tuplify requires one key dicts {d}")
+        if len(d) == 1:
+            key = next(iter(d))
+            # delete key first to avoid name clashes
+            value = d[key]
+            del d[key]
+            d["key"] = key
+            d["value"] = value
+
+    return dicts
+
+
+def filter_env_vars(vars: List[str]) -> StrStr:
+    return {k.lower(): environ[k] for k in vars if k in environ}
+
+
+def update_dict_with_prune(dest: DictStrAny, update: StrAny) -> None:
+    for k, v in update.items():
+        if v is not None:
+            dest[k] = v
+        elif k in dest:
+            del dest[k]
+
+
+def is_interactive() -> bool:
+    import __main__ as main
+    return not hasattr(main, '__file__')
--- a/dlt/dbt_runner/README.md
+++ b/dlt/dbt_runner/README.md
@@ -0,0 +1,17 @@
+https://github.com/davidgasquez/kubedbt
+https://discourse.getdbt.com/t/running-dbt-in-kubernetes/92
+https://github.com/godatadriven/pytest-dbt-core
+https://github.com/great-expectations/great_expectations
+
+https://github.com/fal-ai/fal (attach python scripts to models)
+
+https://blog.getdbt.com/how-great-data-teams-test-their-data-models/
+
+PG_DATABASE_NAME=chat_analytics_rasa PG_PASSWORD=8P5gyDPNo9zo582rQG6a PG_USER=loader PG_HOST=3.66.204.141 PG_PORT=5439 dbt list --profiles-dir . --vars '{source_schema_prefix: "unk"}' --resource-type test -s source:*
+
+https://docs.getdbt.com/reference/node-selection/test-selection-examples
+
+
+# list tests with selectors
+
+PG_DATABASE_NAME=chat_analytics_rasa PG_PASSWORD=8P5gyDPNo9zo582rQG6a PG_USER=loader PG_HOST=3.66.204.141 PG_PORT=5439 dbt list --profiles-dir . --vars '{source_schema_prefix: "unk"}' --resource-type test -s views
--- a/dlt/dbt_runner/init.py
+++ b/dlt/dbt_runner/init.py
@@ -0,0 +1 @@
+from ._version import __version__
--- a/dlt/dbt_runner/_version.py
+++ b/dlt/dbt_runner/_version.py
@@ -0,0 +1 @@
+__version__ = "1.0.0"
--- a/dlt/dbt_runner/configuration.py
+++ b/dlt/dbt_runner/configuration.py
@@ -0,0 +1,69 @@
+from typing import List, Optional, Type
+
+from dlt.common.typing import StrAny
+from dlt.common.configuration.utils import TConfigSecret, make_configuration, _get_key_value
+from dlt.common.configuration import PoolRunnerConfiguration, TPoolType, PostgresConfiguration, PostgresProductionConfiguration, GcpClientConfiguration, GcpClientProductionConfiguration
+
+from . import __version__
+
+
+class DBTRunnerConfiguration(PoolRunnerConfiguration):
+    POOL_TYPE: TPoolType = "none"
+    STOP_AFTER_RUNS: int = 1
+    PACKAGE_VOLUME_PATH: str = "_storage/dbt_runner"
+    PACKAGE_REPOSITORY_URL: str = "https://github.com/scale-vector/rasa_semantic_schema_customization.git"
+    PACKAGE_REPOSITORY_BRANCH: Optional[str] = None
+    PACKAGE_REPOSITORY_SSH_KEY: TConfigSecret = TConfigSecret("")  # the default is empty value which will disable custom SSH KEY
+    PACKAGE_PROFILES_DIR: str = "."
+    PACKAGE_PROFILE_PREFIX: str = "rasa_semantic_schema"
+    PACKAGE_SOURCE_TESTS_SELECTOR: str = "tag:prerequisites"
+    PACKAGE_ADDITIONAL_VARS: Optional[StrAny] = None
+    PACKAGE_RUN_PARAMS: List[str] = ["--fail-fast"]
+    AUTO_FULL_REFRESH_WHEN_OUT_OF_SYNC: bool = True
+
+    SOURCE_SCHEMA_PREFIX: str = None
+    DEST_SCHEMA_PREFIX: Optional[str] = None
+
+    @classmethod
+    def check_integrity(cls) -> None:
+        if cls.PACKAGE_REPOSITORY_SSH_KEY and cls.PACKAGE_REPOSITORY_SSH_KEY[-1] != "\n":
+            # must end with new line, otherwise won't be parsed by Crypto
+            cls.PACKAGE_REPOSITORY_SSH_KEY = TConfigSecret(cls.PACKAGE_REPOSITORY_SSH_KEY + "\n")
+        if cls.STOP_AFTER_RUNS != 1:
+            # always stop after one run
+            cls.STOP_AFTER_RUNS = 1
+
+
+class DBTRunnerProductionConfiguration(DBTRunnerConfiguration):
+    PACKAGE_VOLUME_PATH: str = "/var/local/app"  # this is actually not exposed as volume
+    PACKAGE_REPOSITORY_URL: str = None
+
+
+def gen_configuration_variant(initial_values: StrAny = None) -> Type[DBTRunnerConfiguration]:
+    # derive concrete config depending on env vars present
+    DBTRunnerConfigurationImpl: Type[DBTRunnerConfiguration]
+    DBTRunnerProductionConfigurationImpl: Type[DBTRunnerProductionConfiguration]
+
+    if _get_key_value("PG_SCHEMA_PREFIX", type(str)):
+        source_schema_prefix = _get_key_value("PG_SCHEMA_PREFIX", type(str))
+        class DBTRunnerConfigurationPostgress(PostgresConfiguration, DBTRunnerConfiguration):
+            SOURCE_SCHEMA_PREFIX: str = source_schema_prefix
+        DBTRunnerConfigurationImpl = DBTRunnerConfigurationPostgress
+
+        class DBTRunnerProductionConfigurationPostgress(DBTRunnerProductionConfiguration, PostgresProductionConfiguration, DBTRunnerConfigurationPostgress):
+            pass
+            # SOURCE_SCHEMA_PREFIX: str = source_schema_prefix
+        DBTRunnerProductionConfigurationImpl = DBTRunnerProductionConfigurationPostgress
+
+    else:
+        source_schema_prefix = _get_key_value("DATASET", type(str))
+        class DBTRunnerConfigurationGcp(GcpClientConfiguration, DBTRunnerConfiguration):
+            SOURCE_SCHEMA_PREFIX: str = source_schema_prefix
+        DBTRunnerConfigurationImpl = DBTRunnerConfigurationGcp
+
+        class DBTRunnerProductionConfigurationGcp(DBTRunnerProductionConfiguration, GcpClientProductionConfiguration, DBTRunnerConfigurationGcp):
+            pass
+            # SOURCE_SCHEMA_PREFIX: str = source_schema_prefix
+        DBTRunnerProductionConfigurationImpl = DBTRunnerProductionConfigurationGcp
+
+    return make_configuration(DBTRunnerConfigurationImpl, DBTRunnerProductionConfigurationImpl, initial_values=initial_values)
--- a/dlt/dbt_runner/exceptions.py
+++ b/dlt/dbt_runner/exceptions.py
@@ -0,0 +1,9 @@
+from dlt.common.exceptions import DltException
+
+
+class DBTRunnerException(DltException):
+    pass
+
+
+class PrerequisitesException(DBTRunnerException):
+    pass
--- a/dlt/dbt_runner/runner.py
+++ b/dlt/dbt_runner/runner.py
@@ -0,0 +1,187 @@
+from typing import Optional, Sequence, Tuple, Type
+from git import GitError
+from prometheus_client import REGISTRY, Gauge, CollectorRegistry, Info
+from prometheus_client.metrics import MetricWrapperBase
+from dlt.common.configuration import GcpClientConfiguration
+
+from dlt.common import logger
+from dlt.common.typing import DictStrAny, DictStrStr, StrAny
+from dlt.common.logger import process_internal_exception, is_json_logging
+from dlt.common.telemetry import get_logging_extras
+from dlt.common.file_storage import FileStorage
+from dlt.common.runners import TRunArgs, create_default_args, initialize_runner, pool_runner
+from dlt.common.telemetry import TRunMetrics
+
+from dlt.dbt_runner.configuration import DBTRunnerConfiguration, gen_configuration_variant
+from dlt.dbt_runner.utils import DBTProcessingError, clone_repo, dbt_results, ensure_remote_head, git_custom_key_command, initialize_dbt_logging, is_incremental_schema_out_of_sync_error, run_dbt_command
+from dlt.dbt_runner.exceptions import PrerequisitesException
+
+
+CLONED_PACKAGE_NAME = "dbt_package"
+
+CONFIG: Type[DBTRunnerConfiguration] = None
+storage: FileStorage = None
+dbt_package_vars: StrAny = None
+global_args: Sequence[str] = None
+repo_path: str = None
+profile_name: str = None
+
+model_elapsed_gauge: Gauge = None
+model_exec_info: Info = None
+
+
+def create_folders() -> Tuple[FileStorage, StrAny, Sequence[str], str, str]:
+    storage = FileStorage(CONFIG.PACKAGE_VOLUME_PATH, makedirs=True)
+    dbt_package_vars: DictStrAny = {
+        "source_schema_prefix": CONFIG.SOURCE_SCHEMA_PREFIX
+    }
+    if CONFIG.DEST_SCHEMA_PREFIX:
+        dbt_package_vars["dest_schema_prefix"] = CONFIG.DEST_SCHEMA_PREFIX
+    if CONFIG.PACKAGE_ADDITIONAL_VARS:
+        dbt_package_vars.update(CONFIG.PACKAGE_ADDITIONAL_VARS)
+
+    # initialize dbt logging, returns global parameters to dbt command
+    global_args = initialize_dbt_logging(CONFIG.LOG_LEVEL, is_json_logging(CONFIG.LOG_FORMAT))
+
+    # generate path for the dbt package repo
+    repo_path = storage._make_path(CLONED_PACKAGE_NAME)
+
+    # generate profile name
+    profile_name: str = None
+    if CONFIG.PACKAGE_PROFILE_PREFIX:
+        if issubclass(CONFIG, GcpClientConfiguration):
+            profile_name = "%s_bigquery" % (CONFIG.PACKAGE_PROFILE_PREFIX)
+        else:
+            profile_name = "%s_redshift" % (CONFIG.PACKAGE_PROFILE_PREFIX)
+
+    return storage, dbt_package_vars, global_args, repo_path, profile_name
+
+
+def create_gauges(registry: CollectorRegistry) -> Tuple[MetricWrapperBase, MetricWrapperBase]:
+    return (
+        Gauge("dbtrunner_model_elapsed_seconds", "Last model processing time", ["model"], registry=registry),
+        Info("dbtrunner_model_status", "Last execution status of the model", registry=registry)
+    )
+
+
+def run_dbt(command: str, command_args: Sequence[str] = None) -> Sequence[dbt_results. BaseResult]:
+    logger.info(f"Exec dbt command: {global_args} {command} {command_args} {dbt_package_vars} on profile {profile_name or '<project_default>'}")
+    return run_dbt_command(
+        repo_path, command,
+        CONFIG.PACKAGE_PROFILES_DIR,
+        profile_name=profile_name,
+        command_args=command_args,
+        global_args=global_args,
+        vars=dbt_package_vars
+    )
+
+
+def log_dbt_run_results(results: dbt_results.RunExecutionResult) -> None:
+    # run may return RunResult of something different depending on error
+    if issubclass(type(results), dbt_results.BaseResult):
+        results = [results]  # make it iterable
+    elif issubclass(type(results), dbt_results.ExecutionResult):
+        pass
+    else:
+        logger.warning(f"{type(results)} is unknown and cannot be logged")
+        return
+
+    info: DictStrStr = {}
+    for res in results:
+        name = res.node.name
+        message = res.message
+        time = res.execution_time
+        if res.status == dbt_results.RunStatus.Error:
+            logger.error(f"Model {name} errored! Error: {message}")
+        else:
+            logger.info(f"Model {name} {res.status} in {time} seconds with {message}")
+        model_elapsed_gauge.labels(name).set(time)
+        info[name] = message
+
+    # log execution
+    model_exec_info.info(info)
+    logger.metrics("Executed models", extra=get_logging_extras([model_elapsed_gauge, model_exec_info]))
+
+
+def initialize_package(with_git_command: Optional[str]) -> None:
+    try:
+        # cleanup package folder
+        if storage.has_folder(CLONED_PACKAGE_NAME):
+            storage.delete_folder(CLONED_PACKAGE_NAME, recursively=True)
+        logger.info(f"Will clone {CONFIG.PACKAGE_REPOSITORY_URL} head {CONFIG.PACKAGE_REPOSITORY_BRANCH} into {repo_path}")
+        clone_repo(CONFIG.PACKAGE_REPOSITORY_URL, repo_path, branch=CONFIG.PACKAGE_REPOSITORY_BRANCH, with_git_command=with_git_command)
+        run_dbt("deps")
+    except Exception as e:
+        # delete folder so we start clean next time
+        if storage.has_folder(CLONED_PACKAGE_NAME):
+            storage.delete_folder(CLONED_PACKAGE_NAME, recursively=True)
+        raise
+
+
+def ensure_newest_package() -> None:
+    with git_custom_key_command(CONFIG.PACKAGE_REPOSITORY_SSH_KEY) as ssh_command:
+        try:
+            ensure_remote_head(repo_path, with_git_command=ssh_command)
+        except GitError as err:
+            # cleanup package folder
+            logger.info(f"Package will be cloned due to {type(err).__name__}:{str(err)}")
+            initialize_package(with_git_command=ssh_command)
+
+
+def run_db_steps() -> Sequence[dbt_results.BaseResult]:
+    # make sure we use package from the remote head
+    ensure_newest_package()
+    # check if raw schema exists
+    try:
+        if CONFIG.PACKAGE_SOURCE_TESTS_SELECTOR:
+            run_dbt("test", ["-s", CONFIG.PACKAGE_SOURCE_TESTS_SELECTOR])
+    except DBTProcessingError as err:
+        raise PrerequisitesException() from err
+
+    # always run seeds
+    run_dbt("seed")
+    # throws DBTProcessingError
+    try:
+        return run_dbt("run", CONFIG.PACKAGE_RUN_PARAMS)
+    except DBTProcessingError as e:
+        # detect incremental model out of sync
+        if is_incremental_schema_out_of_sync_error(e.results) and CONFIG.AUTO_FULL_REFRESH_WHEN_OUT_OF_SYNC:
+            logger.warning(f"Attempting full refresh due to incremental model out of sync on {e.results.message}")
+            return run_dbt("run", CONFIG.PACKAGE_RUN_PARAMS + ["--full-refresh"])
+        else:
+            raise
+
+
+def run(_: None) -> TRunMetrics:
+    try:
+        # there were many issues with running the method below with pool.apply
+        # 1 - some exceptions are not serialized well on process boundary and queue hangs
+        # 2 - random hangs event if there's no exception, probably issues with DBT spawning its own workers
+        # instead the runner host was configured to recycle each run
+        results = run_db_steps()
+        log_dbt_run_results(results)
+        return TRunMetrics(False, False, 0)
+    except PrerequisitesException:
+        logger.warning(f"Raw schema test failed, it may yet not be created")
+        # run failed and loads possibly still pending
+        return TRunMetrics(False, True, 1)
+    except DBTProcessingError as runerr:
+        log_dbt_run_results(runerr.results)
+        # pass exception to the runner
+        raise
+
+
+if __name__ == '__main__':
+    CONFIG = gen_configuration_variant()
+    parser = create_default_args(CONFIG)
+    args = parser.parse_args()
+    # we should force single run
+    initialize_runner(CONFIG, TRunArgs(args.single_run, args.wait_runs))
+    try:
+        storage, dbt_package_vars, global_args, repo_path, profile_name = create_folders()
+        model_elapsed_gauge, model_exec_info = create_gauges(REGISTRY)
+    except Exception:
+        process_internal_exception("init module")
+        exit(-1)
+
+    exit(pool_runner(CONFIG, run))
--- a/dlt/dbt_runner/utils.py
+++ b/dlt/dbt_runner/utils.py
@@ -0,0 +1,130 @@
+import os
+import logging
+import tempfile
+from typing import Any, Iterator, List, Sequence
+from git import Repo, Git, RepositoryDirtyError
+from contextlib import contextmanager
+
+from dlt.common import json
+from dlt.common.utils import uniq_id
+from dlt.common.typing import StrAny, Optional
+from dlt.dbt_runner.exceptions import DBTRunnerException
+
+# block disabling root logger
+import logbook.compat
+logbook.compat.redirect_logging = lambda : None
+
+# can only import DBT after redirect is disabled
+import dbt.main
+import dbt.logger
+from dbt.events import functions
+from dbt.contracts import results as dbt_results
+from dbt.exceptions import FailFastException
+
+
+# keep this exception definition here due to mock of logbook
+class DBTProcessingError(DBTRunnerException):
+    def __init__(self, command: str, results: Any) -> None:
+        self.command = command
+        # the results from DBT may be anything
+        self.results = results
+        super().__init__(f"DBT command {command} could not be executed")
+
+
+@contextmanager
+def git_custom_key_command(private_key: Optional[str]) -> Iterator[str]:
+    if private_key:
+        key_file = tempfile.mktemp(prefix=uniq_id())
+        with open(key_file, "w") as f:
+            f.write(private_key)
+        try:
+            # permissions so SSH does not complain
+            os.chmod(key_file, 0o600)
+            yield 'ssh -o "StrictHostKeyChecking accept-new" -i %s' % key_file
+        finally:
+            os.remove(key_file)
+    else:
+        yield 'ssh -o "StrictHostKeyChecking accept-new"'
+
+
+def ensure_remote_head(repo_path: str, with_git_command: Optional[str] = None) -> None:
+    # update remotes and check if heads are same. ignores locally modified files
+    repo = Repo(repo_path)
+    # use custom environemnt if specified
+    with repo.git.custom_environment(GIT_SSH_COMMAND=with_git_command):
+        # update origin
+        repo.remote().update()
+        # get branch status
+        status: str = repo.git.status("--short", "--branch", "-uno")
+        # we expect first status line ## main...origin/main
+        status_line = status.split("/n")[0]
+        if not (status_line.startswith("##") and not status_line.endswith("]")):
+            raise RepositoryDirtyError(repo, status)
+
+
+def clone_repo(repository_url: str, clone_path: str, branch: Optional[str] = None, with_git_command: Optional[str] = None) -> None:
+    repo = Repo.clone_from(repository_url, clone_path, env=dict(GIT_SSH_COMMAND=with_git_command))
+    if branch:
+        repo.git.checkout(branch)
+
+
+def initialize_dbt_logging(level: str, is_json_logging: bool) -> Sequence[str]:
+    int_level = logging._nameToLevel[level]
+
+    # wrap log setup to force out log level
+
+    def setup_event_logger_wrapper(log_path: str, level_override:str = None) -> None:
+        functions.setup_event_logger(log_path, level)
+        # force log level as file is debug only
+        functions.this.FILE_LOG.setLevel(level)
+        functions.this.FILE_LOG.handlers[0].setLevel(level)
+
+    dbt.main.setup_event_logger = setup_event_logger_wrapper
+
+    globs = []
+    if int_level <= logging.DEBUG:
+        globs = ["--debug"]
+
+    # return global parameters to be passed to setup logging
+
+    if is_json_logging:
+        return ["--log-format", "json"] + globs
+    else:
+        return globs
+
+
+def is_incremental_schema_out_of_sync_error(error: dbt_results.RunResult) -> bool:
+    return issubclass(type(error), dbt_results.RunResult) and error.status == dbt_results.RunStatus.Error and\
+        "The source and target schemas on this incremental model are out of sync" in error.message
+
+
+def run_dbt_command(package_path: str, command: str, profiles_dir: str, profile_name: Optional[str] = None,
+                    global_args: Sequence[str] = None, command_args: Sequence[str] = None, vars: StrAny = None) -> Sequence[dbt_results.BaseResult]:
+    args = ["--profiles-dir", profiles_dir]
+    # add profile name if provided
+    if profile_name:
+        args += ["--profile", profile_name]
+    # serialize dbt variables to pass to package
+    if vars:
+        args += ["--vars", json.dumps(vars)]
+    if command_args:
+        args += command_args
+
+    # cwd to package dir
+    working_dir = os.getcwd()
+    os.chdir(package_path)
+    try:
+        results: List[dbt_results.BaseResult] = None
+        success: bool = None
+        results, success = dbt.main.handle_and_check((global_args or []) + [command] + args)  # type: ignore
+        assert type(success) is bool
+        if not success:
+            raise DBTProcessingError(command ,results)
+        return results
+    except FailFastException as ff:
+        raise DBTProcessingError(command, ff.result) from ff
+    finally:
+        # unblock logger manager to run next command
+        dbt.logger.log_manager.reset_handlers()
+        # go back to working dir
+        os.chdir(working_dir)
--- a/dlt/extractors/init.py
+++ b/dlt/extractors/init.py
--- a/dlt/extractors/exceptions.py
+++ b/dlt/extractors/exceptions.py
@@ -0,0 +1,5 @@
+from dlt.common.exceptions import DltException
+
+
+class ExtractorException(DltException):
+    pass
--- a/dlt/extractors/extractor_storage.py
+++ b/dlt/extractors/extractor_storage.py
@@ -0,0 +1,34 @@
+import semver
+
+from dlt.common.utils import uniq_id
+from dlt.common.file_storage import FileStorage
+from dlt.common.storages.versioned_storage import VersionedStorage
+from dlt.common.storages.unpacker_storage import UnpackerStorage
+
+
+class ExtractorStorageBase(VersionedStorage):
+    def __init__(self, version: semver.VersionInfo, is_owner: bool, storage: FileStorage, unpacker_storage: UnpackerStorage) -> None:
+        self.unpacker_storage = unpacker_storage
+        super().__init__(version, is_owner, storage)
+
+    def create_temp_folder(self) -> str:
+        tf_name = uniq_id()
+        self.storage.create_folder(tf_name)
+        return tf_name
+
+    def commit_events(self, schema_name: str, processed_file_path: str, dest_file_stem: str, no_processed_events: int, load_id: str, with_delete: bool = True) -> str:
+        # schema name cannot contain underscores
+        if "_" in schema_name:
+            raise ValueError(schema_name)
+
+        dest_name = UnpackerStorage.build_unpack_file_name(schema_name, dest_file_stem, no_processed_events, load_id)
+        # if no events extracted from tracker, file is not saved
+        if no_processed_events > 0:
+            # moves file to possibly external storage and place in the dest folder atomically
+            self.storage.copy_cross_storage_atomically(
+                self.unpacker_storage.storage.storage_path, UnpackerStorage.UNPACKING_FOLDER, processed_file_path, dest_name)
+
+        if with_delete:
+            self.storage.delete(processed_file_path)
+
+        return dest_name
--- a/dlt/extractors/generator/init.py
+++ b/dlt/extractors/generator/init.py
--- a/dlt/extractors/generator/extractor.py
+++ b/dlt/extractors/generator/extractor.py
--- a/dlt/loaders/init.py
+++ b/dlt/loaders/init.py
@@ -0,0 +1 @@
+from dlt._version import loader_version as __version__
--- a/dlt/loaders/client_base.py
+++ b/dlt/loaders/client_base.py
@@ -0,0 +1,150 @@
+from abc import ABC, abstractmethod
+from types import TracebackType
+from typing import Any, Literal, Sequence, Type, TypeVar, AnyStr
+from pathlib import Path
+
+from dlt.common import pendulum, logger
+from dlt.common.schema import Column, Schema, Table
+# from dlt.common.file_storage import FileStorage
+
+from dlt.loaders.local_types import LoadJobStatus
+from dlt.loaders.exceptions import LoadClientSchemaVersionCorrupted, LoadUnknownTableException
+
+# typing for context manager
+TClient = TypeVar("TClient", bound="ClientBase")
+
+
+class LoadJob:
+    def __init__(self, file_name: str) -> None:
+        """
+        File name is also a job id (or job id is deterministically derived) so it must be globally unique
+        """
+        self._file_name = file_name
+
+    @abstractmethod
+    def status(self) -> LoadJobStatus:
+        pass
+
+    @abstractmethod
+    def file_name(self) -> str:
+        pass
+
+    @abstractmethod
+    def exception(self) -> str:
+        pass
+
+
+class LoadEmptyJob(LoadJob):
+    def __init__(self, file_name: str, status: LoadJobStatus, exception: str = None) -> None:
+        self._status = status
+        self._exception = exception
+        super().__init__(file_name)
+
+    def status(self) -> LoadJobStatus:
+        return self._status
+
+    def file_name(self) -> str:
+        return self._file_name
+
+    def exception(self) -> str:
+        return self._exception
+
+
+class ClientBase(ABC):
+    def __init__(self, schema: Schema) -> None:
+        self.schema = schema
+
+    @abstractmethod
+    def initialize_storage(self) -> None:
+        pass
+
+    @abstractmethod
+    def update_storage_schema(self) -> None:
+        pass
+
+    @abstractmethod
+    def start_file_load(self, table_name: str, file_path: str) -> LoadJob:
+        pass
+
+    @abstractmethod
+    def get_file_load(self, file_path: str) -> LoadJob:
+        pass
+
+    @abstractmethod
+    def complete_load(self, load_id: str) -> None:
+        pass
+
+    @abstractmethod
+    def _open_connection(self) -> None:
+        pass
+
+    @abstractmethod
+    def _close_connection(self) -> None:
+        pass
+
+
+    def __enter__(self: TClient) -> TClient:
+        self._open_connection()
+        return self
+
+    def __exit__(self, exc_type: Type[BaseException], exc_val: BaseException, exc_tb: TracebackType) -> None:
+        self._close_connection()
+
+    def _get_table_by_name(self, table_name: str, file_name: str) -> Table:
+        try:
+            return self.schema.get_table(table_name)
+        except KeyError:
+            raise LoadUnknownTableException(table_name, file_name)
+
+    @staticmethod
+    def get_file_name_from_file_path(file_path: str) -> str:
+        return Path(file_path).name
+
+    @staticmethod
+    def make_job_with_status(file_path: str, status: LoadJobStatus, message: str = None) -> LoadJob:
+        return LoadEmptyJob(ClientBase.get_file_name_from_file_path(file_path), status, exception=message)
+
+    @staticmethod
+    def make_absolute_path(file_path: str) -> str:
+        return str(Path(file_path).absolute())
+
+
+class SqlClientBase(ClientBase):
+    def __init__(self, schema: Schema) -> None:
+        super().__init__(schema)
+
+    def complete_load(self, load_id: str) -> None:
+        name = self._to_canonical_table_name(Schema.LOADS_TABLE_NAME)
+        now_ts = str(pendulum.now())
+        self._execute_sql(f"INSERT INTO {name}(load_id, status, inserted_at) VALUES('{load_id}', 0, '{now_ts}');")
+
+    @abstractmethod
+    def _execute_sql(self, query: AnyStr) -> Any:
+        pass
+
+    @abstractmethod
+    def _to_canonical_schema_name(self) -> str:
+        pass
+
+    def _create_table_update(self, table_name: str, storage_table: Table) -> Sequence[Column]:
+        # compare table with stored schema and produce delta
+        l = self.schema.get_schema_update_for(table_name, storage_table)
+        logger.info(f"Found {len(l)} updates for {table_name} in {self.schema.schema_name}")
+        return l
+
+    def _to_canonical_table_name(self, table_name: str) -> str:
+        return f"{self._to_canonical_schema_name()}.{table_name}"
+
+    def _get_schema_version_from_storage(self) -> int:
+        name = self._to_canonical_table_name(Schema.VERSION_TABLE_NAME)
+        rows = list(self._execute_sql(f"SELECT {Schema.VERSION_COLUMN_NAME} FROM {name} ORDER BY inserted_at DESC LIMIT 1;"))
+        if len(rows) > 1:
+            raise LoadClientSchemaVersionCorrupted(self._to_canonical_schema_name())
+        if len(rows) == 0:
+            return 0
+        return int(rows[0][0])
+
+    def _update_schema_version(self, new_version: int) -> None:
+        now_ts = str(pendulum.now())
+        name = self._to_canonical_table_name(Schema.VERSION_TABLE_NAME)
+        self._execute_sql(f"INSERT INTO {name}({Schema.VERSION_COLUMN_NAME}, engine_version, inserted_at) VALUES ({new_version}, {Schema.ENGINE_VERSION}, '{now_ts}');")
--- a/dlt/loaders/configuration.py
+++ b/dlt/loaders/configuration.py
@@ -0,0 +1,51 @@
+from typing import Any, Type
+
+from dlt.common.utils import uniq_id
+from dlt.common.typing import StrAny
+from dlt.common.configuration import (PoolRunnerConfiguration,
+                                              LoadingVolumeConfiguration,
+                                              ProductionLoadingVolumeConfiguration,
+                                              PostgresConfiguration, PostgresProductionConfiguration,
+                                              GcpClientConfiguration, GcpClientProductionConfiguration,
+                                              TPoolType, make_configuration)
+
+from dlt.loaders.dummy.configuration import DummyClientConfiguration
+
+from . import __version__
+
+class LoaderConfiguration(PoolRunnerConfiguration, LoadingVolumeConfiguration):
+    CLIENT_TYPE: str = "dummy"  # which analytical storage to use
+    MAX_PARALLEL_LOADS: int = 20  # how many parallel loads can be executed
+    MAX_PARALLELISM: int = 20  # in 20 separate threads
+    POOL_TYPE: TPoolType = "thread"  # mostly i/o (upload) so may be thread pool
+
+
+class ProductionLoaderConfiguration(ProductionLoadingVolumeConfiguration, LoaderConfiguration):
+    pass
+
+
+def configuration(initial_values: StrAny = None) -> Type[LoaderConfiguration]:
+    # synthesize right configuration
+    C = make_configuration(LoaderConfiguration, ProductionLoaderConfiguration, initial_values=initial_values)
+    T: Type[Any] = None
+    T_P: Type[Any] = None
+    if C.CLIENT_TYPE == "dummy":
+        T = DummyClientConfiguration
+        T_P = DummyClientConfiguration
+    elif C.CLIENT_TYPE == "gcp":
+        T = GcpClientConfiguration
+        T_P = GcpClientProductionConfiguration
+    elif C.CLIENT_TYPE == "redshift":
+        T = PostgresConfiguration
+        T_P = PostgresProductionConfiguration
+    else:
+        raise ValueError(C.CLIENT_TYPE)
+
+    ST = type(LoaderConfiguration.__name__ + "_"  + T.__name__ + "_" + uniq_id(), (T, LoaderConfiguration), {})
+    ST_P = type(ProductionLoaderConfiguration.__name__ + "_" + T_P.__name__ + "_" + uniq_id(), (T_P, ProductionLoaderConfiguration), {})
+    return make_configuration(
+        ST,
+        ST_P,
+        initial_values=initial_values,
+        skip_subclass_check=True
+    )
--- a/dlt/loaders/dummy/init.py
+++ b/dlt/loaders/dummy/init.py
--- a/dlt/loaders/dummy/client.py
+++ b/dlt/loaders/dummy/client.py
@@ -0,0 +1,134 @@
+from datetime import time
+import random
+from typing import Dict, Literal, Type
+from dlt.common.dataset_writers import TWriterType
+
+from dlt.common import pendulum
+from dlt.common.schema import Schema
+from dlt.common.typing import StrAny
+
+from dlt.loaders.client_base import ClientBase, LoadJob
+from dlt.loaders.local_types import LoadJobStatus
+from dlt.loaders.exceptions import (LoadJobNotExistsException, LoadJobInvalidStateTransitionException,
+                                            LoadClientTerminalException, LoadClientTransientException)
+
+from dlt.loaders.dummy.configuration import DummyClientConfiguration
+
+
+class LoadDummyJob(LoadJob):
+    def __init__(self, file_name: str, fail_prob: float = 0.0, retry_prob: float = 0.0, completed_prob: float = 1.0, timeout: float = 10.0) -> None:
+        self.fail_prob = fail_prob
+        self.retry_prob = retry_prob
+        self.completed_prob = completed_prob
+        self.timeout = timeout
+        self._status: LoadJobStatus = "running"
+        self._exception: str = None
+        self.start_time: float = pendulum.now().timestamp()
+        super().__init__(file_name)
+        s = self.status()
+        if s == "failed":
+            raise LoadClientTerminalException(self._exception)
+        if s == "retry":
+            raise LoadClientTransientException(self._exception)
+
+
+    def status(self) -> LoadJobStatus:
+        # this should poll the server for a job status, here we simulate various outcomes
+        if self._status == "running":
+            n = pendulum.now().timestamp()
+            if n - self.start_time > self.timeout:
+                self._status = "failed"
+                self._exception = "failed due to timeout"
+            else:
+                c_r = random.random()
+                if self.completed_prob >= c_r:
+                    self._status = "completed"
+                else:
+                    c_r = random.random()
+                    if self.retry_prob >= c_r:
+                        self._status = "retry"
+                        self._exception = "a random retry occured"
+                    else:
+                        c_r = random.random()
+                        if self.fail_prob >= c_r:
+                            self._status = "failed"
+                            self._exception = "a random fail occured"
+
+        return self._status
+
+    def file_name(self) -> str:
+        return self._file_name
+
+    def exception(self) -> str:
+        # this will typically call server for error messages
+        return self._exception
+
+    def retry(self) -> None:
+        if self._status != "retry":
+            raise LoadJobInvalidStateTransitionException(self._status, "retry")
+        self._status = "retry"
+
+
+JOBS: Dict[str, LoadDummyJob] = {}
+
+
+class DummyClient(ClientBase):
+    """
+    dummy client storing jobs in memory
+    """
+    def __init__(self, schema: Schema, CONFIG: Type[DummyClientConfiguration]) -> None:
+        self.C = CONFIG
+        super().__init__(schema)
+
+    def initialize_storage(self) -> None:
+        pass
+
+    def update_storage_schema(self) -> None:
+        pass
+
+    def start_file_load(self, table_name: str, file_path: str) -> LoadJob:
+        self._get_table_by_name(table_name, file_path)
+        job_id = ClientBase.get_file_name_from_file_path(file_path)
+        file_name = ClientBase.get_file_name_from_file_path(file_path)
+        # return existing job if already there
+        if job_id not in JOBS:
+            JOBS[job_id] = self._create_job(file_name)
+        else:
+            job = JOBS[job_id]
+            if job.status == "retry":
+                job.retry()
+
+        return JOBS[job_id]
+
+    def get_file_load(self, file_path: str) -> LoadJob:
+        job_id = ClientBase.get_file_name_from_file_path(file_path)
+        if job_id not in JOBS:
+            raise LoadJobNotExistsException(job_id)
+        return JOBS[job_id]
+
+    def complete_load(self, load_id: str) -> None:
+        pass
+
+    def _open_connection(self) -> None:
+        pass
+
+    def _close_connection(self) -> None:
+        pass
+
+    def _create_job(self, job_id: str) -> LoadDummyJob:
+        return LoadDummyJob(
+            job_id,
+            fail_prob=self.C.FAIL_PROB,
+            retry_prob=self.C.RETRY_PROB,
+            completed_prob=self.C.COMPLETED_PROB,
+            timeout=self.C.TIMEOUT
+            )
+
+
+
+def make_client(schema: Schema, C: Type[DummyClientConfiguration]) -> ClientBase:
+    return DummyClient(schema, C)
+
+
+def supported_writer(C: Type[DummyClientConfiguration]) -> TWriterType:
+    return C.WRITER_TYPE
--- a/dlt/loaders/dummy/configuration.py
+++ b/dlt/loaders/dummy/configuration.py
@@ -0,0 +1,8 @@
+from dlt.common.dataset_writers import TWriterType
+
+class DummyClientConfiguration:
+    WRITER_TYPE: TWriterType = "jsonl"
+    FAIL_PROB: float = 0.0
+    RETRY_PROB: float = 0.0
+    COMPLETED_PROB: float = 0.0
+    TIMEOUT: float = 10.0
--- a/dlt/loaders/exceptions.py
+++ b/dlt/loaders/exceptions.py
@@ -0,0 +1,72 @@
+from typing import Sequence
+from dlt.common.exceptions import DltException, TerminalException, TransientException
+
+from dlt.loaders.local_types import LoadJobStatus
+
+
+class LoadException(DltException):
+    def __init__(self, msg: str) -> None:
+        super().__init__(msg)
+
+
+class LoadClientTerminalException(LoadException, TerminalException):
+    def __init__(self, msg: str) -> None:
+        super().__init__(msg)
+
+
+class LoadClientTransientException(LoadException, TransientException):
+    def __init__(self, msg: str) -> None:
+        super().__init__(msg)
+
+
+class LoadClientTerminalInnerException(LoadClientTerminalException):
+    def __init__(self, msg: str, inner_exc: Exception) -> None:
+        self.inner_exc = inner_exc
+        super().__init__(msg)
+
+
+class LoadClientTransientInnerException(LoadClientTransientException):
+    def __init__(self, msg: str, inner_exc: Exception) -> None:
+        self.inner_exc = inner_exc
+        super().__init__(msg)
+
+
+
+class LoadJobNotExistsException(LoadClientTerminalException):
+    def __init__(self, job_id: str) -> None:
+        super().__init__(f"Job with id/file name {job_id} not found")
+
+
+class LoadUnknownTableException(LoadClientTerminalException):
+    def __init__(self, table_name: str, file_name: str) -> None:
+        self.table_name = table_name
+        super().__init__(f"Client does not know table {table_name} for load file {file_name}")
+
+
+class LoadJobInvalidStateTransitionException(LoadClientTerminalException):
+    def __init__(self, from_state: LoadJobStatus, to_state: LoadJobStatus) -> None:
+        self.from_state = from_state
+        self.to_state = to_state
+        super().__init__(f"Load job cannot transition form {from_state} to {to_state}")
+
+class LoadJobServerTerminalException(LoadClientTerminalException):
+    def __init__(self, file_path: str) -> None:
+        super().__init__(f"Job with id/file name {file_path} encountered unrecoverable problem")
+
+
+class LoadClientSchemaVersionCorrupted(LoadClientTerminalException):
+    def __init__(self, dataset_name: str) -> None:
+        self.dataset_name = dataset_name
+        super().__init__(f"Schema _version table contains too many rows in {dataset_name}")
+
+
+class LoadClientSchemaWillNotUpdate(LoadClientTerminalException):
+    def __init__(self, table_name: str, columns: Sequence[str], msg: str) -> None:
+        self.table_name = table_name
+        self.columns = columns
+        super().__init__(f"Schema for table {table_name} column(s) {columns} will not update: {msg}")
+
+
+class LoadFileTooBig(LoadClientTerminalException):
+    def __init__(self, file_name: str, max_size: int) -> None:
+        super().__init__(f"File {file_name} exceedes {max_size} and cannot be loaded. Split the file and try again.")
--- a/dlt/loaders/gcp/README.md
+++ b/dlt/loaders/gcp/README.md
@@ -0,0 +1,6 @@
+# Loader account setup
+
+1. Create new services account, add private key to it and download the `services.json` file
+2. Make sure that this newly created account has access to BigQuery API
+3. You must add followig roles to the account above: `BigQuery Data Editor` and `BigQuey Job User`
+4. IAM to add roles is here https://console.cloud.google.com/iam-admin/iam?project=chat-analytics-rasa-ci
--- a/dlt/loaders/gcp/init.py
+++ b/dlt/loaders/gcp/init.py
--- a/dlt/loaders/gcp/client.py
+++ b/dlt/loaders/gcp/client.py
@@ -0,0 +1,324 @@
+
+from pathlib import Path
+from typing import Any, AnyStr, Dict, List, Literal, Optional, Tuple, Type
+import google.cloud.bigquery as bigquery
+from google.cloud import exceptions as gcp_exceptions
+from google.oauth2 import service_account
+from google.api_core import exceptions as api_core_exceptions
+
+
+from dlt.common import json, logger
+from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE
+from dlt.common.configuration import GcpClientConfiguration
+from dlt.common.dataset_writers import TWriterType, escape_bigquery_identifier
+from dlt.loaders.local_types import LoadJobStatus
+from dlt.common.schema import Column, DataType, Schema, Table
+
+from dlt.loaders.client_base import SqlClientBase, LoadJob
+from dlt.loaders.exceptions import LoadClientSchemaWillNotUpdate, LoadJobNotExistsException, LoadJobServerTerminalException, LoadUnknownTableException
+
+SCT_TO_BQT: Dict[DataType, str] = {
+    "text": "STRING",
+    "double": "FLOAT64",
+    "bool": "BOOLEAN",
+    "timestamp": "TIMESTAMP",
+    "bigint": "INTEGER",
+    "binary": "BYTES",
+    "decimal": f"NUMERIC({DEFAULT_NUMERIC_PRECISION},{DEFAULT_NUMERIC_SCALE})",
+    "wei": "BIGNUMERIC"  # non parametrized should hold wei values
+}
+
+BQT_TO_SCT: Dict[str, DataType] = {
+    "STRING": "text",
+    "FLOAT": "double",
+    "BOOLEAN": "bool",
+    "TIMESTAMP": "timestamp",
+    "INTEGER": "bigint",
+    "BYTES": "binary",
+    "NUMERIC": "decimal",
+    "BIGNUMERIC": "decimal"
+}
+
+class BigQueryLoadJob(LoadJob):
+    def __init__(self, file_name: str, bq_load_job: bigquery.LoadJob, CONFIG: Type[GcpClientConfiguration]) -> None:
+        self.bq_load_job = bq_load_job
+        self.C = CONFIG
+        self.default_retry = bigquery.DEFAULT_RETRY.with_deadline(CONFIG.TIMEOUT)
+        super().__init__(file_name)
+
+    def status(self) -> LoadJobStatus:
+        # check server if done
+        done = self.bq_load_job.done(retry=self.default_retry, timeout=self.C.TIMEOUT)
+        if done:
+            # rows processed
+            if self.bq_load_job.output_rows is not None and self.bq_load_job.error_result is None:
+                return "completed"
+            else:
+                return "failed"
+        else:
+            return "running"
+
+    def file_name(self) -> str:
+        return self._file_name
+
+    def exception(self) -> str:
+        exception: str = json.dumps({
+            "error_result": self.bq_load_job.error_result,
+            "errors": self.bq_load_job.errors,
+            "job_start": self.bq_load_job.started,
+            "job_end": self.bq_load_job.ended,
+            "job_id": self.bq_load_job.job_id
+        })
+        return exception
+
+
+class BigQueryClient(SqlClientBase):
+    def __init__(self, schema: Schema, CONFIG: Type[GcpClientConfiguration]) -> None:
+        self._client: bigquery.Client = None
+        self.C = CONFIG
+        self.default_retry = bigquery.DEFAULT_RETRY.with_deadline(CONFIG.TIMEOUT)
+        super().__init__(schema)
+
+
+    def initialize_storage(self) -> None:
+        dataset_name = self._to_canonical_schema_name()
+        try:
+            self._client.get_dataset(dataset_name, retry=self.default_retry, timeout=self.C.TIMEOUT)
+        except gcp_exceptions.NotFound:
+            self._client.create_dataset(dataset_name, exists_ok=False, retry=self.default_retry, timeout=self.C.TIMEOUT)
+
+    def get_file_load(self, file_path: str) -> LoadJob:
+        try:
+            return BigQueryLoadJob(
+                SqlClientBase.get_file_name_from_file_path(file_path),
+                self._retrieve_load_job(file_path),
+                self.C
+            )
+        except api_core_exceptions.NotFound:
+            raise LoadJobNotExistsException(file_path)
+        except (api_core_exceptions.BadRequest, api_core_exceptions.NotFound):
+            raise LoadJobServerTerminalException(file_path)
+
+    def start_file_load(self, table_name: str, file_path: str) -> LoadJob:
+        # verify that table exists in the schema
+        self._get_table_by_name(table_name, file_path)
+        try:
+            return BigQueryLoadJob(
+                SqlClientBase.get_file_name_from_file_path(file_path),
+                self._create_load_job(table_name, file_path),
+                self.C
+            )
+        except api_core_exceptions.NotFound:
+            # google.api_core.exceptions.BadRequest - will not be processed ie bad job name
+            raise LoadUnknownTableException(table_name, file_path)
+        except (api_core_exceptions.BadRequest, api_core_exceptions.NotFound):
+            # google.api_core.exceptions.NotFound: 404 - table not found
+            raise LoadJobServerTerminalException(file_path)
+        except api_core_exceptions.Conflict:
+            # google.api_core.exceptions.Conflict: 409 PUT - already exists
+            return self.get_file_load(file_path)
+
+    def update_storage_schema(self) -> None:
+        storage_version = self._get_schema_version_from_storage()
+        if storage_version < self.schema.schema_version:
+            for sql in self._build_schema_update_sql():
+                self._execute_sql(sql)
+            self._update_schema_version(self.schema.schema_version)
+
+    def _open_connection(self) -> None:
+        credentials = service_account.Credentials.from_service_account_info(self.C.to_service_credentials())
+        self._client = bigquery.Client(self.C.PROJECT_ID, credentials=credentials)
+
+    def _close_connection(self) -> None:
+        if self._client:
+            self._client.close()
+            self._client = None
+
+    def _get_schema_version_from_storage(self) -> int:
+        try:
+            return super()._get_schema_version_from_storage()
+        except api_core_exceptions.NotFound:
+            # there's no table so there's no schema
+            return 0
+
+    def _build_schema_update_sql(self) -> List[str]:
+        sql_updates = []
+        for table_name in self.schema.schema_tables:
+            exists, storage_table = self._get_storage_table(table_name)
+            sql = self._get_table_update_sql(table_name, storage_table, exists)
+            if sql:
+                sql_updates.append(sql)
+        return sql_updates
+
+    def _get_table_update_sql(self, table_name: str, storage_table: Table, exists: bool) -> str:
+        new_columns = self._create_table_update(table_name, storage_table)
+        if len(new_columns) == 0:
+            # no changes
+            return None
+        # build sql
+        canonical_name = self._to_canonical_table_name(table_name)
+        if not exists:
+            # build CREATE
+            sql = f"CREATE TABLE {canonical_name} (\n"
+            sql += ",\n".join([self._get_column_def_sql(c) for c in new_columns])
+            sql += ")"
+        else:
+            # build ALTER
+            sql = f"ALTER TABLE {canonical_name}\n"
+            sql += ",\n".join(["ADD COLUMN " + self._get_column_def_sql(c) for c in new_columns])
+        # scan columns to get hints
+        cluster_list = [escape_bigquery_identifier(c["name"]) for c in new_columns if c.get("cluster", False)]
+        partition_list = [escape_bigquery_identifier(c["name"]) for c in new_columns if c.get("partition", False)]
+        # partition by must be added first
+        if len(partition_list) > 0:
+            if exists:
+                raise LoadClientSchemaWillNotUpdate(canonical_name, partition_list, "Partition requested after table was created")
+            elif len(partition_list) > 1:
+                raise LoadClientSchemaWillNotUpdate(canonical_name, partition_list, "Partition requested for more than one column")
+            else:
+                sql += f"\nPARTITION BY DATE({partition_list[0]})"
+        if len(cluster_list) > 0:
+            if exists:
+                raise LoadClientSchemaWillNotUpdate(canonical_name, cluster_list, "Clustering requested after table was created")
+            else:
+                sql += "\nCLUSTER BY " + ",".join(cluster_list)
+
+        return sql
+
+    def _get_column_def_sql(self, c: Column) -> str:
+        name = escape_bigquery_identifier(c["name"])
+        return f"{name} {self._sc_t_to_bq_t(c['data_type'])} {self._gen_not_null(c['nullable'])}"
+
+    def _get_storage_table(self, table_name: str) -> Tuple[bool, Table]:
+        schema_table: Table = {}
+        try:
+            table = self._client.get_table(self._to_canonical_table_name(table_name), retry=self.default_retry, timeout=self.C.TIMEOUT)
+            partition_field = table.time_partitioning.field if table.time_partitioning else None
+            for c in table.schema:
+                schema_c: Column = {
+                    "name": c.name,
+                    "nullable": c.is_nullable,
+                    "data_type": self._bq_t_to_sc_t(c.field_type, c.precision, c.scale),
+                    "unique": False,
+                    "sort": False,
+                    "primary_key": False,
+                    "foreign_key": False,
+                    "cluster": c.name in (table.clustering_fields or []),
+                    "partition": c.name == partition_field
+                }
+                schema_table[c.name] = schema_c
+            return True, schema_table
+        except gcp_exceptions.NotFound:
+            return False, schema_table
+
+    def _execute_sql(self, query: AnyStr) -> Any:
+        logger.debug(f"Will execute query {query}")  # type: ignore
+        return self._client.query(query, job_retry=self.default_retry, timeout=self.C.TIMEOUT).result()
+
+    def _to_canonical_schema_name(self) -> str:
+        return f"{self.C.PROJECT_ID}.{self.C.DATASET}_{self.schema.schema_name}"
+
+    def _create_load_job(self, table_name: str, file_path: str) -> bigquery.LoadJob:
+        job_id = BigQueryClient._get_job_id_from_file_path(file_path)
+        job_config = bigquery.LoadJobConfig(
+            autodetect=False,
+            write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
+            create_disposition=bigquery.CreateDisposition.CREATE_NEVER,
+            source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
+            ignore_unknown_values=False,
+            max_bad_records=0,
+
+            )
+        with open(file_path, "rb") as f:
+            return self._client.load_table_from_file(f,
+                                                     self._to_canonical_table_name(table_name),
+                                                     job_id=job_id,
+                                                     job_config=job_config,
+                                                     timeout=self.C.TIMEOUT
+                                                    )
+
+    def _retrieve_load_job(self, file_path: str) -> bigquery.LoadJob:
+        job_id = BigQueryClient._get_job_id_from_file_path(file_path)
+        return self._client.get_job(job_id)
+
+    @staticmethod
+    def _get_job_id_from_file_path(file_path: str) -> str:
+        return Path(file_path).name.replace(".", "_")
+
+    @staticmethod
+    def _gen_not_null(v: bool) -> str:
+        return "NOT NULL" if not v else ""
+
+    @staticmethod
+    def _sc_t_to_bq_t(sc_t: DataType) -> str:
+        return SCT_TO_BQT[sc_t]
+
+    @staticmethod
+    def _bq_t_to_sc_t(bq_t: str, precision: Optional[int], scale: Optional[int]) -> DataType:
+        if bq_t == "BIGNUMERIC":
+            if precision is None:  # biggest numeric possible
+                return "wei"
+        return BQT_TO_SCT.get(bq_t, "text")
+
+
+def make_client(schema: Schema, C: Type[GcpClientConfiguration]) -> BigQueryClient:
+    return BigQueryClient(schema, C)
+
+
+def supported_writer(C: Type[GcpClientConfiguration]) -> TWriterType:
+    return "jsonl"
+
+# cred = service_account.Credentials.from_service_account_info(_credentials)
+# project_id = cred.get('project_id')
+# client = bigquery.Client(project_id, credentials=cred)
+# print(client.get_dataset("carbon_bot_extract_7"))
+# exit(0)
+# from dlt.common.configuration import SchemaStoreConfiguration
+# from dlt.common.logger import  init_logging_from_config
+
+# init_logging_from_config(CLIENT_CONFIG)
+
+# schema = Schema(SchemaStoreConfiguration.TRACKER_SCHEMA_FILE_PATH)
+# schema.load_schema()
+# import pprint
+# # pprint.pprint(schema.as_yaml())
+# with make_client(schema) as client:
+#     client.initialize_storage()
+#     # job = client._create_load_job("tracker", "_storage/loaded/1630949263.574516/completed_jobs/tracker.1c31ff1b-c250-4690-8973-14f0ee9ae355.jsonl")
+#     # unk table
+#     # job = client._create_load_job("trackerZ", "_storage/loaded/1630949263.574516/completed_jobs/tracker.4876f905-aefe-4262-a440-d29ed2643c3a.jsonl")
+#     # job = client._create_load_job("tracker", "_storage/loaded/1630949263.574516/completed_jobs/event_bot.c9105079-2d1d-4ad3-8613-a5dff790889d.jsonl")
+#     # failed
+#     # job = client._retrieve_load_job("_storage/loaded/1630949263.574516/completed_jobs/event_bot.c9105079-2d1d-4ad3-8613-a5dff790889d.jsonl")
+#     # OK
+#     job = client._retrieve_load_job("_storage/loaded/1630949263.574516/completed_jobs/tracker.1c31ff1b-c250-4690-8973-14f0ee9ae355.jsonl")
+#     while True:
+#         try:
+#             # this does not throw
+#             done = job.done()
+#             print(f"DONE: {job.done(reload=False)}")
+#         except Exception as e:
+#             logger.exception("DONE")
+#             done = True
+#         if done:
+#             break;
+#         # done is not self running
+
+#         # print(job.running())
+#         sleep(1)
+#     try:
+#         print(f"status: {job.state}")
+#         print(f"error: {job.error_result}")
+#         print(f"errors: {job.errors}")
+#         print(f"line count: {job.output_rows}")
+#         print(job.exception())
+#     except:
+#         logger.exception("EXCEPTION")
+#     try:
+#         print(job.result())
+#     except:
+#         logger.exception("RESULT")
+
+    # non existing table
+    # wrong data - unknown column
+
--- a/dlt/loaders/loader.py
+++ b/dlt/loaders/loader.py
@@ -0,0 +1,238 @@
+from types import ModuleType
+from typing import Any, Iterator, List, Dict, Literal, Optional, Tuple, Type
+from multiprocessing.pool import ThreadPool
+from importlib import import_module
+from prometheus_client import REGISTRY, Counter, Gauge, CollectorRegistry, Summary
+from prometheus_client.metrics import MetricWrapperBase
+
+from dlt.common import sleep, logger
+from dlt.common.runners import TRunArgs, TRunMetrics, create_default_args, initialize_runner, pool_runner
+from dlt.common.logger import process_internal_exception, pretty_format_exception
+from dlt.common.exceptions import TerminalValueError
+from dlt.common.dataset_writers import TWriterType
+from dlt.common.schema import Schema
+from dlt.common.storages import SchemaStorage
+from dlt.common.storages.loader_storage import LoaderStorage
+from dlt.common.telemetry import get_logging_extras, set_gauge_all_labels
+
+from dlt.loaders.exceptions import LoadClientTerminalException, LoadClientTransientException, LoadJobNotExistsException
+from dlt.loaders.client_base import ClientBase, LoadJob
+from dlt.loaders.local_types import LoadJobStatus
+from dlt.loaders.configuration import configuration, LoaderConfiguration
+
+
+CONFIG: Type[LoaderConfiguration] = None
+load_storage: LoaderStorage = None
+client_module: ModuleType = None
+load_counter: Counter = None
+job_gauge: Gauge = None
+job_counter: Counter = None
+job_wait_summary: Summary = None
+
+
+def client_impl(client_type: str) -> ModuleType:
+    return import_module(f".{client_type}.client", "dlt.loaders")
+
+
+def create_client(schema: Schema) -> ClientBase:
+    return client_module.make_client(schema, CONFIG)  # type: ignore
+
+
+def supported_writer() -> TWriterType:
+    return client_module.supported_writer(CONFIG)  # type: ignore
+
+
+def create_folders() -> LoaderStorage:
+    load_storage = LoaderStorage(False, CONFIG, supported_writer())
+    load_storage.initialize_storage()
+    return load_storage
+
+
+def create_gauges(registry: CollectorRegistry) -> Tuple[MetricWrapperBase, MetricWrapperBase, MetricWrapperBase, MetricWrapperBase]:
+    return (
+        Counter("loader_load_package_counter", "Counts load package processed", registry=registry),
+        Gauge("loader_last_package_jobs_counter", "Counts jobs in last package per status", ["status"], registry=registry),
+        Counter("loader_jobs_counter", "Counts jobs per job status", ["status"], registry=registry),
+        Summary("loader_jobs_wait_seconds", "Counts jobs total wait until completion", registry=registry)
+    )
+
+
+def spool_job(file_path: str, load_id: str, schema: Schema) -> Optional[LoadJob]:
+    # open new connection for each upload
+    job: LoadJob = None
+    try:
+        with create_client(schema) as client:
+            table_name, _ = load_storage.parse_load_file_name(file_path)
+            logger.info(f"Will load file {file_path} with table name {table_name}")
+            job = client.start_file_load(table_name, load_storage.storage._make_path(file_path))
+    except (LoadClientTerminalException, TerminalValueError):
+        # if job irreversible cannot be started, mark it as failed
+        process_internal_exception(f"Terminal problem with spooling job {file_path}")
+        job = ClientBase.make_job_with_status(file_path, "failed", pretty_format_exception())
+    except (LoadClientTransientException, Exception):
+        # return no job so file stays in new jobs (root) folder
+        process_internal_exception(f"Temporary problem with spooling job {file_path}")
+        return None
+    load_storage.start_job(load_id, job.file_name())
+    return job
+
+
+def spool_new_jobs(pool: ThreadPool, load_id: str, schema: Schema) -> Tuple[int, List[LoadJob]]:
+    # TODO: validate file type, combine files, finalize etc., this is client specific, jsonl for single table
+    # can just be combined, insert_values must be finalized and then combined
+    # use thread based pool as jobs processing is mostly I/O and we do not want to pickle jobs
+    # TODO: combine files by providing a list of files pertaining to same table into job, so job must be
+    # extended to accept a list
+    load_files = load_storage.list_new_jobs(load_id)[:CONFIG.MAX_PARALLEL_LOADS]
+    file_count = len(load_files)
+    if file_count == 0:
+        logger.info(f"No new jobs found in {load_id}")
+        return 0, []
+    logger.info(f"Will load {file_count}, creating jobs")
+    param_chunk = [(file, load_id, schema) for file in load_files]
+    # exceptions should not be raised, None as job is a temporary failure
+    # other jobs should not be affected
+    jobs: List[LoadJob] = pool.starmap(spool_job, param_chunk)
+    # remove None jobs and check the rest
+    return file_count, [job for job in jobs if job is not None]
+
+
+def retrieve_jobs(client: ClientBase, load_id: str) -> Tuple[int, List[LoadJob]]:
+    jobs: List[LoadJob] = []
+
+    # list all files that were started but not yet completed
+    started_jobs = load_storage.list_started_jobs(load_id)
+    logger.info(f"Found {len(started_jobs)} that are already started and should be continued")
+    if len(started_jobs) == 0:
+        return 0, jobs
+
+    for file_path in started_jobs:
+        try:
+            logger.info(f"Will retrieve {file_path}")
+            job = client.get_file_load(file_path)
+        except LoadClientTerminalException:
+            process_internal_exception(f"Job retrieval for {file_path} failed, job will be terminated")
+            job = ClientBase.make_job_with_status(file_path, "failed", pretty_format_exception())
+            # proceed to appending job, do not reraise
+        except (LoadClientTransientException, Exception) as e:
+            # raise on all temporary exceptions, typically network / server problems
+            raise
+        jobs.append(job)
+
+    job_gauge.labels("retrieved").inc()
+    job_counter.labels("retrieved").inc()
+    logger.metrics("Retrieve jobs metrics",
+                    extra=get_logging_extras([job_gauge.labels("retrieved"), job_counter.labels("retrieved")])
+    )
+    return len(jobs), jobs
+
+
+def complete_jobs(load_id: str, jobs: List[LoadJob]) -> List[LoadJob]:
+    remaining_jobs: List[LoadJob] = []
+    logger.info(f"Will complete {len(jobs)} for {load_id}")
+    for ii in range(len(jobs)):
+        job = jobs[ii]
+        logger.debug(f"Checking status for job {job.file_name()}")
+        status: LoadJobStatus = job.status()
+        final_location: str = None
+        if status == "running":
+            # ask again
+            logger.debug(f"job {job.file_name()} still running")
+            remaining_jobs.append(job)
+        elif status == "failed":
+            # try to get exception message from job
+            failed_message = job.exception()
+            final_location = load_storage.fail_job(load_id, job.file_name(), failed_message)
+            logger.error(f"Job for {job.file_name()} failed terminally in load {load_id} with message {failed_message}")
+        elif status == "retry":
+            # try to get exception message from job
+            retry_message = job.exception()
+            # move back to new folder to try again
+            final_location = load_storage.retry_job(load_id, job.file_name())
+            logger.error(f"Job for {job.file_name()} retried in load {load_id} with message {retry_message}")
+        elif status == "completed":
+            # move to completed folder
+            final_location = load_storage.complete_job(load_id, job.file_name())
+            logger.info(f"Job for {job.file_name()} completed in load {load_id}")
+
+        if status != "running":
+            job_gauge.labels(status).inc()
+            job_counter.labels(status).inc()
+            job_wait_summary.observe(load_storage.job_elapsed_time_seconds(final_location))
+
+    logger.metrics("Completing jobs metrics", extra=get_logging_extras([job_counter, job_gauge, job_wait_summary]))
+    return remaining_jobs
+
+
+
+def run(pool: ThreadPool) -> TRunMetrics:
+    logger.info(f"Running file loading")
+    # get list of loads and order by name ASC to execute schema updates
+    loads = load_storage.list_loads()
+    logger.info(f"Found {len(loads)} load packages")
+    if len(loads) == 0:
+        return TRunMetrics(True, False, 0)
+
+    load_id = loads[0]
+    logger.info(f"Loading schema from load package in {load_id}")
+    # one load package contains table from one schema
+    schema_storage = SchemaStorage(load_storage.storage.storage_path)
+    # get relative path to load schema from load package
+    schema = schema_storage.load_folder_schema(load_storage.get_load_path(load_id))
+    logger.info(f"Loaded schema name {schema.schema_name} and version {schema.schema_version}")
+    # initialize analytical storage ie. create dataset required by passed schema
+    with create_client(schema) as client:
+        logger.info(f"Client {CONFIG.CLIENT_TYPE} will start load")
+        client.initialize_storage()
+        schema_update = load_storage.begin_schema_update(load_id)
+        if schema_update:
+            logger.info(f"Client {CONFIG.CLIENT_TYPE} will update schema to package schema")
+            client.update_storage_schema()
+            load_storage.commit_schema_update(load_id)
+        # spool or retrieve unfinished jobs
+        jobs_count, jobs = retrieve_jobs(client, load_id)
+    if not jobs:
+        # jobs count is a total number of jobs including those that could not be initialized
+        jobs_count, jobs = spool_new_jobs(pool, load_id, schema)
+        if jobs_count > 0:
+            # this is a new  load package
+            set_gauge_all_labels(job_gauge, 0)
+            job_gauge.labels("running").inc(len(jobs))
+            job_counter.labels("running").inc(len(jobs))
+            logger.metrics("New jobs metrics",
+                            extra=get_logging_extras([job_counter.labels("running"), job_gauge.labels("running")])
+        )
+    # if there are no existing or new jobs we archive the package
+    if jobs_count == 0:
+        with create_client(schema) as client:
+            remaining_jobs = client.complete_load(load_id)
+        load_storage.archive_load(load_id)
+        logger.info(f"All jobs completed, archiving package {load_id}")
+        load_counter.inc()
+        logger.metrics("Load package metrics", extra=get_logging_extras([load_counter]))
+    else:
+        while True:
+            remaining_jobs = complete_jobs(load_id, jobs)
+            if len(remaining_jobs) == 0:
+                break
+            # process remaining jobs again
+            jobs = remaining_jobs
+            # this will raise on signal
+            sleep(1)
+
+    return TRunMetrics(False, False, len(load_storage.list_loads()))
+
+
+if __name__ == '__main__':
+    CONFIG = configuration()
+    parser = create_default_args(CONFIG)
+    args = parser.parse_args()
+    initialize_runner(CONFIG, TRunArgs(args.single_run, args.wait_runs))
+    try:
+        client_module = client_impl(CONFIG.CLIENT_TYPE)
+        load_counter, job_gauge, job_counter, job_wait_summary = create_gauges(REGISTRY)
+        load_storage = create_folders()
+    except Exception:
+        process_internal_exception("run")
+        exit(-1)
+    exit(pool_runner(CONFIG, run))
--- a/dlt/loaders/local_types.py
+++ b/dlt/loaders/local_types.py
@@ -0,0 +1,4 @@
+from typing import Literal
+
+
+LoadJobStatus = Literal["running", "failed", "retry", "completed"]
--- a/dlt/loaders/redshift/README.md
+++ b/dlt/loaders/redshift/README.md
@@ -0,0 +1,23 @@
+# Public Access setup
+There's *Modify publicly accessible settings* in Actions of each Redshift cluster. Assign your IP there.
+
+# Runtime optimization
+
+https://www.intermix.io/blog/top-14-performance-tuning-techniques-for-amazon-redshift/
+
+1. we should use separate work queue for loader user
+2. they suggest to not use dist keys
+3. data must be inserted in order of sortkey
+
+# loader account setup
+
+1. Create new database `CREATE DATABASE chat_analytics_rasa_ci`
+2. Create new user, set password
+3. Set as database owner (we could set lower permission) `ALTER DATABASE chat_analytics_rasa_ci OWNER TO loader`
+
+# Public access setup for Serverless
+Follow https://docs.aws.amazon.com/redshift/latest/mgmt/serverless-connecting.html `Connecting from the public subnet to the Amazon Redshift Serverless endpoint using Network Load Balancer`
+
+that will use terraform template to create load balancer endpoint and assign public IP. The cost of the load balancer is ~16$/month + cost of IP
+
+It seems that port 5439 is closed to the VPC on which serverless redshift created itself. In the cluster panel: Data Access : VPC security group add Inbound Rule to allow 5439 port from any subnet 0.0.0.0/0
--- a/dlt/loaders/redshift/init.py
+++ b/dlt/loaders/redshift/init.py
--- a/dlt/loaders/redshift/client.py
+++ b/dlt/loaders/redshift/client.py
@@ -0,0 +1,282 @@
+import os
+import psycopg2
+from psycopg2.sql import SQL, Identifier, Composed, Literal as SQLLiteral
+from typing import Any, AnyStr, Dict, List, Literal, Optional, Tuple, Type
+
+from dlt.common.typing import StrAny
+from dlt.common.arithmetics import DEFAULT_NUMERIC_PRECISION, DEFAULT_NUMERIC_SCALE
+from dlt.common.configuration import PostgresConfiguration
+from dlt.common.dataset_writers import TWriterType, escape_redshift_identifier
+from dlt.common.schema import COLUMN_HINTS, Column, ColumnBase, DataType, HintType, Schema, SchemaUpdate, Table
+
+from dlt.loaders.exceptions import (LoadClientSchemaWillNotUpdate, LoadClientTerminalInnerException,
+                                            LoadClientTransientInnerException, LoadFileTooBig)
+from dlt.loaders.local_types import LoadJobStatus
+from dlt.loaders.client_base import ClientBase, SqlClientBase, LoadJob
+
+SCT_TO_PGT: Dict[DataType, str] = {
+    "text": "varchar(max)",
+    "double": "double precision",
+    "bool": "boolean",
+    "timestamp": "timestamp with time zone",
+    "bigint": "bigint",
+    "binary": "varbinary",
+    "decimal": f"numeric({DEFAULT_NUMERIC_PRECISION},{DEFAULT_NUMERIC_SCALE})"
+}
+
+PGT_TO_SCT: Dict[str, DataType] = {
+    "varchar(max)": "text",
+    "double precision": "double",
+    "boolean": "bool",
+    "timestamp with time zone": "timestamp",
+    "bigint": "bigint",
+    "binary varying": "binary",
+    "numeric": "decimal"
+}
+
+HINT_TO_REDSHIFT_ATTR: Dict[HintType, str] = {
+    "cluster": "DISTKEY",
+    # it is better to not enforce constraints in redshift
+    # "primary_key": "PRIMARY KEY",
+    "sort": "SORTKEY"
+}
+
+
+class SqlClientMixin:
+
+    MAX_STATEMENT_SIZE = 16 * 1024 * 1204
+
+    def __init__(self, CONFIG: Type[PostgresConfiguration], *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self._conn: psycopg2.connection = None
+        self.C = CONFIG
+
+    def _open_connection(self) -> None:
+        self._conn = psycopg2.connect(dbname=self.C.PG_DATABASE_NAME,
+                             user=self.C.PG_USER,
+                             host=self.C.PG_HOST,
+                             port=self.C.PG_PORT,
+                             password=self.C.PG_PASSWORD,
+                             connect_timeout=self.C.PG_CONNECTION_TIMEOUT
+                             )
+        # we'll provide explicit transactions
+        self._conn.set_session(autocommit=True)
+
+    def _close_connection(self) -> None:
+        if self._conn:
+            self._conn.close()
+            self._conn = None
+
+    def _execute_sql(self, query: AnyStr) -> Any:
+        curr: psycopg2.cursor
+        with self._conn.cursor() as curr:
+            try:
+                curr.execute(query)
+            except psycopg2.Error as outer:
+                try:
+                    self._conn.rollback()
+                    self._conn.reset()
+                except psycopg2.Error:
+                    self._close_connection()
+                    self._open_connection()
+                raise outer
+            if curr.description is None:
+                return None
+            else:
+                f = curr.fetchall()
+                return f
+
+
+class RedshiftInsertLoadJob(SqlClientMixin, LoadJob):
+    def __init__(self, canonical_table_name: str, file_path: str, conn: Any, CONFIG: Type[PostgresConfiguration]) -> None:
+        super().__init__(CONFIG, ClientBase.get_file_name_from_file_path(file_path))
+        self._conn = conn
+        # insert file content immediately
+        self._insert(canonical_table_name, file_path)
+
+    def status(self) -> LoadJobStatus:
+        # this job is always done
+        return "completed"
+
+    def file_name(self) -> str:
+        return self._file_name
+
+    def exception(self) -> str:
+        # this part of code should be never reached
+        raise NotImplementedError()
+
+    def _insert(self, canonical_table_name: str, file_path: str) -> None:
+        # TODO: implement tracking of jobs in storage, both completed and failed
+        # WARNING: maximum redshift statement is 16MB https://docs.aws.amazon.com/redshift/latest/dg/c_redshift-sql.html
+        # in case of postgres: 2GiB
+        if os.stat(file_path).st_size >= SqlClientMixin.MAX_STATEMENT_SIZE:
+            # terminal exception
+            raise LoadFileTooBig(file_path, SqlClientMixin.MAX_STATEMENT_SIZE)
+        with open(file_path, "r") as f:
+            header = f.readline()
+            content = f.read()
+        sql = Composed(
+            [SQL("BEGIN TRANSACTION;"),
+            SQL(header).format(SQL(canonical_table_name)),
+            SQL(content),
+            SQL("COMMIT TRANSACTION;")]
+        )
+        self._execute_sql(sql)
+
+
+class RedshiftClient(SqlClientMixin, SqlClientBase):
+    def __init__(self, schema: Schema, CONFIG: Type[PostgresConfiguration]) -> None:
+        super().__init__(CONFIG, schema)
+
+    def initialize_storage(self) -> None:
+        schema_name = self._to_canonical_schema_name()
+        query = """
+                SELECT 1
+                    FROM INFORMATION_SCHEMA.SCHEMATA
+                    WHERE schema_name = {};
+                """
+        rows = self._execute_sql(SQL(query).format(SQLLiteral(schema_name)))
+        if len(rows) == 0:
+            self._execute_sql(SQL("CREATE SCHEMA {};").format(Identifier(schema_name)))
+
+    def get_file_load(self, file_path: str) -> LoadJob:
+        # always returns completed jobs as RedshiftInsertLoadJob is executed
+        # atomically in start_file_load so any jobs that should be recreated are already completed
+        # in case of bugs in loader (asking for jobs that were never created) we are not able to detect that
+        return ClientBase.make_job_with_status(file_path, "completed")
+
+    def start_file_load(self, table_name: str, file_path: str) -> LoadJob:
+        # verify that table exists in the schema
+        self._get_table_by_name(table_name, file_path)
+        try:
+            return RedshiftInsertLoadJob(self._to_canonical_table_name(table_name), file_path, self._conn, self.C)
+        except (psycopg2.OperationalError, psycopg2.InternalError) as tr_ex:
+            if tr_ex.pgerror is not None:
+                if "Cannot insert a NULL value into column" in tr_ex.pgerror:
+                    # NULL violations is internal error, probably a redshift thing
+                    raise LoadClientTerminalInnerException("Terminal error, file will not load", tr_ex)
+                if "Numeric data overflow" in tr_ex.pgerror:
+                    raise LoadClientTerminalInnerException("Terminal error, file will not load", tr_ex)
+                if "Precision exceeds maximum":
+                    raise LoadClientTerminalInnerException("Terminal error, file will not load", tr_ex)
+            raise LoadClientTransientInnerException("Error may go away, will retry", tr_ex)
+        except (psycopg2.DataError, psycopg2.ProgrammingError, psycopg2.IntegrityError) as ter_ex:
+            raise LoadClientTerminalInnerException("Terminal error, file will not load", ter_ex)
+
+    def update_storage_schema(self) -> None:
+        storage_version = self._get_schema_version_from_storage()
+        if storage_version < self.schema.schema_version:
+            for sql in self._build_schema_update_sql():
+                self._execute_sql(sql)
+            self._update_schema_version(self.schema.schema_version)
+
+    def _get_schema_version_from_storage(self) -> int:
+        try:
+            return super()._get_schema_version_from_storage()
+        except psycopg2.ProgrammingError:
+            # there's no table so there's no schema
+            return 0
+
+    def _build_schema_update_sql(self) -> List[str]:
+        sql_updates = []
+        for table_name in self.schema.schema_tables:
+            exists, storage_table = self._get_storage_table(table_name)
+            sql = self._get_table_update_sql(table_name, storage_table, exists)
+            if sql:
+                sql_updates.append(sql)
+        return sql_updates
+
+    def _get_table_update_sql(self, table_name: str, storage_table: Table, exists: bool) -> str:
+        new_columns = self._create_table_update(table_name, storage_table)
+        if len(new_columns) == 0:
+            # no changes
+            return None
+        # build sql
+        canonical_name = self._to_canonical_table_name(table_name)
+        sql = "BEGIN TRANSACTION;\n"
+        if not exists:
+            # build CREATE
+            sql += f"CREATE TABLE {canonical_name} (\n"
+            sql += ",\n".join([self._get_column_def_sql(c) for c in new_columns])
+            sql += ");"
+        else:
+            # build ALTER as separate statement for each column (redshift limitation)
+            sql += "\n".join([f"ALTER TABLE {canonical_name}\nADD COLUMN {self._get_column_def_sql(c)};" for c in new_columns])
+        # scan columns to get hints
+        if exists:
+            # no hints may be specified on added columns
+            for hint in COLUMN_HINTS:
+                if any(c.get(hint, False) is True for c in new_columns):
+                    hint_columns = [c["name"] for c in new_columns if c.get(hint, False)]
+                    raise LoadClientSchemaWillNotUpdate(canonical_name, hint_columns, f"{hint} requested after table was created")
+        # TODO: add FK relations
+        sql += "\nCOMMIT TRANSACTION;"
+        return sql
+
+    def _get_column_def_sql(self, c: Column) -> str:
+        hints_str = " ".join(HINT_TO_REDSHIFT_ATTR.get(h, "") for h in HINT_TO_REDSHIFT_ATTR.keys() if c.get(h, False) is True)
+        column_name = escape_redshift_identifier(c["name"])
+        return f"{column_name} {self._sc_t_to_pq_t(c['data_type'])} {hints_str} {self._gen_not_null(c['nullable'])}"
+
+    def _get_storage_table(self, table_name: str) -> Tuple[bool, Table]:
+        schema_table: Table = {}
+        query = f"""
+                SELECT column_name, data_type, is_nullable, numeric_precision, numeric_scale
+                    FROM INFORMATION_SCHEMA.COLUMNS
+                WHERE table_schema = '{self._to_canonical_schema_name()}' AND table_name = '{table_name}'
+                ORDER BY ordinal_position;
+                """
+        rows = self._execute_sql(query)
+        # if no rows we assume that table does not exist
+        if len(rows) == 0:
+            # TODO: additionally check if table exists
+            return False, schema_table
+        # TODO: pull more data to infer DISTKEY, PK and SORTKEY attributes/constraints
+        for c in rows:
+            schema_c: ColumnBase = {
+                "name": c[0],
+                "nullable": self._null_to_bool(c[2]),
+                "data_type": self._pq_t_to_sc_t(c[1], c[3], c[4]),
+            }
+            schema_table[c[0]] = Schema._add_missing_hints(schema_c)
+        return True, schema_table
+
+
+    def _to_canonical_schema_name(self) -> str:
+        return f"{self.C.PG_SCHEMA_PREFIX}_{self.schema.schema_name}"
+
+    def _to_canonical_table_name(self, table_name: str) -> str:
+        return f"{self._to_canonical_schema_name()}.{table_name}"
+
+    @staticmethod
+    def _null_to_bool(v: str) -> bool:
+        if v == "NO":
+            return False
+        elif v == "YES":
+            return True;
+        raise ValueError(v)
+
+    @staticmethod
+    def _gen_not_null(v: bool) -> str:
+        return "NOT NULL" if not v else ""
+
+    @staticmethod
+    def _sc_t_to_pq_t(sc_t: DataType) -> str:
+        if sc_t == "wei":
+            return f"numeric({DEFAULT_NUMERIC_PRECISION},0)"
+        return SCT_TO_PGT[sc_t]
+
+    @staticmethod
+    def _pq_t_to_sc_t(pq_t: str, precision: Optional[int], scale: Optional[int]) -> DataType:
+        if pq_t == "numeric":
+            if precision == DEFAULT_NUMERIC_PRECISION and scale == 0:
+                return "wei"
+        return PGT_TO_SCT.get(pq_t, "text")
+
+
+def make_client(schema: Schema, C: Type[PostgresConfiguration]) -> RedshiftClient:
+    return RedshiftClient(schema, C)
+
+
+def supported_writer(C: Type[PostgresConfiguration]) -> TWriterType:
+    return "insert_values"
--- a/dlt/pipeline/init.py
+++ b/dlt/pipeline/init.py
@@ -5,21 +5,21 @@ import os.path
 from typing import Callable, Dict, Iterator, List, Literal, Sequence, Tuple
 from prometheus_client import REGISTRY

-from autopoiesis.common import json, runners
-from autopoiesis.common.configuration import BasicConfiguration, make_configuration
-from autopoiesis.common.configuration.utils import TConfigSecret
-from autopoiesis.common.file_storage import FileStorage
-from autopoiesis.common.logger import process_internal_exception
-from autopoiesis.common.runners import TRunArgs, TRunMetrics
-from autopoiesis.common.schema import Schema, StoredSchema
-from autopoiesis.common.typing import DictStrAny, StrAny
-from autopoiesis.common.utils import uniq_id, is_interactive
+from dlt.common import json, runners
+from dlt.common.configuration import BasicConfiguration, make_configuration
+from dlt.common.configuration.utils import TConfigSecret
+from dlt.common.file_storage import FileStorage
+from dlt.common.logger import process_internal_exception
+from dlt.common.runners import TRunArgs, TRunMetrics
+from dlt.common.schema import Schema, StoredSchema
+from dlt.common.typing import DictStrAny, StrAny
+from dlt.common.utils import uniq_id, is_interactive

-from autopoiesis.extractors.extractor_storage import ExtractorStorageBase
-from autopoiesis.unpacker.configuration import configuration as unpacker_configuration
-from autopoiesis.loaders.configuration import configuration as loader_configuration
-from autopoiesis.unpacker import unpacker
-from autopoiesis.loaders import loader
+from dlt.extractors.extractor_storage import ExtractorStorageBase
+from dlt.unpacker.configuration import configuration as unpacker_configuration
+from dlt.loaders.configuration import configuration as loader_configuration
+from dlt.unpacker import unpacker
+from dlt.loaders import loader

 TClientType = Literal["gcp", "redshift"]

--- a/dlt/py.typed
+++ b/dlt/py.typed
--- a/dlt/unpacker/init.py
+++ b/dlt/unpacker/init.py
@@ -0,0 +1 @@
+from dlt._version import unpacker_version as __version__
--- a/dlt/unpacker/configuration.py
+++ b/dlt/unpacker/configuration.py
@@ -0,0 +1,29 @@
+from typing import Type
+
+from dlt.common.typing import StrAny
+from dlt.common.configuration.pool_runner_configuration import TPoolType
+from dlt.common.dataset_writers import TWriterType
+
+from dlt.common.configuration import (PoolRunnerConfiguration, UnpackingVolumeConfiguration,
+                                              LoadingVolumeConfiguration, SchemaVolumeConfiguration,
+                                              ProductionLoadingVolumeConfiguration, ProductionUnpackingVolumeConfiguration,
+                                              ProductionSchemaVolumeConfiguration,
+                                              TPoolType, make_configuration)
+
+from . import __version__
+
+
+class UnpackerConfiguration(PoolRunnerConfiguration, UnpackingVolumeConfiguration, LoadingVolumeConfiguration, SchemaVolumeConfiguration):
+    MAX_EVENTS_IN_CHUNK: int = 40000  # maximum events to be processed in single chunk
+    WRITER_TYPE: TWriterType = "jsonl"  # jsonp or insert commands will be generated
+    ADD_EVENT_JSON: bool = True  # add event json to "event" table, useful for debugging or recreating tracker
+    POOL_TYPE: TPoolType = "process"
+
+
+class ProductionUnpackerConfiguration(ProductionUnpackingVolumeConfiguration, ProductionLoadingVolumeConfiguration,
+                                      ProductionSchemaVolumeConfiguration, UnpackerConfiguration):
+    pass
+
+
+def configuration(initial_values: StrAny = None) -> Type[UnpackerConfiguration]:
+    return make_configuration(UnpackerConfiguration, ProductionUnpackerConfiguration, initial_values=initial_values)
--- a/dlt/unpacker/exceptions.py
+++ b/dlt/unpacker/exceptions.py
--- a/dlt/unpacker/unpacker.py
+++ b/dlt/unpacker/unpacker.py
@@ -0,0 +1,249 @@
+from typing import Any, Callable, Type, List, Dict, Optional, Sequence, Tuple
+from multiprocessing.pool import Pool as ProcessPool
+from itertools import chain
+from prometheus_client import Counter, CollectorRegistry, REGISTRY, Gauge
+from prometheus_client.metrics import MetricWrapperBase
+
+from dlt.common import pendulum, signals, json, logger
+from dlt.common.runners import TRunArgs, TRunMetrics, create_default_args, pool_runner, initialize_runner
+from dlt.common.storages.unpacker_storage import UnpackerStorage
+from dlt.common.telemetry import get_logging_extras
+from dlt.common.utils import uniq_id
+from dlt.common.typing import TEvent
+from dlt.common.logger import process_internal_exception
+from dlt.common.exceptions import PoolException
+from dlt.common.storages import SchemaStorage
+from dlt.common.schema import CannotCoerceColumnException, SchemaUpdate, Schema
+from dlt.common.parser import PATH_SEPARATOR
+from dlt.common.storages.loader_storage import LoaderStorage
+
+from dlt.common.parser import extract, TExtractFunc
+from dlt.unpacker.configuration import configuration, UnpackerConfiguration
+
+extract_func: TExtractFunc = extract
+CONFIG: Type[UnpackerConfiguration] = None
+unpack_storage: UnpackerStorage = None
+load_storage: LoaderStorage = None
+schema_storage: SchemaStorage = None
+load_schema_storage: SchemaStorage = None
+event_counter: Counter = None
+event_gauge: Gauge = None
+schema_version_gauge: Gauge = None
+load_package_counter: Counter = None
+
+
+def create_gauges(registry: CollectorRegistry) -> Tuple[MetricWrapperBase, MetricWrapperBase, MetricWrapperBase, MetricWrapperBase]:
+    return (
+        Counter("unpacker_event_count", "Events processed in unpacker", ["schema"], registry=registry),
+        Gauge("unpacker_last_events", "Number of events processed in last run", ["schema"], registry=registry),
+        Gauge("unpacker_schema_version", "Current schema version", ["schema"], registry=registry),
+        Gauge("unpacker_load_packages_created_count", "Count of load package created", ["schema"], registry=registry)
+    )
+
+
+def create_folders() -> Tuple[UnpackerStorage, LoaderStorage, SchemaStorage, SchemaStorage]:
+    unpack_storage = UnpackerStorage(True, CONFIG)
+    schema_storage = SchemaStorage(CONFIG.SCHEMA_VOLUME_PATH, makedirs=True)
+    load_schema_storage = SchemaStorage(CONFIG.LOADING_VOLUME_PATH, makedirs=False)
+    load_storage = LoaderStorage(True, CONFIG, CONFIG.WRITER_TYPE)
+
+    unpack_storage.initialize_storage()
+    load_storage.initialize_storage()
+
+    return unpack_storage, load_storage, schema_storage, load_schema_storage
+
+
+def install_schemas(default_schemas_path: str, schema_names: List[str]) -> None:
+    # copy default schemas if not present
+    default_schemas = SchemaStorage(default_schemas_path)
+    logger.info(f"Checking default schemas in {schema_storage.storage.storage_path}")
+    for name in schema_names:
+        if not schema_storage.has_store_schema(name):
+            logger.info(f"Schema, {name} not present in {schema_storage.storage.storage_path}, installing...")
+            schema = default_schemas.load_store_schema(name)
+            schema_storage.save_store_schema(schema)
+
+
+def load_or_create_schema(schema_name: str) -> Schema:
+    try:
+        schema = schema_storage.load_store_schema(schema_name)
+        logger.info(f"Loaded schema with name {schema_name} with version {schema.schema_version}")
+    except FileNotFoundError:
+        schema = Schema(schema_name)
+        logger.info(f"Created new schema with name {schema_name}")
+    return schema
+
+
+# this is a worker process
+def w_unpack_files(schema_name: str, load_id: str, events_files: Sequence[str]) -> SchemaUpdate:
+    unpacked_data: Dict[str, List[Any]] = {}
+
+    schema_update: SchemaUpdate = {}
+    schema = load_or_create_schema(schema_name)
+    file_id = uniq_id()
+
+    # process all event files and store rows in memory
+    for events_file in events_files:
+        try:
+            logger.debug(f"Processing events file {events_file}")
+            with unpack_storage.storage.open(events_file) as f:
+                events: Sequence[TEvent] = json.load(f)
+            for event in events:
+                for table_name, row in extract_func(schema, event, load_id, CONFIG.ADD_EVENT_JSON):
+                    # filter row, may eliminate some or all fields
+                    row = schema.filter_row(table_name, row, PATH_SEPARATOR)
+                    # do not process empty rows
+                    if row:
+                        # check if schema can be updated
+                        row, table_update = schema.coerce_row(table_name, row)
+                        if len(table_update) > 0:
+                            # update schema and save the change
+                            schema.update_schema(table_name, table_update)
+                            table_updates = schema_update.setdefault(table_name, [])
+                            table_updates.extend(table_update)
+                        # store row
+                        rows = unpacked_data.setdefault(table_name, [])
+                        rows.append(row)
+        except Exception:
+            process_internal_exception(f"Exception when processing file {events_file}")
+            raise PoolException("unpack_files", events_file)
+
+    # save rows and return schema changes to be gathered in parent process
+    for table_name, rows in unpacked_data.items():
+        # save into new jobs to processed as load
+        table = schema.get_table(table_name)
+        load_storage.write_temp_loading_file(load_id, table_name, table, file_id, rows)
+
+    return schema_update
+
+
+TMapFuncRV = Tuple[List[SchemaUpdate], List[Sequence[str]]]
+TMapFuncType = Callable[[ProcessPool, str, str, Sequence[str]], TMapFuncRV]
+
+def map_parallel(pool: ProcessPool, schema_name: str, load_id: str, files: Sequence[str]) -> TMapFuncRV:
+    # we chunk files in a way to not exceed MAX_EVENTS_IN_CHUNK and split them equally
+    # between processors
+    configured_processes = pool._processes  # type: ignore
+    chunk_files = UnpackerStorage.chunk_by_events(files, CONFIG.MAX_EVENTS_IN_CHUNK, configured_processes)
+    logger.info(f"Obtained {len(chunk_files)} processing chunks")
+    param_chunk = [(schema_name, load_id, files) for files in chunk_files]
+    return pool.starmap(w_unpack_files, param_chunk), chunk_files
+
+
+def map_single(_: ProcessPool, schema_name: str, load_id: str, files: Sequence[str]) -> TMapFuncRV:
+    chunk_files = UnpackerStorage.chunk_by_events(files, CONFIG.MAX_EVENTS_IN_CHUNK, 1)
+    # get in one chunk
+    assert len(chunk_files) == 1
+    logger.info(f"Obtained {len(chunk_files)} processing chunks")
+    return [w_unpack_files(schema_name, load_id, chunk_files[0])], chunk_files
+
+
+def update_schema(schema_name: str, schema_updates: List[SchemaUpdate]) -> Schema:
+    schema = load_or_create_schema(schema_name)
+    # gather schema from all manifests, validate consistency and combine
+    for schema_update in schema_updates:
+        for table_name, table_updates in schema_update.items():
+            logger.debug(f"Updating schema for table {table_name} with {len(table_updates)} deltas")
+            schema.update_schema(table_name, table_updates)
+    return schema
+
+
+def spool_files(pool: ProcessPool, schema_name: str, load_id: str, map_f: TMapFuncType, files: Sequence[str]) -> None:
+    # process files in parallel or in single thread, depending on map_f
+    schema_updates, chunk_files = map_f(pool, schema_name, load_id, files)
+
+    schema = update_schema(schema_name, schema_updates)
+    schema_version_gauge.labels(schema_name).set(schema._version)
+    logger.metrics("Unpacker metrics", extra=get_logging_extras([schema_version_gauge.labels(schema_name)]))
+    logger.info(f"Saving schema {schema_name} with version {schema._version}, writing manifest files")
+    # schema is updated, save it to schema volume
+    schema_storage.save_store_schema(schema)
+    # save schema and schema updates to temp load folder
+    load_schema_storage.save_folder_schema(schema, load_id)
+    load_storage.save_schema_updates(load_id, schema_updates)
+    # files must be renamed and deleted together so do not attempt that when process is about to be terminated
+    signals.raise_if_signalled()
+    logger.info(f"Committing storage, do not kill this process")
+    # rename temp folder to processing
+    load_storage.commit_temp_load_folder(load_id)
+    # delete event files and count events to provide metrics
+    total_events = 0
+    for event_file in chain.from_iterable(chunk_files):  # flatten chunks
+        unpack_storage.storage.delete(event_file)
+        total_events += UnpackerStorage.get_events_count(event_file)
+    # log and update metrics
+    logger.info(f"Chunk {load_id} processed")
+    load_package_counter.labels(schema_name).inc()
+    event_counter.labels(schema_name).inc(total_events)
+    event_gauge.labels(schema_name).set(total_events)
+    logger.metrics("Unpacker metrics", extra=get_logging_extras(
+        [load_package_counter.labels(schema_name), event_counter.labels(schema_name), event_gauge.labels(schema_name)]))
+
+
+def spool_schema_files(pool: ProcessPool, schema_name: str, files: Sequence[str]) -> str:
+    # unpacked files will go here before being atomically renamed
+    load_id = str(pendulum.now().timestamp())
+    load_storage.create_temp_load_folder(load_id)
+    logger.info(f"Created temp load folder {load_id} on loading volume")
+
+    try:
+        # process parallel
+        spool_files(pool, schema_name, load_id, map_parallel, files)
+    except CannotCoerceColumnException as exc:
+        # schema conflicts resulting from parallel executing
+        logger.warning(f"Parallel schema update conflict, switching to single thread ({str(exc)}")
+        # start from scratch
+        load_storage.create_temp_load_folder(load_id)
+        spool_files(pool, schema_name, load_id, map_single, files)
+
+    return load_id
+
+
+def run(pool: ProcessPool) -> TRunMetrics:
+    logger.info(f"Running file unpacking")
+    # list files and group by schema name, list must be sorted for group by to actually work
+    files = unpack_storage.list_files_to_unpack_sorted()
+    logger.info(f"Found {len(files)} files, will process in chunks of {CONFIG.MAX_EVENTS_IN_CHUNK} of events")
+    if len(files) == 0:
+        return TRunMetrics(True, False, 0)
+    # group files by schema
+    for schema_name, files_in_schema in unpack_storage.get_grouped_iterator(files):
+        logger.info(f"Found files in schema {schema_name}")
+        spool_schema_files(pool, schema_name, list(files_in_schema))
+    # return info on still pending files (if extractor saved something in the meantime)
+    return TRunMetrics(False, False, len(unpack_storage.list_files_to_unpack_sorted()))
+
+
+def configure(C: Type[UnpackerConfiguration], collector: CollectorRegistry, extract_f: TExtractFunc, default_schemas_path: str = None, schema_names: List[str] = None) -> bool:
+    global CONFIG
+    global unpack_storage, load_storage, schema_storage, load_schema_storage
+    global event_counter, event_gauge, schema_version_gauge, load_package_counter
+    global extract_func
+
+    CONFIG = C
+    # set extracting parser function
+    extract_func = extract_f
+    try:
+        unpack_storage, load_storage, schema_storage, load_schema_storage = create_folders()
+        event_counter, event_gauge, schema_version_gauge, load_package_counter = create_gauges(collector)
+        if default_schemas_path and schema_names:
+            install_schemas(default_schemas_path, schema_names)
+        return True
+    except Exception:
+        process_internal_exception("init module")
+        return False
+
+
+def main(extract_f: TExtractFunc, default_schemas_path: str = None, schema_names: List[str] = None) -> None:
+    # initialize runner
+    C = configuration()
+    parser = create_default_args(C)
+    args = parser.parse_args()
+    initialize_runner(C, TRunArgs(args.single_run, args.wait_runs))
+    if not configure(C, REGISTRY, extract_f, default_schemas_path, schema_names):
+        exit(-1)
+    # run
+    exit(pool_runner(C, run))
+
+if __name__ == '__main__':
+    main(extract)
--- a/examples/demo_example.py
+++ b/examples/demo_example.py
@@ -7,9 +7,9 @@

 from typing import Sequence

-from autopoiesis.common.typing import StrAny
-from autopoiesis.common import json
-from autopoiesis.common.schema import Schema
+from dlt.common.typing import StrAny
+from dlt.common import json
+from dlt.common.schema import Schema
 from dlt.pipeline import Pipeline

 # the load schema will be named {pipeline_mame}_{source_name}
--- a/examples/discord_iterator.py
+++ b/examples/discord_iterator.py
@@ -1,6 +1,6 @@
-from autopoiesis.common import json
-from autopoiesis.common.schema import Schema
-from autopoiesis.common.typing import DictStrAny, StrAny
+from dlt.common import json
+from dlt.common.schema import Schema
+from dlt.common.typing import DictStrAny, StrAny

 from dlt.pipeline import Pipeline, PostgresPipelineCredentials

@@ -17,7 +17,6 @@ from dlt.pipeline import Pipeline, PostgresPipelineCredentials
 # credentials = Pipeline.load_gcp_credentials("_secrets/project1234_service.json", "gamma_guild")

 import multiprocessing
-multiprocessing.set_start_method("spawn", force=True)

 if __name__ == '__main__':
    # working redshift creds, you can pass password as last parameter or via PG_PASSWORD env variable ie.
@@ -70,7 +69,7 @@ if __name__ == '__main__':
    # from now on each pipeline does more or less the same thing: unpack and load data

    # now create loading packages and infer the schema
-    m = pipeline.unpack(workers=2)
+    m = pipeline.unpack()
    if m.has_failed:
        print("Unpacking failed")
        print(pipeline.last_run_exception)
--- a/examples/ethereum_generator.py
+++ b/examples/ethereum_generator.py
@@ -2,10 +2,11 @@ import requests
 from typing import Iterator, Sequence, cast
 from web3 import Web3, HTTPProvider

-from autopoiesis.common import Decimal
-from autopoiesis.common.arithmetics import numeric_default_context, numeric_default_quantize
-from autopoiesis.common.schema import Schema
-from autopoiesis.common.typing import DictStrAny, StrAny
+from dlt.common import json
+from dlt.common import Decimal
+from dlt.common.arithmetics import numeric_default_context, numeric_default_quantize
+from dlt.common.schema import Schema
+from dlt.common.typing import DictStrAny, StrAny

 from dlt.pipeline import Pipeline, TExtractorItemWithTable, TExtractorItem

@@ -117,6 +118,10 @@ schema: Schema = None
 # in case of ethereum data the fundamental problem is 2^256 integer size which does not fit in any BIGINT
 # type. that is fixed in schema loaded below
 schema = Pipeline.load_schema_from_file("examples/schemas/ethereum_schema.yml")
+# jschema = schema.to_dict()
+# with open("examples/schemas/ethereum_schema.json", "w") as f:
+#     json.dump(jschema, f)
+# exit(-1)
 pipeline.create_pipeline(credentials, schema=schema)
 print(pipeline.root_path)

@@ -124,13 +129,12 @@ m = pipeline.extract_generator(block_generator)
 if m.has_failed:
    print("Extracting failed")
    print(pipeline.last_run_exception)
-    exit(0)
+exit(0)

 m = pipeline.unpack()
 if m.has_failed:
    print("Unpacking failed")
    print(pipeline.last_run_exception)
-    exit(0)

 # get inferred schema
 schema = pipeline.get_current_schema()
--- a/examples/google_drive_csv.py
+++ b/examples/google_drive_csv.py
@@ -5,8 +5,8 @@ import io
 from typing import Any, Iterator
 import csv

-from autopoiesis.common.typing import StrAny
-from autopoiesis.common.schema import Schema
+from dlt.common.typing import StrAny
+from dlt.common.schema import Schema
 from dlt.pipeline import Pipeline

 SCOPES = ['https://www.googleapis.com/auth/drive']
@@ -15,19 +15,19 @@ SCOPES = ['https://www.googleapis.com/auth/drive']
 KEY_FILE_LOCATION = '_secrets/project1234_service.json'


-def _initialize_drive() -> Any:
-    """Initializes an drive service object.
+# def _initialize_drive() -> Any:
+#     """Initializes an drive service object.

-    Returns:
-    An authorized drive service object.
-    """
-    credentials = ServiceAccountCredentials.from_json_keyfile_name(
-        KEY_FILE_LOCATION, SCOPES)
+#     Returns:
+#     An authorized drive service object.
+#     """
+#     credentials = ServiceAccountCredentials.from_json_keyfile_name(
+#         KEY_FILE_LOCATION, SCOPES)

-    # Build the service object.
-    service = build('drive', 'v3', credentials=credentials)
+#     # Build the service object.
+#     service = build('drive', 'v3', credentials=credentials)

-    return service
+#     return service


 def _initialize_sheets() -> Any:
@@ -41,20 +41,20 @@ def _initialize_sheets() -> Any:
    return service


-def download_csv_as_json(file_id: str, csv_options: StrAny = None) -> Iterator[StrAny]:
-    if csv_options is None:
-        csv_options = {}
+# def download_csv_as_json(file_id: str, csv_options: StrAny = None) -> Iterator[StrAny]:
+#     if csv_options is None:
+#         csv_options = {}

-    drive_service = _initialize_drive()
-    request = drive_service.files().get_media(fileId=file_id)
-    fh = io.BytesIO()
-    downloader = MediaIoBaseDownload(fh, request)
-    done = False
-    while done is False:
-        status, done = downloader.next_chunk()
-        print("Download %d%%." % int(status.progress() * 100))
-    rows = fh.getvalue().decode("utf-8")
-    return csv.DictReader(io.StringIO(rows), **csv_options)
+#     drive_service = _initialize_drive()
+#     request = drive_service.files().get_media(fileId=file_id)
+#     fh = io.BytesIO()
+#     downloader = MediaIoBaseDownload(fh, request)
+#     done = False
+#     while done is False:
+#         status, done = downloader.next_chunk()
+#         print("Download %d%%." % int(status.progress() * 100))
+#     rows = fh.getvalue().decode("utf-8")
+#     return csv.DictReader(io.StringIO(rows), **csv_options)


 def download_sheet_to_csv(spreadsheet_id: str, sheet_name: str) -> Iterator[StrAny]:
--- a/examples/schemas/ethereum_schema.json
+++ b/examples/schemas/ethereum_schema.json
@@ -0,0 +1,911 @@
+{
+    "tables": {
+        "_loads": {
+            "inserted_at": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "timestamp",
+                "name": "inserted_at",
+                "nullable": false
+            },
+            "load_id": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "load_id",
+                "nullable": false
+            },
+            "status": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "status",
+                "nullable": false
+            }
+        },
+        "_version": {
+            "engine_version": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "engine_version",
+                "nullable": false
+            },
+            "inserted_at": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "timestamp",
+                "name": "inserted_at",
+                "nullable": false
+            },
+            "version": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "version",
+                "nullable": false
+            }
+        },
+        "blocks": {
+            "_load_id": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_load_id",
+                "nullable": false
+            },
+            "_record_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": true,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_record_hash",
+                "nullable": false
+            },
+            "number": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": true,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "number",
+                "nullable": false
+            },
+            "parent_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "parent_hash",
+                "nullable": true
+            },
+            "hash": {
+                "partition": false,
+                "cluster": true,
+                "unique": true,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "hash",
+                "nullable": false
+            },
+            "base_fee_per_gas": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "wei",
+                "name": "base_fee_per_gas",
+                "nullable": false
+            },
+            "difficulty": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "wei",
+                "name": "difficulty",
+                "nullable": false
+            },
+            "extra_data": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "extra_data",
+                "nullable": true
+            },
+            "gas_limit": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "gas_limit",
+                "nullable": false
+            },
+            "gas_used": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "gas_used",
+                "nullable": false
+            },
+            "logs_bloom": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "logs_bloom",
+                "nullable": true
+            },
+            "miner": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "miner",
+                "nullable": true
+            },
+            "mix_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "mix_hash",
+                "nullable": true
+            },
+            "nonce": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "nonce",
+                "nullable": true
+            },
+            "receipts_root": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "receipts_root",
+                "nullable": true
+            },
+            "sha3_uncles": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "sha3_uncles",
+                "nullable": true
+            },
+            "size": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "size",
+                "nullable": true
+            },
+            "state_root": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "state_root",
+                "nullable": false
+            },
+            "timestamp": {
+                "partition": false,
+                "cluster": false,
+                "unique": true,
+                "sort": true,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "timestamp",
+                "name": "timestamp",
+                "nullable": false
+            },
+            "total_difficulty": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "wei",
+                "name": "total_difficulty",
+                "nullable": true
+            },
+            "transactions_root": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "transactions_root",
+                "nullable": false
+            }
+        },
+        "blocks__transactions": {
+            "_record_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": true,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_record_hash",
+                "nullable": false
+            },
+            "block_number": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": true,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "block_number",
+                "nullable": false
+            },
+            "transaction_index": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": true,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "transaction_index",
+                "nullable": false
+            },
+            "hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": true,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "hash",
+                "nullable": false
+            },
+            "block_hash": {
+                "partition": false,
+                "cluster": true,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "block_hash",
+                "nullable": false
+            },
+            "block_timestamp": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": true,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "timestamp",
+                "name": "block_timestamp",
+                "nullable": false
+            },
+            "chain_id": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "chain_id",
+                "nullable": true
+            },
+            "from": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "from",
+                "nullable": true
+            },
+            "gas": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "gas",
+                "nullable": true
+            },
+            "gas_price": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "gas_price",
+                "nullable": true
+            },
+            "input": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "input",
+                "nullable": true
+            },
+            "max_fee_per_gas": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "wei",
+                "name": "max_fee_per_gas",
+                "nullable": true
+            },
+            "max_priority_fee_per_gas": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "wei",
+                "name": "max_priority_fee_per_gas",
+                "nullable": true
+            },
+            "nonce": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "nonce",
+                "nullable": true
+            },
+            "r": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "r",
+                "nullable": true
+            },
+            "s": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "s",
+                "nullable": true
+            },
+            "status": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "status",
+                "nullable": true
+            },
+            "to": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "to",
+                "nullable": true
+            },
+            "type": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "type",
+                "nullable": true
+            },
+            "v": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "v",
+                "nullable": true
+            },
+            "value": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "wei",
+                "name": "value",
+                "nullable": false
+            },
+            "eth_value": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "decimal",
+                "name": "eth_value",
+                "nullable": true
+            }
+        },
+        "blocks__transactions__logs": {
+            "_record_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": true,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_record_hash",
+                "nullable": false
+            },
+            "address": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "address",
+                "nullable": false
+            },
+            "block_timestamp": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": true,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "timestamp",
+                "name": "block_timestamp",
+                "nullable": false
+            },
+            "block_hash": {
+                "partition": false,
+                "cluster": true,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "block_hash",
+                "nullable": false
+            },
+            "block_number": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": true,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "block_number",
+                "nullable": false
+            },
+            "transaction_index": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": true,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "transaction_index",
+                "nullable": false
+            },
+            "log_index": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": true,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "log_index",
+                "nullable": false
+            },
+            "data": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "data",
+                "nullable": true
+            },
+            "removed": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bool",
+                "name": "removed",
+                "nullable": true
+            },
+            "transaction_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "transaction_hash",
+                "nullable": false
+            }
+        },
+        "blocks__transactions__logs__topics": {
+            "_parent_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": true,
+                "data_type": "text",
+                "name": "_parent_hash",
+                "nullable": false
+            },
+            "_pos": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "_pos",
+                "nullable": false
+            },
+            "_record_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": true,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_record_hash",
+                "nullable": false
+            },
+            "_root_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_root_hash",
+                "nullable": false
+            },
+            "value": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "value",
+                "nullable": true
+            }
+        },
+        "blocks__transactions__access_list": {
+            "_parent_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": true,
+                "data_type": "text",
+                "name": "_parent_hash",
+                "nullable": false
+            },
+            "_pos": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "_pos",
+                "nullable": false
+            },
+            "_record_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": true,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_record_hash",
+                "nullable": false
+            },
+            "_root_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_root_hash",
+                "nullable": false
+            },
+            "address": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "address",
+                "nullable": true
+            }
+        },
+        "blocks__transactions__access_list__storage_keys": {
+            "_parent_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": true,
+                "data_type": "text",
+                "name": "_parent_hash",
+                "nullable": false
+            },
+            "_pos": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "_pos",
+                "nullable": false
+            },
+            "_record_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": true,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_record_hash",
+                "nullable": false
+            },
+            "_root_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_root_hash",
+                "nullable": false
+            },
+            "value": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "value",
+                "nullable": true
+            }
+        },
+        "blocks__uncles": {
+            "_parent_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": true,
+                "data_type": "text",
+                "name": "_parent_hash",
+                "nullable": false
+            },
+            "_pos": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "bigint",
+                "name": "_pos",
+                "nullable": false
+            },
+            "_record_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": true,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_record_hash",
+                "nullable": false
+            },
+            "_root_hash": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "_root_hash",
+                "nullable": false
+            },
+            "value": {
+                "partition": false,
+                "cluster": false,
+                "unique": false,
+                "sort": false,
+                "primary_key": false,
+                "foreign_key": false,
+                "data_type": "text",
+                "name": "value",
+                "nullable": true
+            }
+        }
+    },
+    "name": "ethereum",
+    "version": 8,
+    "preferred_types": {},
+    "hints": {
+        "foreign_key": [
+            "^_parent_hash$"
+        ],
+        "not_null": [
+            "^_record_hash$",
+            "^_root_hash$",
+            "^_parent_hash$",
+            "^_pos$"
+        ],
+        "unique": [
+            "^_record_hash$"
+        ]
+    },
+    "excludes": [],
+    "includes": [],
+    "engine_version": 2
+}
--- a/examples/schemas/inferred_drive_csv_11G95oVZjieRhyGqtQMQqlqpxyvWkRXowKE8CtdLtFaU_schema.yml
+++ b/examples/schemas/inferred_drive_csv_11G95oVZjieRhyGqtQMQqlqpxyvWkRXowKE8CtdLtFaU_schema.yml
@@ -0,0 +1,936 @@
+tables:
+  _version:
+    version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    engine_version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  _loads:
+    load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    status:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  model_annotations:
+    sender_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    message_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    annotation:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    confidence:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: double
+      nullable: true
+    count:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    added_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    reviewed:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bool
+      nullable: true
+    _load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    _record_hash:
+      partition: false
+      cluster: false
+      unique: true
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+name: csv
+version: 2
+preferred_types: {}
+hints:
+  not_null:
+  - ^_record_hash$
+  - ^_root_hash$
+  - ^_parent_hash$
+  - ^_pos$
+  - _load_id
+  foreign_key:
+  - ^_parent_hash$
+  unique:
+  - ^_record_hash$
+excludes: []
+includes: []
+engine_version: 2
+tables:
+  _version:
+    version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    engine_version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  _loads:
+    load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    status:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  model_annotations:
+    sender_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    message_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    annotation:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    confidence:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: double
+      nullable: true
+    count:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    added_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    reviewed:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bool
+      nullable: true
+    _load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    _record_hash:
+      partition: false
+      cluster: false
+      unique: true
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+name: csv
+version: 2
+preferred_types: {}
+hints:
+  not_null:
+  - ^_record_hash$
+  - ^_root_hash$
+  - ^_parent_hash$
+  - ^_pos$
+  - _load_id
+  foreign_key:
+  - ^_parent_hash$
+  unique:
+  - ^_record_hash$
+excludes: []
+includes: []
+engine_version: 2
+tables:
+  _version:
+    version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    engine_version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  _loads:
+    load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    status:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  model_annotations:
+    sender_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    message_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    annotation:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    confidence:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: double
+      nullable: true
+    count:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    added_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    reviewed:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bool
+      nullable: true
+    _load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    _record_hash:
+      partition: false
+      cluster: false
+      unique: true
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+name: csv
+version: 2
+preferred_types: {}
+hints:
+  not_null:
+  - ^_record_hash$
+  - ^_root_hash$
+  - ^_parent_hash$
+  - ^_pos$
+  - _load_id
+  foreign_key:
+  - ^_parent_hash$
+  unique:
+  - ^_record_hash$
+excludes: []
+includes: []
+engine_version: 2
+tables:
+  _version:
+    version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    engine_version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  _loads:
+    load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    status:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  model_annotations:
+    sender_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    message_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    annotation:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    confidence:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: double
+      nullable: true
+    count:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    added_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    reviewed:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bool
+      nullable: true
+    _load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    _record_hash:
+      partition: false
+      cluster: false
+      unique: true
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+name: csv
+version: 2
+preferred_types: {}
+hints:
+  not_null:
+  - ^_record_hash$
+  - ^_root_hash$
+  - ^_parent_hash$
+  - ^_pos$
+  - _load_id
+  foreign_key:
+  - ^_parent_hash$
+  unique:
+  - ^_record_hash$
+excludes: []
+includes: []
+engine_version: 2
+tables:
+  _version:
+    version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    engine_version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  _loads:
+    load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    status:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  model_annotations:
+    sender_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    message_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    annotation:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    confidence:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: double
+      nullable: true
+    count:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    added_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    reviewed:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bool
+      nullable: true
+    _load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    _record_hash:
+      partition: false
+      cluster: false
+      unique: true
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+name: csv
+version: 2
+preferred_types: {}
+hints:
+  not_null:
+  - ^_record_hash$
+  - ^_root_hash$
+  - ^_parent_hash$
+  - ^_pos$
+  - _load_id
+  foreign_key:
+  - ^_parent_hash$
+  unique:
+  - ^_record_hash$
+excludes: []
+includes: []
+engine_version: 2
+tables:
+  _version:
+    version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    engine_version:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  _loads:
+    load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    status:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: false
+    inserted_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: timestamp
+      nullable: false
+  model_annotations:
+    sender_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    message_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    annotation:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    confidence:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: double
+      nullable: true
+    count:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bigint
+      nullable: true
+    added_at:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: true
+    reviewed:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: bool
+      nullable: true
+    _load_id:
+      partition: false
+      cluster: false
+      unique: false
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+    _record_hash:
+      partition: false
+      cluster: false
+      unique: true
+      sort: false
+      primary_key: false
+      foreign_key: false
+      data_type: text
+      nullable: false
+name: csv
+version: 2
+preferred_types: {}
+hints:
+  not_null:
+  - ^_record_hash$
+  - ^_root_hash$
+  - ^_parent_hash$
+  - ^_pos$
+  - _load_id
+  foreign_key:
+  - ^_parent_hash$
+  unique:
+  - ^_record_hash$
+excludes: []
+includes: []
+engine_version: 2
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,27 +1,61 @@
 [tool.poetry]
 name = "python-dlt"
-version = "0.0.1"
+version = "0.1.0.dev0"
 description = "DLT is an open-source python-native scalable data loading framework that does not require any devops efforts to run."
-authors = ["Marcin Rudolf <rudolfix@rudolfix.org>"]
-license = "MIT"
+authors = ["ScaleVector <services@scalevector.ai>"]
+maintainers = [ "Marcin Rudolf <marcin@scalevector.ai>", "Adrian Brudaru <adrian@scalevector.ai>",]
+readme = "README.md"
+license = "Apache-2.0"
+homepage = "https://github.com/scale-vector"
+repository = "https://github.com/scale-vector/dlt"
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Apache Software License",
+    "Topic :: Software Development :: Libraries",
+    "Operating System :: MacOS :: MacOS X",
+    "Operating System :: POSIX :: Linux",]
+keywords = [ "etl" ]
+include = [ "LICENSE.txt", "README.md"]
 packages = [
    { include = "dlt" },
 ]

 [tool.poetry.dependencies]
 python = "^3.8,<3.11"
-# autopoiesis = {path = "../rasa_data_ingestion"}
+requests = "^2.26.0"
+pendulum = "^2.1.2"
+simplejson = "^3.17.5"
+jsonlines = "^2.0.0"
+PyYAML = "^5.4.1"
+json-logging = "1.4.1rc0"
+prometheus-client = "^0.11.0"
+semver = "^2.13.0"
+sentry-sdk = "^1.4.3"
+hexbytes = "^0.2.2"
+cachetools = "^5.2.0"
+
+psycopg2-binary = {version = "^2.9.1", optional = true, extras = ["redshift", "postgres"]}
+
+grpcio = {version = "1.43.0", optional = true, extras = ["gcp"]}
+google-cloud-bigquery = {version = "^2.26.0", optional = true, extras = ["gcp"]}
+
+GitPython = {version = "^3.1.26", optional = true, extras = ["dbt"]}
+dbt-core = {version = "1.0.6", optional = true, extras = ["dbt"]}
+dbt-redshift = {version = "1.0.1", optional = true, extras = ["dbt"]}
+dbt-bigquery = {version = "1.0.0", optional = true, extras = ["dbt"]}


 [tool.poetry.dev-dependencies]
-pytest = "6.2.4"
+pytest = "^6.2.4"
 mypy = "0.931"
 flake8 = "3.9.2"
-bandit = "1.7.0"
-flake8-bugbear = "21.4.3"
-pytest-pythonpath = "0.7.3"
+bandit = "^1.7.0"
+flake8-bugbear = "^21.4.3"
+pytest-pythonpath = "^0.7.3"
 pytest-order = "^1.0.0"
 pytest-cases = "^3.6.9"
+pytest-forked = "^1.3.0"
 types-PyYAML = "^6.0.7"
 types-cachetools = "^4.2.9"
 types-protobuf = "^3.19.8"
@@ -29,6 +63,12 @@ types-simplejson = "^3.17.0"
 types-requests = "^2.25.6"
 types-python-dateutil = "^2.8.15"

+[tool.poetry.extras]
+dbt = ["dbt-core", "GitPython", "dbt-redshift", "dbt-bigquery"]
+gcp = ["grpcio", "google-cloud-bigquery"]
+postgres = ["psycopg2-binary"]
+redshift = ["psycopg2-binary"]
+
 [build-system]
 requires = ["poetry-core>=1.0.8"]
 build-backend = "poetry.core.masonry.api"
--- a/pytest.ini
+++ b/pytest.ini
@@ -0,0 +1,7 @@
+[pytest]
+python_paths= autopoiesis
+norecursedirs= .direnv .eggs build dist
+addopts= -v --showlocals --durations 10
+xfail_strict= true
+log_cli= 1
+log_cli_level= INFO
--- a/tests/.example.env
+++ b/tests/.example.env
@@ -0,0 +1,18 @@
+
+# copy to .env and run with (set -a && . tests/.env && pytest tests)
+# for tests that do not involve any secrets you may run (set -a && . tests/.example.env && pytest tests)
+
+
+PROJECT_ID=chat-analytics-317513
+DATASET=carbon_bot_3
+BQ_CRED_PRIVATE_KEY="-----BEGIN PRIVATE KEY-----
+    paste key here
+    -----END PRIVATE KEY-----
+"
+BQ_CRED_CLIENT_EMAIL=loader@chat-analytics-317513.iam.gserviceaccount.com
+
+PG_DATABASE_NAME=chat_analytics_rasa
+PG_SCHEMA_PREFIX=carbon_bot_3
+PG_USER=loader
+PG_HOST=3.73.90.3
+PG_PASSWORD=set-me-up
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/common/init.py
+++ b/tests/common/init.py
--- a/tests/common/cases/mod_bot_case.json
+++ b/tests/common/cases/mod_bot_case.json
@@ -0,0 +1,32 @@
+{
+    "event": "bot",
+    "timestamp": 1624001210.7276764,
+    "metadata": {
+        "rasa_x_flagged": false,
+        "rasa_x_id": 60304
+    },
+    "text": "Hello! Just a heads up - this bot is part of a research project and we intend to make the conversations publicly available to researchers. So please don't share any personal information! [Privacy Policy](https://rasa.com/carbon-bot-privacy-policy/)",
+    "data": {
+        "elements": null,
+        "quick_replies": null,
+        "buttons": null,
+        "attachment": null,
+        "image": null,
+        "custom": null
+    },
+    "data__custom": "remains",
+    "data__custom__goes": "goes",
+    "custom_data": {
+        "excluded_path": {
+            "prop1": "str1"
+        },
+        "included_object": {
+            "included_exception": "exception",
+            "eliminated": true
+        }
+    },
+    "is_flagged": false,
+    "sender_id": "411b44bdfcc545f282fb4aa15282b73f",
+    "model_id": "__unknown",
+    "environment": "__unknown"
+}
--- a/tests/common/cases/schemas/ev1/event_schema.7z
+++ b/tests/common/cases/schemas/ev1/event_schema.7z
--- a/tests/common/cases/schemas/ev1/event_schema.json
+++ b/tests/common/cases/schemas/ev1/event_schema.json
--- a/tests/common/cases/schemas/ev1/model_schema.json
+++ b/tests/common/cases/schemas/ev1/model_schema.json
@@ -0,0 +1,63 @@
+{
+  "tables": {
+    "_version": {
+      "version": {
+        "name": "version",
+        "data_type": "bigint",
+        "nullable": false
+      },
+      "engine_version": {
+        "name": "engine_version",
+        "data_type": "bigint",
+        "nullable": false
+      },
+      "inserted_at": {
+        "name": "inserted_at",
+        "data_type": "timestamp",
+        "nullable": false
+      }
+    },
+    "_loads": {
+      "load_id": {
+        "name": "load_id",
+        "data_type": "text",
+        "nullable": false
+      },
+      "status": {
+        "name": "status",
+        "data_type": "bigint",
+        "nullable": false
+      },
+      "inserted_at": {
+        "name": "inserted_at",
+        "data_type": "timestamp",
+        "nullable": false
+      }
+    }
+  },
+  "name": "model",
+  "version": 1,
+  "preferred_types": {
+    "^timestamp$": "timestamp",
+    "trained_at$": "timestamp",
+    "^inserted_at$": "timestamp",
+    "^_pos$": "bigint"
+  },
+  "hints": {
+    "not_null": [
+      "^timestamp$",
+      "^_record_hash$",
+      "^_root_hash$",
+      "^_load_id$",
+      "^_parent_hash$",
+      "^_pos$"
+    ],
+    "primary_key": [
+      "^_record_hash$"
+    ],
+    "foreign_key": [
+      "^_parent_hash$"
+    ]
+  },
+  "engine_version": 1
+}
--- a/tests/common/cases/schemas/rasa/event_schema.json
+++ b/tests/common/cases/schemas/rasa/event_schema.json
@@ -0,0 +1,59 @@
+{
+    "tables": {
+        "_version": {
+            "version": {
+                "name": "version",
+                "data_type": "bigint",
+                "nullable": false
+            },
+            "engine_version": {
+                "name": "engine_version",
+                "data_type": "bigint",
+                "nullable": false
+            },
+            "inserted_at": {
+                "name": "inserted_at",
+                "data_type": "timestamp",
+                "nullable": false
+            }
+        },
+        "_loads": {
+            "load_id": {
+                "name": "load_id",
+                "data_type": "text",
+                "nullable": false
+            },
+            "status": {
+                "name": "status",
+                "data_type": "bigint",
+                "nullable": false
+            },
+            "inserted_at": {
+                "name": "inserted_at",
+                "data_type": "timestamp",
+                "nullable": false
+            }
+        }
+    },
+    "version": 1,
+    "engine_version": 2,
+    "name": "event",
+    "preferred_types": {
+        "^timestamp$": "timestamp",
+        "^_timestamp$": "timestamp",
+        "^inserted_at$": "timestamp",
+        "confidence": "double",
+        "^_pos$": "bigint"
+    },
+    "hints": {
+        "not_null": ["^timestamp$", "^_timestamp$", "^_dist_key$", "^_record_hash$", "^_root_hash$", "^_load_id$", "^_parent_hash$", "^_pos$", "^sender_id$"],
+        "partition": ["^_timestamp$", "^timestamp$"],
+        "cluster": ["^_dist_key$", "^sender_id$"],
+        "primary_key": [],
+        "foreign_key": ["^_parent_hash$"],
+        "sort": ["^timestamp$", "^_timestamp$"],
+        "unique": ["^_record_hash$"]
+    },
+    "excludes": ["^event_user__parse_data", "^event_bot__data", "^event_bot__metadata"],
+    "includes": ["^event_user__parse_data__(intent|entities|message_id$|text$)", "^event_bot__metadata__(utter_action|template_name|rasa_x_[a-z]+)$"]
+}
--- a/tests/common/cases/schemas/rasa/model_schema.json
+++ b/tests/common/cases/schemas/rasa/model_schema.json
@@ -0,0 +1,54 @@
+{
+    "tables": {
+        "_version": {
+            "version": {
+                "name": "version",
+                "data_type": "bigint",
+                "nullable": false
+            },
+            "engine_version": {
+                "name": "engine_version",
+                "data_type": "bigint",
+                "nullable": false
+            },
+            "inserted_at": {
+                "name": "inserted_at",
+                "data_type": "timestamp",
+                "nullable": false
+            }
+        },
+        "_loads": {
+            "load_id": {
+                "name": "load_id",
+                "data_type": "text",
+                "nullable": false
+            },
+            "status": {
+                "name": "status",
+                "data_type": "bigint",
+                "nullable": false
+            },
+            "inserted_at": {
+                "name": "inserted_at",
+                "data_type": "timestamp",
+                "nullable": false
+            }
+        }
+    },
+    "version": 1,
+    "engine_version": 2,
+    "name": "model",
+    "preferred_types": {
+        "^timestamp$": "timestamp",
+        "trained_at$": "timestamp",
+        "^inserted_at$": "timestamp",
+        "^_pos$": "bigint"
+    },
+    "hints": {
+        "not_null": ["^timestamp$", "^_record_hash$", "^_root_hash$", "^_load_id$", "^_parent_hash$", "^_pos$"],
+        "unique": ["^_record_hash$"],
+        "foreign_key": ["^_parent_hash$"]
+    },
+    "excludes": [],
+    "includes": []
+}
--- a/tests/common/cases/secret-kube/secret-kube
+++ b/tests/common/cases/secret-kube/secret-kube
@@ -0,0 +1 @@
+kube
--- a/tests/common/cases/secret-value
+++ b/tests/common/cases/secret-value
@@ -0,0 +1 @@
+BANANA
--- a/tests/common/cases/simple_row.json
+++ b/tests/common/cases/simple_row.json
@@ -0,0 +1,16 @@
+[
+    {
+        "f_int": 7817289712,
+        "f_float": 92898e37,
+        "f_timestamp": "2021-10-13T13:49:32.901899+00:00",
+        "f_bool": true,
+        "f_bool_2": false,
+        "f_str": "some string"
+    },
+    {
+        "f_int": 7817289713,
+        "f_float": 878172.8292,
+        "f_timestamp": "2021-10-13T13:49:32.901899+00:00",
+        "f_bool_2": false
+    }
+]
--- a/tests/common/cases/weird_rows.json
+++ b/tests/common/cases/weird_rows.json
@@ -0,0 +1,14 @@
+[
+    {
+        "idx": 1,
+        "str": ", NULL'); DROP SCHEMA Public --"
+    },
+    {
+        "idx": 2,
+        "str": "イロハニホヘト チリヌルヲ 'ワカヨタレソ ツネナラム"
+    },
+    {
+        "idx": 3,
+        "str": "ऄअआइ'ईउऊऋऌऍऎए"
+    }
+]
--- a/tests/common/storages/init.py
+++ b/tests/common/storages/init.py
--- a/tests/common/storages/test_loader_storage.py
+++ b/tests/common/storages/test_loader_storage.py
@@ -0,0 +1,84 @@
+import pytest
+from typing import Sequence, Tuple
+
+from dlt.common.file_storage import FileStorage
+from dlt.common.storages.loader_storage import LoaderStorage
+from dlt.common.configuration import LoadingVolumeConfiguration, make_configuration
+from dlt.common.storages.exceptions import NoMigrationPathException
+from dlt.common.typing import StrAny
+from dlt.common.utils import uniq_id
+
+from tests.utils import write_version, autouse_root_storage
+
+
+@pytest.fixture
+def storage() -> LoaderStorage:
+    C = make_configuration(LoadingVolumeConfiguration, LoadingVolumeConfiguration)
+    s = LoaderStorage(True, C, "jsonl")
+    s.initialize_storage()
+    return s
+
+
+def test_archive_completed(storage: LoaderStorage) -> None:
+    # should delete archive in full
+    storage.delete_completed_jobs = True
+    load_id, file_name = start_loading_file(storage, [{"content": "a"}, {"content": "b"}])
+    assert storage.storage.has_folder(storage.get_load_path(load_id))
+    storage.complete_job(load_id, file_name)
+    storage.archive_load(load_id)
+    # deleted from loading
+    assert not storage.storage.has_folder(storage.get_load_path(load_id))
+    # deleted from archive
+    assert not storage.storage.has_folder(storage.get_archived_path(load_id))
+
+    # do not delete completed jobs
+    storage.delete_completed_jobs = False
+    load_id, file_name = start_loading_file(storage, [{"content": "a"}, {"content": "b"}])
+    storage.complete_job(load_id, file_name)
+    storage.archive_load(load_id)
+    # deleted from loading
+    assert not storage.storage.has_folder(storage.get_load_path(load_id))
+    # has load archived
+    assert storage.storage.has_folder(storage.get_archived_path(load_id))
+
+
+def test_archive_failed(storage: LoaderStorage) -> None:
+    # loads with failed jobs are always archived
+    storage.delete_completed_jobs = True
+    load_id, file_name = start_loading_file(storage, [{"content": "a"}, {"content": "b"}])
+    assert storage.storage.has_folder(storage.get_load_path(load_id))
+    storage.fail_job(load_id, file_name, "EXCEPTION")
+    storage.archive_load(load_id)
+    # deleted from loading
+    assert not storage.storage.has_folder(storage.get_load_path(load_id))
+    # present in archive
+    assert storage.storage.has_folder(storage.get_archived_path(load_id))
+
+
+def test_full_migration_path() -> None:
+    # create directory structure
+    s = LoaderStorage(True, LoadingVolumeConfiguration, "jsonl")
+    # overwrite known initial version
+    write_version(s.storage, "1.0.0")
+    # must be able to migrate to current version
+    s = LoaderStorage(False, LoadingVolumeConfiguration, "jsonl")
+    assert s.version == LoaderStorage.STORAGE_VERSION
+
+
+def test_unknown_migration_path() -> None:
+    # create directory structure
+    s = LoaderStorage(True, LoadingVolumeConfiguration, "jsonl")
+    # overwrite known initial version
+    write_version(s.storage, "10.0.0")
+    # must be able to migrate to current version
+    with pytest.raises(NoMigrationPathException):
+        LoaderStorage(False, LoadingVolumeConfiguration, "jsonl")
+
+
+def start_loading_file(s: LoaderStorage, content: Sequence[StrAny]) -> Tuple[str, str]:
+    load_id = uniq_id()
+    s.create_temp_load_folder(load_id)
+    file_name = s.write_temp_loading_file(load_id, "mock_table", None, uniq_id(), content)
+    s.commit_temp_load_folder(load_id)
+    s.start_job(load_id, file_name)
+    return load_id, file_name
--- a/tests/common/storages/test_unpacker_storage.py
+++ b/tests/common/storages/test_unpacker_storage.py
@@ -0,0 +1,40 @@
+import pytest
+
+from dlt.common.file_storage import FileStorage
+from dlt.common.storages.exceptions import NoMigrationPathException
+from dlt.common.storages.unpacker_storage import UnpackerStorage
+from dlt.common.configuration import UnpackingVolumeConfiguration
+
+from tests.utils import TEST_STORAGE, write_version, autouse_root_storage
+
+@pytest.mark.skip()
+def test_load_events_and_group_by_sender() -> None:
+    # TODO: create fixture with two sender ids and 3 files and check the result
+    pass
+
+
+@pytest.mark.skip()
+def test_chunk_by_events() -> None:
+    # TODO: should distribute ~ N events evenly among m cores with fallback for small amounts of events
+    pass
+
+
+
+def test_full_migration_path() -> None:
+    # create directory structure
+    s = UnpackerStorage(True, UnpackingVolumeConfiguration)
+    # overwrite known initial version
+    write_version(s.storage, "1.0.0")
+    # must be able to migrate to current version
+    s = UnpackerStorage(True, UnpackingVolumeConfiguration)
+    assert s.version == UnpackerStorage.STORAGE_VERSION
+
+
+def test_unknown_migration_path() -> None:
+    # create directory structure
+    s = UnpackerStorage(True, UnpackingVolumeConfiguration)
+    # overwrite known initial version
+    write_version(s.storage, "10.0.0")
+    # must be able to migrate to current version
+    with pytest.raises(NoMigrationPathException):
+        UnpackerStorage(False, UnpackingVolumeConfiguration)
--- a/tests/common/storages/test_versioned_storage.py
+++ b/tests/common/storages/test_versioned_storage.py
@@ -0,0 +1,59 @@
+import pytest
+import semver
+
+from dlt.common.file_storage import FileStorage
+from dlt.common.storages.exceptions import NoMigrationPathException, WrongStorageVersionException
+from dlt.common.storages.versioned_storage import VersionedStorage
+
+from tests.utils import write_version, root_storage
+
+
+class MigratedStorage(VersionedStorage):
+    def migrate_storage(self, from_version: semver.VersionInfo, to_version: semver.VersionInfo) -> None:
+        # migration example:
+        if from_version == "1.0.0" and from_version < to_version:
+            from_version = semver.VersionInfo.parse("1.1.0")
+            self._save_version(from_version)
+        if from_version == "1.1.0" and from_version < to_version:
+            from_version = semver.VersionInfo.parse("1.2.0")
+            self._save_version(from_version)
+
+
+def test_new_versioned_storage(root_storage: FileStorage) -> None:
+    v = VersionedStorage("1.0.1", True, root_storage)
+    assert v.version == "1.0.1"
+
+
+def test_new_versioned_storage_non_owner(root_storage: FileStorage) -> None:
+    with pytest.raises(WrongStorageVersionException) as wsve:
+        VersionedStorage("1.0.1", False, root_storage)
+    assert wsve.value.storage_path == root_storage.storage_path
+    assert wsve.value.target_version == "1.0.1"
+    assert wsve.value.initial_version == "0.0.0"
+
+
+def test_migration(root_storage: FileStorage) -> None:
+    write_version(root_storage, "1.0.0")
+    v = MigratedStorage("1.2.0", True, root_storage)
+    assert v.version == "1.2.0"
+
+
+def test_unknown_migration_path(root_storage: FileStorage) -> None:
+    write_version(root_storage, "1.0.0")
+    with pytest.raises(NoMigrationPathException) as wmpe:
+        MigratedStorage("1.3.0", True, root_storage)
+    assert wmpe.value.migrated_version == "1.2.0"
+
+
+def test_only_owner_migrates(root_storage: FileStorage) -> None:
+    write_version(root_storage, "1.0.0")
+    with pytest.raises(WrongStorageVersionException) as wmpe:
+        MigratedStorage("1.2.0", False, root_storage)
+    assert wmpe.value.initial_version == "1.0.0"
+
+
+def test_downgrade_not_possible(root_storage: FileStorage) -> None:
+    write_version(root_storage, "1.2.0")
+    with pytest.raises(NoMigrationPathException) as wmpe:
+        MigratedStorage("1.1.0", True, root_storage)
+    assert wmpe.value.migrated_version == "1.2.0"
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`cffi\\|idna\\|simplejson\\|pendulum\\|grpcio\\|google-crc32c`
				`@@ -0,0 +1 @@`
				`from dlt._version import common_version as __version__`
				`@@ -0,0 +1 @@`
				`from .schema_storage import SchemaStorage # noqa: F401`
				`@@ -0,0 +1 @@`
				`from dlt._version import loader_version as __version__`
				`@@ -0,0 +1 @@`
				`from dlt._version import unpacker_version as __version__`