mirror of
https://github.com/dlt-hub/dlt.git
synced 2025-12-17 19:31:30 +00:00
* move pyproject.toml and makefile from old branch and add inbetween changes
* update workflow files to use uv
* run new version of formatter
* fix building of images with uv
* possibly fix docs linting
* downgrade lancedb dependency to fix tests
* fix gcs compat mode for s3 for newest boto
* fix docstrings in examples
* add some uv constraints
* update readme.md and contributing.md and some other places
* allow duckdb 0.8 in range
* add link-mode copy to uv venv on windows
* remove poetry lockfile and unneeded lockfile checker
* fix chess api related failures
* sleep after dremio start..
* set correct package in pyproject
* Revert "add some uv constraints"
This reverts commit d611e9ecce.
# Conflicts:
# pyproject.toml
# uv.lock
* add missing databricks sql connector version bounds
113 lines
3.5 KiB
Python
113 lines
3.5 KiB
Python
import os
|
|
import tempfile
|
|
from typing import Any, Iterator, cast, Union
|
|
|
|
import dlt
|
|
from dlt.common.typing import TypedDict
|
|
from dlt.common import json
|
|
from dlt.common.configuration.specs import BaseConfiguration
|
|
from dlt.common.runners.venv import Venv
|
|
from dlt.common.typing import DictStrAny, StrAny, StrOrBytesPath, TDataItem, TDataItems
|
|
|
|
from docs.examples.sources.stdout import json_stdout as singer_process_pipe
|
|
|
|
FilePathOrDict = Union[StrAny, StrOrBytesPath]
|
|
|
|
|
|
class SingerMessage(TypedDict):
|
|
type: str # noqa: A003
|
|
|
|
|
|
class SingerRecord(SingerMessage):
|
|
record: DictStrAny
|
|
stream: str
|
|
|
|
|
|
class SingerState(SingerMessage):
|
|
value: DictStrAny
|
|
|
|
|
|
# try:
|
|
# from singer import parse_message_from_obj, Message, RecordMessage, StateMessage
|
|
# except ImportError:
|
|
# raise MissingDependencyException("Singer Source", ["python-dlt-singer"], "Singer runtime compatible with DLT")
|
|
|
|
|
|
# pip install ../singer/singer-python
|
|
# https://github.com/datamill-co/singer-runner/tree/master/singer_runner
|
|
# https://techgaun.github.io/active-forks/index.html#singer-io/singer-python
|
|
def get_source_from_stream(
|
|
singer_messages: Iterator[SingerMessage], state: DictStrAny = None
|
|
) -> Iterator[TDataItem]:
|
|
last_state = {}
|
|
for msg in singer_messages:
|
|
if msg["type"] == "RECORD":
|
|
# yield record
|
|
msg = cast(SingerRecord, msg)
|
|
yield dlt.mark.with_table_name(msg["record"], msg["stream"])
|
|
if msg["type"] == "STATE":
|
|
msg = cast(SingerState, msg)
|
|
last_state = msg["value"]
|
|
if state is not None:
|
|
state["singer"] = last_state
|
|
|
|
|
|
@dlt.transformer()
|
|
def singer_raw_stream(singer_messages: TDataItems, use_state: bool = True) -> Iterator[TDataItem]:
|
|
if use_state:
|
|
state = dlt.current.source_state()
|
|
else:
|
|
state = None
|
|
yield from get_source_from_stream(cast(Iterator[SingerMessage], singer_messages), state)
|
|
|
|
|
|
@dlt.source(spec=BaseConfiguration) # use BaseConfiguration spec to prevent injections
|
|
def tap(
|
|
venv: Venv,
|
|
tap_name: str,
|
|
config_file: FilePathOrDict,
|
|
catalog_file: FilePathOrDict,
|
|
use_state: bool = True,
|
|
) -> Any:
|
|
# TODO: generate append/replace dispositions and some table/column hints from catalog files
|
|
|
|
def as_config_file(config: FilePathOrDict) -> StrOrBytesPath:
|
|
if type(config) is dict:
|
|
fd, tmp_name = tempfile.mkstemp(dir=venv.context.env_dir)
|
|
with os.fdopen(fd, "wb") as f:
|
|
json.dump(config, f)
|
|
return cast(str, tmp_name)
|
|
else:
|
|
return config # type: ignore
|
|
|
|
# write config dictionary to temp file in virtual environment if passed as dict
|
|
config_file_path = as_config_file(config_file)
|
|
|
|
# process catalog like config
|
|
catalog_file_path = as_config_file(catalog_file)
|
|
|
|
@dlt.resource(name=tap_name)
|
|
def singer_messages() -> Iterator[TDataItem]:
|
|
# possibly pass state
|
|
if use_state:
|
|
state = dlt.current.source_state()
|
|
else:
|
|
state = None
|
|
if state is not None and state.get("singer"):
|
|
state_params = ("--state", as_config_file(dlt.current.source_state()["singer"]))
|
|
else:
|
|
state_params = () # type: ignore
|
|
|
|
pipe_iterator = singer_process_pipe(
|
|
venv,
|
|
tap_name,
|
|
"--config",
|
|
os.path.abspath(config_file_path),
|
|
"--catalog",
|
|
os.path.abspath(catalog_file_path),
|
|
*state_params,
|
|
)
|
|
yield from get_source_from_stream(pipe_iterator, state)
|
|
|
|
return singer_messages
|