Files
dlt/docs/examples/archive/sources/singer_tap.py
David Scharf 3ebbfa1f9e migrate to uv (#2766)
* move pyproject.toml and makefile from old branch and add inbetween changes

* update workflow files to use uv

* run new version of formatter

* fix building of images with uv

* possibly fix docs linting

* downgrade lancedb dependency to fix tests

* fix gcs compat mode for s3 for newest boto

* fix docstrings in examples

* add some uv constraints

* update readme.md and contributing.md and some other places

* allow duckdb 0.8 in range

* add link-mode copy to uv venv on windows

* remove poetry lockfile and unneeded lockfile checker

* fix chess api related failures

* sleep after dremio start..

* set correct package in pyproject

* Revert "add some uv constraints"

This reverts commit d611e9ecce.

# Conflicts:
#	pyproject.toml
#	uv.lock

* add missing databricks sql connector version bounds
2025-06-19 10:11:24 +02:00

113 lines
3.5 KiB
Python

import os
import tempfile
from typing import Any, Iterator, cast, Union
import dlt
from dlt.common.typing import TypedDict
from dlt.common import json
from dlt.common.configuration.specs import BaseConfiguration
from dlt.common.runners.venv import Venv
from dlt.common.typing import DictStrAny, StrAny, StrOrBytesPath, TDataItem, TDataItems
from docs.examples.sources.stdout import json_stdout as singer_process_pipe
FilePathOrDict = Union[StrAny, StrOrBytesPath]
class SingerMessage(TypedDict):
type: str # noqa: A003
class SingerRecord(SingerMessage):
record: DictStrAny
stream: str
class SingerState(SingerMessage):
value: DictStrAny
# try:
# from singer import parse_message_from_obj, Message, RecordMessage, StateMessage
# except ImportError:
# raise MissingDependencyException("Singer Source", ["python-dlt-singer"], "Singer runtime compatible with DLT")
# pip install ../singer/singer-python
# https://github.com/datamill-co/singer-runner/tree/master/singer_runner
# https://techgaun.github.io/active-forks/index.html#singer-io/singer-python
def get_source_from_stream(
singer_messages: Iterator[SingerMessage], state: DictStrAny = None
) -> Iterator[TDataItem]:
last_state = {}
for msg in singer_messages:
if msg["type"] == "RECORD":
# yield record
msg = cast(SingerRecord, msg)
yield dlt.mark.with_table_name(msg["record"], msg["stream"])
if msg["type"] == "STATE":
msg = cast(SingerState, msg)
last_state = msg["value"]
if state is not None:
state["singer"] = last_state
@dlt.transformer()
def singer_raw_stream(singer_messages: TDataItems, use_state: bool = True) -> Iterator[TDataItem]:
if use_state:
state = dlt.current.source_state()
else:
state = None
yield from get_source_from_stream(cast(Iterator[SingerMessage], singer_messages), state)
@dlt.source(spec=BaseConfiguration) # use BaseConfiguration spec to prevent injections
def tap(
venv: Venv,
tap_name: str,
config_file: FilePathOrDict,
catalog_file: FilePathOrDict,
use_state: bool = True,
) -> Any:
# TODO: generate append/replace dispositions and some table/column hints from catalog files
def as_config_file(config: FilePathOrDict) -> StrOrBytesPath:
if type(config) is dict:
fd, tmp_name = tempfile.mkstemp(dir=venv.context.env_dir)
with os.fdopen(fd, "wb") as f:
json.dump(config, f)
return cast(str, tmp_name)
else:
return config # type: ignore
# write config dictionary to temp file in virtual environment if passed as dict
config_file_path = as_config_file(config_file)
# process catalog like config
catalog_file_path = as_config_file(catalog_file)
@dlt.resource(name=tap_name)
def singer_messages() -> Iterator[TDataItem]:
# possibly pass state
if use_state:
state = dlt.current.source_state()
else:
state = None
if state is not None and state.get("singer"):
state_params = ("--state", as_config_file(dlt.current.source_state()["singer"]))
else:
state_params = () # type: ignore
pipe_iterator = singer_process_pipe(
venv,
tap_name,
"--config",
os.path.abspath(config_file_path),
"--catalog",
os.path.abspath(catalog_file_path),
*state_params,
)
yield from get_source_from_stream(pipe_iterator, state)
return singer_messages