allows to copy files from vibe-hub repo (#2760)

* copies files from hub repo

* adds ai setup and hub copy commands to dlt init for dlthub:

* adds vibe rest api pipeline with full AST rewrite

* moves repo locations to init

* fixes run context in venv
This commit is contained in:
rudolfix
2025-06-18 11:25:12 +02:00
committed by GitHub
parent 3fd94ca74a
commit b1cff8cc66
15 changed files with 755 additions and 586 deletions

View File

@@ -1,4 +1,13 @@
from dlt.cli.reference import SupportsCliCommand
from dlt.cli.exceptions import CliCommandException
__all__ = ["SupportsCliCommand", "CliCommandException"]
DEFAULT_VERIFIED_SOURCES_REPO = "https://github.com/dlt-hub/verified-sources.git"
DEFAULT_VIBE_SOURCES_REPO = "https://github.com/dlt-hub/vibe-hub.git"
__all__ = [
"SupportsCliCommand",
"CliCommandException",
"DEFAULT_VERIFIED_SOURCES_REPO",
"DEFAULT_VIBE_SOURCES_REPO",
]

View File

@@ -1,10 +1,9 @@
import os
import shutil
from pathlib import Path
from typing import get_args, Literal, Set, Union
from typing import List, Tuple, get_args, Literal, Set, Union
from dlt.cli import echo as fmt
from dlt.cli.init_command import DEFAULT_VERIFIED_SOURCES_REPO
from dlt.common import git
from dlt.common.pipeline import get_dlt_repos_dir
from dlt.common.runtime import run_context
@@ -19,41 +18,17 @@ TSupportedIde = Literal[
SUPPORTED_IDES: Set[TSupportedIde] = list(get_args(TSupportedIde)) # type: ignore
VERIFIED_SOURCES_AI_BASE_DIR = "ai"
AI_CONTRIBUTE_URL = (
os.path.splitext(DEFAULT_VERIFIED_SOURCES_REPO)[0]
+ "/tree/master/"
+ VERIFIED_SOURCES_AI_BASE_DIR
)
# TODO Claude Desktop: rules need to be named `CLAUDE.md`, allow command to append to it
# TODO Continue: rules need to be in YAML file, allow command to properly edit it
# TODO generate more files based on the specifics of the source README and the destination
def ai_setup_command(
ide: TSupportedIde,
branch: Union[str, None] = None,
repo: str = DEFAULT_VERIFIED_SOURCES_REPO,
) -> None:
"""Get AI rules files into your local project for the selected IDE.
Get the source and destination directories for the rules files.
Files found in the source directory will be copied into the destination directory.
"""
# where dlt-hub/verified-sources is cloned
fmt.echo("Looking up IDE rules and configuration %s..." % fmt.bold(repo))
src_storage = git.get_fresh_repo_files(repo, get_dlt_repos_dir(), branch=branch)
if not src_storage.has_folder(VERIFIED_SOURCES_AI_BASE_DIR):
fmt.warning(
"Support for ai command not found in repo %s branch %s"
% (fmt.bold(repo), fmt.bold(branch or "<default>"))
)
return
src_dir = Path(src_storage.make_full_path(VERIFIED_SOURCES_AI_BASE_DIR)) / ide
# where the command is ran, i.e., project root
dest_dir = Path(run_context.active().run_dir)
copied_files = 0
def _copy_repo_files(
src_dir: Path, dest_dir: Path, warn_on_overwrite: bool = True
) -> Tuple[List[str], int]:
copied_files = []
count_files = 0
for src_sub_path in src_dir.rglob("*"):
if src_sub_path.is_dir():
@@ -64,33 +39,96 @@ def ai_setup_command(
fmt.echo(src_sub_path.read_text(encoding="utf-8"))
continue
copied_files += 1
count_files += 1
dest_file_path = dest_dir / src_sub_path.relative_to(src_dir)
if dest_file_path.exists():
fmt.warning(f"Existing rules file found at {dest_file_path.absolute()}; Skipping.")
if warn_on_overwrite:
fmt.warning(f"Existing rules file found at {dest_file_path.absolute()}; Skipping.")
continue
copied_files.append(src_sub_path.name)
if not dest_file_path.parent.exists():
dest_file_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src_sub_path, dest_file_path)
return copied_files, count_files
if copied_files == 0:
def ai_setup_command(
ide: TSupportedIde,
location: str,
branch: Union[str, None] = None,
hide_warnings: bool = False,
) -> None:
"""Get AI rules files into your local project for the selected IDE.
Get the source and destination directories for the rules files.
Files found in the source directory will be copied into the destination directory.
"""
# where dlt-hub/verified-sources is cloned
fmt.echo("Looking up IDE rules and configuration %s..." % fmt.bold(location))
src_storage = git.get_fresh_repo_files(location, get_dlt_repos_dir(), branch=branch)
if not src_storage.has_folder(VERIFIED_SOURCES_AI_BASE_DIR):
fmt.warning(
"Support for ai command not found in repo %s branch %s"
% (fmt.bold(location), fmt.bold(branch or "<default>"))
)
return
src_dir = Path(src_storage.make_full_path(VERIFIED_SOURCES_AI_BASE_DIR)) / ide
# where the command is ran, i.e., project root
dest_dir = Path(run_context.active().run_dir)
copied_files, count_files = _copy_repo_files(src_dir, dest_dir, not hide_warnings)
if count_files == 0:
fmt.echo(
"%s%s is not yet supported. No files were found."
% (fmt.bold(ide), fmt.style("", bold=False))
)
else:
fmt.echo(
"%s file(s) supporting %s were copied." % (fmt.bold(str(copied_files)), fmt.bold(ide))
if copied_files:
fmt.echo(
"%s file(s) supporting %s were copied."
% (fmt.bold(str(len(copied_files))), fmt.bold(ide))
)
if not hide_warnings:
# refer to contribute README in the repo
ai_contribute_url = (
os.path.splitext(location)[0] + "/tree/master/" + VERIFIED_SOURCES_AI_BASE_DIR
)
fmt.note(
"Help us to build better support for %s by contributing better rules, prompts or configs"
" in %s" % (ide, AI_CONTRIBUTE_URL)
)
fmt.note(
"Help us to build better support for %s by contributing better rules, prompts or"
" configs in %s" % (ide, ai_contribute_url)
)
def vibe_source_setup(
source: str,
location: str,
branch: Union[str, None] = None,
) -> None:
"""Copies files from vibe sources repo into the current working folder"""
fmt.echo("Looking up in dltHub for rules, docs and snippets for %s..." % fmt.bold(source))
src_storage = git.get_fresh_repo_files(location, get_dlt_repos_dir(), branch=branch)
if not src_storage.has_folder(source):
fmt.warning("We have nothing for %s at dltHub yet." % fmt.bold(source))
return
src_dir = Path(src_storage.make_full_path(source))
# where the command is ran, i.e., project root
dest_dir = Path(run_context.active().run_dir)
copied_files, count_files = _copy_repo_files(src_dir, dest_dir)
if count_files == 0:
fmt.warning("We have nothing for %s at dltHub yet." % fmt.bold(source))
else:
fmt.echo(
"%s file(s) supporting %s were copied:" % (fmt.bold(str(count_files)), fmt.bold(source))
)
for file in copied_files:
fmt.echo(fmt.bold(file))
# TODO create a command to create a copy-pasteable MCP server config
def mcp_command() -> None: ...

View File

@@ -168,4 +168,4 @@ def telemetry_change_status_command_wrapper(enabled: bool) -> None:
@utils.track_command("ai_setup", False)
def ai_setup_command_wrapper(ide: TSupportedIde, branch: Union[str, None], repo: str) -> None:
ai_setup_command(ide, branch=branch, repo=repo)
ai_setup_command(ide, location=repo, branch=branch)

File diff suppressed because it is too large Load Diff

View File

@@ -203,10 +203,10 @@ def _get_docstring_for_module(sources_storage: FileStorage, source_name: str) ->
def get_template_configuration(
sources_storage: FileStorage, source_name: str
sources_storage: FileStorage, source_name: str, display_source_name: str
) -> SourceConfiguration:
destination_pipeline_file_name = source_name + PIPELINE_FILE_SUFFIX
source_pipeline_file_name = destination_pipeline_file_name
destination_pipeline_file_name = display_source_name + PIPELINE_FILE_SUFFIX
source_pipeline_file_name = source_name + PIPELINE_FILE_SUFFIX
if not sources_storage.has_file(source_pipeline_file_name):
source_pipeline_file_name = DEFAULT_PIPELINE_TEMPLATE

View File

@@ -5,10 +5,7 @@ import dlt.cli.echo as fmt
from dlt.common.configuration import plugins
from dlt.cli import SupportsCliCommand
from dlt.cli.init_command import (
DEFAULT_VERIFIED_SOURCES_REPO,
)
from dlt.cli import SupportsCliCommand, DEFAULT_VERIFIED_SOURCES_REPO
from dlt.cli.exceptions import CliCommandException
from dlt.cli.command_wrappers import (
init_command_wrapper,

View File

@@ -0,0 +1,46 @@
"""A template that is a good start for vibe coding REST API Source. Works best with `dlt ai` command cursor rules"""
import dlt
from dlt.sources.rest_api import (
RESTAPIConfig,
rest_api_resources,
)
@dlt.source
def source(access_token=dlt.secrets.value):
config: RESTAPIConfig = {
"client": {
# TODO: place valid base url here
"base_url": "https://example.com/v1/",
# TODO: configure the right auth or remove if api does not need authentication
# NOTE: pass secrets and other configuration in source function signature
"auth": {
"type": "bearer",
"token": access_token,
},
},
"resources": [
# TODO: add resource definitions here
],
}
yield from rest_api_resources(config)
def get_data() -> None:
pipeline = dlt.pipeline(
pipeline_name="rest_api_github",
destination="duckdb",
dataset_name="rest_api_data",
)
# TODO: during debugging feel free to pass access token explicitly
# NOTE: use `secrets.toml` or env variables to pass configuration in production
access_token = "my_access_token"
load_info = pipeline.run(source(access_token))
print(load_info) # noqa
if __name__ == "__main__":
get_data()

View File

@@ -17,6 +17,9 @@ exclude=docs/examples/archive/*|tests/reflection/module_cases/*|tests/common/ref
disallow_untyped_defs=false
warn_return_any=false
[mypy-dlt.sources._single_file_templates.*]
disallow_untyped_defs=false
[mypy-docs.*]
disallow_untyped_defs=false

View File

@@ -10,7 +10,7 @@ from dlt.common.utils import custom_environ, set_working_dir
from dlt.common.pipeline import get_dlt_pipelines_dir
from tests.cli.utils import echo_default_choice, repo_dir, cloned_init_repo
from tests.utils import TEST_STORAGE_ROOT, patch_home_dir
from tests.utils import TEST_STORAGE_ROOT
BASE_COMMANDS = ["init", "deploy", "pipeline", "telemetry", "schema"]

View File

@@ -136,13 +136,13 @@ def test_command_instrumentation() -> None:
def test_instrumentation_wrappers() -> None:
from dlt.cli import (
DEFAULT_VERIFIED_SOURCES_REPO,
)
from dlt.cli.deploy_command import (
DeploymentMethods,
COMMAND_DEPLOY_REPO_LOCATION,
)
from dlt.cli.init_command import (
DEFAULT_VERIFIED_SOURCES_REPO,
)
from dlt.cli.command_wrappers import (
init_command_wrapper,
deploy_command_wrapper,

View File

@@ -1 +1,7 @@
from tests.utils import preserve_environ, autouse_test_storage, unload_modules, wipe_pipeline
from tests.utils import (
preserve_environ,
autouse_test_storage,
unload_modules,
wipe_pipeline,
patch_home_dir,
)

View File

@@ -25,10 +25,9 @@ from dlt.common.storages.file_storage import FileStorage
from dlt.common.utils import set_working_dir
from dlt.cli import init_command, echo, utils
from dlt.cli import init_command, echo, utils, DEFAULT_VERIFIED_SOURCES_REPO
from dlt.cli.init_command import (
SOURCES_MODULE_NAME,
DEFAULT_VERIFIED_SOURCES_REPO,
SourceConfiguration,
utils as cli_utils,
files_ops,
@@ -64,7 +63,16 @@ CORE_SOURCES_CONFIG = {
CORE_SOURCES = list(CORE_SOURCES_CONFIG.keys())
# we also hardcode all the templates here for testing
TEMPLATES = ["debug", "default", "arrow", "requests", "dataframe", "fruitshop", "github_api"]
TEMPLATES = [
"debug",
"default",
"arrow",
"requests",
"dataframe",
"fruitshop",
"github_api",
"vibe_rest_api",
]
# a few verified sources we know to exist
SOME_KNOWN_VERIFIED_SOURCES = ["chess", "google_sheets", "pipedrive"]
@@ -94,15 +102,22 @@ def test_default_source_file_selection() -> None:
templates_storage = init_command._get_templates_storage()
# try a known source, it will take the known pipeline script
tconf = files_ops.get_template_configuration(templates_storage, "debug")
tconf = files_ops.get_template_configuration(templates_storage, "debug", "debug")
assert tconf.dest_pipeline_script == "debug_pipeline.py"
assert tconf.src_pipeline_script == "debug_pipeline.py"
# random name will select the default script
tconf = files_ops.get_template_configuration(templates_storage, "very_nice_name")
tconf = files_ops.get_template_configuration(
templates_storage, "very_nice_name", "very_nice_name"
)
assert tconf.dest_pipeline_script == "very_nice_name_pipeline.py"
assert tconf.src_pipeline_script == "default_pipeline.py"
# you can set the dest script name for existing scripts
tconf = files_ops.get_template_configuration(templates_storage, "vibe_rest_api", "notion")
assert tconf.dest_pipeline_script == "notion_pipeline.py"
assert tconf.src_pipeline_script == "vibe_rest_api_pipeline.py"
def test_init_command_new_pipeline_same_name(repo_dir: str, project_files: FileStorage) -> None:
init_command.init_command("debug_pipeline", "bigquery", repo_dir)
@@ -287,7 +302,7 @@ def test_init_core_sources_ejected(cloned_init_repo: FileStorage) -> None:
assert files.has_folder(candidate)
def test_init_writes_example_config_placeholders(repo_dir: str) -> None:
def test_init_writes_example_config_placeholders(repo_dir: str, project_files: FileStorage) -> None:
init_command.init_command("filesystem", "bigquery", repo_dir)
# check that written secret of type string was replaced with correct placeholder value
secrets = SecretsTomlProvider(settings_dir=dlt.current.run_context().settings_dir)

View File

@@ -10,8 +10,7 @@ from dlt.common.utils import set_working_dir, uniq_id
from dlt.sources import SourceReference
from dlt.cli import echo
from dlt.cli.init_command import DEFAULT_VERIFIED_SOURCES_REPO
from dlt.cli import echo, DEFAULT_VERIFIED_SOURCES_REPO
from tests.utils import TEST_STORAGE_ROOT

View File

@@ -47,7 +47,7 @@ from dlt.extract.exceptions import (
from dlt.extract.items import TableNameMeta
from tests.common.utils import load_yml_case
from tests.utils import MockableRunContext, unload_modules
from tests.utils import unload_modules
@pytest.fixture(autouse=True, scope="function")

View File

@@ -209,8 +209,15 @@ def _patch_home_dir() -> Iterator[None]:
mock._global_dir = mock._data_dir = os.path.join(mock._local_dir, DOT_DLT)
ctx.context = mock
with Container().injectable_context(ctx):
yield
# also emit corresponding env variables so pipelines in env work like that
with custom_environ(
{
known_env.DLT_LOCAL_DIR: mock.local_dir,
known_env.DLT_DATA_DIR: mock.data_dir,
}
):
with Container().injectable_context(ctx):
yield
def _preserve_environ() -> Iterator[None]: