allows to copy files from vibe-hub repo (#2760)

* copies files from hub repo * adds ai setup and hub copy commands to dlt init for dlthub: * adds vibe rest api pipeline with full AST rewrite * moves repo locations to init * fixes run context in venv
2025-12-17 19:31:30 +00:00 · 2025-06-18 11:25:12 +02:00
parent 3fd94ca74a
commit b1cff8cc66
15 changed files with 755 additions and 586 deletions
--- a/dlt/cli/init.py
+++ b/dlt/cli/init.py
@@ -1,4 +1,13 @@
 from dlt.cli.reference import SupportsCliCommand
 from dlt.cli.exceptions import CliCommandException

-__all__ = ["SupportsCliCommand", "CliCommandException"]
+DEFAULT_VERIFIED_SOURCES_REPO = "https://github.com/dlt-hub/verified-sources.git"
+DEFAULT_VIBE_SOURCES_REPO = "https://github.com/dlt-hub/vibe-hub.git"
+
+
+__all__ = [
+    "SupportsCliCommand",
+    "CliCommandException",
+    "DEFAULT_VERIFIED_SOURCES_REPO",
+    "DEFAULT_VIBE_SOURCES_REPO",
+]
--- a/dlt/cli/ai_command.py
+++ b/dlt/cli/ai_command.py
@@ -1,10 +1,9 @@
 import os
 import shutil
 from pathlib import Path
-from typing import get_args, Literal, Set, Union
+from typing import List, Tuple, get_args, Literal, Set, Union

 from dlt.cli import echo as fmt
-from dlt.cli.init_command import DEFAULT_VERIFIED_SOURCES_REPO
 from dlt.common import git
 from dlt.common.pipeline import get_dlt_repos_dir
 from dlt.common.runtime import run_context
@@ -19,41 +18,17 @@ TSupportedIde = Literal[

 SUPPORTED_IDES: Set[TSupportedIde] = list(get_args(TSupportedIde))  # type: ignore
 VERIFIED_SOURCES_AI_BASE_DIR = "ai"
-AI_CONTRIBUTE_URL = (
-    os.path.splitext(DEFAULT_VERIFIED_SOURCES_REPO)[0]
-    + "/tree/master/"
-    + VERIFIED_SOURCES_AI_BASE_DIR
-)

 # TODO Claude Desktop: rules need to be named `CLAUDE.md`, allow command to append to it
 # TODO Continue: rules need to be in YAML file, allow command to properly edit it
 # TODO generate more files based on the specifics of the source README and the destination


-def ai_setup_command(
-    ide: TSupportedIde,
-    branch: Union[str, None] = None,
-    repo: str = DEFAULT_VERIFIED_SOURCES_REPO,
-) -> None:
-    """Get AI rules files into your local project for the selected IDE.
-
-    Get the source and destination directories for the rules files.
-    Files found in the source directory will be copied into the destination directory.
-    """
-    # where dlt-hub/verified-sources is cloned
-    fmt.echo("Looking up IDE rules and configuration %s..." % fmt.bold(repo))
-    src_storage = git.get_fresh_repo_files(repo, get_dlt_repos_dir(), branch=branch)
-    if not src_storage.has_folder(VERIFIED_SOURCES_AI_BASE_DIR):
-        fmt.warning(
-            "Support for ai command not found in repo %s branch %s"
-            % (fmt.bold(repo), fmt.bold(branch or "<default>"))
-        )
-        return
-    src_dir = Path(src_storage.make_full_path(VERIFIED_SOURCES_AI_BASE_DIR)) / ide
-
-    # where the command is ran, i.e., project root
-    dest_dir = Path(run_context.active().run_dir)
-    copied_files = 0
+def _copy_repo_files(
+    src_dir: Path, dest_dir: Path, warn_on_overwrite: bool = True
+) -> Tuple[List[str], int]:
+    copied_files = []
+    count_files = 0

    for src_sub_path in src_dir.rglob("*"):
        if src_sub_path.is_dir():
@@ -64,33 +39,96 @@ def ai_setup_command(
            fmt.echo(src_sub_path.read_text(encoding="utf-8"))
            continue

-        copied_files += 1
+        count_files += 1
        dest_file_path = dest_dir / src_sub_path.relative_to(src_dir)
        if dest_file_path.exists():
-            fmt.warning(f"Existing rules file found at {dest_file_path.absolute()}; Skipping.")
+            if warn_on_overwrite:
+                fmt.warning(f"Existing rules file found at {dest_file_path.absolute()}; Skipping.")
            continue

+        copied_files.append(src_sub_path.name)
+
        if not dest_file_path.parent.exists():
            dest_file_path.parent.mkdir(parents=True, exist_ok=True)

        shutil.copy2(src_sub_path, dest_file_path)
+    return copied_files, count_files

-    if copied_files == 0:
+
+def ai_setup_command(
+    ide: TSupportedIde,
+    location: str,
+    branch: Union[str, None] = None,
+    hide_warnings: bool = False,
+) -> None:
+    """Get AI rules files into your local project for the selected IDE.
+
+    Get the source and destination directories for the rules files.
+    Files found in the source directory will be copied into the destination directory.
+    """
+    # where dlt-hub/verified-sources is cloned
+    fmt.echo("Looking up IDE rules and configuration %s..." % fmt.bold(location))
+    src_storage = git.get_fresh_repo_files(location, get_dlt_repos_dir(), branch=branch)
+    if not src_storage.has_folder(VERIFIED_SOURCES_AI_BASE_DIR):
+        fmt.warning(
+            "Support for ai command not found in repo %s branch %s"
+            % (fmt.bold(location), fmt.bold(branch or "<default>"))
+        )
+        return
+    src_dir = Path(src_storage.make_full_path(VERIFIED_SOURCES_AI_BASE_DIR)) / ide
+
+    # where the command is ran, i.e., project root
+    dest_dir = Path(run_context.active().run_dir)
+    copied_files, count_files = _copy_repo_files(src_dir, dest_dir, not hide_warnings)
+    if count_files == 0:
        fmt.echo(
            "%s%s is not yet supported. No files were found."
            % (fmt.bold(ide), fmt.style("", bold=False))
        )
    else:
-        fmt.echo(
-            "%s file(s) supporting %s were copied." % (fmt.bold(str(copied_files)), fmt.bold(ide))
+        if copied_files:
+            fmt.echo(
+                "%s file(s) supporting %s were copied."
+                % (fmt.bold(str(len(copied_files))), fmt.bold(ide))
+            )
+
+    if not hide_warnings:
+        # refer to contribute README in the repo
+        ai_contribute_url = (
+            os.path.splitext(location)[0] + "/tree/master/" + VERIFIED_SOURCES_AI_BASE_DIR
        )
-    fmt.note(
-        "Help us to build better support for %s by contributing better rules, prompts or configs"
-        " in %s" % (ide, AI_CONTRIBUTE_URL)
-    )
+        fmt.note(
+            "Help us to build better support for %s by contributing better rules, prompts or"
+            " configs in %s" % (ide, ai_contribute_url)
+        )
+
+
+def vibe_source_setup(
+    source: str,
+    location: str,
+    branch: Union[str, None] = None,
+) -> None:
+    """Copies files from vibe sources repo into the current working folder"""
+
+    fmt.echo("Looking up in dltHub for rules, docs and snippets for %s..." % fmt.bold(source))
+    src_storage = git.get_fresh_repo_files(location, get_dlt_repos_dir(), branch=branch)
+    if not src_storage.has_folder(source):
+        fmt.warning("We have nothing for %s at dltHub yet." % fmt.bold(source))
+        return
+    src_dir = Path(src_storage.make_full_path(source))
+
+    # where the command is ran, i.e., project root
+    dest_dir = Path(run_context.active().run_dir)
+    copied_files, count_files = _copy_repo_files(src_dir, dest_dir)
+    if count_files == 0:
+        fmt.warning("We have nothing for %s at dltHub yet." % fmt.bold(source))
+    else:
+        fmt.echo(
+            "%s file(s) supporting %s were copied:" % (fmt.bold(str(count_files)), fmt.bold(source))
+        )
+        for file in copied_files:
+            fmt.echo(fmt.bold(file))


 # TODO create a command to create a copy-pasteable MCP server config
-
-
 def mcp_command() -> None: ...
--- a/dlt/cli/command_wrappers.py
+++ b/dlt/cli/command_wrappers.py
@@ -168,4 +168,4 @@ def telemetry_change_status_command_wrapper(enabled: bool) -> None:

@utils.track_command("ai_setup", False)
 def ai_setup_command_wrapper(ide: TSupportedIde, branch: Union[str, None], repo: str) -> None:
-    ai_setup_command(ide, branch=branch, repo=repo)
+    ai_setup_command(ide, location=repo, branch=branch)
--- a/dlt/cli/init_command.py
+++ b/dlt/cli/init_command.py
--- a/dlt/cli/pipeline_files.py
+++ b/dlt/cli/pipeline_files.py
@@ -203,10 +203,10 @@ def _get_docstring_for_module(sources_storage: FileStorage, source_name: str) ->


 def get_template_configuration(
-    sources_storage: FileStorage, source_name: str
+    sources_storage: FileStorage, source_name: str, display_source_name: str
 ) -> SourceConfiguration:
-    destination_pipeline_file_name = source_name + PIPELINE_FILE_SUFFIX
-    source_pipeline_file_name = destination_pipeline_file_name
+    destination_pipeline_file_name = display_source_name + PIPELINE_FILE_SUFFIX
+    source_pipeline_file_name = source_name + PIPELINE_FILE_SUFFIX

    if not sources_storage.has_file(source_pipeline_file_name):
        source_pipeline_file_name = DEFAULT_PIPELINE_TEMPLATE
--- a/dlt/cli/plugins.py
+++ b/dlt/cli/plugins.py
@@ -5,10 +5,7 @@ import dlt.cli.echo as fmt


 from dlt.common.configuration import plugins
-from dlt.cli import SupportsCliCommand
-from dlt.cli.init_command import (
-    DEFAULT_VERIFIED_SOURCES_REPO,
-)
+from dlt.cli import SupportsCliCommand, DEFAULT_VERIFIED_SOURCES_REPO
 from dlt.cli.exceptions import CliCommandException
 from dlt.cli.command_wrappers import (
    init_command_wrapper,
--- a/dlt/sources/_single_file_templates/vibe_rest_api_pipeline.py
+++ b/dlt/sources/_single_file_templates/vibe_rest_api_pipeline.py
@@ -0,0 +1,46 @@
+"""A template that is a good start for vibe coding REST API Source. Works best with `dlt ai` command cursor rules"""
+
+import dlt
+from dlt.sources.rest_api import (
+    RESTAPIConfig,
+    rest_api_resources,
+)
+
+
+@dlt.source
+def source(access_token=dlt.secrets.value):
+    config: RESTAPIConfig = {
+        "client": {
+            # TODO: place valid base url here
+            "base_url": "https://example.com/v1/",
+            # TODO: configure the right auth or remove if api does not need authentication
+            # NOTE: pass secrets and other configuration in source function signature
+            "auth": {
+                "type": "bearer",
+                "token": access_token,
+            },
+        },
+        "resources": [
+            # TODO: add resource definitions here
+        ],
+    }
+
+    yield from rest_api_resources(config)
+
+
+def get_data() -> None:
+    pipeline = dlt.pipeline(
+        pipeline_name="rest_api_github",
+        destination="duckdb",
+        dataset_name="rest_api_data",
+    )
+
+    # TODO: during debugging feel free to pass access token explicitly
+    # NOTE: use `secrets.toml` or env variables to pass configuration in production
+    access_token = "my_access_token"
+    load_info = pipeline.run(source(access_token))
+    print(load_info)  # noqa
+
+
+if __name__ == "__main__":
+    get_data()
--- a/mypy.ini
+++ b/mypy.ini
@@ -17,6 +17,9 @@ exclude=docs/examples/archive/*|tests/reflection/module_cases/*|tests/common/ref
 disallow_untyped_defs=false
 warn_return_any=false

+[mypy-dlt.sources._single_file_templates.*]
+disallow_untyped_defs=false
+
 [mypy-docs.*]
 disallow_untyped_defs=false

--- a/tests/cli/common/test_cli_invoke.py
+++ b/tests/cli/common/test_cli_invoke.py
@@ -10,7 +10,7 @@ from dlt.common.utils import custom_environ, set_working_dir
 from dlt.common.pipeline import get_dlt_pipelines_dir

 from tests.cli.utils import echo_default_choice, repo_dir, cloned_init_repo
-from tests.utils import TEST_STORAGE_ROOT, patch_home_dir
+from tests.utils import TEST_STORAGE_ROOT

 BASE_COMMANDS = ["init", "deploy", "pipeline", "telemetry", "schema"]

--- a/tests/cli/common/test_telemetry_command.py
+++ b/tests/cli/common/test_telemetry_command.py
@@ -136,13 +136,13 @@ def test_command_instrumentation() -> None:


 def test_instrumentation_wrappers() -> None:
+    from dlt.cli import (
+        DEFAULT_VERIFIED_SOURCES_REPO,
+    )
    from dlt.cli.deploy_command import (
        DeploymentMethods,
        COMMAND_DEPLOY_REPO_LOCATION,
    )
-    from dlt.cli.init_command import (
-        DEFAULT_VERIFIED_SOURCES_REPO,
-    )
    from dlt.cli.command_wrappers import (
        init_command_wrapper,
        deploy_command_wrapper,
--- a/tests/cli/conftest.py
+++ b/tests/cli/conftest.py
@@ -1 +1,7 @@
-from tests.utils import preserve_environ, autouse_test_storage, unload_modules, wipe_pipeline
+from tests.utils import (
+    preserve_environ,
+    autouse_test_storage,
+    unload_modules,
+    wipe_pipeline,
+    patch_home_dir,
+)
--- a/tests/cli/test_init_command.py
+++ b/tests/cli/test_init_command.py
@@ -25,10 +25,9 @@ from dlt.common.storages.file_storage import FileStorage
 from dlt.common.utils import set_working_dir


-from dlt.cli import init_command, echo, utils
+from dlt.cli import init_command, echo, utils, DEFAULT_VERIFIED_SOURCES_REPO
 from dlt.cli.init_command import (
    SOURCES_MODULE_NAME,
-    DEFAULT_VERIFIED_SOURCES_REPO,
    SourceConfiguration,
    utils as cli_utils,
    files_ops,
@@ -64,7 +63,16 @@ CORE_SOURCES_CONFIG = {
 CORE_SOURCES = list(CORE_SOURCES_CONFIG.keys())

 # we also hardcode all the templates here for testing
-TEMPLATES = ["debug", "default", "arrow", "requests", "dataframe", "fruitshop", "github_api"]
+TEMPLATES = [
+    "debug",
+    "default",
+    "arrow",
+    "requests",
+    "dataframe",
+    "fruitshop",
+    "github_api",
+    "vibe_rest_api",
+]

 # a few verified sources we know to exist
 SOME_KNOWN_VERIFIED_SOURCES = ["chess", "google_sheets", "pipedrive"]
@@ -94,15 +102,22 @@ def test_default_source_file_selection() -> None:
    templates_storage = init_command._get_templates_storage()

    # try a known source, it will take the known pipeline script
-    tconf = files_ops.get_template_configuration(templates_storage, "debug")
+    tconf = files_ops.get_template_configuration(templates_storage, "debug", "debug")
    assert tconf.dest_pipeline_script == "debug_pipeline.py"
    assert tconf.src_pipeline_script == "debug_pipeline.py"

    # random name will select the default script
-    tconf = files_ops.get_template_configuration(templates_storage, "very_nice_name")
+    tconf = files_ops.get_template_configuration(
+        templates_storage, "very_nice_name", "very_nice_name"
+    )
    assert tconf.dest_pipeline_script == "very_nice_name_pipeline.py"
    assert tconf.src_pipeline_script == "default_pipeline.py"

+    # you can set the dest script name for existing scripts
+    tconf = files_ops.get_template_configuration(templates_storage, "vibe_rest_api", "notion")
+    assert tconf.dest_pipeline_script == "notion_pipeline.py"
+    assert tconf.src_pipeline_script == "vibe_rest_api_pipeline.py"
+

 def test_init_command_new_pipeline_same_name(repo_dir: str, project_files: FileStorage) -> None:
    init_command.init_command("debug_pipeline", "bigquery", repo_dir)
@@ -287,7 +302,7 @@ def test_init_core_sources_ejected(cloned_init_repo: FileStorage) -> None:
            assert files.has_folder(candidate)


-def test_init_writes_example_config_placeholders(repo_dir: str) -> None:
+def test_init_writes_example_config_placeholders(repo_dir: str, project_files: FileStorage) -> None:
    init_command.init_command("filesystem", "bigquery", repo_dir)
    # check that written secret of type string was replaced with correct placeholder value
    secrets = SecretsTomlProvider(settings_dir=dlt.current.run_context().settings_dir)
--- a/tests/cli/utils.py
+++ b/tests/cli/utils.py
@@ -10,8 +10,7 @@ from dlt.common.utils import set_working_dir, uniq_id

 from dlt.sources import SourceReference

-from dlt.cli import echo
-from dlt.cli.init_command import DEFAULT_VERIFIED_SOURCES_REPO
+from dlt.cli import echo, DEFAULT_VERIFIED_SOURCES_REPO

 from tests.utils import TEST_STORAGE_ROOT

--- a/tests/extract/test_decorators.py
+++ b/tests/extract/test_decorators.py
@@ -47,7 +47,7 @@ from dlt.extract.exceptions import (
 from dlt.extract.items import TableNameMeta

 from tests.common.utils import load_yml_case
-from tests.utils import MockableRunContext, unload_modules
+from tests.utils import unload_modules


@pytest.fixture(autouse=True, scope="function")
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -209,8 +209,15 @@ def _patch_home_dir() -> Iterator[None]:
    mock._global_dir = mock._data_dir = os.path.join(mock._local_dir, DOT_DLT)
    ctx.context = mock

-    with Container().injectable_context(ctx):
-        yield
+    # also emit corresponding env variables so pipelines in env work like that
+    with custom_environ(
+        {
+            known_env.DLT_LOCAL_DIR: mock.local_dir,
+            known_env.DLT_DATA_DIR: mock.data_dir,
+        }
+    ):
+        with Container().injectable_context(ctx):
+            yield


 def _preserve_environ() -> Iterator[None]: