mirror of
https://github.com/dlt-hub/dlt.git
synced 2025-12-17 19:31:30 +00:00
allows to copy files from vibe-hub repo (#2760)
* copies files from hub repo * adds ai setup and hub copy commands to dlt init for dlthub: * adds vibe rest api pipeline with full AST rewrite * moves repo locations to init * fixes run context in venv
This commit is contained in:
@@ -1,4 +1,13 @@
|
||||
from dlt.cli.reference import SupportsCliCommand
|
||||
from dlt.cli.exceptions import CliCommandException
|
||||
|
||||
__all__ = ["SupportsCliCommand", "CliCommandException"]
|
||||
DEFAULT_VERIFIED_SOURCES_REPO = "https://github.com/dlt-hub/verified-sources.git"
|
||||
DEFAULT_VIBE_SOURCES_REPO = "https://github.com/dlt-hub/vibe-hub.git"
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SupportsCliCommand",
|
||||
"CliCommandException",
|
||||
"DEFAULT_VERIFIED_SOURCES_REPO",
|
||||
"DEFAULT_VIBE_SOURCES_REPO",
|
||||
]
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import get_args, Literal, Set, Union
|
||||
from typing import List, Tuple, get_args, Literal, Set, Union
|
||||
|
||||
from dlt.cli import echo as fmt
|
||||
from dlt.cli.init_command import DEFAULT_VERIFIED_SOURCES_REPO
|
||||
from dlt.common import git
|
||||
from dlt.common.pipeline import get_dlt_repos_dir
|
||||
from dlt.common.runtime import run_context
|
||||
@@ -19,41 +18,17 @@ TSupportedIde = Literal[
|
||||
|
||||
SUPPORTED_IDES: Set[TSupportedIde] = list(get_args(TSupportedIde)) # type: ignore
|
||||
VERIFIED_SOURCES_AI_BASE_DIR = "ai"
|
||||
AI_CONTRIBUTE_URL = (
|
||||
os.path.splitext(DEFAULT_VERIFIED_SOURCES_REPO)[0]
|
||||
+ "/tree/master/"
|
||||
+ VERIFIED_SOURCES_AI_BASE_DIR
|
||||
)
|
||||
|
||||
# TODO Claude Desktop: rules need to be named `CLAUDE.md`, allow command to append to it
|
||||
# TODO Continue: rules need to be in YAML file, allow command to properly edit it
|
||||
# TODO generate more files based on the specifics of the source README and the destination
|
||||
|
||||
|
||||
def ai_setup_command(
|
||||
ide: TSupportedIde,
|
||||
branch: Union[str, None] = None,
|
||||
repo: str = DEFAULT_VERIFIED_SOURCES_REPO,
|
||||
) -> None:
|
||||
"""Get AI rules files into your local project for the selected IDE.
|
||||
|
||||
Get the source and destination directories for the rules files.
|
||||
Files found in the source directory will be copied into the destination directory.
|
||||
"""
|
||||
# where dlt-hub/verified-sources is cloned
|
||||
fmt.echo("Looking up IDE rules and configuration %s..." % fmt.bold(repo))
|
||||
src_storage = git.get_fresh_repo_files(repo, get_dlt_repos_dir(), branch=branch)
|
||||
if not src_storage.has_folder(VERIFIED_SOURCES_AI_BASE_DIR):
|
||||
fmt.warning(
|
||||
"Support for ai command not found in repo %s branch %s"
|
||||
% (fmt.bold(repo), fmt.bold(branch or "<default>"))
|
||||
)
|
||||
return
|
||||
src_dir = Path(src_storage.make_full_path(VERIFIED_SOURCES_AI_BASE_DIR)) / ide
|
||||
|
||||
# where the command is ran, i.e., project root
|
||||
dest_dir = Path(run_context.active().run_dir)
|
||||
copied_files = 0
|
||||
def _copy_repo_files(
|
||||
src_dir: Path, dest_dir: Path, warn_on_overwrite: bool = True
|
||||
) -> Tuple[List[str], int]:
|
||||
copied_files = []
|
||||
count_files = 0
|
||||
|
||||
for src_sub_path in src_dir.rglob("*"):
|
||||
if src_sub_path.is_dir():
|
||||
@@ -64,33 +39,96 @@ def ai_setup_command(
|
||||
fmt.echo(src_sub_path.read_text(encoding="utf-8"))
|
||||
continue
|
||||
|
||||
copied_files += 1
|
||||
count_files += 1
|
||||
dest_file_path = dest_dir / src_sub_path.relative_to(src_dir)
|
||||
if dest_file_path.exists():
|
||||
fmt.warning(f"Existing rules file found at {dest_file_path.absolute()}; Skipping.")
|
||||
if warn_on_overwrite:
|
||||
fmt.warning(f"Existing rules file found at {dest_file_path.absolute()}; Skipping.")
|
||||
continue
|
||||
|
||||
copied_files.append(src_sub_path.name)
|
||||
|
||||
if not dest_file_path.parent.exists():
|
||||
dest_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
shutil.copy2(src_sub_path, dest_file_path)
|
||||
return copied_files, count_files
|
||||
|
||||
if copied_files == 0:
|
||||
|
||||
def ai_setup_command(
|
||||
ide: TSupportedIde,
|
||||
location: str,
|
||||
branch: Union[str, None] = None,
|
||||
hide_warnings: bool = False,
|
||||
) -> None:
|
||||
"""Get AI rules files into your local project for the selected IDE.
|
||||
|
||||
Get the source and destination directories for the rules files.
|
||||
Files found in the source directory will be copied into the destination directory.
|
||||
"""
|
||||
# where dlt-hub/verified-sources is cloned
|
||||
fmt.echo("Looking up IDE rules and configuration %s..." % fmt.bold(location))
|
||||
src_storage = git.get_fresh_repo_files(location, get_dlt_repos_dir(), branch=branch)
|
||||
if not src_storage.has_folder(VERIFIED_SOURCES_AI_BASE_DIR):
|
||||
fmt.warning(
|
||||
"Support for ai command not found in repo %s branch %s"
|
||||
% (fmt.bold(location), fmt.bold(branch or "<default>"))
|
||||
)
|
||||
return
|
||||
src_dir = Path(src_storage.make_full_path(VERIFIED_SOURCES_AI_BASE_DIR)) / ide
|
||||
|
||||
# where the command is ran, i.e., project root
|
||||
dest_dir = Path(run_context.active().run_dir)
|
||||
copied_files, count_files = _copy_repo_files(src_dir, dest_dir, not hide_warnings)
|
||||
if count_files == 0:
|
||||
fmt.echo(
|
||||
"%s%s is not yet supported. No files were found."
|
||||
% (fmt.bold(ide), fmt.style("", bold=False))
|
||||
)
|
||||
else:
|
||||
fmt.echo(
|
||||
"%s file(s) supporting %s were copied." % (fmt.bold(str(copied_files)), fmt.bold(ide))
|
||||
if copied_files:
|
||||
fmt.echo(
|
||||
"%s file(s) supporting %s were copied."
|
||||
% (fmt.bold(str(len(copied_files))), fmt.bold(ide))
|
||||
)
|
||||
|
||||
if not hide_warnings:
|
||||
# refer to contribute README in the repo
|
||||
ai_contribute_url = (
|
||||
os.path.splitext(location)[0] + "/tree/master/" + VERIFIED_SOURCES_AI_BASE_DIR
|
||||
)
|
||||
fmt.note(
|
||||
"Help us to build better support for %s by contributing better rules, prompts or configs"
|
||||
" in %s" % (ide, AI_CONTRIBUTE_URL)
|
||||
)
|
||||
fmt.note(
|
||||
"Help us to build better support for %s by contributing better rules, prompts or"
|
||||
" configs in %s" % (ide, ai_contribute_url)
|
||||
)
|
||||
|
||||
|
||||
def vibe_source_setup(
|
||||
source: str,
|
||||
location: str,
|
||||
branch: Union[str, None] = None,
|
||||
) -> None:
|
||||
"""Copies files from vibe sources repo into the current working folder"""
|
||||
|
||||
fmt.echo("Looking up in dltHub for rules, docs and snippets for %s..." % fmt.bold(source))
|
||||
src_storage = git.get_fresh_repo_files(location, get_dlt_repos_dir(), branch=branch)
|
||||
if not src_storage.has_folder(source):
|
||||
fmt.warning("We have nothing for %s at dltHub yet." % fmt.bold(source))
|
||||
return
|
||||
src_dir = Path(src_storage.make_full_path(source))
|
||||
|
||||
# where the command is ran, i.e., project root
|
||||
dest_dir = Path(run_context.active().run_dir)
|
||||
copied_files, count_files = _copy_repo_files(src_dir, dest_dir)
|
||||
if count_files == 0:
|
||||
fmt.warning("We have nothing for %s at dltHub yet." % fmt.bold(source))
|
||||
else:
|
||||
fmt.echo(
|
||||
"%s file(s) supporting %s were copied:" % (fmt.bold(str(count_files)), fmt.bold(source))
|
||||
)
|
||||
for file in copied_files:
|
||||
fmt.echo(fmt.bold(file))
|
||||
|
||||
|
||||
# TODO create a command to create a copy-pasteable MCP server config
|
||||
|
||||
|
||||
def mcp_command() -> None: ...
|
||||
|
||||
@@ -168,4 +168,4 @@ def telemetry_change_status_command_wrapper(enabled: bool) -> None:
|
||||
|
||||
@utils.track_command("ai_setup", False)
|
||||
def ai_setup_command_wrapper(ide: TSupportedIde, branch: Union[str, None], repo: str) -> None:
|
||||
ai_setup_command(ide, branch=branch, repo=repo)
|
||||
ai_setup_command(ide, location=repo, branch=branch)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -203,10 +203,10 @@ def _get_docstring_for_module(sources_storage: FileStorage, source_name: str) ->
|
||||
|
||||
|
||||
def get_template_configuration(
|
||||
sources_storage: FileStorage, source_name: str
|
||||
sources_storage: FileStorage, source_name: str, display_source_name: str
|
||||
) -> SourceConfiguration:
|
||||
destination_pipeline_file_name = source_name + PIPELINE_FILE_SUFFIX
|
||||
source_pipeline_file_name = destination_pipeline_file_name
|
||||
destination_pipeline_file_name = display_source_name + PIPELINE_FILE_SUFFIX
|
||||
source_pipeline_file_name = source_name + PIPELINE_FILE_SUFFIX
|
||||
|
||||
if not sources_storage.has_file(source_pipeline_file_name):
|
||||
source_pipeline_file_name = DEFAULT_PIPELINE_TEMPLATE
|
||||
|
||||
@@ -5,10 +5,7 @@ import dlt.cli.echo as fmt
|
||||
|
||||
|
||||
from dlt.common.configuration import plugins
|
||||
from dlt.cli import SupportsCliCommand
|
||||
from dlt.cli.init_command import (
|
||||
DEFAULT_VERIFIED_SOURCES_REPO,
|
||||
)
|
||||
from dlt.cli import SupportsCliCommand, DEFAULT_VERIFIED_SOURCES_REPO
|
||||
from dlt.cli.exceptions import CliCommandException
|
||||
from dlt.cli.command_wrappers import (
|
||||
init_command_wrapper,
|
||||
|
||||
46
dlt/sources/_single_file_templates/vibe_rest_api_pipeline.py
Normal file
46
dlt/sources/_single_file_templates/vibe_rest_api_pipeline.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""A template that is a good start for vibe coding REST API Source. Works best with `dlt ai` command cursor rules"""
|
||||
|
||||
import dlt
|
||||
from dlt.sources.rest_api import (
|
||||
RESTAPIConfig,
|
||||
rest_api_resources,
|
||||
)
|
||||
|
||||
|
||||
@dlt.source
|
||||
def source(access_token=dlt.secrets.value):
|
||||
config: RESTAPIConfig = {
|
||||
"client": {
|
||||
# TODO: place valid base url here
|
||||
"base_url": "https://example.com/v1/",
|
||||
# TODO: configure the right auth or remove if api does not need authentication
|
||||
# NOTE: pass secrets and other configuration in source function signature
|
||||
"auth": {
|
||||
"type": "bearer",
|
||||
"token": access_token,
|
||||
},
|
||||
},
|
||||
"resources": [
|
||||
# TODO: add resource definitions here
|
||||
],
|
||||
}
|
||||
|
||||
yield from rest_api_resources(config)
|
||||
|
||||
|
||||
def get_data() -> None:
|
||||
pipeline = dlt.pipeline(
|
||||
pipeline_name="rest_api_github",
|
||||
destination="duckdb",
|
||||
dataset_name="rest_api_data",
|
||||
)
|
||||
|
||||
# TODO: during debugging feel free to pass access token explicitly
|
||||
# NOTE: use `secrets.toml` or env variables to pass configuration in production
|
||||
access_token = "my_access_token"
|
||||
load_info = pipeline.run(source(access_token))
|
||||
print(load_info) # noqa
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
get_data()
|
||||
3
mypy.ini
3
mypy.ini
@@ -17,6 +17,9 @@ exclude=docs/examples/archive/*|tests/reflection/module_cases/*|tests/common/ref
|
||||
disallow_untyped_defs=false
|
||||
warn_return_any=false
|
||||
|
||||
[mypy-dlt.sources._single_file_templates.*]
|
||||
disallow_untyped_defs=false
|
||||
|
||||
[mypy-docs.*]
|
||||
disallow_untyped_defs=false
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ from dlt.common.utils import custom_environ, set_working_dir
|
||||
from dlt.common.pipeline import get_dlt_pipelines_dir
|
||||
|
||||
from tests.cli.utils import echo_default_choice, repo_dir, cloned_init_repo
|
||||
from tests.utils import TEST_STORAGE_ROOT, patch_home_dir
|
||||
from tests.utils import TEST_STORAGE_ROOT
|
||||
|
||||
BASE_COMMANDS = ["init", "deploy", "pipeline", "telemetry", "schema"]
|
||||
|
||||
|
||||
@@ -136,13 +136,13 @@ def test_command_instrumentation() -> None:
|
||||
|
||||
|
||||
def test_instrumentation_wrappers() -> None:
|
||||
from dlt.cli import (
|
||||
DEFAULT_VERIFIED_SOURCES_REPO,
|
||||
)
|
||||
from dlt.cli.deploy_command import (
|
||||
DeploymentMethods,
|
||||
COMMAND_DEPLOY_REPO_LOCATION,
|
||||
)
|
||||
from dlt.cli.init_command import (
|
||||
DEFAULT_VERIFIED_SOURCES_REPO,
|
||||
)
|
||||
from dlt.cli.command_wrappers import (
|
||||
init_command_wrapper,
|
||||
deploy_command_wrapper,
|
||||
|
||||
@@ -1 +1,7 @@
|
||||
from tests.utils import preserve_environ, autouse_test_storage, unload_modules, wipe_pipeline
|
||||
from tests.utils import (
|
||||
preserve_environ,
|
||||
autouse_test_storage,
|
||||
unload_modules,
|
||||
wipe_pipeline,
|
||||
patch_home_dir,
|
||||
)
|
||||
|
||||
@@ -25,10 +25,9 @@ from dlt.common.storages.file_storage import FileStorage
|
||||
from dlt.common.utils import set_working_dir
|
||||
|
||||
|
||||
from dlt.cli import init_command, echo, utils
|
||||
from dlt.cli import init_command, echo, utils, DEFAULT_VERIFIED_SOURCES_REPO
|
||||
from dlt.cli.init_command import (
|
||||
SOURCES_MODULE_NAME,
|
||||
DEFAULT_VERIFIED_SOURCES_REPO,
|
||||
SourceConfiguration,
|
||||
utils as cli_utils,
|
||||
files_ops,
|
||||
@@ -64,7 +63,16 @@ CORE_SOURCES_CONFIG = {
|
||||
CORE_SOURCES = list(CORE_SOURCES_CONFIG.keys())
|
||||
|
||||
# we also hardcode all the templates here for testing
|
||||
TEMPLATES = ["debug", "default", "arrow", "requests", "dataframe", "fruitshop", "github_api"]
|
||||
TEMPLATES = [
|
||||
"debug",
|
||||
"default",
|
||||
"arrow",
|
||||
"requests",
|
||||
"dataframe",
|
||||
"fruitshop",
|
||||
"github_api",
|
||||
"vibe_rest_api",
|
||||
]
|
||||
|
||||
# a few verified sources we know to exist
|
||||
SOME_KNOWN_VERIFIED_SOURCES = ["chess", "google_sheets", "pipedrive"]
|
||||
@@ -94,15 +102,22 @@ def test_default_source_file_selection() -> None:
|
||||
templates_storage = init_command._get_templates_storage()
|
||||
|
||||
# try a known source, it will take the known pipeline script
|
||||
tconf = files_ops.get_template_configuration(templates_storage, "debug")
|
||||
tconf = files_ops.get_template_configuration(templates_storage, "debug", "debug")
|
||||
assert tconf.dest_pipeline_script == "debug_pipeline.py"
|
||||
assert tconf.src_pipeline_script == "debug_pipeline.py"
|
||||
|
||||
# random name will select the default script
|
||||
tconf = files_ops.get_template_configuration(templates_storage, "very_nice_name")
|
||||
tconf = files_ops.get_template_configuration(
|
||||
templates_storage, "very_nice_name", "very_nice_name"
|
||||
)
|
||||
assert tconf.dest_pipeline_script == "very_nice_name_pipeline.py"
|
||||
assert tconf.src_pipeline_script == "default_pipeline.py"
|
||||
|
||||
# you can set the dest script name for existing scripts
|
||||
tconf = files_ops.get_template_configuration(templates_storage, "vibe_rest_api", "notion")
|
||||
assert tconf.dest_pipeline_script == "notion_pipeline.py"
|
||||
assert tconf.src_pipeline_script == "vibe_rest_api_pipeline.py"
|
||||
|
||||
|
||||
def test_init_command_new_pipeline_same_name(repo_dir: str, project_files: FileStorage) -> None:
|
||||
init_command.init_command("debug_pipeline", "bigquery", repo_dir)
|
||||
@@ -287,7 +302,7 @@ def test_init_core_sources_ejected(cloned_init_repo: FileStorage) -> None:
|
||||
assert files.has_folder(candidate)
|
||||
|
||||
|
||||
def test_init_writes_example_config_placeholders(repo_dir: str) -> None:
|
||||
def test_init_writes_example_config_placeholders(repo_dir: str, project_files: FileStorage) -> None:
|
||||
init_command.init_command("filesystem", "bigquery", repo_dir)
|
||||
# check that written secret of type string was replaced with correct placeholder value
|
||||
secrets = SecretsTomlProvider(settings_dir=dlt.current.run_context().settings_dir)
|
||||
|
||||
@@ -10,8 +10,7 @@ from dlt.common.utils import set_working_dir, uniq_id
|
||||
|
||||
from dlt.sources import SourceReference
|
||||
|
||||
from dlt.cli import echo
|
||||
from dlt.cli.init_command import DEFAULT_VERIFIED_SOURCES_REPO
|
||||
from dlt.cli import echo, DEFAULT_VERIFIED_SOURCES_REPO
|
||||
|
||||
from tests.utils import TEST_STORAGE_ROOT
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ from dlt.extract.exceptions import (
|
||||
from dlt.extract.items import TableNameMeta
|
||||
|
||||
from tests.common.utils import load_yml_case
|
||||
from tests.utils import MockableRunContext, unload_modules
|
||||
from tests.utils import unload_modules
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope="function")
|
||||
|
||||
@@ -209,8 +209,15 @@ def _patch_home_dir() -> Iterator[None]:
|
||||
mock._global_dir = mock._data_dir = os.path.join(mock._local_dir, DOT_DLT)
|
||||
ctx.context = mock
|
||||
|
||||
with Container().injectable_context(ctx):
|
||||
yield
|
||||
# also emit corresponding env variables so pipelines in env work like that
|
||||
with custom_environ(
|
||||
{
|
||||
known_env.DLT_LOCAL_DIR: mock.local_dir,
|
||||
known_env.DLT_DATA_DIR: mock.data_dir,
|
||||
}
|
||||
):
|
||||
with Container().injectable_context(ctx):
|
||||
yield
|
||||
|
||||
|
||||
def _preserve_environ() -> Iterator[None]:
|
||||
|
||||
Reference in New Issue
Block a user