mirror of
https://github.com/dlt-hub/dlt.git
synced 2025-12-17 19:31:30 +00:00
reimplement, add tests (#3418)
Co-authored-by: ivasio <ivan@dlthub.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
from typing import Iterator, Optional, List
|
||||
from typing import Iterable, Iterator, Optional, List, Tuple
|
||||
from pathlib import Path
|
||||
from pathspec import PathSpec
|
||||
from pathspec.util import iter_tree_files
|
||||
@@ -6,7 +6,16 @@ from pathspec.util import iter_tree_files
|
||||
from dlt._workspace._workspace_context import WorkspaceRunContext
|
||||
|
||||
|
||||
class WorkspaceFileSelector:
|
||||
class BaseFileSelector(Iterable[Tuple[Path, Path]]):
|
||||
"""
|
||||
Base class for file selectors. For every file yields 2 paths: absolute path in the filesystem
|
||||
and relative path of the file in the resulting tarball
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class WorkspaceFileSelector(BaseFileSelector):
|
||||
"""Iterates files in workspace respecting ignore patterns and excluding workspace internals.
|
||||
|
||||
Uses gitignore-style patterns from a configurable ignore file (default .gitignore). Additional
|
||||
@@ -22,7 +31,7 @@ class WorkspaceFileSelector:
|
||||
self.root_path: Path = Path(context.run_dir).resolve()
|
||||
self.settings_dir: Path = Path(context.settings_dir).resolve()
|
||||
self.ignore_file: str = ignore_file
|
||||
self.spec: PathSpec = self._build_pathspec(additional_excludes or [])
|
||||
self.ignore_spec: PathSpec = self._build_pathspec(additional_excludes or [])
|
||||
|
||||
def _build_pathspec(self, additional_excludes: List[str]) -> PathSpec:
|
||||
"""Build PathSpec from ignore file + defaults + additional excludes"""
|
||||
@@ -39,8 +48,25 @@ class WorkspaceFileSelector:
|
||||
|
||||
return PathSpec.from_lines("gitwildmatch", patterns)
|
||||
|
||||
def __iter__(self) -> Iterator[Path]:
|
||||
def __iter__(self) -> Iterator[Tuple[Path, Path]]:
|
||||
"""Yield paths of files eligible for deployment"""
|
||||
root_path = Path(self.root_path)
|
||||
for file_path in iter_tree_files(self.root_path):
|
||||
if not self.spec.match_file(file_path):
|
||||
yield Path(file_path)
|
||||
if not self.ignore_spec.match_file(file_path):
|
||||
yield root_path / file_path, Path(file_path)
|
||||
|
||||
|
||||
class ConfigurationFileSelector(BaseFileSelector):
|
||||
"""Iterates config and secrets files in workspace"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
context: WorkspaceRunContext,
|
||||
) -> None:
|
||||
self.settings_dir: Path = Path(context.settings_dir).resolve()
|
||||
|
||||
def __iter__(self) -> Iterator[Tuple[Path, Path]]:
|
||||
"""Yield paths of config and secrets paths"""
|
||||
for file_path in iter_tree_files(self.settings_dir):
|
||||
if file_path.endswith("config.toml") or file_path.endswith("secrets.toml"):
|
||||
yield self.settings_dir / file_path, Path(file_path)
|
||||
|
||||
@@ -7,7 +7,7 @@ import yaml
|
||||
from dlt.common.time import precise_time
|
||||
from dlt.common.utils import digest256_tar_stream
|
||||
|
||||
from dlt._workspace.deployment.file_selector import WorkspaceFileSelector
|
||||
from dlt._workspace.deployment.file_selector import BaseFileSelector, WorkspaceFileSelector
|
||||
from dlt._workspace.deployment.manifest import (
|
||||
TDeploymentFileItem,
|
||||
TDeploymentManifest,
|
||||
@@ -22,33 +22,32 @@ DEFAULT_MANIFEST_FILE_NAME = "manifest.yaml"
|
||||
DEFAULT_DEPLOYMENT_PACKAGE_LAYOUT = "deployment-{timestamp}.tar.gz"
|
||||
|
||||
|
||||
class DeploymentPackageBuilder:
|
||||
class PackageBuilder:
|
||||
"""Builds gzipped deployment package from file selectors"""
|
||||
|
||||
def __init__(self, context: WorkspaceRunContext):
|
||||
self.run_context: WorkspaceRunContext = context
|
||||
|
||||
def write_package_to_stream(
|
||||
self, file_selector: WorkspaceFileSelector, output_stream: BinaryIO
|
||||
self, file_selector: BaseFileSelector, output_stream: BinaryIO
|
||||
) -> str:
|
||||
"""Write deployment package to output stream, return content hash"""
|
||||
manifest_files: List[TDeploymentFileItem] = []
|
||||
|
||||
# Add files to the archive
|
||||
with tarfile.open(fileobj=output_stream, mode="w|gz") as tar:
|
||||
for file_path in file_selector:
|
||||
full_path = self.run_context.run_dir / file_path
|
||||
for abs_path, rel_path in file_selector:
|
||||
# Use POSIX paths for tar archives (cross-platform compatibility)
|
||||
posix_path = file_path.as_posix()
|
||||
posix_path = rel_path.as_posix()
|
||||
tar.add(
|
||||
full_path,
|
||||
abs_path,
|
||||
arcname=f"{DEFAULT_DEPLOYMENT_FILES_FOLDER}/{posix_path}",
|
||||
recursive=False,
|
||||
)
|
||||
manifest_files.append(
|
||||
{
|
||||
"relative_path": posix_path,
|
||||
"size_in_bytes": full_path.stat().st_size,
|
||||
"size_in_bytes": abs_path.stat().st_size,
|
||||
}
|
||||
)
|
||||
# Create and add manifest with file metadata at the end
|
||||
@@ -67,7 +66,7 @@ class DeploymentPackageBuilder:
|
||||
|
||||
return digest256_tar_stream(output_stream)
|
||||
|
||||
def build_package(self, file_selector: WorkspaceFileSelector) -> Tuple[Path, str]:
|
||||
def build_package(self, file_selector: BaseFileSelector) -> Tuple[Path, str]:
|
||||
"""Create deployment package file, return (path, content_hash)"""
|
||||
package_name = DEFAULT_DEPLOYMENT_PACKAGE_LAYOUT.format(timestamp=str(precise_time()))
|
||||
package_path = Path(self.run_context.get_data_entity(package_name))
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from dlt._workspace.deployment.file_selector import WorkspaceFileSelector
|
||||
from dlt._workspace.deployment.file_selector import ConfigurationFileSelector, WorkspaceFileSelector
|
||||
|
||||
from tests.workspace.utils import isolated_workspace
|
||||
|
||||
@@ -27,5 +27,14 @@ def test_file_selector_respects_gitignore(with_additional_exclude: bool) -> None
|
||||
selector = WorkspaceFileSelector(
|
||||
ctx, additional_excludes=additional_excludes, ignore_file=".ignorefile"
|
||||
)
|
||||
files = set([f.as_posix() for f in selector])
|
||||
files = set([rel.as_posix() for _, rel in selector])
|
||||
assert files == expected_files
|
||||
|
||||
|
||||
def test_configuration_file_selector() -> None:
|
||||
"""Test that ConfigurationFileSelector yields only config/secrets from settings dir."""
|
||||
with isolated_workspace("configured_workspace") as ctx:
|
||||
selector = ConfigurationFileSelector(ctx)
|
||||
files = set([rel.as_posix() for _, rel in selector])
|
||||
# In this workspace case only .config.toml files exist
|
||||
assert files == {"config.toml", "dev.config.toml"}
|
||||
|
||||
@@ -2,11 +2,10 @@ import os
|
||||
import tarfile
|
||||
import yaml
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
from dlt._workspace.deployment.package_builder import (
|
||||
DeploymentPackageBuilder,
|
||||
PackageBuilder,
|
||||
DEFAULT_DEPLOYMENT_FILES_FOLDER,
|
||||
DEFAULT_MANIFEST_FILE_NAME,
|
||||
)
|
||||
@@ -20,7 +19,7 @@ def test_write_package_to_stream() -> None:
|
||||
"""Test building deployment package to a stream and verify structure."""
|
||||
|
||||
with isolated_workspace("default") as ctx:
|
||||
builder = DeploymentPackageBuilder(ctx)
|
||||
builder = PackageBuilder(ctx)
|
||||
selector = WorkspaceFileSelector(ctx, ignore_file=".ignorefile")
|
||||
|
||||
stream = BytesIO()
|
||||
@@ -66,7 +65,7 @@ def test_build_package() -> None:
|
||||
"""Test that deployment packages are content-addressable with reproducible hashes."""
|
||||
|
||||
with isolated_workspace("default") as ctx:
|
||||
builder = DeploymentPackageBuilder(ctx)
|
||||
builder = PackageBuilder(ctx)
|
||||
selector = WorkspaceFileSelector(ctx)
|
||||
|
||||
package_path, content_hash = builder.build_package(selector)
|
||||
@@ -86,7 +85,7 @@ def test_build_package() -> None:
|
||||
def test_manifest_files_are_sorted() -> None:
|
||||
"""Test that hash is independent of file iteration order."""
|
||||
with isolated_workspace("default") as ctx:
|
||||
builder = DeploymentPackageBuilder(ctx)
|
||||
builder = PackageBuilder(ctx)
|
||||
selector = WorkspaceFileSelector(ctx)
|
||||
|
||||
hash1 = builder.write_package_to_stream(selector, BytesIO())
|
||||
|
||||
Reference in New Issue
Block a user