commit 9e0bf8bb4f5cf55f0996a5d2ffeb222a21f313c2 Author: Ytbarek Hailu Date: Wed Jan 8 14:30:44 2025 -0800 Open source intitial commit diff --git a/.github/repo_meta.yaml b/.github/repo_meta.yaml new file mode 100644 index 0000000..792e4a2 --- /dev/null +++ b/.github/repo_meta.yaml @@ -0,0 +1,33 @@ +# point_of_contact: the owner of this repository, can be a GitHub user or GitHub team +point_of_contact: sfc-gh-yhailu + +# production: whether this repository meets the criteria for being "production", see https://snowflakecomputing.atlassian.net/wiki/spaces/CLO/pages/2239988967/Production+Repository+Criteria for criteria +production: true + +# distributed: whether any source code in this repository is distributed directly to customers (e.g. driver and frontend software) +distributed: false + +# modified: whether any open source dependencies in this repository have been modified +modified: false + +# release_branches: list of release branch patterns, exact matches or regex is acceptable +release_branches: + - main + - release.* + + +# code_owners_file_present: whether there is a CODEOWNERS file in this repository +code_owners_file_present: true + +# jira_project_issue_type: the jira issuetype used to raise issues related to this repository in the SNOW Jira project +jira_project_issue_type: Bug + + + +# jira_area: the jira area that raised issues should use +jira_area: Orphaned + + +# audit_in_scope: whether this repository is included in scope for audits or certifications (SOX, SOC, ISO, Fedramp etc.) +audit_in_scope: false + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..909d2f3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +.idea/ +.DS_Store +.gradle +build +out +*.iml +*.iws +log/ +target/ +.env +Jenkinsfile-app +out/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..4fcfc32 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,7 @@ +repos: + - repo: git@github.com:GitGuardian/ggshield.git + rev: v1.28.0 + hooks: + - id: ggshield + language_version: python3 + stages: [commit] diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e69de29 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..ba12d12 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM adoptopenjdk:11-jre-hotspot + +# Update and install dependencies +RUN apt update && \ + apt-get -y install coreutils python3-venv jq + +# Install AWS CLI +RUN python3 -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" +RUN pip install awscli + +# DlSync app +RUN mkdir /opt/app +WORKDIR /opt/app +COPY build/libs/dlsync-*.jar dlsync.jar \ No newline at end of file diff --git a/LEGAL.md b/LEGAL.md new file mode 100644 index 0000000..41b774a --- /dev/null +++ b/LEGAL.md @@ -0,0 +1,3 @@ + +#### This application is not part of the Snowflake Service and is governed by the terms in LICENSE, unless expressly agreed to in writing. You use this application at your own risk, and Snowflake has no obligation to support your use of this application. + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f49a4e1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..66d51f2 --- /dev/null +++ b/README.md @@ -0,0 +1,294 @@ +# DLSync + +logo + +--- + +DLSync is a database change management that deploys database changes to our database. +Each object(view, table, udf ...) in our database will +have a corresponding SQL script file where every change to this object is tracked in this file only. DLSync keeps track of what changes have been deployed to database +by using hash. Hence DLSync is capable of identifying what scripts have changed in the current deployment. +Using this DLSync only deploys changed script to database objects. +DLSync also understands interdependency between different scripts, thus applies these changes +according their dependency. +Based on how we define the changes to database objects, DLSync divides database object scripts to 2 types, State and migration scripts. +## Key Features +- It combines state based and migration based change management to manage database changes +- Each object will have it's corresponding unique Script file where we can define the change to the object +- It can detect change between previous deployment and current script state. +- It can reorder scripts based on their dependency before deploying to database. +- It supports parametrization of scripts where we can define variables that change between different database instances. +- It supports parameter config file where each parameter config file corresponds to an instance +- It supports rollback to previous deployment state. +- Rollback is very simple and intuitive. Only one needs to rollback git repository of the script and triggering rollback module. +- It supports verify module where each database object is checked with current script to check for deployment verification or tracking out of sync database changes. +- It supports create script where we can create script file for each database objects. + +## Project structure +To use this tool first create your script root directory. +This directory will contain all scripts and configurations. +Inside this directory create a directory structure like: +``` +/script-root # Root directory for the scripts +├── /main # Main scripts for deployment +│ ├── /database_name_1 # Database name +│ │ ├── /schema_name_1 # database Schema name +│ │ │ ├── /[object_type]_1 # Database Object type like (VIEWS, FUNCTIONS, TABLES ...) +│ │ │ │ ├── object_name_1.sql # The database object name(table name, view name, function name ...) +│ │ │ │ ├── object_name_2.sql # The database object name(table name, view name, function name ...) +│ │ │ ├── /[object_type]_2 # Database Object type like (VIEWS, FUNCTIONS, TABLES ...) +│ │ │ │ ├── object_name_3.sql # The database object name(table name, view name, function name ...) +│ │ │ │ ├── object_name_4.sql # The database object name(table name, view name, function name ...) +│ │ ├── /schema_name_2 # database Schema name +│ │ │ ├── /[object_type]_1 # Database Object type like (VIEWS, FUNCTIONS, TABLES ...) +│ │ │ │ ├── object_name_5.sql # The database object name(table name, view name, function name ...) +│ │ │ │ ├── object_name_6.sql # The database object name(table name, view name, function name ...) +│ │ │ ├── /[object_type]_2 # Database Object type like (VIEWS, FUNCTIONS, TABLES ...) +│ │ │ │ ├── object_name_7.sql # The database object name(table name, view name, function name ...) +│ │ │ │ ├── object_name_8.sql # The database object name(table name, view name, function name ...) +├── /tests # SQL unit test scripts +├── config.yml # configuration file +├── parameter-[profile-1].properties # parameter property file +├── parameter-[profile-2].properties # parameter property file +└── parameter-[profile-3].properties # parameter property file +``` + +Where +- **database_name_*:** is the database name of your project, +- **schema_name_*:** are schemas inside the database, +- **object_type:** is type of the object only 1 of the following (VIEWS, FUNCTIONS, PROCEDURES, FILE_FORMATS, TABLES, SEQUENCES, STAGES, STREAMS, TASKS) +- **object_name_*.sql:** are individual database object scripts. +- **config.yml:** is a configuration file used to configure DLSync behavior. +- **parameter-[profile-*].properties:** is parameter to value map file. This is going to be used by corresponding individual instances of your database. +This property files will help you parametrize changing parameters and their value. For each deployment instance of your database(project) you should create a separate parameter profile property. +These property files should have names in the above format by replacing "format" by your deployment instance name. +where profile is the instance name of your database. you will provide the profile name in environment variable while running this tool. + +### Script content +Each object will have a single SQL to track the changes applied to the given object. The SQL file is named using the object's name. +For example if you have a view named `SAMPLE_VIEW` in schema `MY_SCHEMA` in database `MY_DATABASE`, then the script file should be named `SAMPLE_VIEW.SQL` and should be placed in the directory `[scripts_root]/main/MY_DATABASE/MY_SCHEMA/VIEWS/SAMPLE_VIEW.SQL`. +The structure and content of the scripts will defer based on the type of script. This tool categorizes script in to 2 types named State script and Migration script. +#### 1. State Script +This type of script is used for object types of Views, UDF, Stored Procedure and File formats. +In this type of script you define the current state(desired state) of the object. +When a change is made to the script, DLSync replaces the current object with the updated definition. +These types of scripts must always have `create or replace` statement. Every time you make a change to the script DLSync will replace the object with the new definition. + +The sql file should be named with the database object name. +The State script file should adhere to the following rules +1. The file name should match database object name referenced by the `create or replace` statement. +2. The file should contain only one SQL DDL script that creates and replaces the specified object. +3. The create script should refer the object with its full qualified name (database.schema.object_name) + +eg: view named SAMPLE_VIEW can have the following SQL statement in the `SAMPLE_VIEW.SQL` file. +``` +create or replace view ${MY_DB}.{MY_SCHEMA}.SAMPLE_VIEW as select * from ${MY_DB}.{MY_SECOND_SCHEMA}.MY_TABLE; +``` +#### 2. Migration Script +This type of script is used for object types of TABLES, SEQUENCES, STAGES, STREAMS and TASKS. +Here the script is treated as migration that will be applied to the object sequentially based on the version number. +This type of script contains 1 or more migration versions. One migration versions contains version number, author(optional), content (DDL or DML SQL statement) , rollback statement(optional) and verify statement(optional). +Each migration version is immutable i.e Once the version is deployed you can not change the code of this version. Only you can add new versions. + +eg: for the table named `SAMPLE_TABLE` you can have the following SQL statement in the `SAMPLE_TABLE.SQL` file.: +``` +---version: 0, author: user1 +create or replace table ${MY_DB}.{MY_SCHEMA}.SAMPLE_TABLE(id varchar, my_column varchar); +---rollback: drop table if exists ${MY_DB}.{MY_SCHEMA}.SAMPLE_TABLE; +---verify: select * from ${MY_DB}.{MY_SCHEMA}.SAMPLE_TABLE limit 1; + +---version: 1, author: user1 +insert into ${MY_DB}.{MY_SCHEMA}.SAMPLE_TABLE values('1', 'value'); +---rollback: delete from ${MY_DB}.{MY_SCHEMA}.SAMPLE_TABLE where id = '1'; +---verify: select 1/count(*) from ${MY_DB}.{MY_SCHEMA}.SAMPLE_TABLE where id = '1'; + +---version: 2, author: user2 +alter table ${MY_DB}.{MY_SCHEMA}.SAMPLE_TABLE add column my_new_column varchar; +---rollback: alter table ${MY_DB}.{MY_SCHEMA}.SAMPLE_TABLE drop column my_new_column; +---verify: select my_new_column from ${MY_DB}.{MY_SCHEMA}.SAMPLE_TABLE limit 1; +``` + +The migration script will have the following format: +``` +---version: VERSION_NUMBER, author: NAME +CONTENT; +---rollback: ROLLBACK_CONTENT; +---verify: VERIFY_CONTENT; +``` +where +- ```VERSION_NUMBER``` is the version number of the migration script +- ```NAME``` is the author of the script, +- ```CONTENT``` is the DDL or DML script that changes the object, +- ```ROLLBACK_CONTENT``` is the script that rolls back the changes made by the migration script +- ```VERIFY_CONTENT``` is the script that verifies the changes made by the migration script. + +The migration script should adhere to the following rules: +1. Each change to database object should be wrapped in a migration format specified above. +2. Each migration version should contain migration header (version and author) and the content of the migration(single DDL or DML script), rollback(optional) and verify (optional). +3. migration header should start in a new line with three hyphens(---) and can contain only version and author. +4. Version should be unique number per each script file and should be in incremental order. And it is used to order the scripts migration sequence for that object. +5. author is optional alphanumeric characters used for informational purpose only to track who added the changes. +6. Content of the change (migration) should be specified after migration header in a new line. And it can span multiple lines. +7. Content should always be terminated by semi-colon (`;`). +8. Rollback if specified should start in a new line with `---rollback: `. The rollback script should be on a single line and must be terminated with semi-colon (;); +9. Verify if specified should start in a new line with `---verify:`. The verify script should be on a single line and must be terminated with semi-colon (;); +10. Migration versions are immutable. Once a version is deployed, it cannot be changed. Only new versions can be added or existing versions can be rolled back. + +### Configurations +#### Parameter profile +Parameter files help you define parameters that change between different database instances. This is helpful if you have variables that change between different instances (like dev, staging and prod). +Parameter files are defined per each instance. Parameter file are basically property files where you define parameter and their values. +the parameter files should be placed in the root script directory and should be named in the following format: +``` +parameter-[profile].property +``` +where `[profile]` is the instance name of your database. you will provide the profile name in the command line option or environment variable while running this tool. +Eg. if you have a dev instance of your database, then you should create a parameter file named `parameter-dev.property` in the root script directory. And the content of this file can be: +``` +MY_DB=my_database_dev +MY_SCHEMA=my_schema_dev +other_param=other_value +``` +And you can use these parameters in your script files. The format for using parameters is: +``` +${parameter_name} +``` +where parameter_name is the name of parameter defined in the parameter-[profile].property file with its value. + +For example, +``` +create or replace view ${MY_DB}.${MY_SCHEMA}.my_view as select * from ${MY_DB}.${MY_SCHEMA}.my_table; +``` +#### config file +The config file is used to configure the behavior of this tool. The config file should be named `config.yml` and placed in the root script directory. The config file should have the following format: +``` +version: #version of the config file +configTables: # List of configuration tables, only used for create script module +scriptExclusion: # List of script files to be excluded from deploy, verify, rollback and create script module +dependencyOverride: # List of additional dependencies for the scripts + - script: # script file name to override the dependencies + dependencies: List of dependencies to override + ``` +The `configTables` is used by create script module to add the data of the tables to the script file. +The `scriptExclusion` is used to exclude the script files from being processed by this tool. +The `dependencyOverride` is used to override the dependencies of the script files. This can be used to add additional dependencies to the script files. +### How to use this tool +In order to run the application you need to provide the snowflake connection parameters in environment variables. The following environment variables are required to run the application: +``` +account=my_account #account used for connection +db=database #your database +schema=dl_sync #your dl_sync schema. It will use this schema to store neccessary tables for this tool +user=user_name #user name of the database +password=password #password for the connection (optional) +authenticator=externalbrowser #authenticator used for the connection (optional) +warehouse=my_warehouse #warehouse to be used by the connection +role=my_role #role used by this tool +``` + +You also need to provide the script root directory and which profile to use. This can be provided in the command line argument or in the environment variable. +Providing in the command line argument will override the environment variable. +you can provide command line option using the following: +``` +dlsync deploy --script-root path/to/db_scripts --profile dev +``` +or +``` +dlsync deploy -s path/to/db_scripts -p dev +``` +or you can provide in environment variable as: +``` +script_root=path/to/db_scripts +profile=dev +``` +There are 4 main modules (commands). Each module of the tool can be triggered from the command line argument. +#### Deploy +This module is used to deploy the changes to the database. It will deploy the changes to the database objects based on the script files. +First DLSync will identify the changed scripts based on the hash of the script file and the hash stored in the database(`dl_sync_script_history` table). For migration scripts each migration version will have it's hash stored in the script history. Thus only newly added versions will be picked up for the changed scripts. After identifying the changes, it will order the scripts based on their dependency. Then it will deploy the changes to the database objects sequentially. +The deploy module can be triggered using the following command: +``` +dlsync deploy -s path/to/db_scripts -p dev +``` +If you have already deployed the changes manually or though other tools, you can can mark the scripts as deployed without deploying the changes. This will only add the hashes to the script history table(`dl_sync_script_history`) without affecting the current database state. This can be very helpful while migrating from other tools. +You can use the following command to mark the scripts as deployed without deploying the changes: +``` +dlsync deploy --only-hashes -s path/to/db_scripts -p dev +``` +or +``` +dlsync deploy -o -s path/to/db_scripts -p dev +``` + +#### Rollback +This module is used to rollback changes to the previous deployment. It will rollback the changes to the database objects based on the script files. This should be triggered after you have rolled back the git repository of the script files. +The rollback works first by identifying the changes between the current deployment and the previous deployment. For state scripts (views, udf, stored procedures and file formats) it will replace them with the current script(i.e previous version as you have already made git rollback). +For migration scripts it will identify the versions need to be rolled back by checking the missing versions of current scripts but have been deployed previously. Then it will use the rollback script specified in the migration version to rollback the changes. +This will be stored in the script history table. +To rollback the changes use the following command: +``` +dlsync rollback -script-root path/to/db_scripts -profile dev + +``` +#### Verify +This module is used to verify the database scripts are in sync with the current database objects. For state scripts it will compare the content of script with the DDL of the database object. +For Migration scripts it uses the verify script provided in the migration version. if the verify script throws error, then it will mark the migration version as out of sync. Since latest migration versions can change previous versions results, it only checks the latest migration version of each script for verification. +To verify the changes use the following command: +``` +dlsync verify --script-root path/to/db_scripts --profile qa +``` +#### Create script +This module is used to create script files for each database object. This can be used to create script files for the existing database objects. This might be helpful when you are migrating from other tools to DLSync. To achieve it first identifies the schemas inside the current database. Then for each schema retrieves the ddl of each object. Then based on the parameter profile provided it will replace the static values with the parameter keys. Then it will create the script file for each object. +If you have configuration tables where you want the data also to be included in the script file, you can provide the list of table names in the config file. +``` +dlsync create_script --script-root path/to/db_scripts --profile uat +``` + +## Tables used by this tool +DLSync stores script meta data, deployment history and logs in the database. +DLSync will depend on these tables to track the changes and deployment history. If these tables are missing from the schema and database provided in the connection parameter, then DLSync will create these tables. +Please make sure the role provided in the connection has the necessary privileges to create tables in the schema. + +**_N.B: Since DLSync uses these tables to track the changes, it is recommended not to delete or change these tables. It is also import not change the schema of the connection. If DLSync is not able to find these tables in the schema, it will create them and assume as if it is running first time._** + +This tool uses the following tables to store important information: +### dl_sync_script_history +This table store the meta data for script files. It contains the following columns: +``` +script_id: # for state script the script name, for migration script script name plus the version number +object_name: the object name of the script +object_type: the type of the object (VIEWS, FUNCTIONS, PROCEDURES, FILE_FORMATS, TABLES, SEQUENCES, STAGES, STREAMS, TASKS) +rollback_script: the rollback script for the migration version +script_hash: the hash of the script file +deployed_hash: the hash of the script file that has been deployed +change_sync_id: the id of the change sync +created_by: the db user who added this change +created_ts: the timestamp when was this change added +updated_by: the db user who updated this change +updated_ts: the timestamp when was this change updated + +``` +### dl_sync_change_sync +This table stores the deployment history of the scripts. It contains the following columns: +``` +id: the id of the change sync +change_type: the type of the change (DEPLOY, ROLLBACK, VERIFY) +status: the status of the change (SUCCESS, FAILED) +log: the log of the change +change_count: the number of changes in this sync +start_time: the start time of the change +end_time: the end time of the change +``` +### dl_sync_script_event +This table stores the logs of the each script activity. It contains the following columns: +``` +id: the id of the script event +script_id: the id of the script +object_name: the object name of the script +script_hash: the hash of the script +status: the status of the script (SUCCESS, FAILED) +log: the log of the script +changeSyncId: the id of the change sync +created_by: the db user who added this change +created_ts: the timestamp when was this change added +``` +## Example scripts +To explore the tool you can use the example scripts provided in the directory `example_scripts` . diff --git a/backlog.md b/backlog.md new file mode 100644 index 0000000..9faf84c --- /dev/null +++ b/backlog.md @@ -0,0 +1,8 @@ +# DLSync Backlog +- [x] Rollback for migration +- [x] Verify module State Script +- [x] create script to capture config tables +- [x] Script hierarchy design +- [x] Verify module for migration Script +- [ ] Migration Script parsing using ATLR +- [ ] Support for different DB \ No newline at end of file diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000..719baa8 --- /dev/null +++ b/build.gradle @@ -0,0 +1,51 @@ +/* + * This file was generated by the Gradle 'init' task. + */ + +plugins { + id 'java' +} + +repositories { + mavenLocal() + maven { + url = uri('https://repo.maven.apache.org/maven2/') + } +} + +dependencies { + implementation 'org.apache.commons:commons-text:1.10.0' + implementation 'net.snowflake:snowflake-jdbc:3.20.0' + implementation 'ch.qos.logback:logback-core:1.5.12' + implementation 'ch.qos.logback:logback-classic:1.5.12' + implementation 'org.slf4j:slf4j-api:2.0.4' + implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.18.2' + implementation 'commons-cli:commons-cli:1.9.0' + + compileOnly 'org.projectlombok:lombok:1.18.24' + annotationProcessor 'org.projectlombok:lombok:1.18.24' + testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.1' + testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.8.1' +} + +group = 'com.snowflake' +version = '1.0-SNAPSHOT' +description = 'dlsync' +java.sourceCompatibility = JavaVersion.VERSION_11 + +jar { + dependsOn 'test' + manifest { + attributes( + 'Main-Class': 'com.snowflake.dlsync.Main' + ) + } + duplicatesStrategy = DuplicatesStrategy.EXCLUDE + from { + configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) } + } +} + +test { + useJUnitPlatform() +} diff --git a/example_scripts/config.yaml b/example_scripts/config.yaml new file mode 100644 index 0000000..a246599 --- /dev/null +++ b/example_scripts/config.yaml @@ -0,0 +1,8 @@ +version: 1 +configTables: +scriptExclusion: + - ${EXAMPLE_DB}.${AUDIT_SCHEMA}.AUDIT_SEQ +dependencyOverride: + - script: ${EXAMPLE_DB}.${MAIN_SCHEMA}.UPDATE_STOCK + dependencies: + - ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS diff --git a/example_scripts/main/example_db/AUDIT_SCHEMA/SEQUENCES/AUDIT_SEQ.SQL b/example_scripts/main/example_db/AUDIT_SCHEMA/SEQUENCES/AUDIT_SEQ.SQL new file mode 100644 index 0000000..f5c5bf2 --- /dev/null +++ b/example_scripts/main/example_db/AUDIT_SCHEMA/SEQUENCES/AUDIT_SEQ.SQL @@ -0,0 +1,6 @@ +---version: 0, author: DlSync +CREATE SEQUENCE ${EXAMPLE_DB}.${AUDIT_SCHEMA}.AUDIT_SEQ + START WITH 1 + INCREMENT BY 1; +---rollback: DROP SEQUENCE IF EXISTS ${EXAMPLE_DB}.${AUDIT_SCHEMA}.AUDIT_SEQ; +---verify: SHOW SEQUENCES LIKE 'AUDIT_SEQ' IN ${EXAMPLE_DB}.${AUDIT_SCHEMA}; \ No newline at end of file diff --git a/example_scripts/main/example_db/AUDIT_SCHEMA/TABLES/PRODUCT_AUDIT.SQL b/example_scripts/main/example_db/AUDIT_SCHEMA/TABLES/PRODUCT_AUDIT.SQL new file mode 100644 index 0000000..97aa202 --- /dev/null +++ b/example_scripts/main/example_db/AUDIT_SCHEMA/TABLES/PRODUCT_AUDIT.SQL @@ -0,0 +1,9 @@ +---version: 0, author: DLSync +CREATE TABLE ${EXAMPLE_DB}.${AUDIT_SCHEMA}.PRODUCT_AUDIT ( + AUDIT_ID INT AUTOINCREMENT PRIMARY KEY, + PRODUCT_ID INT, + ACTION STRING NOT NULL, + ACTION_TIMESTAMP TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +---rollback: DROP TABLE IF EXISTS ${EXAMPLE_DB}.${AUDIT_SCHEMA}.PRODUCT_AUDIT; +---verify: SELECT * FROM ${EXAMPLE_DB}.${AUDIT_SCHEMA}.PRODUCT_AUDIT LIMIT 1; diff --git a/example_scripts/main/example_db/AUDIT_SCHEMA/TABLES/USER_AUDIT.SQL b/example_scripts/main/example_db/AUDIT_SCHEMA/TABLES/USER_AUDIT.SQL new file mode 100644 index 0000000..8fc50c0 --- /dev/null +++ b/example_scripts/main/example_db/AUDIT_SCHEMA/TABLES/USER_AUDIT.SQL @@ -0,0 +1,9 @@ +---version: 0, author: DLSync +CREATE TABLE ${EXAMPLE_DB}.${AUDIT_SCHEMA}.USER_AUDIT ( + AUDIT_ID INT AUTOINCREMENT PRIMARY KEY, + USER_ID INT, + ACTION STRING NOT NULL, + ACTION_TIMESTAMP TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +---rollback: DROP TABLE IF EXISTS ${EXAMPLE_DB}.${AUDIT_SCHEMA}.USER_AUDIT; +---verify: SELECT * FROM ${EXAMPLE_DB}.${AUDIT_SCHEMA}.USER_AUDIT LIMIT 1; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/FILE_FORMATS/PRODUCT_CSV_FILE_FORMAT.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/FILE_FORMATS/PRODUCT_CSV_FILE_FORMAT.SQL new file mode 100644 index 0000000..7e1af6b --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/FILE_FORMATS/PRODUCT_CSV_FILE_FORMAT.SQL @@ -0,0 +1,7 @@ +CREATE OR REPLACE FILE FORMAT ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCT_CSV_FILE_FORMAT + TYPE = CSV + FIELD_DELIMITER = '|' + SKIP_HEADER = 1 + NULL_IF = ('NULL', 'null') + EMPTY_FIELD_AS_NULL = true + COMPRESSION = gzip; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/FUNCTIONS/CALCULATE_DISCOUNT.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/FUNCTIONS/CALCULATE_DISCOUNT.SQL new file mode 100644 index 0000000..7218b48 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/FUNCTIONS/CALCULATE_DISCOUNT.SQL @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION ${EXAMPLE_DB}.${MAIN_SCHEMA}.CALCULATE_DISCOUNT(P_PRICE NUMERIC(10, 2), P_DISCOUNT_RATE NUMERIC(5, 2)) +RETURNS NUMERIC(10, 2) +LANGUAGE SQL +AS +$$ + P_PRICE * (1 - P_DISCOUNT_RATE / 100) +$$; diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/FUNCTIONS/CALCULATE_ORDER_TOTAL.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/FUNCTIONS/CALCULATE_ORDER_TOTAL.SQL new file mode 100644 index 0000000..77fa730 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/FUNCTIONS/CALCULATE_ORDER_TOTAL.SQL @@ -0,0 +1,7 @@ +CREATE OR REPLACE FUNCTION ${EXAMPLE_DB}.${MAIN_SCHEMA}.CALCULATE_ORDER_TOTAL(P_QUANTITY INT, P_PRICE NUMERIC(10, 2)) +RETURNS NUMERIC(10, 2) +LANGUAGE SQL +AS +$$ + P_QUANTITY * P_PRICE +$$; diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/PROCEDURES/UPDATE_ORDER_SUMMARY.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/PROCEDURES/UPDATE_ORDER_SUMMARY.SQL new file mode 100644 index 0000000..382cf14 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/PROCEDURES/UPDATE_ORDER_SUMMARY.SQL @@ -0,0 +1,20 @@ +CREATE OR REPLACE PROCEDURE ${EXAMPLE_DB}.${MAIN_SCHEMA}.UPDATE_ORDER_SUMMARY() + returns string not null + language python + runtime_version = '3.8' + packages = ('snowflake-snowpark-python') + handler = 'main' + execute as caller +as +$$ +def main(snowpark_session): + + ## Read the command into a Snowflake dataframe + results_df = snowpark_session.sql("select * from ${EXAMPLE_DB}.${MAIN_SCHEMA}.USER_ORDER_SUMMARY") + + ## Write the results of the dataframe into a target table + results_df.write.mode("overwrite").save_as_table("${EXAMPLE_DB}.${MAIN_SCHEMA}.MATERILIZED_ORDER_SUMMARY") + + return f"Succeeded: Results inserted into table ${EXAMPLE_DB}.${MAIN_SCHEMA}.MATERILIZED_ORDER_SUMMARY" +$$ +; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/PROCEDURES/UPDATE_STOCK.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/PROCEDURES/UPDATE_STOCK.SQL new file mode 100644 index 0000000..86d29f8 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/PROCEDURES/UPDATE_STOCK.SQL @@ -0,0 +1,13 @@ +CREATE OR REPLACE PROCEDURE ${EXAMPLE_DB}.${MAIN_SCHEMA}.UPDATE_STOCK(P_PRODUCT_ID INT, P_QUANTITY INT) +RETURNS STRING +LANGUAGE SQL +AS +$$ +BEGIN + UPDATE ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS + SET STOCK = STOCK - P_QUANTITY + WHERE PRODUCT_ID = P_PRODUCT_ID; + + RETURN 'STOCK UPDATED SUCCESSFULLY'; +END; +$$; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/SEQUENCES/ORDER_SEQ.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/SEQUENCES/ORDER_SEQ.SQL new file mode 100644 index 0000000..76f3ca8 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/SEQUENCES/ORDER_SEQ.SQL @@ -0,0 +1,4 @@ +---version: 0, author: DlSync +create or replace sequence ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDER_SEQ start with 1 increment by 1; +---rollback: DROP SEQUENCE IF EXISTS ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDER_SEQ; +---verify: SHOW SEQUENCES LIKE 'ORDER_SEQ' IN ${EXAMPLE_DB}.${MAIN_SCHEMA}; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/STAGES/PRODUCT_DATA_STAGE.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/STAGES/PRODUCT_DATA_STAGE.SQL new file mode 100644 index 0000000..0cf4fa7 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/STAGES/PRODUCT_DATA_STAGE.SQL @@ -0,0 +1,13 @@ +---version: 0, author: DlSync +CREATE OR REPLACE STAGE ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCT_DATA_STAGE + FILE_FORMAT = (TYPE = 'CSV') + COMMENT = 'Stage for uploading product data files'; +---rollback: DROP STAGE IF EXISTS ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCT_DATA_STAGE; +---verify: SHOW STAGES LIKE 'PRODUCT_DATA_STAGE' IN ${EXAMPLE_DB}.${MAIN_SCHEMA}; + +---version: 1, author: DlSync +CREATE OR REPLACE STAGE ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCT_DATA_STAGE +FILE_FORMAT = ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCT_CSV_FILE_FORMAT +COMMENT = 'Stage for uploading product data files'; +---rollback: DROP STAGE IF EXISTS ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCT_DATA_STAGE; +---verify: SHOW STAGES LIKE 'PRODUCT_DATA_STAGE' IN ${EXAMPLE_DB}.${MAIN_SCHEMA}; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/STREAMS/ORDER_STREAM.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/STREAMS/ORDER_STREAM.SQL new file mode 100644 index 0000000..527f7c9 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/STREAMS/ORDER_STREAM.SQL @@ -0,0 +1,5 @@ +---version: 0, author: DlSync +CREATE OR REPLACE STREAM ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDER_STREAM +ON TABLE ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDERS; +---rollback: DROP STREAM IF EXISTS ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDER_STREAM; +---verify: SHOW STREAMS LIKE 'ORDER_STREAM' IN ${EXAMPLE_DB}.${MAIN_SCHEMA}; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/DISCOUNTS.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/DISCOUNTS.SQL new file mode 100644 index 0000000..351e140 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/DISCOUNTS.SQL @@ -0,0 +1,10 @@ +---version: 0, author: DlSync +CREATE TABLE ${EXAMPLE_DB}.${MAIN_SCHEMA}.DISCOUNTS ( + ID INT AUTOINCREMENT PRIMARY KEY, + PRODUCT_ID INT REFERENCES ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS(ID), + DISCOUNT_RATE DECIMAL, + VALID_FROM TIMESTAMP, + VALID_UNTIL TIMESTAMP +); +---rollback: DROP TABLE IF EXISTS ${EXAMPLE_DB}.${MAIN_SCHEMA}.DISCOUNTS; +---verify: SELECT * FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.DISCOUNTS LIMIT 1; diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/ORDERS.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/ORDERS.SQL new file mode 100644 index 0000000..1627afb --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/ORDERS.SQL @@ -0,0 +1,14 @@ +---version: 0, author: DlSync +CREATE TABLE ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDERS ( + ID INT AUTOINCREMENT PRIMARY KEY, + USER_ID INT REFERENCES ${EXAMPLE_DB}.${MAIN_SCHEMA}.USERS(ID), + PRODUCT_ID INT REFERENCES ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS(ID), + QUANTITY INT NOT NULL, + ORDER_DATE TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +---rollback: DROP TABLE IF EXISTS ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDERS; +---verify: SELECT * FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDERS LIMIT 1; + +---version: 1, author: DlSync +ALTER TABLE ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDERS ADD COLUMN PAYMENT VARCHAR; +---verify: SELECT PAYMENT FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDERS; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/PRODUCTS.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/PRODUCTS.SQL new file mode 100644 index 0000000..fd8fab5 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/PRODUCTS.SQL @@ -0,0 +1,15 @@ +---version: 0, author: DlSync +CREATE TABLE ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS ( + ID INT AUTOINCREMENT PRIMARY KEY, + PRODUCT_NAME STRING NOT NULL, + PRICE NUMERIC(10, 2) NOT NULL, + STOCK INT NOT NULL, + CREATED_DATE TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +---rollback: DROP TABLE IF EXISTS ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS; +---verify: SELECT * FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS LIMIT 1; + +---version: 1, author: DlSync +INSERT INTO ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS values('1', 'MY_VALUE', 25, 10, CURRENT_TIMESTAMP); +---rollback: DELETE FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS WHERE ID = '1'; +---verify: SELECT 1/count(*) FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS WHERE ID = '1'; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/USERS.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/USERS.SQL new file mode 100644 index 0000000..1bd7241 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/TABLES/USERS.SQL @@ -0,0 +1,9 @@ +---version: 0, author: DLSync +CREATE TABLE ${EXAMPLE_DB}.${MAIN_SCHEMA}.USERS ( + ID INT AUTOINCREMENT PRIMARY KEY, + USER_NAME STRING NOT NULL, + EMAIL STRING UNIQUE NOT NULL, + CREATED_DATE TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +---rollback: DROP TABLE IF EXISTS ${EXAMPLE_DB}.${MAIN_SCHEMA}.USERS; +---verify: SELECT * FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.USERS LIMIT 1; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/DATA_SUMMARY.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/DATA_SUMMARY.SQL new file mode 100644 index 0000000..d17bc1e --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/DATA_SUMMARY.SQL @@ -0,0 +1,11 @@ +CREATE OR REPLACE VIEW ${EXAMPLE_DB}.${MAIN_SCHEMA}.DATA_SUMMARY AS + SELECT ORD_SMY.ORDER_ID, STK_SMY.PRODUCT_ID, USR_ORD_SMY.USER_ID +FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDER_SUMMARY ORD_SMY +LEFT OUTER JOIN ${EXAMPLE_DB}.${MAIN_SCHEMA}.STOCK_SUMMARY STK_SMY + ON ORD_SMY.PRODUCT_ID = STK_SMY.PRODUCT_ID +LEFT OUTER JOIN ${EXAMPLE_DB}.${MAIN_SCHEMA}.USER_ORDER_SUMMARY USR_ORD_SMY + ON ORD_SMY.USER_ID = USR_ORD_SMY.USER_ID +LEFT OUTER JOIN ${EXAMPLE_DB}.${AUDIT_SCHEMA}.PRODUCT_AUDIT PRD_ADT + ON PRD_ADT.PRODUCT_ID = ORD_SMY.PRODUCT_ID +LEFT OUTER JOIN ${EXAMPLE_DB}.${AUDIT_SCHEMA}.USER_AUDIT USR_ADT + ON USR_ADT.USER_ID = ORD_SMY.USER_ID; diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/ORDER_SUMMARY.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/ORDER_SUMMARY.SQL new file mode 100644 index 0000000..ce4df23 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/ORDER_SUMMARY.SQL @@ -0,0 +1,13 @@ +CREATE OR REPLACE VIEW ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDER_SUMMARY AS +SELECT + ORD.ID AS ORDER_ID, + ORD.USER_ID, + ORD.PRODUCT_ID, + ORD.QUANTITY, + ORD.ORDER_DATE, + ${EXAMPLE_DB}.${MAIN_SCHEMA}.CALCULATE_ORDER_TOTAL(ORD.QUANTITY, PRD.PRICE) AS TOTAL_PRICE, + ${EXAMPLE_DB}.${MAIN_SCHEMA}.CALCULATE_DISCOUNT(TOTAL_PRICE, DIS.DISCOUNT_RATE) AS DISCOUNTED_TOTAL_PRICE +FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDERS ORD +LEFT OUTER JOIN ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS PRD +LEFT OUTER JOIN ${EXAMPLE_DB}.${MAIN_SCHEMA}.DISCOUNTS DIS +ON ORD.PRODUCT_ID = DIS.PRODUCT_ID; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/PRODUCT_STAGE_VIEW.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/PRODUCT_STAGE_VIEW.SQL new file mode 100644 index 0000000..735de36 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/PRODUCT_STAGE_VIEW.SQL @@ -0,0 +1,3 @@ +CREATE OR REPLACE VIEW ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCT_STAGE_VIEW +AS +SELECT t.$1, t.$2 FROM @${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCT_DATA_STAGE(file_format => '${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCT_CSV_FILE_FORMAT') t; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/STOCK_SUMMARY.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/STOCK_SUMMARY.SQL new file mode 100644 index 0000000..ba29aa8 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/STOCK_SUMMARY.SQL @@ -0,0 +1,10 @@ +CREATE OR REPLACE VIEW ${EXAMPLE_DB}.${MAIN_SCHEMA}.STOCK_SUMMARY AS +SELECT + ID AS PRODUCT_ID, + PRODUCT_NAME, + STOCK, + CASE + WHEN STOCK < 10 THEN 'LOW STOCK' + ELSE 'SUFFICIENT STOCK' + END AS STOCK_STATUS +FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS; \ No newline at end of file diff --git a/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/USER_ORDER_SUMMARY.SQL b/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/USER_ORDER_SUMMARY.SQL new file mode 100644 index 0000000..6f5f724 --- /dev/null +++ b/example_scripts/main/example_db/MAIN_SCHEMA/VIEWS/USER_ORDER_SUMMARY.SQL @@ -0,0 +1,11 @@ +CREATE OR REPLACE VIEW ${EXAMPLE_DB}.${MAIN_SCHEMA}.USER_ORDER_SUMMARY AS +SELECT + U.ID AS USER_ID, + U.USER_NAME, + COUNT(O.ID) AS TOTAL_ORDERS, + SUM(P.PRICE * O.QUANTITY) AS TOTAL_SPENT +FROM ${EXAMPLE_DB}.${MAIN_SCHEMA}.USERS U +LEFT JOIN ${EXAMPLE_DB}.${MAIN_SCHEMA}.ORDERS O ON U.ID = O.USER_ID +LEFT JOIN ${EXAMPLE_DB}.${MAIN_SCHEMA}.PRODUCTS P ON O.PRODUCT_ID = P.ID +GROUP BY + U.ID, U.USER_NAME; \ No newline at end of file diff --git a/example_scripts/parameter-dev.properties b/example_scripts/parameter-dev.properties new file mode 100644 index 0000000..d0ac723 --- /dev/null +++ b/example_scripts/parameter-dev.properties @@ -0,0 +1,4 @@ +#Script Parameters +EXAMPLE_DB=EXAMPLE_DEV +MAIN_SCHEMA=MAIN_SCHEMA +AUDIT_SCHEMA=DEV_AUDIT_SCHEMA diff --git a/example_scripts/parameter-qa.properties b/example_scripts/parameter-qa.properties new file mode 100644 index 0000000..f425594 --- /dev/null +++ b/example_scripts/parameter-qa.properties @@ -0,0 +1,4 @@ +#Script Parameters +EXAMPLE_DB=EXAMPLE_QA +MAIN_SCHEMA=MAIN_SCHEMA +AUDIT_SCHEMA=QA_AUDIT_SCHEMA \ No newline at end of file diff --git a/example_scripts/parameter-uat.properties b/example_scripts/parameter-uat.properties new file mode 100644 index 0000000..6759d57 --- /dev/null +++ b/example_scripts/parameter-uat.properties @@ -0,0 +1,4 @@ +#Script Parameters +EXAMPLE_DB=EXAMPLE_UAT +MAIN_SCHEMA=MAIN_SCHEMA +AUDIT_SCHEMA=UAT_AUDIT_SCHEMA \ No newline at end of file diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 0000000..1e77ac6 --- /dev/null +++ b/gradle.properties @@ -0,0 +1 @@ +releaseVersion=1.5.0 \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..033e24c Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..9f4197d --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,7 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.2.1-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..fcb6fca --- /dev/null +++ b/gradlew @@ -0,0 +1,248 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..6689b85 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,92 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..12a0934 --- /dev/null +++ b/pom.xml @@ -0,0 +1,110 @@ + + + 4.0.0 + + com.snowflake + dlsync + 1.0-SNAPSHOT + + + 11 + 11 + + + + org.apache.commons + commons-text + 1.10.0 + + + + net.snowflake + snowflake-jdbc + 3.20.0 + + + ch.qos.logback + logback-core + 1.5.12 + + + + ch.qos.logback + logback-classic + 1.5.12 + + + org.slf4j + slf4j-api + 2.0.4 + + + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + 2.18.2 + + + + org.projectlombok + lombok + 1.18.24 + provided + + + commons-cli + commons-cli + 1.9.0 + + + + org.junit.jupiter + junit-jupiter-api + 5.8.1 + test + + + org.junit.jupiter + junit-jupiter-engine + 5.8.1 + test + + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.22.2 + + + org.apache.maven.plugins + maven-assembly-plugin + + + package + + single + + + + + + com.snowflake.dlsync.Main + + + + + jar-with-dependencies + + + + + + + + + \ No newline at end of file diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000..74c4998 --- /dev/null +++ b/settings.gradle @@ -0,0 +1,5 @@ +/* + * This file was generated by the Gradle 'init' task. + */ + +rootProject.name = 'dlsync' diff --git a/src/main/java/com/snowflake/dlsync/ChangeManager.java b/src/main/java/com/snowflake/dlsync/ChangeManager.java new file mode 100644 index 0000000..42dd536 --- /dev/null +++ b/src/main/java/com/snowflake/dlsync/ChangeManager.java @@ -0,0 +1,223 @@ +package com.snowflake.dlsync; + +import com.snowflake.dlsync.dependency.DependencyGraph; +import com.snowflake.dlsync.doa.ScriptRepo; +import com.snowflake.dlsync.doa.ScriptSource; +import com.snowflake.dlsync.models.*; +import com.snowflake.dlsync.parser.ParameterInjector; +import lombok.extern.slf4j.Slf4j; + +import java.io.*; +import java.security.NoSuchAlgorithmException; +import java.sql.SQLException; +import java.util.*; +import java.util.stream.Collectors; + +@Slf4j +public class ChangeManager { + private Config config; + private ScriptSource scriptSource; + private ScriptRepo scriptRepo; + private DependencyGraph dependencyGraph; + private ParameterInjector parameterInjector; + + public ChangeManager(Config config, ScriptSource scriptSource, ScriptRepo scriptRepo, DependencyGraph dependencyGraph, ParameterInjector parameterInjector) { + this.config= config; + this.scriptSource = scriptSource; + this.scriptRepo = scriptRepo; + this.dependencyGraph = dependencyGraph; + this.parameterInjector = parameterInjector; + } + + private void validateScript(Script script) { + if(script instanceof MigrationScript && scriptRepo.isScriptVersionDeployed(script)) { + log.error("Migration type script changed. Script for the object {} has changed from previous deployments.", script.getId()); + throw new RuntimeException("Migration type scripts should not change."); + } + } + public void deploy(boolean onlyHashes) throws SQLException, IOException, NoSuchAlgorithmException{ + log.info("Started Deploying {}", onlyHashes?"Only Hashes":"scripts"); + startSync(ChangeType.DEPLOY); + scriptRepo.loadScriptHash(); + List