mirror of
https://github.com/bitol-io/open-data-contract-standard.git
synced 2025-05-28 19:34:02 +00:00
2348 lines
67 KiB
JSON
2348 lines
67 KiB
JSON
{
|
||
"$schema": "https://json-schema.org/draft/2019-09/schema",
|
||
"title": "Open Data Contract Standard (ODCS)",
|
||
"description": "An open data contract specification to establish agreement between data producers and consumers.",
|
||
"type": "object",
|
||
"properties": {
|
||
"version": {
|
||
"type": "string",
|
||
"description": "Current version of the data contract."
|
||
},
|
||
"kind": {
|
||
"type": "string",
|
||
"default": "DataContract",
|
||
"description": "The kind of file this is. Valid value is `DataContract`.",
|
||
"enum": ["DataContract"]
|
||
},
|
||
"apiVersion": {
|
||
"type": "string",
|
||
"default": "v3.0.0",
|
||
"description": "Version of the standard used to build data contract. Default value is v3.0.0.",
|
||
"enum": ["v3.0.0", "v2.2.2", "v2.2.1", "v2.2.0"]
|
||
},
|
||
"id": {
|
||
"type": "string",
|
||
"description": "A unique identifier used to reduce the risk of dataset name collisions, such as a UUID."
|
||
},
|
||
"name": {
|
||
"type": "string",
|
||
"description": "Name of the data contract."
|
||
},
|
||
"tenant": {
|
||
"type": "string",
|
||
"description": "Indicates the property the data is primarily associated with. Value is case insensitive."
|
||
},
|
||
"tags": {
|
||
"$ref": "#/$defs/Tags"
|
||
},
|
||
"status": {
|
||
"type": "string",
|
||
"description": "Current status of the dataset. Valid values are `production`, `test`, or `development`.",
|
||
"examples": ["production", "test", "development"]
|
||
},
|
||
"servers": {
|
||
"type": "array",
|
||
"description": "List of servers where the datasets reside.",
|
||
"items": {
|
||
"$ref": "#/$defs/Server"
|
||
}
|
||
},
|
||
"dataProduct": {
|
||
"type": "string",
|
||
"description": "The name of the data product."
|
||
},
|
||
"description": {
|
||
"type": "object",
|
||
"description": "High level description of the dataset.",
|
||
"properties": {
|
||
"usage": {
|
||
"type": "string",
|
||
"description": "Intended usage of the dataset."
|
||
},
|
||
"purpose": {
|
||
"type": "string",
|
||
"description": "Purpose of the dataset."
|
||
},
|
||
"limitations": {
|
||
"type": "string",
|
||
"description": "Limitations of the dataset."
|
||
}
|
||
}
|
||
},
|
||
"domain": {
|
||
"type": "string",
|
||
"description": "Name of the logical data domain.",
|
||
"examples": ["imdb_ds_aggregate", "receiver_profile_out", "transaction_profile_out"]
|
||
},
|
||
"schema": {
|
||
"type": "array",
|
||
"description": "A list of elements within the schema to be cataloged.",
|
||
"items": {
|
||
"$ref": "#/$defs/SchemaObject"
|
||
}
|
||
},
|
||
"support": {
|
||
"$ref": "#/$defs/Support"
|
||
},
|
||
"price": {
|
||
"$ref": "#/$defs/Pricing"
|
||
},
|
||
"team": {
|
||
"type": "array",
|
||
"items": {
|
||
"$ref": "#/$defs/Team"
|
||
}
|
||
},
|
||
"roles": {
|
||
"type": "array",
|
||
"description": "A list of roles that will provide user access to the dataset.",
|
||
"items": {
|
||
"$ref": "#/$defs/Role"
|
||
}
|
||
},
|
||
"slaDefaultElement": {
|
||
"type": "string",
|
||
"description": "Element (using the element path notation) to do the checks on."
|
||
},
|
||
"slaProperties": {
|
||
"type": "array",
|
||
"description": "A list of key/value pairs for SLA specific properties. There is no limit on the type of properties (more details to come).",
|
||
"items": {
|
||
"$ref": "#/$defs/ServiceLevelAgreementProperty"
|
||
}
|
||
},
|
||
"customProperties": {
|
||
"$ref": "#/$defs/CustomProperties"
|
||
},
|
||
"contractCreatedTs": {
|
||
"type": "string",
|
||
"format": "date-time",
|
||
"description": "Timestamp in UTC of when the data contract was created."
|
||
}
|
||
},
|
||
"required": ["version", "apiVersion", "kind", "id", "status"],
|
||
"additionalProperties": false,
|
||
"$defs": {
|
||
"Server": {
|
||
"type": "object",
|
||
"description": "Data source details of where data is physically stored.",
|
||
"properties": {
|
||
"server": {
|
||
"type": "string",
|
||
"description": "Identifier of the server."
|
||
},
|
||
"type": {
|
||
"type": "string",
|
||
"description": "Type of the server.",
|
||
"enum": [
|
||
"api", "athena", "azure", "bigquery", "clickhouse", "databricks", "denodo", "dremio",
|
||
"duckdb", "glue", "cloudsql", "db2", "informix", "kafka", "kinesis", "local",
|
||
"mysql", "oracle", "postgresql", "postgres", "presto", "pubsub",
|
||
"redshift", "s3", "sftp", "snowflake", "sqlserver", "synapse", "trino", "vertica", "custom"
|
||
]
|
||
},
|
||
"description": {
|
||
"type": "string",
|
||
"description": "Description of the server."
|
||
},
|
||
"environment": {
|
||
"type": "string",
|
||
"description": "Environment of the server.",
|
||
"examples": ["prod", "preprod", "dev", "uat"]
|
||
},
|
||
"roles": {
|
||
"type": "array",
|
||
"description": "List of roles that have access to the server.",
|
||
"items": {
|
||
"$ref": "#/$defs/Role"
|
||
}
|
||
},
|
||
"customProperties": {
|
||
"$ref": "#/$defs/CustomProperties"
|
||
}
|
||
},
|
||
"allOf": [
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "api"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/ApiServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "athena"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/AthenaServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "azure"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/AzureServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "bigquery"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/BigQueryServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "clickhouse"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/ClickHouseServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "databricks"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/DatabricksServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "denodo"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/DenodoServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "dremio"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/DremioServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "duckdb"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/DuckdbServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "glue"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/GlueServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "cloudsql"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/GoogleCloudSqlServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "db2"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/IBMDB2Server"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "informix"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/InformixServer"
|
||
}
|
||
},
|
||
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "custom"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/CustomServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "kafka"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/KafkaServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "kinesis"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/KinesisServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "local"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/LocalServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "mysql"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/MySqlServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "oracle"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/OracleServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "postgresql"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/PostgresServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "postgres"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/PostgresServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "presto"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/PrestoServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "pubsub"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/PubSubServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "redshift"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/RedshiftServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "s3"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/S3Server"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "sftp"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/SftpServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "snowflake"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/SnowflakeServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "sqlserver"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/SqlserverServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "synapse"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/SynapseServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "trino"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/TrinoServer"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "vertica"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/ServerSource/VerticaServer"
|
||
}
|
||
}
|
||
],
|
||
"required": ["server", "type"]
|
||
},
|
||
"ServerSource": {
|
||
"ApiServer": {
|
||
"type": "object",
|
||
"title": "AthenaServer",
|
||
"properties": {
|
||
"location": {
|
||
"type": "string",
|
||
"format": "uri",
|
||
"description": "The url to the API.",
|
||
"examples": [
|
||
"https://api.example.com/v1"
|
||
]
|
||
}
|
||
},
|
||
"required": [
|
||
"location"
|
||
]
|
||
},
|
||
"AthenaServer": {
|
||
"type": "object",
|
||
"title": "AthenaServer",
|
||
"properties": {
|
||
"stagingDir": {
|
||
"type": "string",
|
||
"format": "uri",
|
||
"description": "Amazon Athena automatically stores query results and metadata information for each query that runs in a query result location that you can specify in Amazon S3.",
|
||
"examples": [
|
||
"s3://my_storage_account_name/my_container/path"
|
||
]
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "Identify the schema in the data source in which your tables exist."
|
||
},
|
||
"catalog": {
|
||
"type": "string",
|
||
"description": "Identify the name of the Data Source, also referred to as a Catalog.",
|
||
"default": "awsdatacatalog"
|
||
},
|
||
"regionName": {
|
||
"type": "string",
|
||
"description": "The region your AWS account uses.",
|
||
"examples": ["eu-west-1"]
|
||
}
|
||
},
|
||
"required": [
|
||
"staging_dir",
|
||
"schema"
|
||
]
|
||
},
|
||
"AzureServer": {
|
||
"type": "object",
|
||
"title": "AzureServer",
|
||
"properties": {
|
||
"location": {
|
||
"type": "string",
|
||
"format": "uri",
|
||
"description": "Fully qualified path to Azure Blob Storage or Azure Data Lake Storage (ADLS), supports globs.",
|
||
"examples": [
|
||
"az://my_storage_account_name.blob.core.windows.net/my_container/path/*.parquet",
|
||
"abfss://my_storage_account_name.dfs.core.windows.net/my_container_name/path/*.parquet"
|
||
]
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"enum": [
|
||
"parquet",
|
||
"delta",
|
||
"json",
|
||
"csv"
|
||
],
|
||
"description": "File format."
|
||
},
|
||
"delimiter": {
|
||
"type": "string",
|
||
"enum": [
|
||
"new_line",
|
||
"array"
|
||
],
|
||
"description": "Only for format = json. How multiple json documents are delimited within one file"
|
||
}
|
||
},
|
||
"required": [
|
||
"location",
|
||
"format"
|
||
]
|
||
},
|
||
"BigQueryServer": {
|
||
"type": "object",
|
||
"title": "BigQueryServer",
|
||
"properties": {
|
||
"project": {
|
||
"type": "string",
|
||
"description": "The GCP project name."
|
||
},
|
||
"dataset": {
|
||
"type": "string",
|
||
"description": "The GCP dataset name."
|
||
}
|
||
},
|
||
"required": [
|
||
"project",
|
||
"dataset"
|
||
]
|
||
},
|
||
"ClickHouseServer": {
|
||
"type": "object",
|
||
"title": "ClickHouseServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host of the ClickHouse server."
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port to the ClickHouse server."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port",
|
||
"database"
|
||
]
|
||
},
|
||
"DatabricksServer": {
|
||
"type": "object",
|
||
"title": "DatabricksServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The Databricks host",
|
||
"examples": [
|
||
"dbc-abcdefgh-1234.cloud.databricks.com"
|
||
]
|
||
},
|
||
"catalog": {
|
||
"type": "string",
|
||
"description": "The name of the Hive or Unity catalog"
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The schema name in the catalog"
|
||
}
|
||
},
|
||
"required": [
|
||
"catalog",
|
||
"schema"
|
||
]
|
||
},
|
||
"DenodoServer": {
|
||
"type": "object",
|
||
"title": "DenodoServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host of the Denodo server."
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port of the Denodo server."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port"
|
||
]
|
||
},
|
||
"DremioServer": {
|
||
"type": "object",
|
||
"title": "DremioServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host of the Dremio server."
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port of the Dremio server."
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The name of the schema."
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port"
|
||
]
|
||
},
|
||
"DuckdbServer": {
|
||
"type": "object",
|
||
"title": "DuckdbServer",
|
||
"properties": {
|
||
"database": {
|
||
"type": "string",
|
||
"description": "Path to duckdb database file."
|
||
},
|
||
"schema": {
|
||
"type": "integer",
|
||
"description": "The name of the schema."
|
||
}
|
||
},
|
||
"required": [
|
||
"database"
|
||
]
|
||
},
|
||
"GlueServer": {
|
||
"type": "object",
|
||
"title": "GlueServer",
|
||
"properties": {
|
||
"account": {
|
||
"type": "string",
|
||
"description": "The AWS Glue account",
|
||
"examples": [
|
||
"1234-5678-9012"
|
||
]
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The AWS Glue database name",
|
||
"examples": [
|
||
"my_database"
|
||
]
|
||
},
|
||
"location": {
|
||
"type": "string",
|
||
"format": "uri",
|
||
"description": "The AWS S3 path. Must be in the form of a URL.",
|
||
"examples": [
|
||
"s3://datacontract-example-orders-latest/data/{model}"
|
||
]
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"description": "The format of the files",
|
||
"examples": [
|
||
"parquet",
|
||
"csv",
|
||
"json",
|
||
"delta"
|
||
]
|
||
}
|
||
},
|
||
"required": [
|
||
"account",
|
||
"database"
|
||
]
|
||
},
|
||
"GoogleCloudSqlServer": {
|
||
"type": "object",
|
||
"title": "GoogleCloudSqlServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host of the Google Cloud Sql server."
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port of the Google Cloud Sql server."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The name of the schema."
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port",
|
||
"database",
|
||
"schema"
|
||
]
|
||
},
|
||
"IBMDB2Server": {
|
||
"type": "object",
|
||
"title": "IBMDB2Server",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host of the IBM DB2 server."
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port of the IBM DB2 server."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The name of the schema."
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port",
|
||
"database"
|
||
]
|
||
},
|
||
"InformixServer": {
|
||
"type": "object",
|
||
"title": "InformixServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host to the Informix server. "
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port to the Informix server. Defaults to 9088."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"database"
|
||
]
|
||
},
|
||
"CustomServer": {
|
||
"type": "object",
|
||
"title": "CustomServer",
|
||
"properties": {
|
||
"account": {
|
||
"type": "string",
|
||
"description": "Account used by the server."
|
||
},
|
||
"catalog": {
|
||
"type": "string",
|
||
"description": "Name of the catalog."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "Name of the database."
|
||
},
|
||
"dataset": {
|
||
"type": "string",
|
||
"description": "Name of the dataset."
|
||
},
|
||
"delimiter": {
|
||
"type": "string",
|
||
"description": "Delimiter."
|
||
},
|
||
"endpointUrl": {
|
||
"type": "string",
|
||
"description": "Server endpoint.",
|
||
"format": "uri"
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"description": "File format."
|
||
},
|
||
"host": {
|
||
"type": "string",
|
||
"description": "Host name or IP address."
|
||
},
|
||
"location": {
|
||
"type": "string",
|
||
"description": "A URL to a location.",
|
||
"format": "uri"
|
||
},
|
||
"path": {
|
||
"type": "string",
|
||
"description": "Relative or absolute path to the data file(s)."
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "Port to the server. No default value is assumed for custom servers."
|
||
},
|
||
"project": {
|
||
"type": "string",
|
||
"description": "Project name."
|
||
},
|
||
"region": {
|
||
"type": "string",
|
||
"description": "Cloud region."
|
||
},
|
||
"regionName": {
|
||
"type": "string",
|
||
"description": "Region name."
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "Name of the schema."
|
||
},
|
||
"serviceName": {
|
||
"type": "string",
|
||
"description": "Name of the service."
|
||
},
|
||
"stagingDir": {
|
||
"type": "string",
|
||
"description": "Staging directory."
|
||
},
|
||
"warehouse": {
|
||
"type": "string",
|
||
"description": "Name of the cluster or warehouse."
|
||
}
|
||
}
|
||
},
|
||
"KafkaServer": {
|
||
"type": "object",
|
||
"title": "KafkaServer",
|
||
"description": "Kafka Server",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The bootstrap server of the kafka cluster."
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"description": "The format of the messages.",
|
||
"examples": ["json", "avro", "protobuf", "xml"],
|
||
"default": "json"
|
||
}
|
||
},
|
||
"required": [
|
||
"host"
|
||
]
|
||
},
|
||
"KinesisServer": {
|
||
"type": "object",
|
||
"title": "KinesisDataStreamsServer",
|
||
"description": "Kinesis Data Streams Server",
|
||
"properties": {
|
||
"region": {
|
||
"type": "string",
|
||
"description": "AWS region.",
|
||
"examples": [
|
||
"eu-west-1"
|
||
]
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"description": "The format of the record",
|
||
"examples": [
|
||
"json",
|
||
"avro",
|
||
"protobuf"
|
||
]
|
||
}
|
||
}
|
||
},
|
||
"LocalServer": {
|
||
"type": "object",
|
||
"title": "LocalServer",
|
||
"properties": {
|
||
"path": {
|
||
"type": "string",
|
||
"description": "The relative or absolute path to the data file(s).",
|
||
"examples": [
|
||
"./folder/data.parquet",
|
||
"./folder/*.parquet"
|
||
]
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"description": "The format of the file(s)",
|
||
"examples": [
|
||
"json",
|
||
"parquet",
|
||
"delta",
|
||
"csv"
|
||
]
|
||
}
|
||
},
|
||
"required": [
|
||
"path",
|
||
"format"
|
||
]
|
||
},
|
||
"MySqlServer": {
|
||
"type": "object",
|
||
"title": "MySqlServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host of the MySql server."
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port of the MySql server."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port",
|
||
"database"
|
||
]
|
||
},
|
||
"OracleServer": {
|
||
"type": "object",
|
||
"title": "OracleServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host to the oracle server",
|
||
"examples": [
|
||
"localhost"
|
||
]
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port to the oracle server.",
|
||
"examples": [
|
||
1523
|
||
]
|
||
},
|
||
"serviceName": {
|
||
"type": "string",
|
||
"description": "The name of the service.",
|
||
"examples": [
|
||
"service"
|
||
]
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port",
|
||
"serviceName"
|
||
]
|
||
},
|
||
"PostgresServer": {
|
||
"type": "object",
|
||
"title": "PostgresServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host to the Postgres server"
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port to the Postgres server."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The name of the schema in the database."
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port",
|
||
"database",
|
||
"schema"
|
||
]
|
||
},
|
||
"PrestoServer": {
|
||
"type": "object",
|
||
"title": "PrestoServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host to the Presto server",
|
||
"examples": [
|
||
"localhost:8080"
|
||
]
|
||
},
|
||
"catalog": {
|
||
"type": "string",
|
||
"description": "The name of the catalog.",
|
||
"examples": [
|
||
"postgres"
|
||
]
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The name of the schema.",
|
||
"examples": [
|
||
"public"
|
||
]
|
||
}
|
||
},
|
||
"required": [
|
||
"host"
|
||
]
|
||
},
|
||
"PubSubServer": {
|
||
"type": "object",
|
||
"title": "PubSubServer",
|
||
"properties": {
|
||
"project": {
|
||
"type": "string",
|
||
"description": "The GCP project name."
|
||
}
|
||
},
|
||
"required": [
|
||
"project"
|
||
]
|
||
},
|
||
"RedshiftServer": {
|
||
"type": "object",
|
||
"title": "RedshiftServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "An optional string describing the server."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The name of the schema."
|
||
},
|
||
"region": {
|
||
"type": "string",
|
||
"description": "AWS region of Redshift server.",
|
||
"examples": ["us-east-1"]
|
||
},
|
||
"account": {
|
||
"type": "string",
|
||
"description": "The account used by the server."
|
||
}
|
||
},
|
||
"required": [
|
||
"database",
|
||
"schema"
|
||
]
|
||
},
|
||
"S3Server": {
|
||
"type": "object",
|
||
"title": "S3Server",
|
||
"properties": {
|
||
"location": {
|
||
"type": "string",
|
||
"format": "uri",
|
||
"description": "S3 URL, starting with `s3://`",
|
||
"examples": [
|
||
"s3://datacontract-example-orders-latest/data/{model}/*.json"
|
||
]
|
||
},
|
||
"endpointUrl": {
|
||
"type": "string",
|
||
"format": "uri",
|
||
"description": "The server endpoint for S3-compatible servers.",
|
||
"examples": ["https://minio.example.com"]
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"enum": [
|
||
"parquet",
|
||
"delta",
|
||
"json",
|
||
"csv"
|
||
],
|
||
"description": "File format."
|
||
},
|
||
"delimiter": {
|
||
"type": "string",
|
||
"enum": [
|
||
"new_line",
|
||
"array"
|
||
],
|
||
"description": "Only for format = json. How multiple json documents are delimited within one file"
|
||
}
|
||
},
|
||
"required": [
|
||
"location"
|
||
]
|
||
},
|
||
"SftpServer": {
|
||
"type": "object",
|
||
"title": "SftpServer",
|
||
"properties": {
|
||
"location": {
|
||
"type": "string",
|
||
"format": "uri",
|
||
"pattern": "^sftp://.*",
|
||
"description": "SFTP URL, starting with `sftp://`",
|
||
"examples": [
|
||
"sftp://123.123.12.123/{model}/*.json"
|
||
]
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"enum": [
|
||
"parquet",
|
||
"delta",
|
||
"json",
|
||
"csv"
|
||
],
|
||
"description": "File format."
|
||
},
|
||
"delimiter": {
|
||
"type": "string",
|
||
"enum": [
|
||
"new_line",
|
||
"array"
|
||
],
|
||
"description": "Only for format = json. How multiple json documents are delimited within one file"
|
||
}
|
||
},
|
||
"required": [
|
||
"location"
|
||
]
|
||
},
|
||
"SnowflakeServer": {
|
||
"type": "object",
|
||
"title": "SnowflakeServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host to the Snowflake server"
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port to the Snowflake server."
|
||
},
|
||
"account": {
|
||
"type": "string",
|
||
"description": "The Snowflake account used by the server."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The name of the schema."
|
||
},
|
||
"warehouse": {
|
||
"type": "string",
|
||
"description": "The name of the cluster of resources that is a Snowflake virtual warehouse."
|
||
}
|
||
},
|
||
"required": [
|
||
"account",
|
||
"database",
|
||
"schema"
|
||
]
|
||
},
|
||
"SqlserverServer": {
|
||
"type": "object",
|
||
"title": "SqlserverServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host to the database server",
|
||
"examples": [
|
||
"localhost"
|
||
]
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port to the database server.",
|
||
"default": 1433,
|
||
"examples": [
|
||
1433
|
||
]
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database.",
|
||
"examples": [
|
||
"database"
|
||
]
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The name of the schema in the database.",
|
||
"examples": [
|
||
"dbo"
|
||
]
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"database",
|
||
"schema"
|
||
]
|
||
},
|
||
"SynapseServer": {
|
||
"type": "object",
|
||
"title": "SynapseServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host of the Synapse server."
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port of the Synapse server."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port",
|
||
"database"
|
||
]
|
||
},
|
||
"TrinoServer": {
|
||
"type": "object",
|
||
"title": "TrinoServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The Trino host URL.",
|
||
"examples": [
|
||
"localhost"
|
||
]
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The Trino port."
|
||
},
|
||
"catalog": {
|
||
"type": "string",
|
||
"description": "The name of the catalog.",
|
||
"examples": [
|
||
"hive"
|
||
]
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The name of the schema in the database.",
|
||
"examples": [
|
||
"my_schema"
|
||
]
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port",
|
||
"catalog",
|
||
"schema"
|
||
]
|
||
},
|
||
"VerticaServer": {
|
||
"type": "object",
|
||
"title": "VerticaServer",
|
||
"properties": {
|
||
"host": {
|
||
"type": "string",
|
||
"description": "The host of the Vertica server."
|
||
},
|
||
"port": {
|
||
"type": "integer",
|
||
"description": "The port of the Vertica server."
|
||
},
|
||
"database": {
|
||
"type": "string",
|
||
"description": "The name of the database."
|
||
},
|
||
"schema": {
|
||
"type": "string",
|
||
"description": "The name of the schema."
|
||
}
|
||
},
|
||
"required": [
|
||
"host",
|
||
"port",
|
||
"database",
|
||
"schema"
|
||
]
|
||
}
|
||
},
|
||
"SchemaElement": {
|
||
"type": "object",
|
||
"properties": {
|
||
"name": {
|
||
"type": "string",
|
||
"description": "Name of the element."
|
||
},
|
||
"physicalType": {
|
||
"type": "string",
|
||
"description": "The physical element data type in the data source.",
|
||
"examples": ["table", "view", "topic", "file"]
|
||
},
|
||
"description": {
|
||
"type": "string",
|
||
"description": "Description of the element."
|
||
},
|
||
"businessName": {
|
||
"type": "string",
|
||
"description": "The business name of the element."
|
||
},
|
||
"authoritativeDefinitions": {
|
||
"$ref": "#/$defs/AuthoritativeDefinitions"
|
||
},
|
||
"tags": {
|
||
"$ref": "#/$defs/Tags"
|
||
},
|
||
"customProperties": {
|
||
"$ref": "#/$defs/CustomProperties"
|
||
}
|
||
}
|
||
},
|
||
"SchemaObject": {
|
||
"type": "object",
|
||
"properties": {
|
||
"logicalType": {
|
||
"type": "string",
|
||
"description": "The logical element data type.",
|
||
"enum": ["object"]
|
||
},
|
||
"physicalName": {
|
||
"type": "string",
|
||
"description": "Physical name.",
|
||
"examples": ["table_1_2_0"]
|
||
},
|
||
"dataGranularityDescription": {
|
||
"type": "string",
|
||
"description": "Granular level of the data in the object.",
|
||
"examples": ["Aggregation by country"]
|
||
},
|
||
"properties": {
|
||
"type": "array",
|
||
"description": "A list of properties for the object.",
|
||
"items": {
|
||
"$ref": "#/$defs/SchemaProperty"
|
||
}
|
||
},
|
||
"quality": {
|
||
"$ref": "#/$defs/DataQualityChecks"
|
||
}
|
||
},
|
||
"allOf": [
|
||
{
|
||
"$ref": "#/$defs/SchemaElement"
|
||
}
|
||
],
|
||
"required": ["name"],
|
||
"unevaluatedProperties": false
|
||
},
|
||
"SchemaBaseProperty": {
|
||
"type": "object",
|
||
"properties": {
|
||
"primaryKey": {
|
||
"type": "boolean",
|
||
"description": "Boolean value specifying whether the element is primary or not. Default is false."
|
||
},
|
||
"primaryKeyPosition": {
|
||
"type": "integer",
|
||
"default": -1,
|
||
"description": "If element is a primary key, the position of the primary key element. Starts from 1. Example of `account_id, name` being primary key columns, `account_id` has primaryKeyPosition 1 and `name` primaryKeyPosition 2. Default to -1."
|
||
},
|
||
"logicalType": {
|
||
"type": "string",
|
||
"description": "The logical element data type.",
|
||
"enum": ["string", "date", "number", "integer", "object", "array", "boolean"]
|
||
},
|
||
"logicalTypeOptions": {
|
||
"type": "object",
|
||
"description": "Additional optional metadata to describe the logical type."
|
||
},
|
||
"physicalType": {
|
||
"type": "string",
|
||
"description": "The physical element data type in the data source. For example, VARCHAR(2), DOUBLE, INT."
|
||
},
|
||
"required": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "Indicates if the element may contain Null values; possible values are true and false. Default is false."
|
||
},
|
||
"unique": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "Indicates if the element contains unique values; possible values are true and false. Default is false."
|
||
},
|
||
"partitioned": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "Indicates if the element is partitioned; possible values are true and false."
|
||
},
|
||
"partitionKeyPosition": {
|
||
"type": "integer",
|
||
"default": -1,
|
||
"description": "If element is used for partitioning, the position of the partition element. Starts from 1. Example of `country, year` being partition columns, `country` has partitionKeyPosition 1 and `year` partitionKeyPosition 2. Default to -1."
|
||
},
|
||
"classification": {
|
||
"type": "string",
|
||
"description": "Can be anything, like confidential, restricted, and public to more advanced categorization. Some companies like PayPal, use data classification indicating the class of data in the element; expected values are 1, 2, 3, 4, or 5.",
|
||
"examples": ["confidential", "restricted", "public"]
|
||
},
|
||
"encryptedName": {
|
||
"type": "string",
|
||
"description": "The element name within the dataset that contains the encrypted element value. For example, unencrypted element `email_address` might have an encryptedName of `email_address_encrypt`."
|
||
},
|
||
"transformSourceObjects": {
|
||
"type": "array",
|
||
"description": "List of objects in the data source used in the transformation.",
|
||
"items": {
|
||
"type": "string"
|
||
}
|
||
},
|
||
"transformLogic": {
|
||
"type": "string",
|
||
"description": "Logic used in the element transformation."
|
||
},
|
||
"transformDescription": {
|
||
"type": "string",
|
||
"description": "Describes the transform logic in very simple terms."
|
||
},
|
||
"examples": {
|
||
"type": "array",
|
||
"description": "List of sample element values.",
|
||
"items": {
|
||
"$ref": "#/$defs/AnyType"
|
||
}
|
||
},
|
||
"criticalDataElement": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "True or false indicator; If element is considered a critical data element (CDE) then true else false."
|
||
},
|
||
"quality": {
|
||
"$ref": "#/$defs/DataQualityChecks"
|
||
}
|
||
},
|
||
"allOf": [
|
||
{
|
||
"$ref": "#/$defs/SchemaElement"
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"logicalType": {
|
||
"const": "string"
|
||
}
|
||
}
|
||
},
|
||
"then": {
|
||
"properties": {
|
||
"logicalTypeOptions": {
|
||
"type": "object",
|
||
"properties": {
|
||
"minLength": {
|
||
"type": "integer",
|
||
"minimum": 0,
|
||
"description": "Minimum length of the string."
|
||
},
|
||
"maxLength": {
|
||
"type": "integer",
|
||
"minimum": 0,
|
||
"description": "Maximum length of the string."
|
||
},
|
||
"pattern": {
|
||
"type": "string",
|
||
"description": "Regular expression pattern to define valid value. Follows regular expression syntax from ECMA-262 (https://262.ecma-international.org/5.1/#sec-15.10.1)."
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"examples": ["password", "byte", "binary", "email", "uuid", "uri", "hostname", "ipv4", "ipv6"],
|
||
"description": "Provides extra context about what format the string follows."
|
||
}
|
||
},
|
||
"additionalProperties": false
|
||
}
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"logicalType": {
|
||
"const": "date"
|
||
}
|
||
}
|
||
},
|
||
"then": {
|
||
"properties": {
|
||
"logicalTypeOptions": {
|
||
"type": "object",
|
||
"properties": {
|
||
"format": {
|
||
"type": "string",
|
||
"examples": ["yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "HH:mm:ss"],
|
||
"description": "Format of the date. Follows the format as prescribed by [JDK DateTimeFormatter](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html). For example, format 'yyyy-MM-dd'."
|
||
},
|
||
"exclusiveMaximum": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
|
||
},
|
||
"maximum": {
|
||
"type": "string",
|
||
"description": "All date values are less than or equal to this value (values <= maximum)."
|
||
},
|
||
"exclusiveMinimum": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
|
||
},
|
||
"minimum": {
|
||
"type": "string",
|
||
"description": "All date values are greater than or equal to this value (values >= minimum)."
|
||
}
|
||
},
|
||
"additionalProperties": false
|
||
}
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"anyOf": [
|
||
{
|
||
"properties": {
|
||
"logicalType": {
|
||
"const": "integer"
|
||
}
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"then": {
|
||
"properties": {
|
||
"logicalTypeOptions": {
|
||
"type": "object",
|
||
"properties": {
|
||
"multipleOf": {
|
||
"type": "number",
|
||
"exclusiveMinimum": 0,
|
||
"description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5."
|
||
},
|
||
"maximum": {
|
||
"type": "number",
|
||
"description": "All values are less than or equal to this value (values <= maximum)."
|
||
},
|
||
"exclusiveMaximum": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
|
||
},
|
||
"minimum": {
|
||
"type": "number",
|
||
"description": "All values are greater than or equal to this value (values >= minimum)."
|
||
},
|
||
"exclusiveMinimum": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"default": "i32",
|
||
"description": "Format of the value in terms of how many bits of space it can use and whether it is signed or unsigned (follows the Rust integer types).",
|
||
"enum": ["i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", "u64", "u128"]
|
||
}
|
||
},
|
||
"additionalProperties": false
|
||
}
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"anyOf": [
|
||
{
|
||
"properties": {
|
||
"logicalType": {
|
||
"const": "number"
|
||
}
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"then": {
|
||
"properties": {
|
||
"logicalTypeOptions": {
|
||
"type": "object",
|
||
"properties": {
|
||
"multipleOf": {
|
||
"type": "number",
|
||
"exclusiveMinimum": 0,
|
||
"description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5."
|
||
},
|
||
"maximum": {
|
||
"type": "number",
|
||
"description": "All values are less than or equal to this value (values <= maximum)."
|
||
},
|
||
"exclusiveMaximum": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
|
||
},
|
||
"minimum": {
|
||
"type": "number",
|
||
"description": "All values are greater than or equal to this value (values >= minimum)."
|
||
},
|
||
"exclusiveMinimum": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
|
||
},
|
||
"format": {
|
||
"type": "string",
|
||
"default": "i32",
|
||
"description": "Format of the value in terms of how many bits of space it can use (follows the Rust float types).",
|
||
"enum": ["f32", "f64"]
|
||
}
|
||
},
|
||
"additionalProperties": false
|
||
}
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"logicalType": {
|
||
"const": "object"
|
||
}
|
||
}
|
||
},
|
||
"then": {
|
||
"properties": {
|
||
"logicalTypeOptions": {
|
||
"type": "object",
|
||
"properties": {
|
||
"maxProperties": {
|
||
"type": "integer",
|
||
"minimum": 0,
|
||
"description": "Maximum number of properties."
|
||
},
|
||
"minProperties": {
|
||
"type": "integer",
|
||
"minimum": 0,
|
||
"default": 0,
|
||
"description": "Minimum number of properties."
|
||
},
|
||
"required": {
|
||
"type": "array",
|
||
"items": {
|
||
"type": "string"
|
||
},
|
||
"minItems": 1,
|
||
"uniqueItems": true,
|
||
"description": "Property names that are required to exist in the object."
|
||
}
|
||
},
|
||
"additionalProperties": false
|
||
},
|
||
"properties": {
|
||
"type": "array",
|
||
"description": "A list of properties for the object.",
|
||
"items": {
|
||
"$ref": "#/$defs/SchemaProperty"
|
||
}
|
||
}
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"logicalType": {
|
||
"const": "array"
|
||
}
|
||
}
|
||
},
|
||
"then": {
|
||
"properties": {
|
||
"logicalTypeOptions": {
|
||
"type": "object",
|
||
"properties": {
|
||
"maxItems": {
|
||
"type": "integer",
|
||
"minimum": 0,
|
||
"description": "Maximum number of items."
|
||
},
|
||
"minItems": {
|
||
"type": "integer",
|
||
"minimum": 0,
|
||
"default": 0,
|
||
"description": "Minimum number of items"
|
||
},
|
||
"uniqueItems": {
|
||
"type": "boolean",
|
||
"default": false,
|
||
"description": "If set to true, all items in the array are unique."
|
||
}
|
||
},
|
||
"additionalProperties": false
|
||
},
|
||
"items": {
|
||
"$ref": "#/$defs/SchemaItemProperty",
|
||
"description": "List of items in an array (only applicable when `logicalType: array`)."
|
||
}
|
||
}
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"SchemaProperty": {
|
||
"type": "object",
|
||
"$ref": "#/$defs/SchemaBaseProperty",
|
||
"required": ["name"],
|
||
"unevaluatedProperties": false
|
||
},
|
||
"SchemaItemProperty": {
|
||
"type": "object",
|
||
"$ref": "#/$defs/SchemaBaseProperty",
|
||
"properties": {
|
||
"properties": {
|
||
"type": "array",
|
||
"description": "A list of properties for the object.",
|
||
"items": {
|
||
"$ref": "#/$defs/SchemaProperty"
|
||
}
|
||
}
|
||
},
|
||
"unevaluatedProperties": false
|
||
},
|
||
"Tags": {
|
||
"type": "array",
|
||
"description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level.",
|
||
"items": {
|
||
"type": "string"
|
||
}
|
||
},
|
||
"DataQuality": {
|
||
"type": "object",
|
||
"properties": {
|
||
"authoritativeDefinitions": {
|
||
"$ref": "#/$defs/AuthoritativeDefinitions"
|
||
},
|
||
"businessImpact": {
|
||
"type": "string",
|
||
"description": "Consequences of the rule failure.",
|
||
"examples": ["operational", "regulatory"]
|
||
},
|
||
"customProperties": {
|
||
"type": "array",
|
||
"description": "Additional properties required for rule execution.",
|
||
"items": {
|
||
"$ref": "#/$defs/CustomProperty"
|
||
}
|
||
},
|
||
"description": {
|
||
"type": "string",
|
||
"description": "Describe the quality check to be completed."
|
||
},
|
||
"dimension": {
|
||
"type": "string",
|
||
"description": "The key performance indicator (KPI) or dimension for data quality.",
|
||
"enum": ["accuracy", "completeness", "conformity", "consistency", "coverage", "timeliness", "uniqueness"]
|
||
},
|
||
"method": {
|
||
"type": "string",
|
||
"examples": ["reconciliation"]
|
||
},
|
||
"name": {
|
||
"type": "string",
|
||
"description": "Name of the data quality check."
|
||
},
|
||
"schedule": {
|
||
"type": "string",
|
||
"description": "Rule execution schedule details.",
|
||
"examples": ["0 20 * * *"]
|
||
},
|
||
"scheduler": {
|
||
"type": "string",
|
||
"description": "The name or type of scheduler used to start the data quality check.",
|
||
"examples": ["cron"]
|
||
},
|
||
"severity": {
|
||
"type": "string",
|
||
"description": "The severance of the quality rule.",
|
||
"examples": ["info", "warning", "error"]
|
||
},
|
||
"tags": {
|
||
"$ref": "#/$defs/Tags"
|
||
},
|
||
"type": {
|
||
"type": "string",
|
||
"description": "The type of quality check. 'text' is human-readable text that describes the quality of the data. 'library' is a set of maintained predefined quality attributes such as row count or unique. 'sql' is an individual SQL query that returns a value that can be compared. 'custom' is quality attributes that are vendor-specific, such as Soda or Great Expectations.",
|
||
"enum": ["text", "library", "sql", "custom"],
|
||
"default": "library"
|
||
},
|
||
"unit": {
|
||
"type": "string",
|
||
"description": "Unit the rule is using, popular values are `rows` or `percent`, but any value is allowed.",
|
||
"examples": ["rows", "percent"]
|
||
}
|
||
},
|
||
"allOf": [
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "library"
|
||
}
|
||
}
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/DataQualityLibrary"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "sql"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/DataQualitySql"
|
||
}
|
||
},
|
||
{
|
||
"if": {
|
||
"properties": {
|
||
"type": {
|
||
"const": "custom"
|
||
}
|
||
},
|
||
"required": ["type"]
|
||
},
|
||
"then": {
|
||
"$ref": "#/$defs/DataQualityCustom"
|
||
}
|
||
}
|
||
]
|
||
},
|
||
"DataQualityChecks": {
|
||
"type": "array",
|
||
"description": "Data quality rules with all the relevant information for rule setup and execution.",
|
||
"items": {
|
||
"$ref": "#/$defs/DataQuality"
|
||
}
|
||
},
|
||
"DataQualityLibrary": {
|
||
"type": "object",
|
||
"properties": {
|
||
"rule": {
|
||
"type": "string",
|
||
"description": "Define a data quality check based on the predefined rules as per ODCS.",
|
||
"examples": ["duplicateCount", "validValues", "rowCount"]
|
||
},
|
||
"mustBe": {
|
||
"description": "Must be equal to the value to be valid. When using numbers, it is equivalent to '='."
|
||
},
|
||
"mustNotBe": {
|
||
"description": "Must not be equal to the value to be valid. When using numbers, it is equivalent to '!='."
|
||
},
|
||
"mustBeGreaterThan": {
|
||
"type": "number",
|
||
"description": "Must be greater than the value to be valid. It is equivalent to '>'."
|
||
},
|
||
"mustBeGreaterOrEqualTo": {
|
||
"type": "number",
|
||
"description": "Must be greater than or equal to the value to be valid. It is equivalent to '>='."
|
||
},
|
||
"mustBeLessThan": {
|
||
"type": "number",
|
||
"description": "Must be less than the value to be valid. It is equivalent to '<'."
|
||
},
|
||
"mustBeLessOrEqualTo": {
|
||
"type": "number",
|
||
"description": "Must be less than or equal to the value to be valid. It is equivalent to '<='."
|
||
},
|
||
"mustBeBetween": {
|
||
"type": "array",
|
||
"description": "Must be between the two numbers to be valid. Smallest number first in the array.",
|
||
"minItems": 2,
|
||
"maxItems": 2,
|
||
"uniqueItems": true,
|
||
"items": {
|
||
"type": "number"
|
||
}
|
||
},
|
||
"mustNotBeBetween": {
|
||
"type": "array",
|
||
"description": "Must not be between the two numbers to be valid. Smallest number first in the array.",
|
||
"minItems": 2,
|
||
"maxItems": 2,
|
||
"uniqueItems": true,
|
||
"items": {
|
||
"type": "number"
|
||
}
|
||
}
|
||
},
|
||
"required": ["rule"]
|
||
},
|
||
"DataQualitySql": {
|
||
"type": "object",
|
||
"properties": {
|
||
"query": {
|
||
"type": "string",
|
||
"description": "Query string that adheres to the dialect of the provided server.",
|
||
"examples": ["SELECT COUNT(*) FROM ${table} WHERE ${column} IS NOT NULL"]
|
||
}
|
||
},
|
||
"required": ["query"]
|
||
},
|
||
"DataQualityCustom": {
|
||
"type": "object",
|
||
"properties": {
|
||
"engine": {
|
||
"type": "string",
|
||
"description": "Name of the engine which executes the data quality checks.",
|
||
"examples": ["soda", "great-expectations", "monte-carlo", "dbt"]
|
||
},
|
||
"implementation": {
|
||
"oneOf": [
|
||
{
|
||
"type": "string"
|
||
},
|
||
{
|
||
"type": "object"
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"required": ["engine", "implementation"]
|
||
},
|
||
"AuthoritativeDefinitions": {
|
||
"type": "array",
|
||
"description": "List of links to sources that provide more details on the dataset; examples would be a link to an external definition, a training video, a GitHub repo, Collibra, or another tool. Authoritative definitions follow the same structure in the standard.",
|
||
"items": {
|
||
"type": "object",
|
||
"properties": {
|
||
"url": {
|
||
"type": "string",
|
||
"description": "URL to the authority."
|
||
},
|
||
"type": {
|
||
"type": "string",
|
||
"description": "Type of definition for authority: v2.3 adds standard values: `businessDefinition`, `transformationImplementation`, `videoTutorial`, `tutorial`, and `implementation`.",
|
||
"examples": ["businessDefinition", "transformationImplementation", "videoTutorial", "tutorial", "implementation"]
|
||
}
|
||
},
|
||
"required": ["url", "type"]
|
||
}
|
||
},
|
||
"Support": {
|
||
"type": "array",
|
||
"description": "Top level for support channels.",
|
||
"items": {
|
||
"$ref": "#/$defs/SupportItem"
|
||
}
|
||
},
|
||
"SupportItem": {
|
||
"type": "object",
|
||
"properties": {
|
||
"channel": {
|
||
"type": "string",
|
||
"description": "Channel name or identifier."
|
||
},
|
||
"url": {
|
||
"type": "string",
|
||
"description": "Access URL using normal [URL scheme](https://en.wikipedia.org/wiki/URL#Syntax) (https, mailto, etc.)."
|
||
},
|
||
"description": {
|
||
"type": "string",
|
||
"description": "Description of the channel, free text."
|
||
},
|
||
"tool": {
|
||
"type": "string",
|
||
"description": "Name of the tool, value can be `email`, `slack`, `teams`, `discord`, `ticket`, or `other`.",
|
||
"examples": ["email", "slack", "teams", "discord", "ticket", "other"]
|
||
},
|
||
"scope": {
|
||
"type": "string",
|
||
"description": "Scope can be: `interactive`, `announcements`, `issues`.",
|
||
"examples": ["interactive", "announcements", "issues"]
|
||
},
|
||
"invitationUrl": {
|
||
"type": "string",
|
||
"description": "Some tools uses invitation URL for requesting or subscribing. Follows the [URL scheme](https://en.wikipedia.org/wiki/URL#Syntax)."
|
||
}
|
||
},
|
||
"required": ["channel", "url"]
|
||
},
|
||
"Pricing": {
|
||
"type": "object",
|
||
"properties": {
|
||
"priceAmount": {
|
||
"type": "number",
|
||
"description": "Subscription price per unit of measure in `priceUnit`."
|
||
},
|
||
"priceCurrency": {
|
||
"type": "string",
|
||
"description": "Currency of the subscription price in `price.priceAmount`."
|
||
},
|
||
"priceUnit": {
|
||
"type": "string",
|
||
"description": "The unit of measure for calculating cost. Examples megabyte, gigabyte."
|
||
}
|
||
}
|
||
},
|
||
"Team": {
|
||
"type": "object",
|
||
"properties": {
|
||
"username": {
|
||
"type": "string",
|
||
"description": "The user's username or email."
|
||
},
|
||
"role": {
|
||
"type": "string",
|
||
"description": "The user's job role; Examples might be owner, data steward. There is no limit on the role."
|
||
},
|
||
"dateIn": {
|
||
"type": "string",
|
||
"format": "date",
|
||
"description": "The date when the user joined the team."
|
||
},
|
||
"dateOut": {
|
||
"type": "string",
|
||
"format": "date",
|
||
"description": "The date when the user ceased to be part of the team."
|
||
},
|
||
"replacedByUsername": {
|
||
"type": "string",
|
||
"description": "The username of the user who replaced the previous user."
|
||
}
|
||
}
|
||
},
|
||
"Role": {
|
||
"type": "object",
|
||
"properties": {
|
||
"role": {
|
||
"type": "string",
|
||
"description": "Name of the IAM role that provides access to the dataset."
|
||
},
|
||
"description": {
|
||
"type": "string",
|
||
"description": "Description of the IAM role and its permissions."
|
||
},
|
||
"access": {
|
||
"type": "string",
|
||
"description": "The type of access provided by the IAM role."
|
||
},
|
||
"firstLevelApprovers": {
|
||
"type": "string",
|
||
"description": "The name(s) of the first-level approver(s) of the role."
|
||
},
|
||
"secondLevelApprovers": {
|
||
"type": "string",
|
||
"description": "The name(s) of the second-level approver(s) of the role."
|
||
}
|
||
},
|
||
"required": ["role"]
|
||
},
|
||
"ServiceLevelAgreementProperty": {
|
||
"type": "object",
|
||
"properties": {
|
||
"property": {
|
||
"type": "string",
|
||
"description": "Specific property in SLA, check the periodic table. May requires units (more details to come)."
|
||
},
|
||
"value": {
|
||
"anyOf": [
|
||
{
|
||
"type": "string"
|
||
},
|
||
{
|
||
"type": "number"
|
||
},
|
||
{
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"type": "boolean"
|
||
},
|
||
{
|
||
"type": "null"
|
||
}
|
||
],
|
||
"description": "Agreement value. The label will change based on the property itself."
|
||
},
|
||
"valueExt": {
|
||
"$ref": "#/$defs/AnyNonCollectionType",
|
||
"description": "Extended agreement value. The label will change based on the property itself."
|
||
},
|
||
"unit": {
|
||
"type": "string",
|
||
"description": "**d**, day, days for days; **y**, yr, years for years, etc. Units use the ISO standard."
|
||
},
|
||
"element": {
|
||
"type": "string",
|
||
"description": "Element(s) to check on. Multiple elements should be extremely rare and, if so, separated by commas."
|
||
},
|
||
"driver": {
|
||
"type": "string",
|
||
"description": "Describes the importance of the SLA from the list of: `regulatory`, `analytics`, or `operational`.",
|
||
"examples": ["regulatory", "analytics", "operational"]
|
||
}
|
||
},
|
||
"required": ["property", "value"]
|
||
},
|
||
"CustomProperties": {
|
||
"type": "array",
|
||
"description": "A list of key/value pairs for custom properties.",
|
||
"items": {
|
||
"$ref": "#/$defs/CustomProperty"
|
||
}
|
||
},
|
||
"CustomProperty": {
|
||
"type": "object",
|
||
"properties": {
|
||
"property": {
|
||
"type": "string",
|
||
"description": "The name of the key. Names should be in camel case–the same as if they were permanent properties in the contract."
|
||
},
|
||
"value": {
|
||
"$ref": "#/$defs/AnyType",
|
||
"description": "The value of the key."
|
||
}
|
||
}
|
||
},
|
||
"AnyType": {
|
||
"anyOf": [
|
||
{
|
||
"type": "string"
|
||
},
|
||
{
|
||
"type": "number"
|
||
},
|
||
{
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"type": "boolean"
|
||
},
|
||
{
|
||
"type": "null"
|
||
},
|
||
{
|
||
"type": "array"
|
||
},
|
||
{
|
||
"type": "object"
|
||
}
|
||
]
|
||
},
|
||
"AnyNonCollectionType": {
|
||
"anyOf": [
|
||
{
|
||
"type": "string"
|
||
},
|
||
{
|
||
"type": "number"
|
||
},
|
||
{
|
||
"type": "integer"
|
||
},
|
||
{
|
||
"type": "boolean"
|
||
},
|
||
{
|
||
"type": "null"
|
||
}
|
||
]
|
||
}
|
||
}
|
||
}
|