Files
open-data-contract-standard/schema/odcs-json-schema-v3.0.1.json

2363 lines
68 KiB
JSON
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"title": "Open Data Contract Standard (ODCS)",
"description": "An open data contract specification to establish agreement between data producers and consumers.",
"type": "object",
"properties": {
"version": {
"type": "string",
"description": "Current version of the data contract."
},
"kind": {
"type": "string",
"default": "DataContract",
"description": "The kind of file this is. Valid value is `DataContract`.",
"enum": ["DataContract"]
},
"apiVersion": {
"type": "string",
"default": "v3.0.1",
"description": "Version of the standard used to build data contract. Default value is v3.0.1.",
"enum": ["v3.0.1", "v3.0.0", "v2.2.2", "v2.2.1", "v2.2.0"]
},
"id": {
"type": "string",
"description": "A unique identifier used to reduce the risk of dataset name collisions, such as a UUID."
},
"name": {
"type": "string",
"description": "Name of the data contract."
},
"tenant": {
"type": "string",
"description": "Indicates the property the data is primarily associated with. Value is case insensitive."
},
"tags": {
"$ref": "#/$defs/Tags"
},
"status": {
"type": "string",
"description": "Current status of the dataset.",
"examples": [
"proposed", "draft", "active", "deprecated", "retired"
]
},
"servers": {
"type": "array",
"description": "List of servers where the datasets reside.",
"items": {
"$ref": "#/$defs/Server"
}
},
"dataProduct": {
"type": "string",
"description": "The name of the data product."
},
"description": {
"type": "object",
"description": "High level description of the dataset.",
"properties": {
"usage": {
"type": "string",
"description": "Intended usage of the dataset."
},
"purpose": {
"type": "string",
"description": "Purpose of the dataset."
},
"limitations": {
"type": "string",
"description": "Limitations of the dataset."
},
"authoritativeDefinitions": {
"$ref": "#/$defs/AuthoritativeDefinitions"
},
"customProperties": {
"$ref": "#/$defs/CustomProperties"
}
}
},
"domain": {
"type": "string",
"description": "Name of the logical data domain.",
"examples": ["imdb_ds_aggregate", "receiver_profile_out", "transaction_profile_out"]
},
"schema": {
"type": "array",
"description": "A list of elements within the schema to be cataloged.",
"items": {
"$ref": "#/$defs/SchemaObject"
}
},
"support": {
"$ref": "#/$defs/Support"
},
"price": {
"$ref": "#/$defs/Pricing"
},
"team": {
"type": "array",
"items": {
"$ref": "#/$defs/Team"
}
},
"roles": {
"type": "array",
"description": "A list of roles that will provide user access to the dataset.",
"items": {
"$ref": "#/$defs/Role"
}
},
"slaDefaultElement": {
"type": "string",
"description": "Element (using the element path notation) to do the checks on."
},
"slaProperties": {
"type": "array",
"description": "A list of key/value pairs for SLA specific properties. There is no limit on the type of properties (more details to come).",
"items": {
"$ref": "#/$defs/ServiceLevelAgreementProperty"
}
},
"authoritativeDefinitions": {
"$ref": "#/$defs/AuthoritativeDefinitions"
},
"customProperties": {
"$ref": "#/$defs/CustomProperties"
},
"contractCreatedTs": {
"type": "string",
"format": "date-time",
"description": "Timestamp in UTC of when the data contract was created."
}
},
"required": ["version", "apiVersion", "kind", "id", "status"],
"additionalProperties": false,
"$defs": {
"Server": {
"type": "object",
"description": "Data source details of where data is physically stored.",
"properties": {
"server": {
"type": "string",
"description": "Identifier of the server."
},
"type": {
"type": "string",
"description": "Type of the server.",
"enum": [
"api", "athena", "azure", "bigquery", "clickhouse", "databricks", "denodo", "dremio",
"duckdb", "glue", "cloudsql", "db2", "informix", "kafka", "kinesis", "local",
"mysql", "oracle", "postgresql", "postgres", "presto", "pubsub",
"redshift", "s3", "sftp", "snowflake", "sqlserver", "synapse", "trino", "vertica", "custom"
]
},
"description": {
"type": "string",
"description": "Description of the server."
},
"environment": {
"type": "string",
"description": "Environment of the server.",
"examples": ["prod", "preprod", "dev", "uat"]
},
"roles": {
"type": "array",
"description": "List of roles that have access to the server.",
"items": {
"$ref": "#/$defs/Role"
}
},
"customProperties": {
"$ref": "#/$defs/CustomProperties"
}
},
"allOf": [
{
"if": {
"properties": {
"type": {
"const": "api"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/ApiServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "athena"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/AthenaServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "azure"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/AzureServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "bigquery"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/BigQueryServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "clickhouse"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/ClickHouseServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "databricks"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/DatabricksServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "denodo"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/DenodoServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "dremio"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/DremioServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "duckdb"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/DuckdbServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "glue"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/GlueServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "cloudsql"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/GoogleCloudSqlServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "db2"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/IBMDB2Server"
}
},
{
"if": {
"properties": {
"type": {
"const": "informix"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/InformixServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "custom"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/CustomServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "kafka"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/KafkaServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "kinesis"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/KinesisServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "local"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/LocalServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "mysql"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/MySqlServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "oracle"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/OracleServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "postgresql"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/PostgresServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "postgres"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/PostgresServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "presto"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/PrestoServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "pubsub"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/PubSubServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "redshift"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/RedshiftServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "s3"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/S3Server"
}
},
{
"if": {
"properties": {
"type": {
"const": "sftp"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/SftpServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "snowflake"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/SnowflakeServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "sqlserver"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/SqlserverServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "synapse"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/SynapseServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "trino"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/TrinoServer"
}
},
{
"if": {
"properties": {
"type": {
"const": "vertica"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/ServerSource/VerticaServer"
}
}
],
"required": ["server", "type"]
},
"ServerSource": {
"ApiServer": {
"type": "object",
"title": "AthenaServer",
"properties": {
"location": {
"type": "string",
"format": "uri",
"description": "The url to the API.",
"examples": [
"https://api.example.com/v1"
]
}
},
"required": [
"location"
]
},
"AthenaServer": {
"type": "object",
"title": "AthenaServer",
"properties": {
"stagingDir": {
"type": "string",
"format": "uri",
"description": "Amazon Athena automatically stores query results and metadata information for each query that runs in a query result location that you can specify in Amazon S3.",
"examples": [
"s3://my_storage_account_name/my_container/path"
]
},
"schema": {
"type": "string",
"description": "Identify the schema in the data source in which your tables exist."
},
"catalog": {
"type": "string",
"description": "Identify the name of the Data Source, also referred to as a Catalog.",
"default": "awsdatacatalog"
},
"regionName": {
"type": "string",
"description": "The region your AWS account uses.",
"examples": ["eu-west-1"]
}
},
"required": [
"staging_dir",
"schema"
]
},
"AzureServer": {
"type": "object",
"title": "AzureServer",
"properties": {
"location": {
"type": "string",
"format": "uri",
"description": "Fully qualified path to Azure Blob Storage or Azure Data Lake Storage (ADLS), supports globs.",
"examples": [
"az://my_storage_account_name.blob.core.windows.net/my_container/path/*.parquet",
"abfss://my_storage_account_name.dfs.core.windows.net/my_container_name/path/*.parquet"
]
},
"format": {
"type": "string",
"enum": [
"parquet",
"delta",
"json",
"csv"
],
"description": "File format."
},
"delimiter": {
"type": "string",
"enum": [
"new_line",
"array"
],
"description": "Only for format = json. How multiple json documents are delimited within one file"
}
},
"required": [
"location",
"format"
]
},
"BigQueryServer": {
"type": "object",
"title": "BigQueryServer",
"properties": {
"project": {
"type": "string",
"description": "The GCP project name."
},
"dataset": {
"type": "string",
"description": "The GCP dataset name."
}
},
"required": [
"project",
"dataset"
]
},
"ClickHouseServer": {
"type": "object",
"title": "ClickHouseServer",
"properties": {
"host": {
"type": "string",
"description": "The host of the ClickHouse server."
},
"port": {
"type": "integer",
"description": "The port to the ClickHouse server."
},
"database": {
"type": "string",
"description": "The name of the database."
}
},
"required": [
"host",
"port",
"database"
]
},
"DatabricksServer": {
"type": "object",
"title": "DatabricksServer",
"properties": {
"host": {
"type": "string",
"description": "The Databricks host",
"examples": [
"dbc-abcdefgh-1234.cloud.databricks.com"
]
},
"catalog": {
"type": "string",
"description": "The name of the Hive or Unity catalog"
},
"schema": {
"type": "string",
"description": "The schema name in the catalog"
}
},
"required": [
"catalog",
"schema"
]
},
"DenodoServer": {
"type": "object",
"title": "DenodoServer",
"properties": {
"host": {
"type": "string",
"description": "The host of the Denodo server."
},
"port": {
"type": "integer",
"description": "The port of the Denodo server."
},
"database": {
"type": "string",
"description": "The name of the database."
}
},
"required": [
"host",
"port"
]
},
"DremioServer": {
"type": "object",
"title": "DremioServer",
"properties": {
"host": {
"type": "string",
"description": "The host of the Dremio server."
},
"port": {
"type": "integer",
"description": "The port of the Dremio server."
},
"schema": {
"type": "string",
"description": "The name of the schema."
}
},
"required": [
"host",
"port"
]
},
"DuckdbServer": {
"type": "object",
"title": "DuckdbServer",
"properties": {
"database": {
"type": "string",
"description": "Path to duckdb database file."
},
"schema": {
"type": "integer",
"description": "The name of the schema."
}
},
"required": [
"database"
]
},
"GlueServer": {
"type": "object",
"title": "GlueServer",
"properties": {
"account": {
"type": "string",
"description": "The AWS Glue account",
"examples": [
"1234-5678-9012"
]
},
"database": {
"type": "string",
"description": "The AWS Glue database name",
"examples": [
"my_database"
]
},
"location": {
"type": "string",
"format": "uri",
"description": "The AWS S3 path. Must be in the form of a URL.",
"examples": [
"s3://datacontract-example-orders-latest/data/{model}"
]
},
"format": {
"type": "string",
"description": "The format of the files",
"examples": [
"parquet",
"csv",
"json",
"delta"
]
}
},
"required": [
"account",
"database"
]
},
"GoogleCloudSqlServer": {
"type": "object",
"title": "GoogleCloudSqlServer",
"properties": {
"host": {
"type": "string",
"description": "The host of the Google Cloud Sql server."
},
"port": {
"type": "integer",
"description": "The port of the Google Cloud Sql server."
},
"database": {
"type": "string",
"description": "The name of the database."
},
"schema": {
"type": "string",
"description": "The name of the schema."
}
},
"required": [
"host",
"port",
"database",
"schema"
]
},
"IBMDB2Server": {
"type": "object",
"title": "IBMDB2Server",
"properties": {
"host": {
"type": "string",
"description": "The host of the IBM DB2 server."
},
"port": {
"type": "integer",
"description": "The port of the IBM DB2 server."
},
"database": {
"type": "string",
"description": "The name of the database."
},
"schema": {
"type": "string",
"description": "The name of the schema."
}
},
"required": [
"host",
"port",
"database"
]
},
"InformixServer": {
"type": "object",
"title": "InformixServer",
"properties": {
"host": {
"type": "string",
"description": "The host to the Informix server. "
},
"port": {
"type": "integer",
"description": "The port to the Informix server. Defaults to 9088."
},
"database": {
"type": "string",
"description": "The name of the database."
}
},
"required": [
"host",
"database"
]
},
"CustomServer": {
"type": "object",
"title": "CustomServer",
"properties": {
"account": {
"type": "string",
"description": "Account used by the server."
},
"catalog": {
"type": "string",
"description": "Name of the catalog."
},
"database": {
"type": "string",
"description": "Name of the database."
},
"dataset": {
"type": "string",
"description": "Name of the dataset."
},
"delimiter": {
"type": "string",
"description": "Delimiter."
},
"endpointUrl": {
"type": "string",
"description": "Server endpoint.",
"format": "uri"
},
"format": {
"type": "string",
"description": "File format."
},
"host": {
"type": "string",
"description": "Host name or IP address."
},
"location": {
"type": "string",
"description": "A URL to a location.",
"format": "uri"
},
"path": {
"type": "string",
"description": "Relative or absolute path to the data file(s)."
},
"port": {
"type": "integer",
"description": "Port to the server. No default value is assumed for custom servers."
},
"project": {
"type": "string",
"description": "Project name."
},
"region": {
"type": "string",
"description": "Cloud region."
},
"regionName": {
"type": "string",
"description": "Region name."
},
"schema": {
"type": "string",
"description": "Name of the schema."
},
"serviceName": {
"type": "string",
"description": "Name of the service."
},
"stagingDir": {
"type": "string",
"description": "Staging directory."
},
"warehouse": {
"type": "string",
"description": "Name of the cluster or warehouse."
}
}
},
"KafkaServer": {
"type": "object",
"title": "KafkaServer",
"description": "Kafka Server",
"properties": {
"host": {
"type": "string",
"description": "The bootstrap server of the kafka cluster."
},
"format": {
"type": "string",
"description": "The format of the messages.",
"examples": ["json", "avro", "protobuf", "xml"],
"default": "json"
}
},
"required": [
"host"
]
},
"KinesisServer": {
"type": "object",
"title": "KinesisDataStreamsServer",
"description": "Kinesis Data Streams Server",
"properties": {
"region": {
"type": "string",
"description": "AWS region.",
"examples": [
"eu-west-1"
]
},
"format": {
"type": "string",
"description": "The format of the record",
"examples": [
"json",
"avro",
"protobuf"
]
}
}
},
"LocalServer": {
"type": "object",
"title": "LocalServer",
"properties": {
"path": {
"type": "string",
"description": "The relative or absolute path to the data file(s).",
"examples": [
"./folder/data.parquet",
"./folder/*.parquet"
]
},
"format": {
"type": "string",
"description": "The format of the file(s)",
"examples": [
"json",
"parquet",
"delta",
"csv"
]
}
},
"required": [
"path",
"format"
]
},
"MySqlServer": {
"type": "object",
"title": "MySqlServer",
"properties": {
"host": {
"type": "string",
"description": "The host of the MySql server."
},
"port": {
"type": "integer",
"description": "The port of the MySql server."
},
"database": {
"type": "string",
"description": "The name of the database."
}
},
"required": [
"host",
"port",
"database"
]
},
"OracleServer": {
"type": "object",
"title": "OracleServer",
"properties": {
"host": {
"type": "string",
"description": "The host to the oracle server",
"examples": [
"localhost"
]
},
"port": {
"type": "integer",
"description": "The port to the oracle server.",
"examples": [
1523
]
},
"serviceName": {
"type": "string",
"description": "The name of the service.",
"examples": [
"service"
]
}
},
"required": [
"host",
"port",
"serviceName"
]
},
"PostgresServer": {
"type": "object",
"title": "PostgresServer",
"properties": {
"host": {
"type": "string",
"description": "The host to the Postgres server"
},
"port": {
"type": "integer",
"description": "The port to the Postgres server."
},
"database": {
"type": "string",
"description": "The name of the database."
},
"schema": {
"type": "string",
"description": "The name of the schema in the database."
}
},
"required": [
"host",
"port",
"database",
"schema"
]
},
"PrestoServer": {
"type": "object",
"title": "PrestoServer",
"properties": {
"host": {
"type": "string",
"description": "The host to the Presto server",
"examples": [
"localhost:8080"
]
},
"catalog": {
"type": "string",
"description": "The name of the catalog.",
"examples": [
"postgres"
]
},
"schema": {
"type": "string",
"description": "The name of the schema.",
"examples": [
"public"
]
}
},
"required": [
"host"
]
},
"PubSubServer": {
"type": "object",
"title": "PubSubServer",
"properties": {
"project": {
"type": "string",
"description": "The GCP project name."
}
},
"required": [
"project"
]
},
"RedshiftServer": {
"type": "object",
"title": "RedshiftServer",
"properties": {
"host": {
"type": "string",
"description": "An optional string describing the server."
},
"database": {
"type": "string",
"description": "The name of the database."
},
"schema": {
"type": "string",
"description": "The name of the schema."
},
"region": {
"type": "string",
"description": "AWS region of Redshift server.",
"examples": ["us-east-1"]
},
"account": {
"type": "string",
"description": "The account used by the server."
}
},
"required": [
"database",
"schema"
]
},
"S3Server": {
"type": "object",
"title": "S3Server",
"properties": {
"location": {
"type": "string",
"format": "uri",
"description": "S3 URL, starting with `s3://`",
"examples": [
"s3://datacontract-example-orders-latest/data/{model}/*.json"
]
},
"endpointUrl": {
"type": "string",
"format": "uri",
"description": "The server endpoint for S3-compatible servers.",
"examples": ["https://minio.example.com"]
},
"format": {
"type": "string",
"enum": [
"parquet",
"delta",
"json",
"csv"
],
"description": "File format."
},
"delimiter": {
"type": "string",
"enum": [
"new_line",
"array"
],
"description": "Only for format = json. How multiple json documents are delimited within one file"
}
},
"required": [
"location"
]
},
"SftpServer": {
"type": "object",
"title": "SftpServer",
"properties": {
"location": {
"type": "string",
"format": "uri",
"pattern": "^sftp://.*",
"description": "SFTP URL, starting with `sftp://`",
"examples": [
"sftp://123.123.12.123/{model}/*.json"
]
},
"format": {
"type": "string",
"enum": [
"parquet",
"delta",
"json",
"csv"
],
"description": "File format."
},
"delimiter": {
"type": "string",
"enum": [
"new_line",
"array"
],
"description": "Only for format = json. How multiple json documents are delimited within one file"
}
},
"required": [
"location"
]
},
"SnowflakeServer": {
"type": "object",
"title": "SnowflakeServer",
"properties": {
"host": {
"type": "string",
"description": "The host to the Snowflake server"
},
"port": {
"type": "integer",
"description": "The port to the Snowflake server."
},
"account": {
"type": "string",
"description": "The Snowflake account used by the server."
},
"database": {
"type": "string",
"description": "The name of the database."
},
"schema": {
"type": "string",
"description": "The name of the schema."
},
"warehouse": {
"type": "string",
"description": "The name of the cluster of resources that is a Snowflake virtual warehouse."
}
},
"required": [
"account",
"database",
"schema"
]
},
"SqlserverServer": {
"type": "object",
"title": "SqlserverServer",
"properties": {
"host": {
"type": "string",
"description": "The host to the database server",
"examples": [
"localhost"
]
},
"port": {
"type": "integer",
"description": "The port to the database server.",
"default": 1433,
"examples": [
1433
]
},
"database": {
"type": "string",
"description": "The name of the database.",
"examples": [
"database"
]
},
"schema": {
"type": "string",
"description": "The name of the schema in the database.",
"examples": [
"dbo"
]
}
},
"required": [
"host",
"database",
"schema"
]
},
"SynapseServer": {
"type": "object",
"title": "SynapseServer",
"properties": {
"host": {
"type": "string",
"description": "The host of the Synapse server."
},
"port": {
"type": "integer",
"description": "The port of the Synapse server."
},
"database": {
"type": "string",
"description": "The name of the database."
}
},
"required": [
"host",
"port",
"database"
]
},
"TrinoServer": {
"type": "object",
"title": "TrinoServer",
"properties": {
"host": {
"type": "string",
"description": "The Trino host URL.",
"examples": [
"localhost"
]
},
"port": {
"type": "integer",
"description": "The Trino port."
},
"catalog": {
"type": "string",
"description": "The name of the catalog.",
"examples": [
"hive"
]
},
"schema": {
"type": "string",
"description": "The name of the schema in the database.",
"examples": [
"my_schema"
]
}
},
"required": [
"host",
"port",
"catalog",
"schema"
]
},
"VerticaServer": {
"type": "object",
"title": "VerticaServer",
"properties": {
"host": {
"type": "string",
"description": "The host of the Vertica server."
},
"port": {
"type": "integer",
"description": "The port of the Vertica server."
},
"database": {
"type": "string",
"description": "The name of the database."
},
"schema": {
"type": "string",
"description": "The name of the schema."
}
},
"required": [
"host",
"port",
"database",
"schema"
]
}
},
"SchemaElement": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Name of the element."
},
"physicalType": {
"type": "string",
"description": "The physical element data type in the data source.",
"examples": ["table", "view", "topic", "file"]
},
"description": {
"type": "string",
"description": "Description of the element."
},
"businessName": {
"type": "string",
"description": "The business name of the element."
},
"authoritativeDefinitions": {
"$ref": "#/$defs/AuthoritativeDefinitions"
},
"tags": {
"$ref": "#/$defs/Tags"
},
"customProperties": {
"$ref": "#/$defs/CustomProperties"
}
}
},
"SchemaObject": {
"type": "object",
"properties": {
"logicalType": {
"type": "string",
"description": "The logical element data type.",
"enum": ["object"]
},
"physicalName": {
"type": "string",
"description": "Physical name.",
"examples": ["table_1_2_0"]
},
"dataGranularityDescription": {
"type": "string",
"description": "Granular level of the data in the object.",
"examples": ["Aggregation by country"]
},
"properties": {
"type": "array",
"description": "A list of properties for the object.",
"items": {
"$ref": "#/$defs/SchemaProperty"
}
},
"quality": {
"$ref": "#/$defs/DataQualityChecks"
}
},
"allOf": [
{
"$ref": "#/$defs/SchemaElement"
}
],
"required": ["name"],
"unevaluatedProperties": false
},
"SchemaBaseProperty": {
"type": "object",
"properties": {
"primaryKey": {
"type": "boolean",
"description": "Boolean value specifying whether the element is primary or not. Default is false."
},
"primaryKeyPosition": {
"type": "integer",
"default": -1,
"description": "If element is a primary key, the position of the primary key element. Starts from 1. Example of `account_id, name` being primary key columns, `account_id` has primaryKeyPosition 1 and `name` primaryKeyPosition 2. Default to -1."
},
"logicalType": {
"type": "string",
"description": "The logical element data type.",
"enum": ["string", "date", "number", "integer", "object", "array", "boolean"]
},
"logicalTypeOptions": {
"type": "object",
"description": "Additional optional metadata to describe the logical type."
},
"physicalType": {
"type": "string",
"description": "The physical element data type in the data source. For example, VARCHAR(2), DOUBLE, INT."
},
"required": {
"type": "boolean",
"default": false,
"description": "Indicates if the element may contain Null values; possible values are true and false. Default is false."
},
"unique": {
"type": "boolean",
"default": false,
"description": "Indicates if the element contains unique values; possible values are true and false. Default is false."
},
"partitioned": {
"type": "boolean",
"default": false,
"description": "Indicates if the element is partitioned; possible values are true and false."
},
"partitionKeyPosition": {
"type": "integer",
"default": -1,
"description": "If element is used for partitioning, the position of the partition element. Starts from 1. Example of `country, year` being partition columns, `country` has partitionKeyPosition 1 and `year` partitionKeyPosition 2. Default to -1."
},
"classification": {
"type": "string",
"description": "Can be anything, like confidential, restricted, and public to more advanced categorization. Some companies like PayPal, use data classification indicating the class of data in the element; expected values are 1, 2, 3, 4, or 5.",
"examples": ["confidential", "restricted", "public"]
},
"encryptedName": {
"type": "string",
"description": "The element name within the dataset that contains the encrypted element value. For example, unencrypted element `email_address` might have an encryptedName of `email_address_encrypt`."
},
"transformSourceObjects": {
"type": "array",
"description": "List of objects in the data source used in the transformation.",
"items": {
"type": "string"
}
},
"transformLogic": {
"type": "string",
"description": "Logic used in the element transformation."
},
"transformDescription": {
"type": "string",
"description": "Describes the transform logic in very simple terms."
},
"examples": {
"type": "array",
"description": "List of sample element values.",
"items": {
"$ref": "#/$defs/AnyType"
}
},
"criticalDataElement": {
"type": "boolean",
"default": false,
"description": "True or false indicator; If element is considered a critical data element (CDE) then true else false."
},
"quality": {
"$ref": "#/$defs/DataQualityChecks"
}
},
"allOf": [
{
"$ref": "#/$defs/SchemaElement"
},
{
"if": {
"properties": {
"logicalType": {
"const": "string"
}
}
},
"then": {
"properties": {
"logicalTypeOptions": {
"type": "object",
"properties": {
"minLength": {
"type": "integer",
"minimum": 0,
"description": "Minimum length of the string."
},
"maxLength": {
"type": "integer",
"minimum": 0,
"description": "Maximum length of the string."
},
"pattern": {
"type": "string",
"description": "Regular expression pattern to define valid value. Follows regular expression syntax from ECMA-262 (https://262.ecma-international.org/5.1/#sec-15.10.1)."
},
"format": {
"type": "string",
"examples": ["password", "byte", "binary", "email", "uuid", "uri", "hostname", "ipv4", "ipv6"],
"description": "Provides extra context about what format the string follows."
}
},
"additionalProperties": false
}
}
}
},
{
"if": {
"properties": {
"logicalType": {
"const": "date"
}
}
},
"then": {
"properties": {
"logicalTypeOptions": {
"type": "object",
"properties": {
"format": {
"type": "string",
"examples": ["yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "HH:mm:ss"],
"description": "Format of the date. Follows the format as prescribed by [JDK DateTimeFormatter](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html). For example, format 'yyyy-MM-dd'."
},
"exclusiveMaximum": {
"type": "boolean",
"default": false,
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
},
"maximum": {
"type": "string",
"description": "All date values are less than or equal to this value (values <= maximum)."
},
"exclusiveMinimum": {
"type": "boolean",
"default": false,
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
},
"minimum": {
"type": "string",
"description": "All date values are greater than or equal to this value (values >= minimum)."
}
},
"additionalProperties": false
}
}
}
},
{
"if": {
"anyOf": [
{
"properties": {
"logicalType": {
"const": "integer"
}
}
}
]
},
"then": {
"properties": {
"logicalTypeOptions": {
"type": "object",
"properties": {
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0,
"description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5."
},
"maximum": {
"type": "number",
"description": "All values are less than or equal to this value (values <= maximum)."
},
"exclusiveMaximum": {
"type": "boolean",
"default": false,
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
},
"minimum": {
"type": "number",
"description": "All values are greater than or equal to this value (values >= minimum)."
},
"exclusiveMinimum": {
"type": "boolean",
"default": false,
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
},
"format": {
"type": "string",
"default": "i32",
"description": "Format of the value in terms of how many bits of space it can use and whether it is signed or unsigned (follows the Rust integer types).",
"enum": ["i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", "u64", "u128"]
}
},
"additionalProperties": false
}
}
}
},
{
"if": {
"anyOf": [
{
"properties": {
"logicalType": {
"const": "number"
}
}
}
]
},
"then": {
"properties": {
"logicalTypeOptions": {
"type": "object",
"properties": {
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0,
"description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5."
},
"maximum": {
"type": "number",
"description": "All values are less than or equal to this value (values <= maximum)."
},
"exclusiveMaximum": {
"type": "boolean",
"default": false,
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
},
"minimum": {
"type": "number",
"description": "All values are greater than or equal to this value (values >= minimum)."
},
"exclusiveMinimum": {
"type": "boolean",
"default": false,
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
},
"format": {
"type": "string",
"default": "i32",
"description": "Format of the value in terms of how many bits of space it can use (follows the Rust float types).",
"enum": ["f32", "f64"]
}
},
"additionalProperties": false
}
}
}
},
{
"if": {
"properties": {
"logicalType": {
"const": "object"
}
}
},
"then": {
"properties": {
"logicalTypeOptions": {
"type": "object",
"properties": {
"maxProperties": {
"type": "integer",
"minimum": 0,
"description": "Maximum number of properties."
},
"minProperties": {
"type": "integer",
"minimum": 0,
"default": 0,
"description": "Minimum number of properties."
},
"required": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"uniqueItems": true,
"description": "Property names that are required to exist in the object."
}
},
"additionalProperties": false
},
"properties": {
"type": "array",
"description": "A list of properties for the object.",
"items": {
"$ref": "#/$defs/SchemaProperty"
}
}
}
}
},
{
"if": {
"properties": {
"logicalType": {
"const": "array"
}
}
},
"then": {
"properties": {
"logicalTypeOptions": {
"type": "object",
"properties": {
"maxItems": {
"type": "integer",
"minimum": 0,
"description": "Maximum number of items."
},
"minItems": {
"type": "integer",
"minimum": 0,
"default": 0,
"description": "Minimum number of items"
},
"uniqueItems": {
"type": "boolean",
"default": false,
"description": "If set to true, all items in the array are unique."
}
},
"additionalProperties": false
},
"items": {
"$ref": "#/$defs/SchemaItemProperty",
"description": "List of items in an array (only applicable when `logicalType: array`)."
}
}
}
}
]
},
"SchemaProperty": {
"type": "object",
"$ref": "#/$defs/SchemaBaseProperty",
"required": ["name"],
"unevaluatedProperties": false
},
"SchemaItemProperty": {
"type": "object",
"$ref": "#/$defs/SchemaBaseProperty",
"properties": {
"properties": {
"type": "array",
"description": "A list of properties for the object.",
"items": {
"$ref": "#/$defs/SchemaProperty"
}
}
},
"unevaluatedProperties": false
},
"Tags": {
"type": "array",
"description": "A list of tags that may be assigned to the elements (object or property); the tags keyword may appear at any level. Tags may be used to better categorize an element. For example, `finance`, `sensitive`, `employee_record`.",
"examples": ["finance", "sensitive", "employee_record"],
"items": {
"type": "string"
}
},
"DataQuality": {
"type": "object",
"properties": {
"authoritativeDefinitions": {
"$ref": "#/$defs/AuthoritativeDefinitions"
},
"businessImpact": {
"type": "string",
"description": "Consequences of the rule failure.",
"examples": ["operational", "regulatory"]
},
"customProperties": {
"type": "array",
"description": "Additional properties required for rule execution.",
"items": {
"$ref": "#/$defs/CustomProperty"
}
},
"description": {
"type": "string",
"description": "Describe the quality check to be completed."
},
"dimension": {
"type": "string",
"description": "The key performance indicator (KPI) or dimension for data quality.",
"enum": ["accuracy", "completeness", "conformity", "consistency", "coverage", "timeliness", "uniqueness"]
},
"method": {
"type": "string",
"examples": ["reconciliation"]
},
"name": {
"type": "string",
"description": "Name of the data quality check."
},
"schedule": {
"type": "string",
"description": "Rule execution schedule details.",
"examples": ["0 20 * * *"]
},
"scheduler": {
"type": "string",
"description": "The name or type of scheduler used to start the data quality check.",
"examples": ["cron"]
},
"severity": {
"type": "string",
"description": "The severance of the quality rule.",
"examples": ["info", "warning", "error"]
},
"tags": {
"$ref": "#/$defs/Tags"
},
"type": {
"type": "string",
"description": "The type of quality check. 'text' is human-readable text that describes the quality of the data. 'library' is a set of maintained predefined quality attributes such as row count or unique. 'sql' is an individual SQL query that returns a value that can be compared. 'custom' is quality attributes that are vendor-specific, such as Soda or Great Expectations.",
"enum": ["text", "library", "sql", "custom"],
"default": "library"
},
"unit": {
"type": "string",
"description": "Unit the rule is using, popular values are `rows` or `percent`, but any value is allowed.",
"examples": ["rows", "percent"]
}
},
"allOf": [
{
"if": {
"properties": {
"type": {
"const": "library"
}
}
},
"then": {
"$ref": "#/$defs/DataQualityLibrary"
}
},
{
"if": {
"properties": {
"type": {
"const": "sql"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/DataQualitySql"
}
},
{
"if": {
"properties": {
"type": {
"const": "custom"
}
},
"required": ["type"]
},
"then": {
"$ref": "#/$defs/DataQualityCustom"
}
}
]
},
"DataQualityChecks": {
"type": "array",
"description": "Data quality rules with all the relevant information for rule setup and execution.",
"items": {
"$ref": "#/$defs/DataQuality"
}
},
"DataQualityLibrary": {
"type": "object",
"properties": {
"rule": {
"type": "string",
"description": "Define a data quality check based on the predefined rules as per ODCS.",
"examples": ["duplicateCount", "validValues", "rowCount"]
},
"mustBe": {
"description": "Must be equal to the value to be valid. When using numbers, it is equivalent to '='."
},
"mustNotBe": {
"description": "Must not be equal to the value to be valid. When using numbers, it is equivalent to '!='."
},
"mustBeGreaterThan": {
"type": "number",
"description": "Must be greater than the value to be valid. It is equivalent to '>'."
},
"mustBeGreaterOrEqualTo": {
"type": "number",
"description": "Must be greater than or equal to the value to be valid. It is equivalent to '>='."
},
"mustBeLessThan": {
"type": "number",
"description": "Must be less than the value to be valid. It is equivalent to '<'."
},
"mustBeLessOrEqualTo": {
"type": "number",
"description": "Must be less than or equal to the value to be valid. It is equivalent to '<='."
},
"mustBeBetween": {
"type": "array",
"description": "Must be between the two numbers to be valid. Smallest number first in the array.",
"minItems": 2,
"maxItems": 2,
"uniqueItems": true,
"items": {
"type": "number"
}
},
"mustNotBeBetween": {
"type": "array",
"description": "Must not be between the two numbers to be valid. Smallest number first in the array.",
"minItems": 2,
"maxItems": 2,
"uniqueItems": true,
"items": {
"type": "number"
}
}
},
"required": ["rule"]
},
"DataQualitySql": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Query string that adheres to the dialect of the provided server.",
"examples": ["SELECT COUNT(*) FROM ${table} WHERE ${column} IS NOT NULL"]
}
},
"required": ["query"]
},
"DataQualityCustom": {
"type": "object",
"properties": {
"engine": {
"type": "string",
"description": "Name of the engine which executes the data quality checks.",
"examples": ["soda", "great-expectations", "monte-carlo", "dbt"]
},
"implementation": {
"oneOf": [
{
"type": "string"
},
{
"type": "object"
}
]
}
},
"required": ["engine", "implementation"]
},
"AuthoritativeDefinitions": {
"type": "array",
"description": "List of links to sources that provide more details on the dataset; examples would be a link to an external definition, a training video, a git repo, data catalog, or another tool. Authoritative definitions follow the same structure in the standard.",
"items": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL to the authority."
},
"type": {
"type": "string",
"description": "Type of definition for authority: v2.3 adds standard values: `businessDefinition`, `transformationImplementation`, `videoTutorial`, `tutorial`, and `implementation`.",
"examples": ["businessDefinition", "transformationImplementation", "videoTutorial", "tutorial", "implementation"]
}
},
"required": ["url", "type"]
}
},
"Support": {
"type": "array",
"description": "Top level for support channels.",
"items": {
"$ref": "#/$defs/SupportItem"
}
},
"SupportItem": {
"type": "object",
"properties": {
"channel": {
"type": "string",
"description": "Channel name or identifier."
},
"url": {
"type": "string",
"description": "Access URL using normal [URL scheme](https://en.wikipedia.org/wiki/URL#Syntax) (https, mailto, etc.)."
},
"description": {
"type": "string",
"description": "Description of the channel, free text."
},
"tool": {
"type": "string",
"description": "Name of the tool, value can be `email`, `slack`, `teams`, `discord`, `ticket`, or `other`.",
"examples": ["email", "slack", "teams", "discord", "ticket", "other"]
},
"scope": {
"type": "string",
"description": "Scope can be: `interactive`, `announcements`, `issues`.",
"examples": ["interactive", "announcements", "issues"]
},
"invitationUrl": {
"type": "string",
"description": "Some tools uses invitation URL for requesting or subscribing. Follows the [URL scheme](https://en.wikipedia.org/wiki/URL#Syntax)."
}
},
"required": ["channel", "url"]
},
"Pricing": {
"type": "object",
"properties": {
"priceAmount": {
"type": "number",
"description": "Subscription price per unit of measure in `priceUnit`."
},
"priceCurrency": {
"type": "string",
"description": "Currency of the subscription price in `price.priceAmount`."
},
"priceUnit": {
"type": "string",
"description": "The unit of measure for calculating cost. Examples megabyte, gigabyte."
}
}
},
"Team": {
"type": "object",
"properties": {
"username": {
"type": "string",
"description": "The user's username or email."
},
"role": {
"type": "string",
"description": "The user's job role; Examples might be owner, data steward. There is no limit on the role."
},
"dateIn": {
"type": "string",
"format": "date",
"description": "The date when the user joined the team."
},
"dateOut": {
"type": "string",
"format": "date",
"description": "The date when the user ceased to be part of the team."
},
"replacedByUsername": {
"type": "string",
"description": "The username of the user who replaced the previous user."
}
}
},
"Role": {
"type": "object",
"properties": {
"role": {
"type": "string",
"description": "Name of the IAM role that provides access to the dataset."
},
"description": {
"type": "string",
"description": "Description of the IAM role and its permissions."
},
"access": {
"type": "string",
"description": "The type of access provided by the IAM role."
},
"firstLevelApprovers": {
"type": "string",
"description": "The name(s) of the first-level approver(s) of the role."
},
"secondLevelApprovers": {
"type": "string",
"description": "The name(s) of the second-level approver(s) of the role."
},
"customProperties": {
"$ref": "#/$defs/CustomProperties"
}
},
"required": ["role"]
},
"ServiceLevelAgreementProperty": {
"type": "object",
"properties": {
"property": {
"type": "string",
"description": "Specific property in SLA, check the periodic table. May requires units (more details to come)."
},
"value": {
"anyOf": [
{
"type": "string"
},
{
"type": "number"
},
{
"type": "integer"
},
{
"type": "boolean"
},
{
"type": "null"
}
],
"description": "Agreement value. The label will change based on the property itself."
},
"valueExt": {
"$ref": "#/$defs/AnyNonCollectionType",
"description": "Extended agreement value. The label will change based on the property itself."
},
"unit": {
"type": "string",
"description": "**d**, day, days for days; **y**, yr, years for years, etc. Units use the ISO standard."
},
"element": {
"type": "string",
"description": "Element(s) to check on. Multiple elements should be extremely rare and, if so, separated by commas."
},
"driver": {
"type": "string",
"description": "Describes the importance of the SLA from the list of: `regulatory`, `analytics`, or `operational`.",
"examples": ["regulatory", "analytics", "operational"]
}
},
"required": ["property", "value"]
},
"CustomProperties": {
"type": "array",
"description": "A list of key/value pairs for custom properties.",
"items": {
"$ref": "#/$defs/CustomProperty"
}
},
"CustomProperty": {
"type": "object",
"properties": {
"property": {
"type": "string",
"description": "The name of the key. Names should be in camel casethe same as if they were permanent properties in the contract."
},
"value": {
"$ref": "#/$defs/AnyType",
"description": "The value of the key."
}
}
},
"AnyType": {
"anyOf": [
{
"type": "string"
},
{
"type": "number"
},
{
"type": "integer"
},
{
"type": "boolean"
},
{
"type": "null"
},
{
"type": "array"
},
{
"type": "object"
}
]
},
"AnyNonCollectionType": {
"anyOf": [
{
"type": "string"
},
{
"type": "number"
},
{
"type": "integer"
},
{
"type": "boolean"
},
{
"type": "null"
}
]
}
}
}