mirror of
https://github.com/bitol-io/open-data-contract-standard.git
synced 2025-05-28 19:34:02 +00:00
Add in integer/number format to follow Rust integer format, add in v3.0.0 JSON schema, update latest schema, add in entry to changelog
This commit is contained in:
@@ -6,6 +6,14 @@ image: "https://raw.githubusercontent.com/bitol-io/artwork/main/horizontal/color
|
||||
|
||||
This document tracks the history and evolution of the **Open Data Contract Standard**.
|
||||
|
||||
# v3.0.0 - 2024-07-12 - PROPOSED
|
||||
|
||||
* In JSON schema:
|
||||
* Restrict `dataset.table.columns.column.logicalType` to be one of `string, date, number, integer, object, array, boolean`
|
||||
* Add `dataset.table.columns.column.logicalTypeOptions`
|
||||
* Add [all data types example](docs/examples/data-types/all-data-types.odcs.yaml)
|
||||
* Add [logical type options to standard](docs/standard.md#logical-type-options)
|
||||
|
||||
# v2.2.2 - 2024-05-23 - APPROVED
|
||||
|
||||
* In JSON schema validation:
|
||||
|
||||
@@ -21,15 +21,15 @@ dataset:
|
||||
physicalType: date
|
||||
logicalType: date
|
||||
logicalTypeOptions:
|
||||
minLength: 10
|
||||
maxLength: 10
|
||||
minimum: 2020-01-01
|
||||
maximum: 2021-01-01
|
||||
format: yyyy-MM-dd
|
||||
- column: txn_timestamp
|
||||
physicalType: timestamp
|
||||
logicalType: date
|
||||
logicalTypeOptions:
|
||||
minLength: 19
|
||||
maxLength: 19
|
||||
minimum: 2020-01-01 00:00:00
|
||||
maximum: 2021-01-01 00:00:00
|
||||
format: "yyyy-MM-dd HH:mm:ss"
|
||||
- column: amount
|
||||
physicalType: double
|
||||
@@ -43,6 +43,7 @@ dataset:
|
||||
minimum: 18
|
||||
maximum: 100
|
||||
exclusiveMaximum: true
|
||||
format: i64
|
||||
- column: is_open
|
||||
physicalType: bool
|
||||
logicalType: boolean
|
||||
@@ -27,6 +27,7 @@ This folder contains mainly excerpt of data contracts to illustrate specific sec
|
||||
|
||||
- [Table with single column](schema/table-column.odcs.yaml)
|
||||
- [Table with columns and partitioning](schema/table-columns-with-partition.odcs.yaml)
|
||||
- [Data types](data-types/all-data-types.odcs.yaml)
|
||||
|
||||
## Data quality
|
||||
|
||||
|
||||
@@ -249,6 +249,7 @@ Additional metadata options to more accurately define the data type.
|
||||
| date | minimum | Minimum | No | All date values are greater than or equal to this value (values >= minimum). |
|
||||
| integer/number | exclusiveMaximum | Exclusive Maximum | No | If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum). |
|
||||
| integer/number | exclusiveMinimum | Exclusive Minimum | No | If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum). |
|
||||
| integer/number | format | Format | No | Format of the value in terms of how many bits of space it can use and whether it is signed or unsigned (follows the Rust integer types). |
|
||||
| integer/number | maximum | Maximum | No | All values are less than or equal to this value (values <= maximum). |
|
||||
| integer/number | minimum | Minimum | No | All values are greater than or equal to this value (values >= minimum). |
|
||||
| integer/number | multipleOf | Multiple Of | No | Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5. |
|
||||
|
||||
@@ -19,7 +19,7 @@ following section:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "Open Data Contract Standard (ODCS))",
|
||||
"name": "Open Data Contract Standard (ODCS)",
|
||||
...,
|
||||
"versions": {
|
||||
"<new_version>": "https://github.com/bitol-io/open-data-contract-standard/blob/main/schema/odcs-json-schema-<new_version>.json",
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
},
|
||||
"apiVersion": {
|
||||
"type": "string",
|
||||
"default": "v2.2.2",
|
||||
"description": "Version of the standard used to build data contract. Default value is v2.2.2.",
|
||||
"default": "v3.0.0",
|
||||
"description": "Version of the standard used to build data contract. Default value is v3.0.0.",
|
||||
"pattern": "^v[0-9]+\\.[0-9]+\\.[0-9]+"
|
||||
},
|
||||
"uuid": {
|
||||
@@ -244,196 +244,7 @@
|
||||
},
|
||||
"logicalTypeOptions": {
|
||||
"type": "object",
|
||||
"description": "Additional optional metadata to describe the logical type.",
|
||||
"properties": {
|
||||
"enum": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
},
|
||||
"minItems": 1,
|
||||
"uniqueItems": false,
|
||||
"description": "Set of possible values."
|
||||
}
|
||||
},
|
||||
"allOf": [
|
||||
{
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"minLength": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Minimum length of the string."
|
||||
},
|
||||
"maxLength": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Maximum length of the string."
|
||||
},
|
||||
"pattern": {
|
||||
"type": "string",
|
||||
"format": "regex",
|
||||
"description": "Regular expression pattern to define valid value. Follows regular expression syntax from ECMA-262 (https://262.ecma-international.org/5.1/#sec-15.10.1)."
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"examples": ["password", "byte", "binary", "email", "uuid", "uri", "hostname", "ipv4", "ipv6"],
|
||||
"description": "Provides extra context about what format the string follows."
|
||||
}
|
||||
}
|
||||
}
|
||||
},{
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "date"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"format": {
|
||||
"type": "string",
|
||||
"examples": ["yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "HH:mm:ss"],
|
||||
"description": "Format of the date. Follows the format as prescribed by [JDK DateTimeFormatter](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html). For example, format 'yyyy-MM-dd'."
|
||||
},
|
||||
"exclusiveMaximum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
|
||||
},
|
||||
"maximum": {
|
||||
"type": "string",
|
||||
"description": "All date values are less than or equal to this value (values <= maximum)."
|
||||
},
|
||||
"exclusiveMinimum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
|
||||
},
|
||||
"minimum": {
|
||||
"type": "string",
|
||||
"description": "All date values are greater than or equal to this value (values >= minimum)."
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"anyOf": [
|
||||
{
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "integer"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"multipleOf": {
|
||||
"type": "number",
|
||||
"exclusiveMinimum": 0,
|
||||
"description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5."
|
||||
},
|
||||
"maximum": {
|
||||
"type": "number",
|
||||
"description": "All values are less than or equal to this value (values <= maximum)."
|
||||
},
|
||||
"exclusiveMaximum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
|
||||
},
|
||||
"minimum": {
|
||||
"type": "number",
|
||||
"description": "All values are greater than or equal to this value (values >= minimum)."
|
||||
},
|
||||
"exclusiveMinimum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"maxProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Maximum number of properties."
|
||||
},
|
||||
"minProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Minimum number of properties."
|
||||
},
|
||||
"required": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"minItems": 1,
|
||||
"uniqueItems": true,
|
||||
"description": "Property names that are required to exist in the object."
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "array"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"maxItems": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Maximum number of items."
|
||||
},
|
||||
"minItems": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Minimum number of items"
|
||||
},
|
||||
"uniqueItems": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all items in the array are unique."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
"description": "Additional optional metadata to describe the logical type."
|
||||
},
|
||||
"physicalType": {
|
||||
"type": "string",
|
||||
@@ -520,6 +331,216 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"allOf": [
|
||||
{
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"logicalTypeOptions": {
|
||||
"properties": {
|
||||
"minLength": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Minimum length of the string."
|
||||
},
|
||||
"maxLength": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Maximum length of the string."
|
||||
},
|
||||
"pattern": {
|
||||
"type": "string",
|
||||
"format": "regex",
|
||||
"description": "Regular expression pattern to define valid value. Follows regular expression syntax from ECMA-262 (https://262.ecma-international.org/5.1/#sec-15.10.1)."
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"examples": ["password", "byte", "binary", "email", "uuid", "uri", "hostname", "ipv4", "ipv6"],
|
||||
"description": "Provides extra context about what format the string follows."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}, {
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "date"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"logicalTypeOptions": {
|
||||
"properties": {
|
||||
"format": {
|
||||
"type": "string",
|
||||
"examples": ["yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "HH:mm:ss"],
|
||||
"description": "Format of the date. Follows the format as prescribed by [JDK DateTimeFormatter](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html). For example, format 'yyyy-MM-dd'."
|
||||
},
|
||||
"exclusiveMaximum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
|
||||
},
|
||||
"maximum": {
|
||||
"type": "string",
|
||||
"description": "All date values are less than or equal to this value (values <= maximum)."
|
||||
},
|
||||
"exclusiveMinimum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
|
||||
},
|
||||
"minimum": {
|
||||
"type": "string",
|
||||
"description": "All date values are greater than or equal to this value (values >= minimum)."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"anyOf": [
|
||||
{
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "integer"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"logicalTypeOptions": {
|
||||
"properties": {
|
||||
"multipleOf": {
|
||||
"type": "number",
|
||||
"exclusiveMinimum": 0,
|
||||
"description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5."
|
||||
},
|
||||
"maximum": {
|
||||
"type": "number",
|
||||
"description": "All values are less than or equal to this value (values <= maximum)."
|
||||
},
|
||||
"exclusiveMaximum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
|
||||
},
|
||||
"minimum": {
|
||||
"type": "number",
|
||||
"description": "All values are greater than or equal to this value (values >= minimum)."
|
||||
},
|
||||
"exclusiveMinimum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"default": "i32",
|
||||
"description": "Format of the value in terms of how many bits of space it can use and whether it is signed or unsigned (follows the Rust integer types).",
|
||||
"enum": ["i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", "u64", "u128"]
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"logicalTypeOptions": {
|
||||
"properties": {
|
||||
"maxProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Maximum number of properties."
|
||||
},
|
||||
"minProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Minimum number of properties."
|
||||
},
|
||||
"required": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"minItems": 1,
|
||||
"uniqueItems": true,
|
||||
"description": "Property names that are required to exist in the object."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "array"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"logicalTypeOptions": {
|
||||
"properties": {
|
||||
"maxItems": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Maximum number of items."
|
||||
},
|
||||
"minItems": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Minimum number of items"
|
||||
},
|
||||
"uniqueItems": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all items in the array are unique."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"required": ["column", "logicalType", "physicalType"]
|
||||
},
|
||||
"DataQuality": {
|
||||
|
||||
749
schema/odcs-json-schema-v3.0.0.json
Normal file
749
schema/odcs-json-schema-v3.0.0.json
Normal file
@@ -0,0 +1,749 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2019-09/schema",
|
||||
"title": "Open Data Contract Standard (OCDS)",
|
||||
"description": "An open data contract specification to establish agreement between data producers and consumers.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Current version of the data contract."
|
||||
},
|
||||
"kind": {
|
||||
"type": "string",
|
||||
"default": "DataContract",
|
||||
"description": "The kind of file this is. Valid value is `DataContract`.",
|
||||
"enum": ["DataContract"]
|
||||
},
|
||||
"apiVersion": {
|
||||
"type": "string",
|
||||
"default": "v3.0.0",
|
||||
"description": "Version of the standard used to build data contract. Default value is v3.0.0.",
|
||||
"pattern": "^v[0-9]+\\.[0-9]+\\.[0-9]+"
|
||||
},
|
||||
"uuid": {
|
||||
"type": "string",
|
||||
"description": "A unique identifier used to reduce the risk of dataset name collisions; initially the UUID will be created using a UUID generator tool ([example](https://www.uuidgenerator.net/)). However, we may want to develop a method that accepts a seed value using a combination of fields–such as name, kind and source–to create a repeatable value."
|
||||
},
|
||||
"datasetKind": {
|
||||
"type": "string",
|
||||
"description": "The kind of dataset being cataloged; Expected values are `virtualDataset` or `managedDataset`.",
|
||||
"examples": ["virtualDataset", "managedDataset"]
|
||||
},
|
||||
"userConsumptionMode": {
|
||||
"type": "string",
|
||||
"description": "List of data modes for which the dataset may be used. Expected sample values might be `analytical` or `operational`. <br/>Note: in the future, this will probably be replaced by ports.",
|
||||
"examples": ["analytical", "operational"]
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Identifies the types of objects in the dataset. For BigQuery or any other database, the expected value would be Tables.",
|
||||
"examples": ["Tables"]
|
||||
},
|
||||
"tenant": {
|
||||
"type": "string",
|
||||
"description": "Indicates the property the data is primarily associated with. Value is case insensitive."
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"description": "A list of tags that may be assigned to the dataset, table or column; the `tags` keyword may appear at any level.",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"description": "Current status of the dataset. Valid values are `production`, `test`, or `development`.",
|
||||
"examples": ["production", "test", "development"]
|
||||
},
|
||||
"sourceSystem": {
|
||||
"type": "string",
|
||||
"description": "The system where the dataset resides. Expected value can be BigQuery.",
|
||||
"examples": ["BigQuery"]
|
||||
},
|
||||
"sourcePlatform": {
|
||||
"type": "string",
|
||||
"description": "The platform where the dataset resides. Expected value is GoogleCloudPlatform, IBMCloud, Azure...",
|
||||
"examples": ["GoogleCloudPlatform", "IBMCloud", "Azure", "AWS"]
|
||||
},
|
||||
"server": {
|
||||
"type": "string",
|
||||
"description": "The server where the dataset resides."
|
||||
},
|
||||
"quantumName": {
|
||||
"type": "string",
|
||||
"description": "The name of the data quantum or data product."
|
||||
},
|
||||
"productSlackChannel": {
|
||||
"type": "string",
|
||||
"description": "Slack channel of the team responsible for maintaining the dataset."
|
||||
},
|
||||
"productFeedbackUrl": {
|
||||
"type": "string",
|
||||
"description": "The URL for submitting feedback to the team responsible for maintaining the dataset."
|
||||
},
|
||||
"productDl": {
|
||||
"type": "string",
|
||||
"description": "The email distribution list (DL) of the persons or team responsible for maintaining the dataset."
|
||||
},
|
||||
"username": {
|
||||
"type": "string",
|
||||
"description": "User credentials for connecting to the dataset; how the credentials will be stored/passed is outside of the scope of the contract."
|
||||
},
|
||||
"password": {
|
||||
"type": "string",
|
||||
"description": "User credentials for connecting to the dataset; how the credentials will be stored/passed is out of the scope of this contract."
|
||||
},
|
||||
"driverVersion": {
|
||||
"type": "string",
|
||||
"description": "The version of the connection driver to be used to connect to the dataset."
|
||||
},
|
||||
"driver": {
|
||||
"type": "string",
|
||||
"description": "The connection driver required to connect to the dataset."
|
||||
},
|
||||
"description": {
|
||||
"type": "object",
|
||||
"description": "High level description of the dataset.",
|
||||
"properties": {
|
||||
"usage": {
|
||||
"type": "string",
|
||||
"description": "Intended usage of the dataset."
|
||||
},
|
||||
"purpose": {
|
||||
"type": "string",
|
||||
"description": "Purpose of the dataset."
|
||||
},
|
||||
"limitations": {
|
||||
"type": "string",
|
||||
"description": "Limitations of the dataset."
|
||||
}
|
||||
}
|
||||
},
|
||||
"project": {
|
||||
"type": "string",
|
||||
"description": "Associated project name, can be used for billing or administrative purpose. Used to be datasetProject."
|
||||
},
|
||||
"datasetName": {
|
||||
"type": "string",
|
||||
"description": "May be required in cloud instance like GCP BigQuery dataset name."
|
||||
},
|
||||
"datasetDomain": {
|
||||
"type": "string",
|
||||
"description": "Name of the logical domain dataset the contract describes. This field is only required for output data contracts.",
|
||||
"examples": ["imdb_ds_aggregate", "receiver_profile_out", "transaction_profile_out"]
|
||||
},
|
||||
"database": {
|
||||
"type": "string",
|
||||
"description": "The database where the dataset resides."
|
||||
},
|
||||
"dataset": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/$defs/Dataset"
|
||||
}
|
||||
},
|
||||
"price": {
|
||||
"$ref": "#/$defs/Pricing"
|
||||
},
|
||||
"stakeholders": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/$defs/Stakeholder"
|
||||
}
|
||||
},
|
||||
"roles": {
|
||||
"type": "array",
|
||||
"description": "A list of roles that will provide user access to the dataset.",
|
||||
"items": {
|
||||
"$ref": "#/$defs/Role"
|
||||
}
|
||||
},
|
||||
"slaDefaultColumn": {
|
||||
"type": "string",
|
||||
"description": "Columns (using the Table.Column notation) to do the checks on. By default, it is the partition column."
|
||||
},
|
||||
"slaProperties": {
|
||||
"type": "array",
|
||||
"description": "A list of key/value pairs for SLA specific properties. There is no limit on the type of properties (more details to come).",
|
||||
"items": {
|
||||
"$ref": "#/$defs/ServiceLevelAgreementProperty"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["version", "kind", "uuid", "type", "status", "dataset", "datasetName", "quantumName"],
|
||||
"$defs": {
|
||||
"Dataset": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"table": {
|
||||
"type": "string",
|
||||
"description": "Name of the table being cataloged; the value should only contain the table name. Do not include the project or dataset name in the value."
|
||||
},
|
||||
"physicalName": {
|
||||
"type": "string",
|
||||
"description": "Physical name of the table, default value is table name + version separated by underscores, as `table_1_2_0`.",
|
||||
"examples": ["table_1_2_0"]
|
||||
},
|
||||
"priorTableName": {
|
||||
"type": "string",
|
||||
"description": "Name of the previous version of the dataset, if applicable."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Description of the dataset."
|
||||
},
|
||||
"authoritativeDefinitions": {
|
||||
"$ref": "#/$defs/AuthoritativeDefinitions"
|
||||
},
|
||||
"dataGranularity": {
|
||||
"type": "string",
|
||||
"description": "Granular level of the data in the table. Example would be `pmt_txn_id`.",
|
||||
"examples": ["pmt_txn_id"]
|
||||
},
|
||||
"columns": {
|
||||
"type": "array",
|
||||
"description": "Array. A list of columns in the table.",
|
||||
"items": {
|
||||
"$ref": "#/$defs/Column"
|
||||
}
|
||||
},
|
||||
"quality": {
|
||||
"type": "array",
|
||||
"description": "Data quality rules with all the relevant information for rule setup and execution.",
|
||||
"items": {
|
||||
"$ref": "#/$defs/DataQuality"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["table"]
|
||||
},
|
||||
"Column": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"column": {
|
||||
"type": "string",
|
||||
"description": "The name of the column."
|
||||
},
|
||||
"isPrimaryKey": {
|
||||
"type": "boolean",
|
||||
"description": "Boolean value specifying whether the column is primary or not. Default is false."
|
||||
},
|
||||
"primaryKeyPosition": {
|
||||
"type": "integer",
|
||||
"default": -1,
|
||||
"description": "If column is a primary key, the position of the primary key column. Starts from 1. Example of `account_id, name` being primary key columns, `account_id` has primaryKeyPosition 1 and `name` primaryKeyPosition 2. Default to -1."
|
||||
},
|
||||
"businessName": {
|
||||
"type": "string",
|
||||
"description": "The business name of the column."
|
||||
},
|
||||
"logicalType": {
|
||||
"type": "string",
|
||||
"description": "The logical column data type.",
|
||||
"enum": ["string", "date", "number", "integer", "object", "array", "boolean"]
|
||||
},
|
||||
"logicalTypeOptions": {
|
||||
"type": "object",
|
||||
"description": "Additional optional metadata to describe the logical type."
|
||||
},
|
||||
"physicalType": {
|
||||
"type": "string",
|
||||
"description": "The physical column data type in the data source. For example, VARCHAR(2), DOUBLE, INT."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Description of the column."
|
||||
},
|
||||
"isNullable": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Indicates if the column may contain Null values; possible values are true and false. Default is false."
|
||||
},
|
||||
"isUnique": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Indicates if the column contains unique values; possible values are true and false. Default is false."
|
||||
},
|
||||
"partitionStatus": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Indicates if the column is partitioned; possible values are true and false."
|
||||
},
|
||||
"partitionKeyPosition": {
|
||||
"type": "integer",
|
||||
"default": -1,
|
||||
"description": "If column is used for partitioning, the position of the partition column. Starts from 1. Example of `country, year` being partition columns, `country` has partitionKeyPosition 1 and `year` partitionKeyPosition 2. Default to -1."
|
||||
},
|
||||
"clusterStatus": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Indicates of the column is clustered; possible values are true and false."
|
||||
},
|
||||
"clusterKeyPosition": {
|
||||
"type": "integer",
|
||||
"default": -1,
|
||||
"description": "If column is used for clustering, the position of the cluster column. Starts from 1. Example of `year, date` being cluster columns, `year` has clusterKeyPosition 1 and `date` clusterKeyPosition 2. Default to -1."
|
||||
},
|
||||
"classification": {
|
||||
"type": "string",
|
||||
"description": "Can be anything, like confidential, restricted, and public to more advanced categorization. Some companies like PayPal, use data classification indicating the class of data in the column; expected values are 1, 2, 3, 4, or 5.",
|
||||
"examples": ["confidential", "restricted", "public"]
|
||||
},
|
||||
"authoritativeDefinitions": {
|
||||
"$ref": "#/$defs/AuthoritativeDefinitions"
|
||||
},
|
||||
"encryptedColumnName": {
|
||||
"type": "string",
|
||||
"description": "The column name within the table that contains the encrypted column value. For example, unencrypted column `email_address` might have an encryptedColumnName of `email_address_encrypt`."
|
||||
},
|
||||
"transformSourceTables": {
|
||||
"type": "array",
|
||||
"description": "List of sources used in column transformation.",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"transformLogic": {
|
||||
"type": "string",
|
||||
"description": "Logic used in the column transformation."
|
||||
},
|
||||
"transformDescription": {
|
||||
"type": "string",
|
||||
"description": "Describes the transform logic in very simple terms."
|
||||
},
|
||||
"sampleValues": {
|
||||
"type": "array",
|
||||
"description": "List of sample column values.",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"criticalDataElementStatus": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "True or false indicator; If element is considered a critical data element (CDE) then true else false."
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"description": "A list of tags that may be assigned to the dataset, table or column; the tags keyword may appear at any level.",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"allOf": [
|
||||
{
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"logicalTypeOptions": {
|
||||
"properties": {
|
||||
"minLength": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Minimum length of the string."
|
||||
},
|
||||
"maxLength": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Maximum length of the string."
|
||||
},
|
||||
"pattern": {
|
||||
"type": "string",
|
||||
"format": "regex",
|
||||
"description": "Regular expression pattern to define valid value. Follows regular expression syntax from ECMA-262 (https://262.ecma-international.org/5.1/#sec-15.10.1)."
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"examples": ["password", "byte", "binary", "email", "uuid", "uri", "hostname", "ipv4", "ipv6"],
|
||||
"description": "Provides extra context about what format the string follows."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}, {
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "date"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"logicalTypeOptions": {
|
||||
"properties": {
|
||||
"format": {
|
||||
"type": "string",
|
||||
"examples": ["yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "HH:mm:ss"],
|
||||
"description": "Format of the date. Follows the format as prescribed by [JDK DateTimeFormatter](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html). For example, format 'yyyy-MM-dd'."
|
||||
},
|
||||
"exclusiveMaximum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
|
||||
},
|
||||
"maximum": {
|
||||
"type": "string",
|
||||
"description": "All date values are less than or equal to this value (values <= maximum)."
|
||||
},
|
||||
"exclusiveMinimum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
|
||||
},
|
||||
"minimum": {
|
||||
"type": "string",
|
||||
"description": "All date values are greater than or equal to this value (values >= minimum)."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"anyOf": [
|
||||
{
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "integer"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"logicalTypeOptions": {
|
||||
"properties": {
|
||||
"multipleOf": {
|
||||
"type": "number",
|
||||
"exclusiveMinimum": 0,
|
||||
"description": "Values must be multiples of this number. For example, multiple of 5 has valid values 0, 5, 10, -5."
|
||||
},
|
||||
"maximum": {
|
||||
"type": "number",
|
||||
"description": "All values are less than or equal to this value (values <= maximum)."
|
||||
},
|
||||
"exclusiveMaximum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly less than the maximum value (values < maximum). Otherwise, less than or equal to the maximum value (values <= maximum)."
|
||||
},
|
||||
"minimum": {
|
||||
"type": "number",
|
||||
"description": "All values are greater than or equal to this value (values >= minimum)."
|
||||
},
|
||||
"exclusiveMinimum": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all values are strictly greater than the minimum value (values > minimum). Otherwise, greater than or equal to the minimum value (values >= minimum)."
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"default": "i32",
|
||||
"description": "Format of the value in terms of how many bits of space it can use and whether it is signed or unsigned (follows the Rust integer types).",
|
||||
"enum": ["i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", "u64", "u128"]
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"logicalTypeOptions": {
|
||||
"properties": {
|
||||
"maxProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Maximum number of properties."
|
||||
},
|
||||
"minProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Minimum number of properties."
|
||||
},
|
||||
"required": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"minItems": 1,
|
||||
"uniqueItems": true,
|
||||
"description": "Property names that are required to exist in the object."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"if": {
|
||||
"properties": {
|
||||
"logicalType": {
|
||||
"const": "array"
|
||||
}
|
||||
}
|
||||
},
|
||||
"then": {
|
||||
"properties": {
|
||||
"logicalTypeOptions": {
|
||||
"properties": {
|
||||
"maxItems": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Maximum number of items."
|
||||
},
|
||||
"minItems": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"default": 0,
|
||||
"description": "Minimum number of items"
|
||||
},
|
||||
"uniqueItems": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "If set to true, all items in the array are unique."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"required": ["column", "logicalType", "physicalType"]
|
||||
},
|
||||
"DataQuality": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string",
|
||||
"description": "The Rosewall data quality code(s) indicating which quality checks need to be performed at the dataset, table or column level; The quality keyword may appear at any level; Some quality checks require parameters such so the check can be completed (eg, list of fields used to identify a distinct row) therefore some quality checks may be followed by a single value or an array; See appendix for link to quality checks."
|
||||
},
|
||||
"templateName": {
|
||||
"type": "string",
|
||||
"description": "The template name which indicates what is the equivalent template from the tool."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Describe the quality check to be completed."
|
||||
},
|
||||
"toolName": {
|
||||
"type": "string",
|
||||
"description": "Name of the tool used to complete the quality check; Most will be Elevate initially."
|
||||
},
|
||||
"toolRuleName": {
|
||||
"type": "string",
|
||||
"description": "Name of the quality tool's rule created to complete the quality check."
|
||||
},
|
||||
"dimension": {
|
||||
"type": "string",
|
||||
"description": "The key performance indicator (KPI) or dimension for data quality."
|
||||
},
|
||||
"columns": {
|
||||
"type": "string",
|
||||
"description": "List of columns to be used in the quality check."
|
||||
},
|
||||
"column": {
|
||||
"type": "string",
|
||||
"description": "To be used in lieu of quality.columns when only a single column is required for the quality check."
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The type of quality check."
|
||||
},
|
||||
"severity": {
|
||||
"type": "string",
|
||||
"description": "The severance of the quality rule."
|
||||
},
|
||||
"businessImpact": {
|
||||
"type": "string",
|
||||
"description": "Consequences of the rule failure."
|
||||
},
|
||||
"scheduleCronExpression": {
|
||||
"type": "string",
|
||||
"description": "Rule execution schedule details."
|
||||
},
|
||||
"customProperties": {
|
||||
"type": "array",
|
||||
"description": "Additional properties required for rule execution.",
|
||||
"items": {
|
||||
"$ref": "#/$defs/CustomProperty"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["templateName", "toolName"]
|
||||
},
|
||||
"AuthoritativeDefinitions": {
|
||||
"type": "array",
|
||||
"description": "List of links to sources that provide more details on the table; examples would be a link to an external definition, a training video, a GitHub repo, Collibra, or another tool. Authoritative definitions follow the same structure in the standard.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string",
|
||||
"description": "URL to the authority."
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type of definition for authority: v2.3 adds standard values: `businessDefinition`, `transformationImplementation`, `videoTutorial`, `tutorial`, and `implementation`.",
|
||||
"examples": ["businessDefinition", "transformationImplementation", "videoTutorial", "tutorial", "implementation"]
|
||||
}
|
||||
},
|
||||
"required": ["url", "type"]
|
||||
}
|
||||
},
|
||||
"Pricing": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"priceAmount": {
|
||||
"type": "number",
|
||||
"description": "Subscription price per unit of measure in `priceUnit`."
|
||||
},
|
||||
"priceCurrency": {
|
||||
"type": "string",
|
||||
"description": "Currency of the subscription price in `price.priceAmount`."
|
||||
},
|
||||
"priceUnit": {
|
||||
"type": "string",
|
||||
"description": "The unit of measure for calculating cost. Examples megabyte, gigabyte."
|
||||
}
|
||||
}
|
||||
},
|
||||
"Stakeholder": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"username": {
|
||||
"type": "string",
|
||||
"description": "The stakeholder's username or email."
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "The stakeholder's job role; Examples might be owner, data steward. There is no limit on the role."
|
||||
},
|
||||
"dateIn": {
|
||||
"type": "string",
|
||||
"description": "The date when the user became a stakeholder."
|
||||
},
|
||||
"dateOut": {
|
||||
"type": "string",
|
||||
"description": "The date when the user ceased to be a stakeholder"
|
||||
},
|
||||
"replacedByUsername": {
|
||||
"type": "string",
|
||||
"description": "The username of the user who replaced the stakeholder"
|
||||
}
|
||||
}
|
||||
},
|
||||
"Role": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "Name of the IAM role that provides access to the dataset; the value will generally come directly from the \"BQ dataset to IAM roles mapping\" document."
|
||||
},
|
||||
"access": {
|
||||
"type": "string",
|
||||
"description": "The type of access provided by the IAM role; the value will generally come directly from the \"BQ dataset to IAM roles mapping\" document."
|
||||
},
|
||||
"firstLevelApprovers": {
|
||||
"type": "string",
|
||||
"description": "The name(s) of the first level approver(s) of the role; the value will generally come directly from the \"BQ dataset to IAM roles mapping\" document."
|
||||
},
|
||||
"secondLevelApprovers": {
|
||||
"type": "string",
|
||||
"description": "The name(s) of the second level approver(s) of the role; the value will generally come directly from the \"BQ dataset to IAM roles mapping\" document."
|
||||
}
|
||||
},
|
||||
"required": ["role", "access"]
|
||||
},
|
||||
"ServiceLevelAgreementProperty": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"property": {
|
||||
"type": "string",
|
||||
"description": "Specific property in SLA, check the periodic table. May requires units (more details to come)."
|
||||
},
|
||||
"value": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
}
|
||||
],
|
||||
"description": "Agreement value. The label will change based on the property itself."
|
||||
},
|
||||
"valueExt": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
}
|
||||
],
|
||||
"description": "Extended agreement value. The label will change based on the property itself."
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"description": "**d**, day, days for days; **y**, yr, years for years, etc. Units use the ISO standard."
|
||||
},
|
||||
"column": {
|
||||
"type": "string",
|
||||
"description": "Column(s) to check on. Multiple columns should be extremely rare and, if so, separated by commas."
|
||||
},
|
||||
"driver": {
|
||||
"type": "string",
|
||||
"description": "Describes the importance of the SLA from the list of: `regulatory`, `analytics`, or `operational`.",
|
||||
"examples": ["regulatory", "analytics", "operational"]
|
||||
}
|
||||
},
|
||||
"required": ["property", "value"]
|
||||
},
|
||||
"CustomProperty": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"property": {
|
||||
"type": "string",
|
||||
"description": "The name of the key. Names should be in camel case–the same as if they were permanent properties in the contract."
|
||||
},
|
||||
"value": {
|
||||
"type": "object",
|
||||
"description": "The value of the key."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user