WOVIBHHYIGB6XCWIYLKGF5R4GX7UJJUTSDJVPWDR3ZHCWZUZSRDAC use kdl_schema::SCHEMA_SCHEMA;use kdl_schema_check::CheckExt;use miette::Result;#[test]fn schema_valid() -> Result<()> {SCHEMA_SCHEMA.check_file_matches("tests/kdl-schema.kdl")?;Ok(())}
document {info {title "KDL Schema" lang="en"description "KDL Schema KDL schema in KDL" lang="en"author "Kat Marchán" {link "https://github.com/zkat" rel="self"}contributor "Lars Willighagen" {link "https://github.com/larsgw" rel="self"}link "https://github.com/zkat/kdl" rel="documentation"license "Creative Commons Attribution-ShareAlike 4.0 International License" spdx="CC-BY-SA-4.0" {link "https://creativecommons.org/licenses/by-sa/4.0/" lang="en"}published "2021-08-31"modified "2021-09-01"}node "document" {min 1max 1children id="node-children" {node "node-names" id="node-names-node" description="Validations to apply specifically to arbitrary node names" {children ref=r#"[id="validations"]"#}node "other-nodes-allowed" id="other-nodes-allowed-node" description="Whether to allow child nodes other than the ones explicitly listed. Defaults to 'false'." {max 1value {min 1max 1type "boolean"}}node "tag-names" description="Validations to apply specifically to arbitrary type tag names" {children ref=r#"[id="validations"]"#}node "other-tags-allowed" description="Whether to allow child node tags other than the ones explicitly listed. Defaults to 'false'." {max 1value {min 1max 1type "boolean"}}node "info" description="A child node that describes the schema itself." {children {node "title" description="The title of the schema or the format it describes" {value description="The title text" {type "string"min 1max 1}prop "lang" id="info-lang" description="The language of the text" {type "string"}}node "description" description="A description of the schema or the format it describes" {value description="The description text" {type "string"min 1max 1}prop ref=r#"[id="info-lang"]"#}node "author" description="Author of the schema" {value id="info-person-name" description="Person name" {type "string"min 1max 1}prop "orcid" id="info-orcid" description="The ORCID of the person" {type "string"pattern r"\d{4}-\d{4}-\d{4}-\d{4}"}children {node ref=r#"[id="info-link"]"#}}node "contributor" description="Contributor to the schema" {value ref=r#"[id="info-person-name"]"#prop ref=r#"[id="info-orcid"]"#children {node ref=r#"[id="info-link"]"#}}node "link" id="info-link" description="Links to itself, and to sources describing it" {value description="A URL that the link points to" {type "string"format "url" "irl"min 1max 1}prop "rel" description="The relation between the current entity and the URL" {type "string"enum "self" "documentation"}prop ref=r#"[id="info-lang"]"#}node "license" description="The license(s) that the schema is licensed under" {value description="Name of the used license" {type "string"min 1max 1}prop "spdx" description="An SPDX license identifier" {type "string"}children {node ref=r#"[id="info-link"]"#}}node "published" description="When the schema was published" {value description="Publication date" {type "string"format "date"min 1max 1}prop "time" id="info-time" description="A time to accompany the date" {type "string"format "time"}}node "modified" description="When the schema was last modified" {value description="Modification date" {type "string"format "date"min 1max 1}prop ref=r#"[id="info-time"]"#}node "version" description="The version number of this version of the schema" {value description="Semver version number" {type "string"pattern r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"min 1max 1}}}}node "tag" id="tag-node" description="A tag belonging to a child node of `document` or another node." {value description="The name of the tag. If a tag name is not supplied, the node rules apply to _all_ nodes belonging to the parent." {type "string"max 1}prop "description" description="A description of this node's purpose." {type "string"}prop "id" description="A globally-unique ID for this node." {type "string"}prop "ref" description="A globally unique reference to another node." {type "string"format "kdl-query"}children {node ref=r#"[id="node-names-node"]"#node ref=r#"[id="other-nodes-allowed-node"]"#node ref=r#"[id="node-node"]"#}}node "node" id="node-node" description="A child node belonging either to `document` or to another `node`. Nodes may be anonymous." {value description="The name of the node. If a node name is not supplied, the node rules apply to _all_ nodes belonging to the parent." {type "string"max 1}prop "description" description="A description of this node's purpose." {type "string"}prop "id" description="A globally-unique ID for this node." {type "string"}prop "ref" description="A globally unique reference to another node." {type "string"format "kdl-query"}children {node "prop-names" description="Validations to apply specifically to arbitrary property names" {children ref=r#"[id="validations"]"#}node "other-props-allowed" description="Whether to allow properties other than the ones explicitly listed. Defaults to 'false'." {max 1value {min 1max 1type "boolean"}}node "min" description="minimum number of instances of this node in its parent's children." {max 1value {min 1max 1type "number"}}node "max" description="maximum number of instances of this node in its parent's children." {max 1value {min 1max 1type "number"}}node ref=r#"[id="value-tag-node"]"#node "prop" id="prop-node" description="A node property key/value pair." {value description="The property key." {type "string"}prop "id" description="A globally-unique ID of this property." {type "string"}prop "ref" description="A globally unique reference to another property node." {type "string"format "kdl-query"}prop "description" description="A description of this property's purpose." {type "string"}children description="Property-specific validations." {node "required" description="Whether this property is required if its parent is present." {max 1value {min 1max 1type "boolean"}}}children id="validations" description="General value validations." {node "tag" id="value-tag-node" description="The tags associated with this value" {max 1children ref=r#"[id="validations"]"#}node "type" description="The type for this prop's value." {max 1value {min 1type "string"}}node "enum" description="An enumeration of possible values" {max 1value description="Enumeration choices" {min 1}}node "pattern" description="PCRE (Regex) pattern or patterns to test prop values against." {value {min 1type "string"}}node "min-length" description="Minimum length of prop value, if it's a string." {max 1value {min 1type "number"}}node "max-length" description="Maximum length of prop value, if it's a string." {max 1value {min 1type "number"}}node "format" description="Intended data format." {max 1value {min 1type "string"// https://json-schema.org/understanding-json-schema/reference/string.html#formatenum "date-time" "date" "time" "duration" "decimal" "currency" "country-2" "country-3" "country-subdivision" "email" "idn-email" "hostname" "idn-hostname" "ipv4" "ipv6" "url" "url-reference" "irl" "irl-reference" "url-template" "regex" "uuid" "kdl-query" "i8" "i16" "i32" "i64" "u8" "u16" "u32" "u64" "isize" "usize" "f32" "f64" "decimal64" "decimal128"}}node "%" description="Only used for numeric values. Constrains them to be multiples of the given number(s)" {max 1value {min 1type "number"}}node ">" description="Only used for numeric values. Constrains them to be greater than the given number(s)" {max 1value {min 1max 1type "number"}}node ">=" description="Only used for numeric values. Constrains them to be greater than or equal to the given number(s)" {max 1value {min 1max 1type "number"}}node "<" description="Only used for numeric values. Constrains them to be less than the given number(s)" {max 1value {min 1max 1type "number"}}node "<=" description="Only used for numeric values. Constrains them to be less than or equal to the given number(s)" {max 1value {min 1max 1type "number"}}}}node "value" id="value-node" description="one or more direct node values" {prop "id" description="A globally-unique ID of this value." {type "string"}prop "ref" description="A globally unique reference to another value node." {type "string"format "kdl-query"}prop "description" description="A description of this property's purpose." {type "string"}children ref=r#"[id="validations"]"#children description="Node value-specific validations" {node "min" description="minimum number of values for this node." {max 1value {min 1max 1type "number"}}node "max" description="maximum number of values for this node." {max 1value {min 1max 1type "number"}}}}node "children" id="children-node" {prop "id" description="A globally-unique ID of this children node." {type "string"}prop "ref" description="A globally unique reference to another children node." {type "string"format "kdl-query"}prop "description" description="A description of this these children's purpose." {type "string"}children ref=r#"[id="node-children"]"#}}}node "definitions" description="Definitions to reference in parts of the top-level nodes" {children {node ref=r#"[id="node-node"]"#node ref=r#"[id="value-node"]"#node ref=r#"[id="prop-node"]"#node ref=r#"[id="children-node"]"#node ref=r#"[id="tag-node"]"#}}}}}
ParseError(#[from] knuffel::Error<LineSpan>),}pub trait ParseToDocumentAst {fn parse(self) -> Result<DocumentAst, IoOrParseError>;}impl ParseToDocumentAst for DocumentAst {fn parse(self) -> Result<DocumentAst, IoOrParseError> {Ok(self)}}impl ParseToDocumentAst for &str {fn parse(self) -> Result<DocumentAst, IoOrParseError> {let result = knuffel::parse_ast("<text input>", self)?;Ok(result)}
#[diagnostic(transparent)]ParseError(#[from] knuffel::Error<Span>),#[error(transparent)]#[diagnostic(transparent)]ValidationFailure(#[from] check::CheckFailure),#[error("{failure}")]#[diagnostic(forward(failure))]SourcedValidationFailure {#[source_code]source: NamedSource,#[source]failure: check::CheckFailure,},
impl ParseToDocumentAst for &Path {fn parse(self) -> Result<DocumentAst, IoOrParseError> {let file_name = self.display().to_string();let file_contents = std::fs::read_to_string(self)?;let result = knuffel::parse_ast(&file_name, &file_contents)?;Ok(result)
impl CheckFailure {fn with_named_source(self, name: impl AsRef<str>, source: String) -> Self {match self {Self::ValidationFailure(failure) => Self::SourcedValidationFailure {source: NamedSource::new(name, source),failure,},_ => self,}
fn check_matches<D: ParseToDocumentAst>(&self, document: D) -> Result<(), CheckFailure>;
fn check_file_matches(&self, file_path: impl AsRef<Path>) -> Result<(), CheckFailure>;fn check_text_matches(&self,document_name: &str,document_text: &str,) -> Result<(), CheckFailure>;fn check_ast_matches(&self, document_ast: DocumentAst) -> Result<(), CheckFailure>;
fn check_matches<D: ParseToDocumentAst>(&self, document: D) -> Result<(), CheckFailure> {let document = document.parse()?;let _ = check::check(document, self)?;
fn check_file_matches(&self, file_path: impl AsRef<Path>) -> Result<(), CheckFailure> {let file_path = file_path.as_ref();let file_name = file_path.display().to_string();let file_text = std::fs::read_to_string(file_path)?;let ast = knuffel::parse_ast(&file_name, &file_text)?;self.check_ast_matches(ast).map_err(|err| err.with_named_source(file_name, file_text))}fn check_text_matches(&self,document_name: &str,document_text: &str,) -> Result<(), CheckFailure> {let ast = knuffel::parse_ast(document_name, document_text)?;self.check_ast_matches(ast).map_err(|err| err.with_named_source(document_name, document_text.to_string()))}fn check_ast_matches(&self, document_ast: DocumentAst) -> Result<(), CheckFailure> {check::check(document_ast, self)?;
use kdl_schema::Schema;
use std::borrow::{Borrow, Cow};use std::collections::HashSet;use std::str::FromStr;use kdl_schema::{Children as ChildrenSchema, Format, Node as NodeSchema, Prop as PropSchema, Schema, Validation,Value as ValueSchema,};use knuffel::ast::Literal;use knuffel::span::Span;
pub enum CheckFailure {}
pub enum CheckFailure {#[error("schema error: node ref `{0}` could not be resolved")]MissingNodeRef(String),#[error("schema error: prop ref `{0}` could not be resolved")]MissingPropRef(String),#[error("schema error: value ref `{0}` could not be resolved")]MissingValueRef(String),#[error("schema error: children ref `{0}` could not be resolved")]MissingChildrenRef(String),#[error("wrong number of {expected_type}: expected {expected_number_range} but got {actual_number}")]MinMaxViolation {expected_type: String,expected_number_range: String,actual_number: usize,},#[error("unexpected node {name}")]#[diagnostic()]UnexpectedNode {#[label]span: Span,name: String,},#[error("unexpected prop {key}")]#[diagnostic()]UnexpectedProp {#[label]span: Span,key: String,},#[error("prop {key} missing")]MissingProp { key: String },#[error("value {actual} not in enum list {expected:?}")]EnumViolation {actual: String,expected: Vec<String>,},#[error("value {value:?} does not have type {expected_type}")]#[diagnostic()]IncorrectType {#[label]span: Span,value: Value, // TODO display correctlyexpected_type: String,},#[error("value {value} not of any formats {formats:?}")]#[diagnostic()]IncorrectStringFormat {#[label]span: Span,value: String,formats: Vec<Format>,// TODO pass details upwards somehow},}pub type Result<T = ()> = std::result::Result<T, CheckFailure>;type Name = knuffel::ast::SpannedName<Span>;type Node = knuffel::ast::SpannedNode<Span>;type PropsMap = std::collections::BTreeMap<Name, Value>;type Value = knuffel::ast::Value<Span>;fn check_min_max(actual: usize,min: Option<usize>,max: Option<usize>,get_expected_type: impl Fn() -> String,) -> Result {let below_min = min.map_or(false, |min| actual < min);let above_max = max.map_or(false, |max| actual > max);if below_min || above_max {let what_was_expected = get_expected_type();let how_many_were_expected = match (min, max) {(Some(min), Some(max)) => {if min == max {format!("exactly {}", min)} else {format!("between {} and {}", min, max)}}(Some(min), None) => format!("at least {}", min),(None, Some(max)) => format!("no more than {}", max),(None, None) => unreachable!(),};Err(CheckFailure::MinMaxViolation {expected_type: what_was_expected,expected_number_range: how_many_were_expected,actual_number: actual,})} else {Ok(())}}fn resolve_node_refs<'a>(schema: &'a Schema,nodes: &'a [NodeSchema],) -> Result<Vec<&'a NodeSchema>> {nodes.iter().map(|node| match &node.ref_ {None => Ok(node),Some(r#ref) => schema.resolve_node_ref(r#ref).ok_or_else(|| CheckFailure::MissingNodeRef(r#ref.clone())),}).collect()}fn resolve_prop_refs<'a>(schema: &'a Schema,props: &'a [PropSchema],) -> Result<Vec<&'a PropSchema>> {props.iter().map(|prop| match &prop.ref_ {None => Ok(prop),Some(r#ref) => schema.resolve_prop_ref(r#ref).ok_or_else(|| CheckFailure::MissingPropRef(r#ref.clone())),}).collect()}fn resolve_value_refs<'a>(schema: &'a Schema,values: &'a [ValueSchema],) -> Result<Vec<&'a ValueSchema>> {values.iter().map(|value| match &value.ref_ {None => Ok(value),Some(r#ref) => schema.resolve_value_ref(r#ref).ok_or_else(|| CheckFailure::MissingValueRef(r#ref.clone())),}).collect()}fn resolve_children_refs<'a>(schema: &'a Schema,children: &'a [ChildrenSchema],) -> Result<Vec<&'a ChildrenSchema>> {children.iter().map(|children| match &children.ref_ {None => Ok(children),Some(r#ref) => schema.resolve_children_ref(r#ref).ok_or_else(|| CheckFailure::MissingChildrenRef(r#ref.clone())),}).collect()}pub(crate) fn check(document: DocumentAst, schema: &Schema) -> Result {check_nodes(&document.nodes,schema,&resolve_node_refs(schema, &schema.document.nodes)?,)}
pub(crate) fn check(document: DocumentAst, schema: &Schema) -> Result<(), CheckFailure> {todo!()
fn check_nodes(nodes: &[Node], schema: &Schema, nodes_schema: &[&NodeSchema]) -> Result {let nodes: Vec<(usize, &Node)> = nodes.iter().enumerate().collect();// TODO check node-names// TODO check other-nodes-allowed// TODO check tags, tag-names, other-tags-allowedlet mut nodes_pending_validation: HashSet<usize> = nodes.iter().map(|(i, _)| *i).collect();for node_schema in nodes_schema {let applicable_nodes: Vec<(usize, &Node)> = nodes.iter().filter(|(_, node)| match &node_schema.name {Some(schema_name) => schema_name.as_str() == node.node_name.as_ref(),None => true,}).copied().collect();check_min_max(applicable_nodes.len(),node_schema.min,node_schema.max,|| match &node_schema.name {Some(schema_name) => format!("`{}` nodes", schema_name),None => format!("nodes"),},)?;for (index, node) in applicable_nodes {check_node(node, schema, node_schema)?;nodes_pending_validation.remove(&index);}}let invalid_node = nodes.into_iter().find(|(i, _)| nodes_pending_validation.contains(i));match invalid_node {Some((_, node)) => Err(CheckFailure::UnexpectedNode {span: node.span().clone(),name: node.node_name.to_string(),}),None => Ok(()),}}fn check_node(node: &Node, schema: &Schema, node_schema: &NodeSchema) -> Result {let node_name = &node.node_name;// schema is relevant if either its name matches this node's name or it has no namelet schema_applies = match &node_schema.name {Some(schema_name) => schema_name == node_name.as_ref(),None => true,};if !schema_applies {return Ok(());}// TODO check prop-names// TODO check other-props-allowed// TODO check tagcheck_props(&node.properties,// TODO only do this once per node_schema&resolve_prop_refs(schema, &node_schema.props)?,)?;check_values(&node.arguments,// TODO only do this once per node_schema&resolve_value_refs(schema, &node_schema.values)?,)?;if let Some(children) = &node.children {check_children(children,schema,// TODO only do this once per node_schema&resolve_children_refs(schema, &node_schema.children)?,)?;}Ok(())}fn check_props(props: &PropsMap, props_schema: &[&PropSchema]) -> Result {let props: Vec<(usize, (&Name, &Value))> = props.iter().enumerate().collect();let mut props_pending_validation: HashSet<usize> = props.iter().map(|(i, _)| *i).collect();for prop_schema in props_schema {let applicable_props: Vec<(usize, (&Name, &Value))> = props.iter().filter(|(_, (key, _))| match &prop_schema.key {Some(schema_key) => schema_key.as_str() == key.as_ref(),None => true,}).copied().collect();if prop_schema.required && applicable_props.is_empty() {// TODO preserve node span// TODO reasonably handle non-keyed requiredreturn Err(CheckFailure::MissingProp {key: prop_schema.key.clone().unwrap(),});}for (index, (_, value)) in applicable_props {check_prop(value, prop_schema)?;props_pending_validation.remove(&index);}}let invalid_prop = props.into_iter().find(|(i, _)| props_pending_validation.contains(i));match invalid_prop {Some((_, (name, _))) => Err(CheckFailure::UnexpectedProp {span: name.span().clone(),key: name.to_string(),}),None => Ok(()),}
fn check_prop(prop_value: &Value, prop_schema: &PropSchema) -> Result {for validation in &prop_schema.validations {check_validation(prop_value, validation)?;}Ok(())}fn check_values(values: &[Value], values_schemas: &[&ValueSchema]) -> Result {// Cases that are currently handled are// - one schema that applies to all values// - no schemas, no values// TODO allow several required (min 1; max 1;) values// TODO allow several optional (max 1;) values// TODO error properly when not thatif values.len() == 0 && values_schemas.len() == 0 {return Ok(());}let schemas_coherent = values_schemas.len() == 1;assert!(schemas_coherent, "values schemas confusing");let values_schema = values_schemas[0];check_min_max(values.len(), values_schema.min, values_schema.max, || {format!("values")})?;for value in values {check_value(value, values_schema)?;}Ok(())}fn check_value(value: &Value, values_schema: &ValueSchema) -> Result {for validation in &values_schema.validations {check_validation(value, validation)?;}Ok(())}fn check_children(children: &[Node],schema: &Schema,children_schema: &[&ChildrenSchema],) -> Result {let node_schemas: Vec<&NodeSchema> = children_schema.iter().map(|children| resolve_node_refs(schema, &children.nodes))// TODO don't allocate twice here.collect::<Result<Vec<Vec<&NodeSchema>>>>()?.into_iter().flatten().collect();check_nodes(children, schema, &node_schemas)}fn check_validation(value: &Value, validation: &Validation) -> Result {match validation {// TODO tagValidation::Type(r#type) if r#type == "string" => {let _ = get_string_value(value)?;}Validation::Type(r#type) if r#type == "number" => match value.literal.borrow() {Literal::Int(_) | Literal::Decimal(_) => {}_ => {return Err(CheckFailure::IncorrectType {span: value.literal.span().clone(),value: value.clone(),expected_type: r#type.clone(),});}},Validation::Type(r#type) => todo!("validate type {}", r#type),Validation::Enum(enum_values) => {let search_target = match value.literal.borrow() {Literal::Null => Cow::Borrowed("enum"),Literal::Bool(x) => {if *x {Cow::Borrowed("true")} else {Cow::Borrowed("false")}}Literal::Int(x) => match i64::try_from(x) {Ok(x) => Cow::Owned(x.to_string()),_ => todo!("get value from Integer"),},Literal::String(x) => Cow::Borrowed(x.as_ref()),Literal::Decimal(_) => todo!("get value from Decimal"),};if !enum_values.iter().any(|enum_value| enum_value == search_target.as_ref()){return Err(CheckFailure::EnumViolation {actual: search_target.into_owned(),expected: enum_values.clone(),});}}Validation::Format(valid_formats) => {for format in valid_formats {match format {Format::Date => {if let Ok(value) = get_string_value(value) {if chrono::NaiveDate::from_str(value).is_ok() {return Ok(());}}}Format::Url => {if let Ok(value) = get_string_value(value) {if Url::parse(value).is_ok() {return Ok(());}}}Format::Regex => {if let Ok(value) = get_string_value(value) {if Regex::new(value).is_ok() {return Ok(());}}}Format::KdlQuery => {if let Ok(_value) = get_string_value(value) {// TODO validate KDL queriesreturn Ok(());}}format => todo!("validate format {:?}", format),}}return match value.literal.borrow() {Literal::String(string_value) => Err(CheckFailure::IncorrectStringFormat {span: value.literal.span().clone(),value: string_value.to_string(),formats: valid_formats.clone(),}),value => todo!("no valid format for {:?}", value),};}Validation::Pattern(regex) => {// TODO should that be a full-string match or just a partial-string matchlet regex = Regex::new(&format!("^{}$", regex)).expect("invalid regex in schema");let value = if let Literal::String(value) = value.literal.borrow() {value} else {todo!("error for can't regex a non-string")};if !regex.is_match(value) {todo!("error for regex failure")}}}Ok(())}fn get_string_value(value: &Value) -> Result<&Box<str>> {match value.literal.borrow() {Literal::String(x) => Ok(x),_ => Err(CheckFailure::IncorrectType {span: value.literal.span().clone(),value: value.clone(),expected_type: "string".to_string(),}),}}
"autocfg",][[package]]name = "num-integer"version = "0.1.44"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"dependencies = ["autocfg","num-traits",][[package]]name = "num-traits"version = "0.2.14"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"dependencies = [
][[package]]name = "time"version = "0.1.44"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"dependencies = ["libc","wasi","winapi",][[package]]name = "tinyvec"version = "1.5.1"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "2c1c1d5a42b6245520c249549ec267180beaffcc0615401ac8e31853d4b6d8d2"dependencies = ["tinyvec_macros",