V4EAN7NNXKHQFA3HDHRREMWNWRB3XSHIOTE2G4VLHTRMTFD7NP4QC
[package]
name = "zhedds"
description = "zhed data structures"
version = "0.0.0"
edition = "2018"
license = "Apache-2.0"
publish = false
[dependencies]
[dependencies.chrono]
version = "0.4"
default-features = false
features = [ "serde" ]
[dependencies.serde]
version = "1.0"
features = [ "derive" ]
[dependencies.uuid]
version = "0.8"
features = [ "serde", "v5" ]
use serde::{Deserialize, Serialize};
pub use uuid::Uuid;
/// UUID namespace for ZHED
pub const NAMESPACE_ZHED: Uuid = Uuid::from_bytes([
0x23, 0x04, 0xc3, 0x98, 0x30, 0x27, 0x44, 0x94, 0xbd, 0x67, 0xf8, 0x43, 0xd5, 0x63, 0x06, 0x46,
]);
/// used solely for references, resolved via workspace
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Deserialize, Serialize)]
pub struct Ident {
pub id: Uuid,
/// A counter, starting at 0, which should be increased every time the document
/// structure changes drastically, in order to warn users when using it with
/// documents which hadn't seen that epoch of the document yet, as it might break links.
pub epoch: u32,
}
pub type RefDate = chrono::Date<chrono::Utc>;
/** header stuff
we omit any document type tags of older formats, because we don't want to
differentiate that much here, because it would make parsing cumbersome.
we rather differentiate directly via indexes. Historically, type tags were
vastly different per format nonetheless, making them slightly incompatible.
**/
#[derive(Clone, Deserialize, Serialize)]
pub struct Header {
#[serde(flatten)]
ident: Ident,
}
use std::{collections::BTreeMap, fmt};
use crate::{PackList, Section, ItemData};
#[derive(thiserror::Error, Debug, PartialEq, Eq)]
pub enum Error {
#[error("unexpected data after range specification in header")]
DataAfterRangeSpec,
#[error("section is defined multiple times")]
SectionOverwritten(String),
#[error("invalid item: {0}")]
InvalidItem(String),
}
impl std::str::FromStr for PackList {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Error> {
let mut it = s.lines();
// 1st line: packlist name and range
let mut fst = it.next().unwrap().trim();
if let Some(x) = fst.strip_suffix(':') {
fst = x;
}
let (name, range) = {
let (rstart, rend) = (fst.find('('), fst.rfind(')'));
if rstart.is_some() && rend.is_some() && rstart.unwrap() < rend.unwrap() {
let (rstart, rend) = (rstart.unwrap(), rend.unwrap());
if rend != (fst.len() - 1) {
return Err(Error::DataAfterRangeSpec);
}
let mut name = fst[..rstart].trim();
if let Some(x) = name.strip_suffix(':') {
name = x.trim();
}
(name, &fst[rstart + 1..rend])
} else {
(fst, "")
}
};
// to-pack items
let mut sections = BTreeMap::new();
let mut section = (String::new(), Section::default());
macro_rules! commit { () => {{
if sections.contains_key(§ion.0) {
return Err(Error::SectionOverwritten(section.0));
}
sections.insert(std::mem::take(&mut section.0), std::mem::take(&mut section.1));
}} }
for line in it {
if line.trim().is_empty() {
continue;
}
let (itemdata, desc) = parse_item(line)?;
if let Some(x) = desc.strip_prefix('#') {
commit!();
section.0 = x.trim().to_string();
section.1.summary = itemdata;
} else {
section.1.items.push((itemdata, desc));
}
}
if !section.0.is_empty() || !section.1.items.is_empty() {
commit!();
}
Ok(PackList {
name: name.to_string(),
range: range.to_string(),
sections,
})
}
}
fn parse_item(line: &str) -> Result<(ItemData, String), Error> {
if line.len() < 5 {
return Err(Error::InvalidItem(line.to_string()));
}
let mut it = line.chars();
let c1 = it.next().unwrap();
let premarker = it.next().unwrap();
let _ = it.next().unwrap();
let c2 = it.next().unwrap();
if c1 != '[' || c2 != ']' {
return Err(Error::InvalidItem(line.to_string()));
}
let mut it = it.peekable();
while it.next_if(|i| i.is_whitespace()).is_some() {}
// check for multiplier
// btp : backtrack point if no valid multiplier is found
let btp = it.clone();
let mut multiplier = None;
while let Some(x) = it.next_if(|i| i.is_ascii_digit()) {
let m = multiplier.get_or_insert(0);
*m *= 10;
*m += u32::from((x as u8) - b'0');
}
if it.next_if(|&i| i == 'x').is_none() {
multiplier = None;
}
while it.next_if(|i| i.is_whitespace()).is_some() {}
if multiplier.is_none() {
it = btp;
}
Ok((ItemData {
premarker,
multiplier: multiplier.unwrap_or(1),
}, it.collect()))
}
impl fmt::Display for PackList {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.name)?;
if !self.range.is_empty() {
write!(f, " ({})", self.range)?;
}
writeln!(f)?;
for (section, sdata) in &self.sections {
if !section.is_empty() {
writeln!(f)?;
write!(f, "[{} ] ", sdata.summary.premarker)?;
if sdata.summary.multiplier != 1 {
write!(f, "{}x ", sdata.summary.multiplier)?;
}
writeln!(f, "# {}", section)?;
}
for item in &sdata.items {
write!(f, "[{} ] ", item.0.premarker)?;
if item.0.multiplier != 1 {
write!(f, "{}x ", item.0.multiplier)?;
}
writeln!(f, "{}", item.1)?;
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ex0() {
let inp1 = r#"Packliste Ölland (80 Tage, 50 Nächtsle):
[ ] 15 Ruß
[ ] 132x Wattestäbe
[# ] 0x Handtuch
[- ] 5000x # Wachsbaum ...
[ ] Ranke
[* ] 5 x Metrik + BNaumbnd
"#;
let inp2 = r#"Packliste Ölland (80 Tage, 50 Nächtsle)
[ ] 15 Ruß
[ ] 132x Wattestäbe
[# ] 0x Handtuch
[- ] 5000x # Wachsbaum ...
[ ] Ranke
[* ] 5 x Metrik + BNaumbnd
"#;
let mut sections = BTreeMap::new();
sections.insert(String::new(), Section {
summary: ItemData::default(),
items: vec![
(ItemData { premarker: ' ', multiplier: 1 }, "15 Ruß".to_string()),
(ItemData { premarker: ' ', multiplier: 132 }, "Wattestäbe".to_string()),
(ItemData { premarker: '#', multiplier: 0 }, "Handtuch".to_string()),
],
});
sections.insert("Wachsbaum ...".to_string(), Section {
summary: ItemData {
premarker: '-',
multiplier: 5000,
},
items: vec![
(ItemData { premarker: ' ', multiplier: 1 }, "Ranke".to_string()),
(ItemData { premarker: '*', multiplier: 1 }, "5 x Metrik + BNaumbnd".to_string()),
],
});
let res = PackList {
name: "Packliste Ölland".to_string(),
range: "80 Tage, 50 Nächtsle".to_string(),
sections,
};
assert_eq!(inp1.parse::<PackList>().unwrap(), res);
assert_eq!(inp2.parse::<PackList>().unwrap(), res);
assert_eq!(res.to_string(), inp2);
}
#[test]
fn fail0() {
assert_eq!("Packl (xyz) ...\n".parse::<PackList>(), Err(Error::DataAfterRangeSpec));
assert_eq!("P\n[ ] # 1\n[ ] # 1\n".parse::<PackList>(), Err(Error::SectionOverwritten("1".to_string())));
assert_eq!("P\n[ ]".parse::<PackList>(), Err(Error::InvalidItem("[ ]".to_string())));
}
}
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::Ident;
#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, Eq)]
pub enum Space {
ParaBreak,
}
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
pub enum ListType {
Unordered,
Ordered,
}
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
pub enum Value {
/// unbound variable, referenced by UUID to make renaming easier
UnboundVar(Uuid),
/// bound variable, referenced using [De Bruijn indices](https://en.wikipedia.org/wiki/De_Bruijn_index),
/// starting at 0
BoundVar(usize),
/// tagged elements, no order of evaluation is guaranteed, arguments are lazily evaluated
Tag(Uuid, Vec<Value>),
Space(Space),
List(ListType, Vec<Value>),
}
/** document data structure
## header stuff
we omit any document type tags of older formats, because we don't want to
differentiate that much here, because it would make parsing cumbersome.
we rather differentiate directly via indexes. Historically, type tags were
vastly different per format nonetheless, making them slightly incompatible.
**/
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Document {
#[serde(flatten)]
ident: Ident,
unbound_vars: HashMap<Uuid, String>,
toplevel: Vec<Value>,
}
pub struct Context<'a> {
unbound_vars: &'a HashMap<Uuid, String>,
bound_vars: Vec<String>,
}
impl Document {
pub fn ctx(&self) -> Context<'_> {
Context {
unbound_vars: &self.unbound_vars,
bound_vars: Vec::new(),
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("unbound variable not found: {0}")]
UnboundVarNotFound(Uuid),
#[error("bound variable not found")]
BoundVarNotFound,
}
impl<'a> Context<'a> {
pub fn lookup_unbound_var(&self, id: &Uuid) -> Result<&'a str, Error> {
self.unbound_vars.get(id).map(|i| i.as_str()).ok_or_else(|| Error::UnboundVarNotFound(id.clone()))
}
pub fn on_bound_var<F, R>(&mut self, name: String, f: F) -> R
where
F: FnOnce(&mut Self) -> R,
{
use std::panic::{catch_unwind, AssertUnwindSafe, resume_unwind};
#[cfg(debug_assertions)]
let name2 = name.clone();
self.bound_vars.push(name);
let tmp = catch_unwind(AssertUnwindSafe(|| f(self)));
#[allow(unused_variables)]
let tmp2 = self.bound_vars.pop();
match tmp {
Ok(x) => {
#[cfg(debug_assertions)]
if tmp2 != Some(name2) {
panic!("mismatching bound vars");
}
x
},
Err(y) => resume_unwind(y),
}
}
pub fn lookup_bound_var(&self, id: usize) -> Result<&str, Error> {
self.bound_vars.get(self.bound_vars.len() - 1 - id).map(|i| i.as_str()).ok_or_else(|| Error::BoundVarNotFound)
}
}
/// Formatter-oriented serialization, because we usually define that per formatter,
/// and the structures which are serialized stay mostly the same.
pub trait Serializer: Sized {
type Ok;
type Error: From<Error> + std::error::Error;
fn serialize_value(self, v: &Value, ctx: &mut Context<'_>) -> Result<Self::Ok, Self::Error>;
fn serialize_document(self, d: &Document) -> Result<Self::Ok, Self::Error>;
}
use uuid::Uuid;
/// main UUID namespace
pub const ZHED: Uuid = Uuid::from_bytes([
0x23, 0x04, 0xc3, 0x98, 0x30, 0x27, 0x44, 0x94, 0xbd, 0x67, 0xf8, 0x43, 0xd5, 0x63, 0x06, 0x46,
]);
pub const UNBOUND_VAR: Uuid = Uuid::from_bytes([
0xca, 0xe1, 0x13, 0xee, 0x08, 0xc2, 0x54, 0x3e, 0x8a, 0x5c, 0xe4, 0x3f, 0xc1, 0xde, 0x6b, 0xe7,
]);
pub const FUNCTION: Uuid = Uuid::from_bytes([
0xc4, 0x6a, 0x9c, 0x81, 0x6b, 0x75, 0x54, 0xf6, 0xb4, 0xa5, 0xb6, 0xf1, 0x48, 0x67, 0x60, 0x41,
]);
pub const MARKUP_TAG: Uuid = Uuid::from_bytes([
0x58, 0x50, 0x8b, 0x2e, 0x89, 0x8e, 0x5b, 0x19, 0x8b, 0xc1, 0xda, 0x76, 0x6b, 0x48, 0x99, 0x19,
]);
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
pub use uuid::Uuid;
pub mod uuid_ns;
pub mod valuemodel;
/// used solely for references, resolved via workspace
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Deserialize, Serialize)]
pub struct Ident {
pub id: Uuid,
/// A counter, starting at 0, which should be increased every time the document
/// structure changes drastically, in order to warn users when using it with
/// documents which hadn't seen that epoch of the document yet, as it might break links.
pub epoch: u32,
}
/// An export format, able to turn values of type `V` into strings.
pub trait Strifier<V>: Sized {
fn strify(self, v: V) -> Cow<'static, str>;
}
[package]
name = "zhed-core"
description = "zhed core data structures"
version = "0.0.0"
edition = "2018"
license = "Apache-2.0"
[dependencies]
thiserror = "1.0"
[dependencies.chrono]
version = "0.4"
default-features = false
features = [ "serde" ]
[dependencies.serde]
version = "1.0"
features = [ "derive" ]
[dependencies.uuid]
version = "0.8"
features = [ "serde", "v5" ]