VLUIVUU5SQKL6NPXBHT75UHM23UJ34IX24HSGXIBKFKRIFJIFBJAC
# notes about dependencies
* `zhedds` and `zhed-misc-parsers` should not depend on each other.
use serde::{Deserialize, Serialize};
pub use uuid::Uuid;
/// UUID namespace for ZHED
pub const NAMESPACE_ZHED: Uuid = Uuid::from_bytes([
0x23, 0x04, 0xc3, 0x98, 0x30, 0x27, 0x44, 0x94, 0xbd, 0x67, 0xf8, 0x43, 0xd5, 0x63, 0x06, 0x46,
]);
/// used solely for references, resolved via workspace
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Deserialize, Serialize)]
pub struct Ident {
pub id: Uuid,
/// A counter, starting at 0, which should be increased every time the document
/// structure changes drastically, in order to warn users when using it with
/// documents which hadn't seen that epoch of the document yet, as it might break links.
pub epoch: u32,
}
pub type RefDate = chrono::Date<chrono::Utc>;
/** header stuff
we omit any document type tags of older formats, because we don't want to
differentiate that much here, because it would make parsing cumbersome.
we rather differentiate directly via indexes. Historically, type tags were
vastly different per format nonetheless, making them slightly incompatible.
**/
#[derive(Clone, Deserialize, Serialize)]
pub struct Header {
#[serde(flatten)]
ident: Ident,
}
[package]
name = "zhedds"
description = "zhed data structures"
version = "0.0.0"
edition = "2018"
license = "Apache-2.0"
publish = false
[dependencies]
[dependencies.chrono]
version = "0.4"
default-features = false
features = [ "serde" ]
[dependencies.serde]
version = "1.0"
features = [ "derive" ]
[dependencies.uuid]
version = "0.8"
features = [ "serde", "v5" ]
mod date;
pub use crate::date::*;
mod dbtin;
pub use crate::dbtin::{parse as parse_dbtin, Node as DbtinNode};
use std::collections::HashMap;
#[derive(Clone, Debug, PartialEq)]
pub enum Node<'a> {
Branch(HashMap<&'a str, Node<'a>>),
Leaf(&'a str),
}
/** parser for the dbtin dump format, does not allow any escape symbols.
```text
:PATH/TO/OBJECT
KEY1: VALUE1
KEY2: VALUE2
```
**/
pub fn parse(s: &str) -> Option<Node<'_>> {
let mut ret = Node::Branch(HashMap::new());
let mut selobj = None;
macro_rules! finish_selobj {
() => {{
if let Some((oname, okvm)) = selobj.take() {
if !ret.push_to_leaves(oname, okvm) {
return None;
}
}
}};
}
for i in s.lines() {
if i.trim().is_empty() {
continue;
}
let dcl = i.find(':')?;
if dcl == 0 {
// object name
finish_selobj!();
selobj = Some((&i[1..], HashMap::new()));
} else if let Some((_, ref mut kvm)) = &mut selobj {
// key-value pair
kvm.insert(&i[..dcl], i[dcl + 1..].trim_start());
} else {
return None;
}
}
finish_selobj!();
Some(ret)
}
impl<'a> Node<'a> {
fn push_to_leaves(&mut self, obj: &'a str, kvm: HashMap<&'a str, &'a str>) -> bool {
let mut sel: &mut HashMap<&'a str, Node> = if let Node::Branch(ref mut b) = self {
b
} else {
return false;
};
for i in obj.split('/').filter(|i| !i.is_empty()) {
if let Node::Branch(ref mut x) =
sel.entry(i).or_insert_with(|| Node::Branch(HashMap::new()))
{
sel = x;
} else {
return false;
};
}
for (key, value) in kvm {
use std::collections::hash_map::Entry;
match sel.entry(key) {
Entry::Occupied(_) => return false,
Entry::Vacant(vac) => vac.insert(Node::Leaf(value)),
};
}
true
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ex_ok() {
let mut inner2 = HashMap::new();
let mut inner1 = HashMap::new();
inner1.insert("trfull", Node::Leaf("Ethik"));
inner1.insert("tanken", Node::Leaf("Ja "));
inner2.insert("ETH", Node::Branch(inner1));
let mut inner1 = HashMap::new();
inner1.insert("trfull", Node::Leaf("Rufen"));
inner1.insert("tanken", Node::Leaf("Nein"));
inner2.insert("RUF", Node::Branch(inner1));
let mut inner1 = HashMap::new();
inner1.insert("locan ", Node::Leaf("bl uüpp"));
inner1.insert("Fach", Node::Branch(inner2));
let mut inner2 = HashMap::new();
inner2.insert("Orga", Node::Branch(inner1));
let mut inner1 = HashMap::new();
inner1.insert("Schule", Node::Branch(inner2));
assert_eq!(
parse(
r#"
:Schule/Orga/Fach/ETH
trfull: Ethik
tanken: Ja
:Schule/Orga/Fach/RUF
trfull: Rufen
tanken: Nein
:Schule/Orga
locan : bl uüpp
"#
),
Some(Node::Branch(inner1))
);
}
}
use chrono::NaiveDate;
use std::path::Path;
fn fti_digits(s: &str) -> bool {
s.len() >= 2 && s.chars().take(2).all(|i| i.is_ascii_digit())
}
pub fn parse_diary_date(par: &str, fin: &str) -> Option<NaiveDate> {
if !fti_digits(fin) || par.len() < 4 {
return None;
}
let y = par.parse::<i32>().ok()?;
let m = fin[..2].parse::<u32>().unwrap();
// verify the day info
let mut dayinf = &fin[2..];
if dayinf.starts_with(|i| matches!(i, '-' | '_')) {
dayinf = &dayinf[1..];
}
if !fti_digits(dayinf) {
return None;
}
let d = dayinf[..2].parse::<u32>().unwrap();
chrono::NaiveDate::from_ymd_opt(y, m, d)
}
/// tries to parse a diary entry path to extract the reference date
/// should be usually given a path containing at least 2 components
pub fn parse_diary_date_from_path(x: &Path) -> Option<NaiveDate> {
let mut fin = x;
let mut fins = fin.file_name()?.to_str()?;
// we allow 1 additional path component after the date part
if !fti_digits(fins) {
fin = x.parent()?;
fins = fin.file_name()?.to_str()?;
if !fti_digits(fins) {
return None;
}
}
let par = fin.parent()?.file_name()?.to_str()?;
parse_diary_date(par, fins)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn diary_standard() {
assert_eq!(parse_diary_date_from_path(Path::new("201/01_01")), None);
assert_eq!(parse_diary_date_from_path(Path::new("2016/0")), None);
assert_eq!(
parse_diary_date_from_path(Path::new("2016/08_28")),
Some(NaiveDate::from_ymd(2016, 08, 28))
);
assert_eq!(
parse_diary_date_from_path(Path::new("teller/2016/08_28")),
Some(NaiveDate::from_ymd(2016, 08, 28))
);
assert_eq!(
parse_diary_date_from_path(Path::new("teller/2016/08_28nox/fluppig.jpg")),
Some(NaiveDate::from_ymd(2016, 08, 28))
);
assert_eq!(
parse_diary_date_from_path(Path::new("/blog/2017/1124y_vf.html")),
Some(NaiveDate::from_ymd(2017, 11, 24))
);
}
}
[package]
name = "zhed-misc-parsers"
description = "zhed miscellaneous parsers"
version = "0.0.0"
edition = "2018"
license = "Apache-2.0"
publish = false
[dependencies]
[dependencies.chrono]
version = "0.4"
default-features = false
[workspace]
members = ["crates/*"]
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"num-integer",
"num-traits",
"serde",
]
[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]]
name = "proc-macro2"
version = "1.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c7ed8b8c7b886ea3ed7dde405212185f423ab44682667c8c6dd14aa1d9f6612"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "serde"
version = "1.0.129"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1f72836d2aa753853178eda473a3b9d8e4eefdaf20523b919677e6de489f8f1"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.129"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e57ae87ad533d9a56427558b516d0adac283614e347abf85b0dc0cbbf0a249f3"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "sha1"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2579985fda508104f7587689507983eadd6a6e84dd35d6d115361f530916fa0d"
[[package]]
name = "syn"
version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7f58f7e8eaa0009c5fec437aabf511bd9933e4b2d7407bd05273c01a8906ea7"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "uuid"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
dependencies = [
"serde",
"sha1",
]
[[package]]
name = "zhed-misc-parsers"
version = "0.0.0"
dependencies = [
"chrono",
]
[[package]]
name = "zhedds"
version = "0.0.0"
dependencies = [
"chrono",
"serde",
"uuid",
]
.git
.DS_Store
result
result-*
/target