use arbitrary::Arbitrary;
use heck::ToTrainCase;
use regex::Regex;
use serde::de::Visitor;
use std::{
collections::{HashMap, HashSet},
fmt,
sync::{Arc, Mutex, OnceLock},
};
#[derive(Clone)]
pub struct Subtype(HashSet<Arc<str>>, Arc<str>);
impl PartialEq for Subtype {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl Eq for Subtype {}
impl serde::Serialize for Subtype {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&format!("{self:?}"))
}
}
pub(crate) struct SubtypeVisitor;
impl<'de> Visitor<'de> for SubtypeVisitor {
type Value = Box<str>;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a string of space separated subtypes")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(Box::from(v))
}
}
impl<'de> serde::Deserialize<'de> for Subtype {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
deserializer
.deserialize_str(SubtypeVisitor)
.map(Subtype::new)
}
}
impl fmt::Display for Subtype {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}",
self.1
.split_whitespace()
.map(|s| s.to_train_case())
.collect::<Vec<_>>()
.join(" ")
)
}
}
impl fmt::Debug for Subtype {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if f.alternate() {
f.debug_tuple("Subtype")
.field(&self.0)
.field(&self.1)
.finish()
} else {
write!(f, "{}", self.1)
}
}
}
impl<'a> Arbitrary<'a> for Subtype {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
let arb_s: String = u.arbitrary()?;
Ok(Self::new(arb_s))
}
}
impl Subtype {
pub fn new<S: AsRef<str>>(subtype: S) -> Self {
static REMOVE_SEPARATORS: OnceLock<Regex> = OnceLock::new();
let remove_seps =
REMOVE_SEPARATORS.get_or_init(|| Regex::new(r"(\A|\s)[-+/\\:](\z|\s)").unwrap());
let subtype = subtype.as_ref().trim().to_lowercase();
let subtype = remove_seps.replace_all(&subtype, " ");
Self(
subtype
.split_whitespace()
.filter(|s| !["-", "+", "/", r"\", ":"].contains(s))
.map(Arc::from)
.collect(),
Arc::from(subtype),
)
}
pub fn subtypes(&self) -> impl ExactSizeIterator<Item = &str> {
self.0.iter().map(|s| s.as_ref())
}
pub fn is_member<S: AsRef<str>>(&self, member: S) -> bool {
let member = member.as_ref().to_lowercase();
self.0.iter().any(|sm| sm.as_ref() == member)
}
pub fn is_quasimember<S: AsRef<str>>(&self, quasimember: S) -> bool {
static SUBTYPE_REGEXES: OnceLock<Mutex<HashMap<Box<str>, Regex>>> = OnceLock::new();
let mut subtype_regex_map = SUBTYPE_REGEXES
.get_or_init(|| Mutex::new(HashMap::new()))
.lock()
.unwrap();
let subtype_regex = subtype_regex_map
.entry(Box::from(quasimember.as_ref().to_lowercase()))
.or_insert_with(|| {
Regex::new(&format!(
r"(\A|[-+/\\:]){}(\z|[-+/\\:])",
regex::escape(&quasimember.as_ref().to_lowercase())
))
.unwrap()
});
self.0.iter().any(|sm| subtype_regex.is_match(sm))
}
}
#[cfg(test)]
mod tests {
use super::Subtype;
use arbtest::{
arbitrary::{self, Arbitrary},
arbtest,
};
use bincode::{deserialize, serialize};
#[test]
fn membership_will_only_match_whole_subtypes() {
let subtype = Subtype::new("Mad Relic");
let subtype2 = Subtype::new("Mad-Devouring Dragon");
assert!(subtype.is_member("Mad"));
assert!(!subtype2.is_member("Mad"));
}
#[test]
fn quasimembership_matches_words_in_subtypes() {
let subtype = Subtype::new("Mad Relic");
let subtype2 = Subtype::new("Mad-Devouring Dragon");
assert!(subtype.is_quasimember("Mad"));
assert!(subtype2.is_quasimember("Mad"));
assert!(!subtype2.is_quasimember("evo"));
}
#[test]
fn membership_and_quasimembership_are_caseinsensitive() {
let subtype = Subtype::new("Magic-Spellcaster Ruler");
assert!(subtype.is_member("MAGIC-SPELLCASTER"));
assert!(subtype.is_quasimember("SPELLcAsTeR"));
let subtype2 = Subtype::new("MAGIC-SPELLcaster RULer");
assert_eq!(subtype, subtype2);
}
#[test]
fn whitespace_doesnt_affect_subtype() {
let subtype = Subtype::new("Magic Ruler");
let subtype2 = Subtype::new("Magic \n\t\t Ruler");
assert_eq!(subtype, subtype2);
}
#[test]
fn valid_unicode_subtypes() {
let subtype = Subtype::new("Hailstone (-_-/ 😻-Ruler");
assert!(subtype.is_member("(-_-/"));
assert!(subtype.is_member("😻-ruler"));
assert!(subtype.is_quasimember("(-_-/"));
assert!(subtype.is_quasimember("("));
assert!(subtype.is_quasimember("_"));
assert!(subtype.is_quasimember("/"));
assert!(subtype.is_quasimember("😻"));
assert!(subtype.is_quasimember("Ruler"));
}
#[test]
fn test_serde() {
let subtype_a = Subtype::new("Mad:Dot / Dasher");
let subtype_b = Subtype::new("Mad-dot - Dasher");
let subtype_c = Subtype::new("Mad:Dot - Dasher");
assert_eq!(subtype_a, subtype_c);
assert_ne!(subtype_a, subtype_b);
assert_ne!(subtype_c, subtype_b);
insta::assert_ron_snapshot!([subtype_a, subtype_b, subtype_c]);
}
fn arb_serde_cycle(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<()> {
let subtype = Subtype::arbitrary(u)?;
let serialized = serialize(&subtype).unwrap();
let deserialized = deserialize(&serialized).unwrap();
if subtype != deserialized {
eprintln!("{:?} != {:?}", subtype, deserialized);
panic!();
}
Ok(())
}
#[test]
fn subtypes_cycle() {
arbtest(arb_serde_cycle);
}
}