//! Time-series Abstraction
#![feature(min_const_generics)]
// Implement traits on DataPoint
use serde::{Serialize};
use peroxide::numerical::spline::CubicSpline;
use std::convert::{TryFrom, TryInto};
use std::fmt;
use std::fs;
use std::fmt::Display;
use std::cmp::Ordering;
use std::marker::Copy;
use std::ops::Add;
use std::ops::Sub;
/// Represents a date a granularity of one month or larger. Dates can also act
/// as durations and be added and subtracted.
/// A date with monthly granularity.
#[derive(Clone, Copy, Debug, Eq, Serialize)]
pub struct MonthlyDate(pub usize);
impl MonthlyDate {
/// Return the year of a date.
pub fn year(&self) -> usize {
self.0 / 12
}
/// Return the month of a date.
pub fn month(&self) -> usize {
self.0 % 12
}
/// Return the inner value of a date. This value is an integer representing
/// the total number of months.
pub fn inner(&self) -> usize {
self.0
}
/// Create a monthly date from a year and month.
pub fn ym(year: usize, month: usize) -> Self {
MonthlyDate(year * 12 + month)
}
}
impl PartialOrd for MonthlyDate {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.0.cmp(&other.0))
}
}
impl Ord for MonthlyDate {
fn cmp(&self, other: &Self) -> Ordering {
self.0.cmp(&other.0)
}
}
impl PartialEq for MonthlyDate {
fn eq(&self, other: &Self) -> bool { self.0 == other.0 }
}
impl Add for MonthlyDate {
type Output = MonthlyDate;
fn add(self, other: Self) -> Self {
MonthlyDate(self.inner() + other.inner())
}
}
impl Sub for MonthlyDate {
type Output = MonthlyDate;
fn sub(self, other: Self) -> Self {
MonthlyDate(self.inner() - other.inner())
}
}
/// Errors. TODO: Should be moved to its own module.
#[derive(Debug)]
pub enum Error {
/// Tried to construct a `RegularTimeSeries` from an irregular `TimeSeries`.
NotRegular,
/// The `TimeSeries` is empty.
NoPoint1s,
/// The `TimeSeries` has only one datapoint.
OnePoint1,
/// Could not parse date.
ParseErr,
}
impl Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}", self)
}
}
/// A date of monthly granularity associated with a numeric array of data.
#[derive(Clone, Copy, Debug, Serialize)]
pub struct DatePoint<const N: usize> {
date: MonthlyDate,
#[serde(with = "arrays")]
value: [f32; N],
}
impl<const N: usize> DatePoint<N> {
/// Return the date of a datepoint.
pub fn date(&self) -> MonthlyDate { self.date }
/// Create a new datepoint.
pub fn new(date: MonthlyDate, value: [f32; N]) -> DatePoint<N> {
DatePoint {date, value}
}
/// Return the value at index `n`.
pub fn value(&self, n: usize) -> f32 {
self.value[n]
}
/// Return a datepoint with one value, set by index `n`.
pub fn date_map(&self, n: usize) -> DatePoint<1> {
DatePoint::<1>::new(self.date(), [self.value[n]])
}
}
/// `TimeSeries` is a collection of `DatePoints`. There are no guarantees of ordering.
#[derive(Debug, Serialize)]
pub struct TimeSeries<const N: usize>(Vec<DatePoint<N>>);
impl<const N: usize> TimeSeries<N> {
/// Construct a `TimeSeries` from a `Vec` of `DatePoints`.
pub fn new(v: Vec<DatePoint<N>>) -> TimeSeries<N> {
TimeSeries(v)
}
/// From CSV file with format '2017-01-01, 4.725'.
pub fn from_csv(path: &str) -> TimeSeries<1> {
let s = fs::read_to_string(path).unwrap();
let v = s.lines().map(|line| {
DatePoint::<1>::new(
MonthlyDate::ym(
line[..4].parse().unwrap(),
line[5..7].parse().unwrap(),
),
[line[12..].parse::<f32>().unwrap()],
)
}).collect::<Vec<DatePoint::<1>>>();
TimeSeries::new(v)
}
/// Return true if the durations between Points are all equal.
pub fn is_regular(&self, duration: &MonthlyDate) -> bool {
self.0.as_slice().windows(2).all(|datapoint_pair| {
datapoint_pair[1].date() - datapoint_pair[0].date() == *duration
})
}
/// Return the duration between the first and second points.
pub fn first_duration(&self) -> Result<MonthlyDate, Error> {
if self.0.is_empty() { return Err(Error::NoPoint1s) };
if self.0.len() == 1 { return Err(Error::OnePoint1) };
Ok((self.0)[1].date() - (self.0)[0].date())
}
/// Return the maximum of all values at index `n`.
pub fn max(&self, n: usize) -> f32 {
self.0.iter()
.map(|dp| dp.value(n))
.fold(f32::NEG_INFINITY, |a, b| a.max(b))
}
/// Return the minimum of all values at index `n`.
pub fn min(&self, n: usize) -> f32 {
self.0.iter()
.map(|dp| dp.value(n))
.fold(f32::INFINITY, |a, b| a.min(b))
}
}
/// A `RegularTimeSeries` has an additional requirements over `TimeSeries` in
/// the time interval between successive `DatePoints` has the same `Duration`.
/// This also ensures ordering.
#[derive(Debug, Serialize)]
pub struct RegularTimeSeries<const N: usize> {
duration: MonthlyDate,
ts: TimeSeries<N>,
}
impl<const N: usize> RegularTimeSeries<N> {
/// Return the duration between points.
pub fn duration(&self) -> MonthlyDate {
self.duration
}
/// Return the first point.
pub fn first(&self) -> Option<DatePoint<N>> {
Some(*self.ts.0.first()?)
}
/// Returns 'None' if 'RegularTimeSeries' is empty.
pub fn first_date(&self) -> MonthlyDate {
self.ts.0.first().unwrap().date()
}
/// Return the last date.
pub fn last_date(&self) -> MonthlyDate {
self.ts.0.last().unwrap().date()
}
/// Return the last point.
pub fn last(&self) -> Option<DatePoint<N>> {
Some(*self.ts.0.last()?)
}
/// Take the data at index `n`, and use it to construct a monthly
/// time-series from a quarterly time-series, using splines.
pub fn to_monthly(&self, n: usize) -> RegularTimeSeries<1> {
let x = self.ts.0.iter().map(|dp| dp.date().inner() as f64).collect::<Vec<f64>>();
let y = self.ts.0.iter().map(|dp| dp.value(n) as f64).collect::<Vec<f64>>();
let spline = CubicSpline::from_nodes(x, y);
let mut v = Vec::new();
for i in self.first_date().inner()..=self.last_date().inner() {
let dp = DatePoint::<1>::new(MonthlyDate(i), [spline.eval(i as f64) as f32]);
v.push(dp)
};
TimeSeries::new(v).try_into().unwrap()
}
}
impl<const N: usize> RegularTimeSeries<N> {
/// Return the maximum of all values at index `n`.
pub fn max(&self, n: usize) -> f32 {
self.ts.max(n)
}
/// Return the minimum of all values.
pub fn min(&self, n: usize) -> f32 {
self.ts.min(n)
}
/// Return an iterator over points.
pub fn iter(&self, dr: DateRange) -> impl Iterator + '_ {
self.ts.0.iter()
.skip_while(move |p1| {
if let Some(range_date) = dr.first_date() {
return p1.date() < range_date
} else {
false
}
})
.take_while(move |p1| {
if let Some(range_date) = dr.last_date() {
return p1.date() <= range_date
} else {
true
}
})
}
}
/// A range of dates with monthly granularity.
#[derive(Clone, Copy)]
pub struct DateRange {
start_date: Option<MonthlyDate>,
end_date: Option<MonthlyDate>,
}
impl DateRange {
/// Return the first date.
pub fn first_date(&self) -> Option<MonthlyDate> {
self.start_date
}
/// Return the last date.
pub fn last_date(&self) -> Option<MonthlyDate> {
self.end_date
}
}
// pub struct Range {
// ts: &RegularTimeSeries<N>,
// start_date: MonthlyDate,
// end_date: MonthlyDate,
// }
impl<const N: usize> TryFrom<TimeSeries<N>> for RegularTimeSeries<N> {
type Error = Error;
fn try_from(ts: TimeSeries<N>) -> Result<Self, Self::Error> {
let duration = ts.first_duration()?;
match ts.is_regular(&duration) {
true => {
Ok(RegularTimeSeries::<N> {
duration: duration.clone(),
ts: ts,
})
},
false => Err(Error::NotRegular),
}
}
}
// https://github.com/serde-rs/serde/issues/1937
mod arrays {
use std::{convert::TryInto, marker::PhantomData};
use serde::{
de::{SeqAccess, Visitor},
ser::SerializeTuple,
Deserialize, Deserializer, Serialize, Serializer,
};
pub fn serialize<S: Serializer, T: Serialize, const N: usize>(
data: &[T; N],
ser: S,
) -> Result<S::Ok, S::Error> {
let mut s = ser.serialize_tuple(N)?;
for item in data {
s.serialize_element(item)?;
}
s.end()
}
struct ArrayVisitor<T, const N: usize>(PhantomData<T>);
impl<'de, T, const N: usize> Visitor<'de> for ArrayVisitor<T, N>
where
T: Deserialize<'de>,
{
type Value = [T; N];
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str(&format!("an array of length {}", N))
}
#[inline]
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
// can be optimized using MaybeUninit
let mut data = Vec::with_capacity(N);
for _ in 0..N {
match (seq.next_element())? {
Some(val) => data.push(val),
None => return Err(serde::de::Error::invalid_length(N, &self)),
}
}
match data.try_into() {
Ok(arr) => Ok(arr),
Err(_) => unreachable!(),
}
}
}
pub fn deserialize<'de, D, T, const N: usize>(deserializer: D) -> Result<[T; N], D::Error>
where
D: Deserializer<'de>,
T: Deserialize<'de>,
{
deserializer.deserialize_tuple(N, ArrayVisitor::<T, N>(PhantomData))
}
}