synthphonia/text/parsing/
date.rs

1use std::collections::HashSet;
2
3use chrono::{NaiveDate, Datelike, Month};
4use itertools::Itertools;
5use regex::Regex;
6
7use crate::{galloc::AllocForExactSizeIter, expr::{Expr, ops}, impl_basic, impl_op1_opt, new_op1_opt, value::{ConstValue, Value}};
8
9use crate::galloc::TryAllocForExactSizeIter;
10use super::ParsingOp;
11
12
13impl_basic!(ParseDate, "date.parse");
14impl crate::forward::enumeration::Enumerator1 for ParseDate {
15    fn enumerate(&self, this: &'static ops::Op1Enum, exec: &'static crate::forward::executor::Executor, opnt: [usize; 1]) -> Result<(), ()> { Ok(())}
16}
17
18impl crate::expr::ops::Op1 for ParseDate {
19    fn cost(&self) -> usize {
20        self.0
21    }
22    fn try_eval(&self, a1: crate::value::Value) -> (bool, crate::value::Value) {
23        match a1 {
24            crate::value::Value::Str(s1) => {
25                let a = s1
26                    .iter()
27                    .map(|s1| {
28                        let mut res = self.parse_into(s1);
29                        res.sort_by_key(|(a,b)| -(a.len() as isize));
30                        res.first().map(|(s, c)| c.as_i64().unwrap()).unwrap_or(0_i64)
31                    }).galloc_scollect();
32                (true, a.into())
33            }
34            _ => (false, Value::Null),
35        }
36    }
37}
38
39lazy_static::lazy_static!{
40    static ref REGEXES : [Regex; 5] = {
41        let month_literal = "(?<month>Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?)";
42        let month = r"((?<m>\d{1,2})|(?<month>Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?))";
43        let day = r"((?<d>\d{1,2})(st|nd|rd|th)?)";
44        let year = r"(?<y>\d{2,4})";
45        let regex1 = Regex::new(format!(r"{month}[\- /.,]*{day}?[\- /.,]*{year}?").as_str()).unwrap();
46        let regex2 = Regex::new(format!(r"{year}[ \-/.,]+{month}[\- /.,]*{day}?").as_str()).unwrap();
47        let regex3 = Regex::new(format!(r"{day}[ \-/.,]*{month}[\- /.,]*{year}?").as_str()).unwrap();
48        let regex4 = Regex::new(format!(r"{month}[\- /.,]+{year}?").as_str()).unwrap();
49        let regex5 = Regex::new(month_literal.to_string().as_str()).unwrap();
50        [regex1, regex2, regex3, regex4, regex5]
51    };
52}
53
54impl ParsingOp for ParseDate {
55
56    fn parse_into(&self, input: &'static str) -> std::vec::Vec<(&'static str, ConstValue)> {
57        let months = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
58        let mut result: Vec<(&'static str, ConstValue)> = Vec::new();
59        let [regex1, regex2, regex3, regex4, regex5] = &*REGEXES;
60        let iter = regex1.captures_iter(input).chain(regex2.captures_iter(input)).chain(regex3.captures_iter(input)).chain(regex4.captures_iter(input)).chain(regex5.captures_iter(input));
61        for m in iter {
62            let mut year = if m.name("y").is_none() { 2000 } else { m.name("y").unwrap().as_str().parse::<i32>().unwrap()};
63            if m.name("m").is_some() || m.name("month").is_some() {
64                let month = if m.name("m").is_some() {
65                    m.name("m").unwrap().as_str().parse::<u32>().unwrap()
66                } else {
67                    months.iter().enumerate().find(|(i, s)| ***s == m.name("month").unwrap().as_str()[0..3]).unwrap().0 as u32 + 1
68                };
69                let day = if m.name("d").is_none() { 1 } else { m.name("d").unwrap().as_str().parse::<u32>().unwrap()};
70                if m.name("d").is_none() && m.name("y").is_none() { continue; }
71                if let Some(d) = NaiveDate::from_ymd_opt(year, month, day) {
72                    result.push((m.get(0).unwrap().as_str(), (d.num_days_from_ce() as i64).into() ));
73                }
74            }
75        }
76        result
77    }
78
79}
80
81pub fn detector(input: &str) -> bool {
82    let month_literal = "(?<month>Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?)";
83    let month = r"((?<m>\d{1,2})|(?<month>Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?))";
84    let day = r"((?<d>\d{1,2})(st|nd|rd|th)?)";
85    let year = r"(?<y>\d{2,4})";
86    let month_lit = Regex::new(month_literal).unwrap();
87    let regex1 = Regex::new(format!(r"{month}[\-/.,]+{day}?[\-/.,]+{year}?").as_str()).unwrap();
88    let regex2 = Regex::new(format!(r"{year}[\-/.,]+{month}[\-/.,]+{day}?").as_str()).unwrap();
89    let regex3 = Regex::new(format!(r"{day}[\-/.,]+{month}[\-/.,]+{year}?").as_str()).unwrap();
90    month_lit.is_match(input) || regex1.is_match(input) || regex2.is_match(input) || regex3.is_match(input)
91}
92
93#[cfg(test)]
94mod tests {
95    use crate::{text::parsing::{ParseDate, ParsingOp}};
96    use super::detector;
97
98    #[test]
99    fn test1() {
100        let scanner = ParseDate(1);
101        println!("{:?}", scanner.parse_into("Jan"))           ;
102        println!("{:?}", scanner.parse_into("Jan 1st, 2034")) ;
103        println!("{:?}", scanner.parse_into("03042241"))      ;
104        println!("{:?}", scanner.parse_into("10/6/2143"))     ;
105        println!("{:?}", scanner.parse_into("06-Oct-2143"))   ;
106        println!("{:?}", scanner.parse_into("Mar 30 2002"))   ;
107        println!("{:?}", scanner.parse_into("01311846"))      ;
108        println!("{:?}", scanner.parse_into("22 Apr 1953"))   ;
109        println!("{:?}", scanner.parse_into("03302241"))      ;
110        println!("{:?}", scanner.parse_into("02-Aug-2160"))   ;
111        println!("{:?}", scanner.parse_into("23 May 1984"))   ;
112        println!("{:?}", scanner.parse_into("15 August 1740"));
113        println!("{:?}", scanner.parse_into("Jul 08 2237"))   ;
114        println!("{:?}", scanner.parse_into("3 Nov 1904"))    ;
115        println!("{:?}", scanner.parse_into("5 April 2088"))  ;
116        println!("{:?}", scanner.parse_into("05302131"))      ;
117        println!("{:?}", scanner.parse_into("May 25 1817"))   ;
118        println!("{:?}", scanner.parse_into("31 May 1963"))   ;
119        println!("{:?}", scanner.parse_into("24-Nov-2098"))   ;
120        println!("{:?}", scanner.parse_into("22 Oct 1815"))   ;
121        println!("{:?}", scanner.parse_into("26 May 2155"))   ;
122        println!("{:?}", scanner.parse_into("26-Mar-1816"))   ;
123        println!("{:?}", scanner.parse_into("26 Apr 2090"))   ;
124        println!("{:?}", scanner.parse_into("14-Aug-2089"))   ;
125        println!("{:?}", scanner.parse_into("Apr 20 1957"))   ;
126        println!("{:?}", scanner.parse_into("11 Sep 1952"))   ;
127        println!("{:?}", scanner.parse_into("03-Nov-2114"))   ;
128        println!("{:?}", scanner.parse_into("21 June 2059"))  ;
129        println!("{:?}", scanner.parse_into("21-Jan-1818"))   ;
130        println!("{:?}", scanner.parse_into("16 Sep 2075"))   ;
131        println!("{:?}", scanner.parse_into("Oct 2 2204"))    ;
132        println!("{:?}", scanner.parse_into("02 Sep 1747"))   ;
133        println!("{:?}", scanner.parse_into("29 Jan 2218"))   ;
134        println!("{:?}", scanner.parse_into("03 Apr 2008"))   ;
135    }
136
137    #[test]
138    fn test_detector() {
139        assert!(detector("Jan"));
140        assert!(!detector("01012001"));
141        assert!(detector("03-Nov-2114"));
142        assert!(detector("5 April 2088"));
143    }
144}
145