synthphonia/text/parsing/
date.rs1use std::collections::HashSet;
2
3use chrono::{NaiveDate, Datelike, Month};
4use itertools::Itertools;
5use regex::Regex;
6
7use crate::{galloc::AllocForExactSizeIter, expr::{Expr, ops}, impl_basic, impl_op1_opt, new_op1_opt, value::{ConstValue, Value}};
8
9use crate::galloc::TryAllocForExactSizeIter;
10use super::ParsingOp;
11
12
13impl_basic!(ParseDate, "date.parse");
14impl crate::forward::enumeration::Enumerator1 for ParseDate {
15 fn enumerate(&self, this: &'static ops::Op1Enum, exec: &'static crate::forward::executor::Executor, opnt: [usize; 1]) -> Result<(), ()> { Ok(())}
16}
17
18impl crate::expr::ops::Op1 for ParseDate {
19 fn cost(&self) -> usize {
20 self.0
21 }
22 fn try_eval(&self, a1: crate::value::Value) -> (bool, crate::value::Value) {
23 match a1 {
24 crate::value::Value::Str(s1) => {
25 let a = s1
26 .iter()
27 .map(|s1| {
28 let mut res = self.parse_into(s1);
29 res.sort_by_key(|(a,b)| -(a.len() as isize));
30 res.first().map(|(s, c)| c.as_i64().unwrap()).unwrap_or(0_i64)
31 }).galloc_scollect();
32 (true, a.into())
33 }
34 _ => (false, Value::Null),
35 }
36 }
37}
38
39lazy_static::lazy_static!{
40 static ref REGEXES : [Regex; 5] = {
41 let month_literal = "(?<month>Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?)";
42 let month = r"((?<m>\d{1,2})|(?<month>Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?))";
43 let day = r"((?<d>\d{1,2})(st|nd|rd|th)?)";
44 let year = r"(?<y>\d{2,4})";
45 let regex1 = Regex::new(format!(r"{month}[\- /.,]*{day}?[\- /.,]*{year}?").as_str()).unwrap();
46 let regex2 = Regex::new(format!(r"{year}[ \-/.,]+{month}[\- /.,]*{day}?").as_str()).unwrap();
47 let regex3 = Regex::new(format!(r"{day}[ \-/.,]*{month}[\- /.,]*{year}?").as_str()).unwrap();
48 let regex4 = Regex::new(format!(r"{month}[\- /.,]+{year}?").as_str()).unwrap();
49 let regex5 = Regex::new(month_literal.to_string().as_str()).unwrap();
50 [regex1, regex2, regex3, regex4, regex5]
51 };
52}
53
54impl ParsingOp for ParseDate {
55
56 fn parse_into(&self, input: &'static str) -> std::vec::Vec<(&'static str, ConstValue)> {
57 let months = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
58 let mut result: Vec<(&'static str, ConstValue)> = Vec::new();
59 let [regex1, regex2, regex3, regex4, regex5] = &*REGEXES;
60 let iter = regex1.captures_iter(input).chain(regex2.captures_iter(input)).chain(regex3.captures_iter(input)).chain(regex4.captures_iter(input)).chain(regex5.captures_iter(input));
61 for m in iter {
62 let mut year = if m.name("y").is_none() { 2000 } else { m.name("y").unwrap().as_str().parse::<i32>().unwrap()};
63 if m.name("m").is_some() || m.name("month").is_some() {
64 let month = if m.name("m").is_some() {
65 m.name("m").unwrap().as_str().parse::<u32>().unwrap()
66 } else {
67 months.iter().enumerate().find(|(i, s)| ***s == m.name("month").unwrap().as_str()[0..3]).unwrap().0 as u32 + 1
68 };
69 let day = if m.name("d").is_none() { 1 } else { m.name("d").unwrap().as_str().parse::<u32>().unwrap()};
70 if m.name("d").is_none() && m.name("y").is_none() { continue; }
71 if let Some(d) = NaiveDate::from_ymd_opt(year, month, day) {
72 result.push((m.get(0).unwrap().as_str(), (d.num_days_from_ce() as i64).into() ));
73 }
74 }
75 }
76 result
77 }
78
79}
80
81pub fn detector(input: &str) -> bool {
82 let month_literal = "(?<month>Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?)";
83 let month = r"((?<m>\d{1,2})|(?<month>Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|(Nov|Dec)(?:ember)?))";
84 let day = r"((?<d>\d{1,2})(st|nd|rd|th)?)";
85 let year = r"(?<y>\d{2,4})";
86 let month_lit = Regex::new(month_literal).unwrap();
87 let regex1 = Regex::new(format!(r"{month}[\-/.,]+{day}?[\-/.,]+{year}?").as_str()).unwrap();
88 let regex2 = Regex::new(format!(r"{year}[\-/.,]+{month}[\-/.,]+{day}?").as_str()).unwrap();
89 let regex3 = Regex::new(format!(r"{day}[\-/.,]+{month}[\-/.,]+{year}?").as_str()).unwrap();
90 month_lit.is_match(input) || regex1.is_match(input) || regex2.is_match(input) || regex3.is_match(input)
91}
92
93#[cfg(test)]
94mod tests {
95 use crate::{text::parsing::{ParseDate, ParsingOp}};
96 use super::detector;
97
98 #[test]
99 fn test1() {
100 let scanner = ParseDate(1);
101 println!("{:?}", scanner.parse_into("Jan")) ;
102 println!("{:?}", scanner.parse_into("Jan 1st, 2034")) ;
103 println!("{:?}", scanner.parse_into("03042241")) ;
104 println!("{:?}", scanner.parse_into("10/6/2143")) ;
105 println!("{:?}", scanner.parse_into("06-Oct-2143")) ;
106 println!("{:?}", scanner.parse_into("Mar 30 2002")) ;
107 println!("{:?}", scanner.parse_into("01311846")) ;
108 println!("{:?}", scanner.parse_into("22 Apr 1953")) ;
109 println!("{:?}", scanner.parse_into("03302241")) ;
110 println!("{:?}", scanner.parse_into("02-Aug-2160")) ;
111 println!("{:?}", scanner.parse_into("23 May 1984")) ;
112 println!("{:?}", scanner.parse_into("15 August 1740"));
113 println!("{:?}", scanner.parse_into("Jul 08 2237")) ;
114 println!("{:?}", scanner.parse_into("3 Nov 1904")) ;
115 println!("{:?}", scanner.parse_into("5 April 2088")) ;
116 println!("{:?}", scanner.parse_into("05302131")) ;
117 println!("{:?}", scanner.parse_into("May 25 1817")) ;
118 println!("{:?}", scanner.parse_into("31 May 1963")) ;
119 println!("{:?}", scanner.parse_into("24-Nov-2098")) ;
120 println!("{:?}", scanner.parse_into("22 Oct 1815")) ;
121 println!("{:?}", scanner.parse_into("26 May 2155")) ;
122 println!("{:?}", scanner.parse_into("26-Mar-1816")) ;
123 println!("{:?}", scanner.parse_into("26 Apr 2090")) ;
124 println!("{:?}", scanner.parse_into("14-Aug-2089")) ;
125 println!("{:?}", scanner.parse_into("Apr 20 1957")) ;
126 println!("{:?}", scanner.parse_into("11 Sep 1952")) ;
127 println!("{:?}", scanner.parse_into("03-Nov-2114")) ;
128 println!("{:?}", scanner.parse_into("21 June 2059")) ;
129 println!("{:?}", scanner.parse_into("21-Jan-1818")) ;
130 println!("{:?}", scanner.parse_into("16 Sep 2075")) ;
131 println!("{:?}", scanner.parse_into("Oct 2 2204")) ;
132 println!("{:?}", scanner.parse_into("02 Sep 1747")) ;
133 println!("{:?}", scanner.parse_into("29 Jan 2218")) ;
134 println!("{:?}", scanner.parse_into("03 Apr 2008")) ;
135 }
136
137 #[test]
138 fn test_detector() {
139 assert!(detector("Jan"));
140 assert!(!detector("01012001"));
141 assert!(detector("03-Nov-2114"));
142 assert!(detector("5 April 2088"));
143 }
144}
145