synthphonia/text/parsing/
mod.rs1
2use std::{collections::{HashMap, HashSet}, cell::UnsafeCell};
3
4use itertools::Itertools;
5use kv_trie_rs::{Trie, TrieBuilder};
6use derive_more::From;
7
8use crate::{debg, expr::{cfg::ProdRule, context::Context, ops::{Op1, Op1Enum}, Expr}, forward::executor::Executor, utils::UnsafeCellExt, value::{consts_to_value, ConstValue, Value}};
9
10pub struct TextObjData {
11 trie: UnsafeCell<Vec<(&'static Op1Enum, usize, Trie<u8, ConstValue>)>>,
12 future_exprs: UnsafeCell<Vec<Vec<(Expr, Value)>>>,
13}
14
15impl TextObjData {
16 pub fn trie(&self) -> &mut Vec<(&'static Op1Enum, usize, Trie<u8, ConstValue>)> {
17 unsafe { self.trie.as_mut() }
18 }
19 pub fn future_exprs(&self) -> &mut Vec<Vec<(Expr, Value)>> {
20 unsafe { self.future_exprs.as_mut() }
21 }
22 pub fn enumerate(&self, exec: &'static Executor) -> Result<(), ()> {
23 if exec.size() >= self.future_exprs().len() { return Ok(()); }
24 for (e, v) in self.future_exprs()[exec.size()].drain(0..) {
25 exec.enum_expr(e, v)?;
26 }
27 Ok(())
28 }
29 pub fn build_trie(exec: &Executor) {
30 for (nt, ntdata) in exec.cfg.iter().enumerate() {
31 for rule in &ntdata.rules {
32 if let ProdRule::Op1(op1, from_nt) = rule {
33 let vec = op1.parse_all(&exec.ctx);
34 if vec.is_empty() { continue; }
35 let mut triebuilder = TrieBuilder::new();
36 for (k,v) in vec {
37 debg!("Found TextObj {} -> {} {}", k, op1.name(), v);
38 triebuilder.push(k.as_bytes(), v);
39 }
40 let mut trie = triebuilder.build();
41 exec.data[*from_nt].to.trie().push((op1, nt, trie));
42 }
43 }
44 }
45
46 }
47 pub fn new() -> Self {
48 Self {
49 trie: Vec::new().into(),
50 future_exprs: Vec::new().into(),
51 }
52 }
53 pub fn update(&self, exec: &'static Executor, e: &'static Expr, v: Value) {
54 if let Value::Str(inner) = v {
55 for (scan, nt, v) in self.read_to(inner) {
56 let expr = Expr::Op1(scan, e);
57 let value = consts_to_value(v);
58 let target = exec.data[nt].to.future_exprs();
59 let size = exec.size() + scan.cost();
60 while target.len() <= size {
61 target.push(Vec::new());
62 }
63 target[size].push((expr, value));
64 }
65 }
66 }
67 pub fn read_to(&self, input: &'static [&'static str]) -> impl Iterator<Item= (&'static Op1Enum, usize, Vec<ConstValue>)> + '_ {
68 self.trie().iter().flat_map(|(scan, nt, trie)| {
69 if trie.exact_match(input[0].as_bytes()) {
70 let mut value = vec![*trie.get(input[0].as_bytes()).unwrap()];
71
72 let r = input[1..].iter().find_map(|inp| {
73 if trie.exact_match(inp.as_bytes()) {
74 let v = trie.get(inp.as_bytes()).unwrap();
75 value.push(*v);
76 None
77 } else { Some(()) }
78 });
79 if r.is_none() {
80 return Some((*scan, *nt, value));
81 }
82 }
83 None
84 })
85 }
86}
87
88
89pub trait ParsingOp {
90 fn parse_all(&self, ctx: &Context) -> Vec<(&'static str, ConstValue)> {
91 let mut result = Vec::new();
92 for v in ctx.iter() {
93 if let Value::Str(a) = v {
94 for input in a {
95 let mut res = self.parse_into(input);
96 res.sort_by_key(|(a,b)| -(a.len() as isize));
97 let mut a = HashSet::new();
98 result.append(&mut res.into_iter().filter(|(s, _)| {
99 if a.contains(&s.as_ptr()) { false } else { a.insert(s.as_ptr()); true}
100 }).collect_vec());
101 }
102 }
103 }
104 result
105 }
106 fn parse_into(&self, input: &'static str) -> Vec<(&'static str, ConstValue)>;
107}
108
109pub fn detector(ctx: &Context) -> bool {
110 for v in ctx.iter().chain(ctx.outputs()) {
111 if let Value::Str(a) = v {
112 for input in a {
113 if float::detector(input) || date::detector(input) || time::detector(input) {
114 return true;
115 }
116 }
117 }
118 }
119 false
120}
121
122pub mod date;
123pub use date::*;
124pub mod int;
125pub use int::*;
126mod month;
127pub use month::*;
128mod weekday;
129pub use weekday::*;
130mod time;
131pub use time::*;
132mod float;
133pub use float::*;
134
135impl ParsingOp for Op1Enum {
136 fn parse_into(&self, input: &'static str) -> Vec<(&'static str, ConstValue)> {
137 match self {
138 Op1Enum::ParseTime(p) => p.parse_into(input),
139 Op1Enum::ParseDate(p) => p.parse_into(input),
140 Op1Enum::ParseMonth(p) => p.parse_into(input),
141 Op1Enum::ParseInt(p) => p.parse_into(input),
142 Op1Enum::ParseWeekday(p) => p.parse_into(input),
143 _ => Vec::new(),
144 }
145 }
146}
147
148
149