synthphonia/text/parsing/
mod.rs

1
2use std::{collections::{HashMap, HashSet}, cell::UnsafeCell};
3
4use itertools::Itertools;
5use kv_trie_rs::{Trie, TrieBuilder};
6use derive_more::From;
7
8use crate::{debg, expr::{cfg::ProdRule, context::Context, ops::{Op1, Op1Enum}, Expr}, forward::executor::Executor, utils::UnsafeCellExt, value::{consts_to_value, ConstValue, Value}};
9
10pub struct TextObjData {
11    trie: UnsafeCell<Vec<(&'static Op1Enum, usize, Trie<u8, ConstValue>)>>,
12    future_exprs: UnsafeCell<Vec<Vec<(Expr, Value)>>>,
13}
14
15impl TextObjData {
16    pub fn trie(&self) -> &mut Vec<(&'static Op1Enum, usize, Trie<u8, ConstValue>)> {
17        unsafe { self.trie.as_mut() }
18    }
19    pub fn future_exprs(&self) -> &mut Vec<Vec<(Expr, Value)>> {
20        unsafe { self.future_exprs.as_mut() }
21    }
22    pub fn enumerate(&self, exec: &'static Executor) -> Result<(), ()> {
23        if exec.size() >= self.future_exprs().len() { return Ok(()); }
24        for (e, v) in self.future_exprs()[exec.size()].drain(0..) {
25            exec.enum_expr(e, v)?;
26        }
27        Ok(())
28    }
29    pub fn build_trie(exec: &Executor) {
30        for (nt, ntdata) in exec.cfg.iter().enumerate() {
31            for rule in &ntdata.rules {
32                if let ProdRule::Op1(op1, from_nt) = rule {
33                    let vec = op1.parse_all(&exec.ctx);
34                    if vec.is_empty() { continue; }
35                    let mut triebuilder = TrieBuilder::new();
36                    for (k,v) in vec {
37                        debg!("Found TextObj {} -> {} {}", k, op1.name(), v);
38                        triebuilder.push(k.as_bytes(), v);
39                    }
40                    let mut trie = triebuilder.build();
41                    exec.data[*from_nt].to.trie().push((op1, nt, trie));
42                }
43            }
44        }
45
46    }
47    pub fn new() -> Self {
48        Self {
49            trie: Vec::new().into(),
50            future_exprs: Vec::new().into(),
51        }
52    }
53    pub fn update(&self, exec: &'static Executor, e: &'static Expr, v: Value) {
54        if let Value::Str(inner) = v {
55            for (scan, nt,  v) in self.read_to(inner) {
56                let expr = Expr::Op1(scan, e);
57                let value = consts_to_value(v);
58                let target = exec.data[nt].to.future_exprs();
59                let size = exec.size() + scan.cost();
60                while target.len() <= size {
61                    target.push(Vec::new());
62                }
63                target[size].push((expr, value));
64            }
65        }
66    }
67    pub fn read_to(&self, input: &'static [&'static str]) -> impl Iterator<Item= (&'static Op1Enum, usize, Vec<ConstValue>)> + '_ {
68        self.trie().iter().flat_map(|(scan, nt, trie)| {
69            if trie.exact_match(input[0].as_bytes()) {
70                let mut value = vec![*trie.get(input[0].as_bytes()).unwrap()];
71                
72                let r = input[1..].iter().find_map(|inp| {
73                    if trie.exact_match(inp.as_bytes()) {
74                        let v = trie.get(inp.as_bytes()).unwrap();
75                        value.push(*v);
76                        None
77                    } else { Some(()) }
78                });
79                if r.is_none() {
80                    return Some((*scan, *nt, value));
81                }
82            }
83            None
84        })
85    }
86}
87
88
89pub trait ParsingOp {
90    fn parse_all(&self, ctx: &Context) -> Vec<(&'static str, ConstValue)> {
91        let mut result = Vec::new();
92        for v in ctx.iter() {
93            if let Value::Str(a) = v {
94                for input in a {
95                    let mut res = self.parse_into(input);
96                    res.sort_by_key(|(a,b)| -(a.len() as isize));
97                    let mut a = HashSet::new();
98                    result.append(&mut res.into_iter().filter(|(s, _)| {
99                        if a.contains(&s.as_ptr()) { false } else { a.insert(s.as_ptr()); true}
100                    }).collect_vec());
101                }
102            }
103        }
104        result
105    }
106    fn parse_into(&self, input: &'static str) -> Vec<(&'static str, ConstValue)>;
107}
108
109pub fn detector(ctx: &Context) -> bool {
110    for v in ctx.iter().chain(ctx.outputs()) {
111        if let Value::Str(a) = v {
112            for input in a {
113                if float::detector(input) || date::detector(input) || time::detector(input) {
114                    return true;
115                }
116            }
117        }
118    }
119    false
120}
121
122pub mod date;
123pub use date::*;
124pub mod int;
125pub use int::*;
126mod month;
127pub use month::*;
128mod weekday;
129pub use weekday::*;
130mod time;
131pub use time::*;
132mod float;
133pub use float::*;
134
135impl ParsingOp for Op1Enum {
136    fn parse_into(&self, input: &'static str) -> Vec<(&'static str, ConstValue)> {
137        match self {
138            Op1Enum::ParseTime(p) => p.parse_into(input),
139            Op1Enum::ParseDate(p) => p.parse_into(input),
140            Op1Enum::ParseMonth(p) => p.parse_into(input),
141            Op1Enum::ParseInt(p) => p.parse_into(input),
142            Op1Enum::ParseWeekday(p) => p.parse_into(input),
143            _ => Vec::new(),
144        }
145    }
146}
147
148
149