synthphonia/expr/
cfg.rs

1use std::{collections::HashMap, cmp::min};
2
3use crate::{
4    expr::ops::{Op1Enum, Op2Enum, Op3Enum}, galloc::AllocForAny, parser::{
5        self,
6        problem::{self, Error, PBEProblem, SynthFun},
7        prod, config::Config,
8    }, value::{ConstValue, Type}
9};
10use derive_more::{DebugCustom, Deref, DerefMut, From, Into, Index, IndexMut};
11use itertools::Itertools;
12use joinery::prelude::*;
13
14// use super::{Expr, context::Context, Op1, Op3, Op2};
15
16#[derive(DebugCustom, Clone)]
17/// An enum representing production rules for expressions in the synthesis problem framework. 
18/// 
19/// This variant can encompass constants, variables, and non-terminal symbols along with unary, binary, and ternary operations. 
20/// Each variant includes a formatting directive, used for debugging purposes, to provide a human-readable description of its content. 
21/// The `Const` variant holds a constant value, `Var` represents a variable identified by an integer, and `Nt` refers to a non-terminal symbol using an index. 
22/// The `Op1`, `Op2`, and `Op3` variants represent unary, binary, and ternary operations, respectively, each associated with operator enumerations and indices to expressions they relate to. 
23/// This structure facilitates both the expression construction and the debugging process in the synthesis tasks.
24/// 
25pub enum ProdRule {
26    #[debug(fmt = "{:?}", _0)]
27    Const(ConstValue),
28    #[debug(fmt = "v{:?}", _0)]
29    Var(i64),
30    #[debug(fmt = "nt{:?}", _0)]
31    Nt(usize),
32    #[debug(fmt = "({} {:?})", _0, _1)]
33    Op1(&'static Op1Enum, usize),
34    #[debug(fmt = "({} {:?} {:?})", _0, _1, _2)]
35    Op2(&'static Op2Enum, usize, usize),
36    #[debug(fmt = "({} {:?} {:?} {:?})", _0, _1, _2, _3)]
37    Op3(&'static Op3Enum, usize, usize, usize),
38}
39
40impl ProdRule {
41    /// Creates a new instance from a raw production rule and a synthesis function problem context. 
42    /// 
43    /// It matches various kinds of production rules such as variables, constants, and operations, transforming them into corresponding variants. 
44    /// For variables, it checks if the variable corresponds to an argument in the synthesis function, returning either a variable or a nonterminal rule. 
45    /// The constant variant simply maps to its equivalent, maintaining its value. 
46    /// For operations, it maps the operation names to their respective enums with memory allocation and resolves nonterminals using the synthesis function's context, ensuring that each element corresponds to a valid component in the synthesis task. 
47    /// Any unrecognized variables or nonterminals lead to a panic.
48    /// 
49    pub fn new(raw: &prod::ProdRule, problem: &SynthFun) -> Self {
50        match raw {
51            prod::ProdRule::Var(s, config) => {
52                if let Some(a) = problem.lookup_arg(s.as_str()) {
53                    Self::Var(a as i64)
54                } else if let Some((a, _)) = problem.cfg.inner.iter().enumerate().find(|(a,b)| &b.0 == s) {
55                    Self::Nt(a)
56                } else { panic!("Unrecongized Variable / Nonterminal") }
57            },
58            prod::ProdRule::Const(v, config) => Self::Const(*v),
59            prod::ProdRule::Op1(a, b, config) => Self::Op1(Op1Enum::from_name(a.as_str(), config).galloc(), problem.lookup_nt(b).expect("Unknow non terminal")),
60            prod::ProdRule::Op2(a, b, c, config) => Self::Op2(
61                Op2Enum::from_name(a.as_str(), config).galloc(),
62                problem.lookup_nt(b).expect("Unknow non terminal"),
63                problem.lookup_nt(c).expect("Unknow non terminal"),
64            ),
65            prod::ProdRule::Op3(a, b, c, d, config) => Self::Op3(
66                Op3Enum::from_name(a.as_str(), config).galloc(),
67                problem.lookup_nt(b).expect("Unknow non terminal"),
68                problem.lookup_nt(c).expect("Unknow non terminal"),
69                problem.lookup_nt(d).expect("Unknow non terminal"),
70            ),
71        }
72    }
73    
74}
75
76#[derive(DebugCustom, Clone)]
77#[debug(fmt = "({}: {:?}) -> {:?}", name, ty, rules)]
78/// A struct representing a grammar non-terminal. 
79/// 
80/// This construct includes several fields essential for defining a non-terminal within a string synthesis problem. 
81/// The `name` field holds the identifier for the non-terminal, while `ty` specifies the associated type. 
82/// The `rules` field is a collection of production rules that describe how this non-terminal can be expanded. 
83/// The `start` field is a boolean flag indicating whether this non-terminal serves as the starting point in the grammar. 
84/// Lastly, `config` encompasses additional settings or parameters that may influence the synthesis process for this non-terminal.
85/// 
86pub struct NonTerminal {
87    pub name: String,
88    pub ty: Type,
89    pub rules: Vec<ProdRule>,
90    pub config: Config,
91}
92
93impl NonTerminal {
94    /// Retrieves a unary operation production rule by name. 
95    /// 
96    /// This function iterates through the list of production rules associated with a non-terminal to find a unary operation rule (`Op1`) matching the specified operation name. 
97    /// If a matching `Op1` operation is found, it returns the corresponding production rule; otherwise, it returns `None`. 
98    /// This functionality enables specific lookups for unary operations within a non-terminal's rule set.
99    /// 
100    pub fn get_op1(&self, op1: &str) -> Option<ProdRule>{
101        for rule in self.rules.iter() {
102            if let ProdRule::Op1(r, _) = rule {
103                if r.name() == op1 {
104                    return Some(rule.clone());
105                }
106            }
107        }
108        None
109    }
110    /// Retrieves an `Op2` production rule matching a given operation name. 
111    /// 
112    /// This method iterates over the set of production rules associated with a non-terminal. 
113    /// For each rule, if the rule is an `Op2` operation and its name matches the specified `op2` string, the method returns a cloned instance of that rule. 
114    /// If no matching rule is found, it returns `None`. 
115    /// This allows for extracting specific binary operations from the rule set, aiding in identifying or constructing expressions based on these operations.
116    /// 
117    pub fn get_op2(&self, op2: &str) -> Option<ProdRule>{
118        for rule in self.rules.iter() {
119            if let ProdRule::Op2(r, _, _) = rule {
120                if r.name() == op2 {
121                    return Some(rule.clone());
122                }
123            }
124        }
125        None
126    }
127    /// Retrieves a `ProdRule` of type `Op3` from the `NonTerminal` if it matches a specific operation name. 
128    /// 
129    /// This method iterates over the `rules` vector within a `NonTerminal` instance and checks if each rule is an `Op3` operation. 
130    /// If an `Op3` operation matches the given `op3` string name, it returns a clone of that `ProdRule`. 
131    /// If no matching operation is found, the method returns `None`.
132    /// 
133    pub fn get_op3(&self, op3: &str) -> Option<ProdRule>{
134        for rule in self.rules.iter() {
135            if let ProdRule::Op3(r, _, _, _) = rule {
136                if r.name() == op3 {
137                    return Some(rule.clone());
138                }
139            }
140        }
141        None
142    }
143    
144    /// Retrieves a vector of all one-operand operations that are formatting operations from the production rules associated with the non-terminal. 
145    /// 
146    /// This method iterates through the list of production rules, checking each rule to see if it is a unary operation (`Op1`). 
147    /// If the operation is identified as a formatting operation via `is_formatting_op`, it adds it to the results along with its associated non-terminal index. 
148    /// The method compiles these into a vector of tuples containing the operation enum and the index, which is then returned.
149    /// 
150    pub fn get_all_formatter(&self) -> Vec<(Op1Enum, usize)> {
151        let mut result = Vec::new();
152        for rule in self.rules.iter() {
153            if let ProdRule::Op1(r, nt) = rule {
154                if r.is_formatting_op() {
155                    result.push(((*r).clone(), *nt));
156                }
157            }
158        }
159        result
160    }
161    pub fn map_nt_number(&mut self, mut f: impl FnMut(usize) -> usize) {
162        for prod in self.rules.iter_mut() {
163            match prod {
164                ProdRule::Nt(n) => {
165                    *n = f(*n);
166                }
167                ProdRule::Op1(_, n) => {
168                    *n = f(*n);
169                }
170                ProdRule::Op2(_, n1, n2) => {
171                    *n1 = f(*n1);
172                    *n2 = f(*n2);
173                }
174                ProdRule::Op3(_, n1, n2, n3) => {
175                    *n1 = f(*n1);
176                    *n2 = f(*n2);
177                    *n3 = f(*n3);
178                }
179                _ => {}
180            }
181        }
182    }
183}
184#[derive(Clone)]
185/// A configuration structure for controlling various parameters related to the synthesis problem-solving process. 
186/// 
187/// This structure contains fields that define limits and options for resource consumption and operational modes during synthesis. 
188/// These fields include limits on size, time, and substring occurrences, as well as sample sizes for list subsequences. 
189/// It also includes options for controlling conditional searching, deduction skipping, iteration limit rates, and special tree hole conditions. 
190/// This configuration enables users to tailor the synthesis process by adjusting these parameters to suit different problem constraints and computational scenarios.
191/// 
192pub struct CfgConfig {
193    pub size_limit: usize,
194    pub time_limit: usize,
195    pub substr_limit: usize,
196    pub listsubseq_samples: usize,
197    pub increase_cost_limit: usize,
198    pub cond_search: bool,
199    pub no_deduction: bool,
200    pub ite_limit_rate: usize,
201    pub ite_limit_giveup: usize,
202    pub tree_hole: bool,
203}
204
205impl From<Config> for CfgConfig {
206    /// Creates a new instance by converting from a `Config` object. 
207    /// 
208    /// This method initializes each field of the struct with corresponding values fetched from the `Config` object, using specified keys. 
209    /// If a key does not exist in the `Config`, a default value is assigned. 
210    /// For `size_limit` and `time_limit`, the size defaults to `usize::MAX`. 
211    /// The `substr_limit` defaults to `4`, `listsubseq_samples` to `0`, `increase_cost_limit` to `2000`, `ite_limit_rate` to `1000`, and `ite_limit_giveup` to `40`. 
212    /// The boolean fields `cond_search`, `no_deduction`, and `tree_hole` are initialized as `false`. 
213    /// This method is essential for transforming configuration data into a structured format used for synthesis constraints.
214    /// 
215    fn from(value: Config) -> Self {
216        Self {
217            size_limit: value.get_usize("size_limit").unwrap_or(usize::MAX),
218            time_limit: value.get_usize("time_limit").unwrap_or(usize::MAX),
219            substr_limit: value.get_i64("data.substr.limit").unwrap_or(4) as usize,
220            listsubseq_samples: value.get_i64("data.listsubseq.sample").unwrap_or(0) as usize,
221            increase_cost_limit: value.get_i64("increase_cost_limit").unwrap_or(2000) as usize,
222            cond_search: false,
223            no_deduction: false,
224            ite_limit_rate: value.get_i64("ite_limit_rate").unwrap_or(1000) as usize,
225            ite_limit_giveup: value.get_i64("ite_limit_giveup").unwrap_or(40) as usize,
226            tree_hole: false,
227        }
228    }
229}
230
231#[derive(Deref, DerefMut, Into, Index, IndexMut, Clone)]
232/// Context-free grammar representation
233/// 
234/// This data structure embeds a vector of `NonTerminal` elements, facilitating operations that require dereferencing and indexing, both mutable and immutable. 
235/// These operations are streamlined through attributes such as `#[deref]`, `#[deref_mut]`, `#[index]`, and `#[index_mut]`, enabling direct access and manipulation of the `inner` vector's elements. 
236/// Additionally, it includes a public configuration field, `config`, of type `CfgConfig`, which likely governs specific settings or parameters associated with the non-terminal sequence within the larger framework of the synthesis process.
237/// 
238pub struct Cfg{
239    #[deref]
240    #[deref_mut]
241    #[index]
242    #[index_mut]
243    inner: Vec<NonTerminal>,
244    pub config: CfgConfig
245}
246
247impl std::fmt::Debug for Cfg {
248    /// Formats the configuration and writes it into the provided formatter. 
249    /// 
250    /// This method iterates over the collection of non-terminals within the internal vector, indexes them, and prints each accompanied by its index to the given formatter. 
251    /// The output format aligns with Rust's debug formatting conventions, ensuring the representation is both informative and concise, and returning a result indicates whether formatting was successful or encountered an error.
252    /// 
253    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
254        for (i, nt) in self.inner.iter().enumerate() {
255            writeln!(f, "{}: {:?}", i, nt)?;
256        }
257        Ok(())
258    }
259}
260
261impl Cfg {
262    /// Constructs an instance from a provided `SynthFun` problem. 
263    /// 
264    /// This function extracts the configuration for constructing a context-free grammar (CFG) as represented by the `SynthFun` and populates the `Cfg` structure. 
265    /// It iterates over the non-terminal definitions within the given problem, mapping them to `NonTerminal` structures with relevant details such as name, type, production rules, and configuration. 
266    /// The first non-terminal is designated as the starting point. 
267    /// The overall configuration for the CFG is cloned and assigned, ensuring the new `Cfg` instance accurately embodies the grammar and constraints defined in the `SynthFun` problem.
268    /// 
269    pub fn from_synthfun(problem: &SynthFun) -> Self {
270        Self {
271            inner: problem.cfg.inner.iter().enumerate().map(|(i, nt)| NonTerminal {
272                name: nt.0.clone(),
273                ty: nt.1,
274                rules: nt.2.iter().map(|p| ProdRule::new(p, problem)).collect(), 
275                config: nt.3.clone(),
276            }).collect_vec(),
277            config: problem.cfg.config.clone().into(),
278        }
279    }
280    /// Find and return the index of the first `NonTerminal` in the collection with a specified type. 
281    /// 
282    /// The method iterates over the internal `Vec<NonTerminal>`, checking each element's type against the given `ty`. 
283    /// If a matching type is found, it returns the index as an `Option<usize>`, otherwise, it returns `None`. 
284    /// This function facilitates the retrieval of non-terminal symbols of a certain type, enhancing the ability to programmatically manipulate and access parts of the grammar defined within the `Cfg`.
285    /// 
286    pub fn find_by_type(&self, ty: Type) -> Option<usize> {
287        self.iter().enumerate().find(|x| x.1.ty == ty).map(|(i, _)| i)
288    }
289    pub fn change_start(&self, nstart: usize) -> Self {
290        let mut new = self.clone();
291        for nt in new.iter_mut() {
292            nt.map_nt_number(|i| if i == 0 { nstart } else if i == nstart { 0 } else { i });
293        }
294        new.swap(0, nstart);
295        new
296    }
297}
298
299#[cfg(test)]
300mod tests {
301    use std::fs;
302
303    use crate::{parser::problem::PBEProblem, log};
304
305    use super::Cfg;
306
307    #[test]
308    fn test_cfg() {
309        log::set_log_level(5);
310        let s = fs::read_to_string("test/test.sl").unwrap();
311        let problem = PBEProblem::parse(s.as_str()).unwrap();
312        let cfg = Cfg::from_synthfun(problem.synthfun());
313        println!("{:?}", cfg);
314    }
315}