synthphonia/expr/cfg.rs
1use std::{collections::HashMap, cmp::min};
2
3use crate::{
4 expr::ops::{Op1Enum, Op2Enum, Op3Enum}, galloc::AllocForAny, parser::{
5 self,
6 problem::{self, Error, PBEProblem, SynthFun},
7 prod, config::Config,
8 }, value::{ConstValue, Type}
9};
10use derive_more::{DebugCustom, Deref, DerefMut, From, Into, Index, IndexMut};
11use itertools::Itertools;
12use joinery::prelude::*;
13
14// use super::{Expr, context::Context, Op1, Op3, Op2};
15
16#[derive(DebugCustom, Clone)]
17/// An enum representing production rules for expressions in the synthesis problem framework.
18///
19/// This variant can encompass constants, variables, and non-terminal symbols along with unary, binary, and ternary operations.
20/// Each variant includes a formatting directive, used for debugging purposes, to provide a human-readable description of its content.
21/// The `Const` variant holds a constant value, `Var` represents a variable identified by an integer, and `Nt` refers to a non-terminal symbol using an index.
22/// The `Op1`, `Op2`, and `Op3` variants represent unary, binary, and ternary operations, respectively, each associated with operator enumerations and indices to expressions they relate to.
23/// This structure facilitates both the expression construction and the debugging process in the synthesis tasks.
24///
25pub enum ProdRule {
26 #[debug(fmt = "{:?}", _0)]
27 Const(ConstValue),
28 #[debug(fmt = "v{:?}", _0)]
29 Var(i64),
30 #[debug(fmt = "nt{:?}", _0)]
31 Nt(usize),
32 #[debug(fmt = "({} {:?})", _0, _1)]
33 Op1(&'static Op1Enum, usize),
34 #[debug(fmt = "({} {:?} {:?})", _0, _1, _2)]
35 Op2(&'static Op2Enum, usize, usize),
36 #[debug(fmt = "({} {:?} {:?} {:?})", _0, _1, _2, _3)]
37 Op3(&'static Op3Enum, usize, usize, usize),
38}
39
40impl ProdRule {
41 /// Creates a new instance from a raw production rule and a synthesis function problem context.
42 ///
43 /// It matches various kinds of production rules such as variables, constants, and operations, transforming them into corresponding variants.
44 /// For variables, it checks if the variable corresponds to an argument in the synthesis function, returning either a variable or a nonterminal rule.
45 /// The constant variant simply maps to its equivalent, maintaining its value.
46 /// For operations, it maps the operation names to their respective enums with memory allocation and resolves nonterminals using the synthesis function's context, ensuring that each element corresponds to a valid component in the synthesis task.
47 /// Any unrecognized variables or nonterminals lead to a panic.
48 ///
49 pub fn new(raw: &prod::ProdRule, problem: &SynthFun) -> Self {
50 match raw {
51 prod::ProdRule::Var(s, config) => {
52 if let Some(a) = problem.lookup_arg(s.as_str()) {
53 Self::Var(a as i64)
54 } else if let Some((a, _)) = problem.cfg.inner.iter().enumerate().find(|(a,b)| &b.0 == s) {
55 Self::Nt(a)
56 } else { panic!("Unrecongized Variable / Nonterminal") }
57 },
58 prod::ProdRule::Const(v, config) => Self::Const(*v),
59 prod::ProdRule::Op1(a, b, config) => Self::Op1(Op1Enum::from_name(a.as_str(), config).galloc(), problem.lookup_nt(b).expect("Unknow non terminal")),
60 prod::ProdRule::Op2(a, b, c, config) => Self::Op2(
61 Op2Enum::from_name(a.as_str(), config).galloc(),
62 problem.lookup_nt(b).expect("Unknow non terminal"),
63 problem.lookup_nt(c).expect("Unknow non terminal"),
64 ),
65 prod::ProdRule::Op3(a, b, c, d, config) => Self::Op3(
66 Op3Enum::from_name(a.as_str(), config).galloc(),
67 problem.lookup_nt(b).expect("Unknow non terminal"),
68 problem.lookup_nt(c).expect("Unknow non terminal"),
69 problem.lookup_nt(d).expect("Unknow non terminal"),
70 ),
71 }
72 }
73
74}
75
76#[derive(DebugCustom, Clone)]
77#[debug(fmt = "({}: {:?}) -> {:?}", name, ty, rules)]
78/// A struct representing a grammar non-terminal.
79///
80/// This construct includes several fields essential for defining a non-terminal within a string synthesis problem.
81/// The `name` field holds the identifier for the non-terminal, while `ty` specifies the associated type.
82/// The `rules` field is a collection of production rules that describe how this non-terminal can be expanded.
83/// The `start` field is a boolean flag indicating whether this non-terminal serves as the starting point in the grammar.
84/// Lastly, `config` encompasses additional settings or parameters that may influence the synthesis process for this non-terminal.
85///
86pub struct NonTerminal {
87 pub name: String,
88 pub ty: Type,
89 pub rules: Vec<ProdRule>,
90 pub config: Config,
91}
92
93impl NonTerminal {
94 /// Retrieves a unary operation production rule by name.
95 ///
96 /// This function iterates through the list of production rules associated with a non-terminal to find a unary operation rule (`Op1`) matching the specified operation name.
97 /// If a matching `Op1` operation is found, it returns the corresponding production rule; otherwise, it returns `None`.
98 /// This functionality enables specific lookups for unary operations within a non-terminal's rule set.
99 ///
100 pub fn get_op1(&self, op1: &str) -> Option<ProdRule>{
101 for rule in self.rules.iter() {
102 if let ProdRule::Op1(r, _) = rule {
103 if r.name() == op1 {
104 return Some(rule.clone());
105 }
106 }
107 }
108 None
109 }
110 /// Retrieves an `Op2` production rule matching a given operation name.
111 ///
112 /// This method iterates over the set of production rules associated with a non-terminal.
113 /// For each rule, if the rule is an `Op2` operation and its name matches the specified `op2` string, the method returns a cloned instance of that rule.
114 /// If no matching rule is found, it returns `None`.
115 /// This allows for extracting specific binary operations from the rule set, aiding in identifying or constructing expressions based on these operations.
116 ///
117 pub fn get_op2(&self, op2: &str) -> Option<ProdRule>{
118 for rule in self.rules.iter() {
119 if let ProdRule::Op2(r, _, _) = rule {
120 if r.name() == op2 {
121 return Some(rule.clone());
122 }
123 }
124 }
125 None
126 }
127 /// Retrieves a `ProdRule` of type `Op3` from the `NonTerminal` if it matches a specific operation name.
128 ///
129 /// This method iterates over the `rules` vector within a `NonTerminal` instance and checks if each rule is an `Op3` operation.
130 /// If an `Op3` operation matches the given `op3` string name, it returns a clone of that `ProdRule`.
131 /// If no matching operation is found, the method returns `None`.
132 ///
133 pub fn get_op3(&self, op3: &str) -> Option<ProdRule>{
134 for rule in self.rules.iter() {
135 if let ProdRule::Op3(r, _, _, _) = rule {
136 if r.name() == op3 {
137 return Some(rule.clone());
138 }
139 }
140 }
141 None
142 }
143
144 /// Retrieves a vector of all one-operand operations that are formatting operations from the production rules associated with the non-terminal.
145 ///
146 /// This method iterates through the list of production rules, checking each rule to see if it is a unary operation (`Op1`).
147 /// If the operation is identified as a formatting operation via `is_formatting_op`, it adds it to the results along with its associated non-terminal index.
148 /// The method compiles these into a vector of tuples containing the operation enum and the index, which is then returned.
149 ///
150 pub fn get_all_formatter(&self) -> Vec<(Op1Enum, usize)> {
151 let mut result = Vec::new();
152 for rule in self.rules.iter() {
153 if let ProdRule::Op1(r, nt) = rule {
154 if r.is_formatting_op() {
155 result.push(((*r).clone(), *nt));
156 }
157 }
158 }
159 result
160 }
161 pub fn map_nt_number(&mut self, mut f: impl FnMut(usize) -> usize) {
162 for prod in self.rules.iter_mut() {
163 match prod {
164 ProdRule::Nt(n) => {
165 *n = f(*n);
166 }
167 ProdRule::Op1(_, n) => {
168 *n = f(*n);
169 }
170 ProdRule::Op2(_, n1, n2) => {
171 *n1 = f(*n1);
172 *n2 = f(*n2);
173 }
174 ProdRule::Op3(_, n1, n2, n3) => {
175 *n1 = f(*n1);
176 *n2 = f(*n2);
177 *n3 = f(*n3);
178 }
179 _ => {}
180 }
181 }
182 }
183}
184#[derive(Clone)]
185/// A configuration structure for controlling various parameters related to the synthesis problem-solving process.
186///
187/// This structure contains fields that define limits and options for resource consumption and operational modes during synthesis.
188/// These fields include limits on size, time, and substring occurrences, as well as sample sizes for list subsequences.
189/// It also includes options for controlling conditional searching, deduction skipping, iteration limit rates, and special tree hole conditions.
190/// This configuration enables users to tailor the synthesis process by adjusting these parameters to suit different problem constraints and computational scenarios.
191///
192pub struct CfgConfig {
193 pub size_limit: usize,
194 pub time_limit: usize,
195 pub substr_limit: usize,
196 pub listsubseq_samples: usize,
197 pub increase_cost_limit: usize,
198 pub cond_search: bool,
199 pub no_deduction: bool,
200 pub ite_limit_rate: usize,
201 pub ite_limit_giveup: usize,
202 pub tree_hole: bool,
203}
204
205impl From<Config> for CfgConfig {
206 /// Creates a new instance by converting from a `Config` object.
207 ///
208 /// This method initializes each field of the struct with corresponding values fetched from the `Config` object, using specified keys.
209 /// If a key does not exist in the `Config`, a default value is assigned.
210 /// For `size_limit` and `time_limit`, the size defaults to `usize::MAX`.
211 /// The `substr_limit` defaults to `4`, `listsubseq_samples` to `0`, `increase_cost_limit` to `2000`, `ite_limit_rate` to `1000`, and `ite_limit_giveup` to `40`.
212 /// The boolean fields `cond_search`, `no_deduction`, and `tree_hole` are initialized as `false`.
213 /// This method is essential for transforming configuration data into a structured format used for synthesis constraints.
214 ///
215 fn from(value: Config) -> Self {
216 Self {
217 size_limit: value.get_usize("size_limit").unwrap_or(usize::MAX),
218 time_limit: value.get_usize("time_limit").unwrap_or(usize::MAX),
219 substr_limit: value.get_i64("data.substr.limit").unwrap_or(4) as usize,
220 listsubseq_samples: value.get_i64("data.listsubseq.sample").unwrap_or(0) as usize,
221 increase_cost_limit: value.get_i64("increase_cost_limit").unwrap_or(2000) as usize,
222 cond_search: false,
223 no_deduction: false,
224 ite_limit_rate: value.get_i64("ite_limit_rate").unwrap_or(1000) as usize,
225 ite_limit_giveup: value.get_i64("ite_limit_giveup").unwrap_or(40) as usize,
226 tree_hole: false,
227 }
228 }
229}
230
231#[derive(Deref, DerefMut, Into, Index, IndexMut, Clone)]
232/// Context-free grammar representation
233///
234/// This data structure embeds a vector of `NonTerminal` elements, facilitating operations that require dereferencing and indexing, both mutable and immutable.
235/// These operations are streamlined through attributes such as `#[deref]`, `#[deref_mut]`, `#[index]`, and `#[index_mut]`, enabling direct access and manipulation of the `inner` vector's elements.
236/// Additionally, it includes a public configuration field, `config`, of type `CfgConfig`, which likely governs specific settings or parameters associated with the non-terminal sequence within the larger framework of the synthesis process.
237///
238pub struct Cfg{
239 #[deref]
240 #[deref_mut]
241 #[index]
242 #[index_mut]
243 inner: Vec<NonTerminal>,
244 pub config: CfgConfig
245}
246
247impl std::fmt::Debug for Cfg {
248 /// Formats the configuration and writes it into the provided formatter.
249 ///
250 /// This method iterates over the collection of non-terminals within the internal vector, indexes them, and prints each accompanied by its index to the given formatter.
251 /// The output format aligns with Rust's debug formatting conventions, ensuring the representation is both informative and concise, and returning a result indicates whether formatting was successful or encountered an error.
252 ///
253 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
254 for (i, nt) in self.inner.iter().enumerate() {
255 writeln!(f, "{}: {:?}", i, nt)?;
256 }
257 Ok(())
258 }
259}
260
261impl Cfg {
262 /// Constructs an instance from a provided `SynthFun` problem.
263 ///
264 /// This function extracts the configuration for constructing a context-free grammar (CFG) as represented by the `SynthFun` and populates the `Cfg` structure.
265 /// It iterates over the non-terminal definitions within the given problem, mapping them to `NonTerminal` structures with relevant details such as name, type, production rules, and configuration.
266 /// The first non-terminal is designated as the starting point.
267 /// The overall configuration for the CFG is cloned and assigned, ensuring the new `Cfg` instance accurately embodies the grammar and constraints defined in the `SynthFun` problem.
268 ///
269 pub fn from_synthfun(problem: &SynthFun) -> Self {
270 Self {
271 inner: problem.cfg.inner.iter().enumerate().map(|(i, nt)| NonTerminal {
272 name: nt.0.clone(),
273 ty: nt.1,
274 rules: nt.2.iter().map(|p| ProdRule::new(p, problem)).collect(),
275 config: nt.3.clone(),
276 }).collect_vec(),
277 config: problem.cfg.config.clone().into(),
278 }
279 }
280 /// Find and return the index of the first `NonTerminal` in the collection with a specified type.
281 ///
282 /// The method iterates over the internal `Vec<NonTerminal>`, checking each element's type against the given `ty`.
283 /// If a matching type is found, it returns the index as an `Option<usize>`, otherwise, it returns `None`.
284 /// This function facilitates the retrieval of non-terminal symbols of a certain type, enhancing the ability to programmatically manipulate and access parts of the grammar defined within the `Cfg`.
285 ///
286 pub fn find_by_type(&self, ty: Type) -> Option<usize> {
287 self.iter().enumerate().find(|x| x.1.ty == ty).map(|(i, _)| i)
288 }
289 pub fn change_start(&self, nstart: usize) -> Self {
290 let mut new = self.clone();
291 for nt in new.iter_mut() {
292 nt.map_nt_number(|i| if i == 0 { nstart } else if i == nstart { 0 } else { i });
293 }
294 new.swap(0, nstart);
295 new
296 }
297}
298
299#[cfg(test)]
300mod tests {
301 use std::fs;
302
303 use crate::{parser::problem::PBEProblem, log};
304
305 use super::Cfg;
306
307 #[test]
308 fn test_cfg() {
309 log::set_log_level(5);
310 let s = fs::read_to_string("test/test.sl").unwrap();
311 let problem = PBEProblem::parse(s.as_str()).unwrap();
312 let cfg = Cfg::from_synthfun(problem.synthfun());
313 println!("{:?}", cfg);
314 }
315}