synthphonia/parser/
problem.rs

1use derive_more::Display;
2use itertools::Itertools;
3use pest::{
4    iterators::{Pair, Pairs},
5    Parser,
6};
7
8pub use pest::Position;
9pub use pest::Span;
10
11use crate::{
12    galloc::{self},
13    value::Type,
14};
15
16use super::{ioexamples::IOExamples, prod::ProdRule, config::{Config, self}};
17use derive_more::DebugCustom;
18
19pub type Error = pest::error::Error<Rule>;
20
21/// Creates a new custom error instance associated with a specific span. 
22
23pub fn new_custom_error_span<'i>(msg: String, span: Span<'i>) -> Error { Error::new_from_span(pest::error::ErrorVariant::CustomError { message: msg }, span) }
24/// Constructs and returns an error with a custom error message and position. 
25
26pub fn new_costom_error_pos<'i>(msg: String, pos: Position<'i>) -> Error { Error::new_from_pos(pest::error::ErrorVariant::CustomError { message: msg }, pos) }
27
28#[derive(DebugCustom, PartialEq, Eq, Hash, Clone)]
29#[debug(fmt = "{} : {:?} -> {:?}", _0, _1, _2)]
30/// A struct that encapsulates a non-terminal symbol in the context of string synthesis. 
31/// 
32/// This struct includes a `String` representing the non-terminal's identifier, a `Type` indicating the expected data type or category the non-terminal belongs to, and a `Vec<ProdRule>` which comprises a collection of production rules associated with this non-terminal. 
33/// Additionally, it contains a `Config` to store any configuration settings specific to the non-terminal's behavior or rule application within the parsing process. 
34/// Together, these components define the operational and structural context for a non-terminal during synthesis. 
35/// 
36/// 
37pub struct NonTerminal(pub String, pub Type, pub Vec<ProdRule>, pub Config);
38
39impl NonTerminal {
40    /// Parses a `Pair` into a `NonTerminal`. 
41
42    pub fn parse(pair: Pair<'_, Rule>) -> Result<NonTerminal, Error> {
43        let mut vec = pair.into_inner().collect_vec();
44        let config = vec.last().unwrap().clone();
45        let config = if config.as_rule() == Rule::config {
46            vec.pop();
47            Config::parse(config.clone())?
48        } else {
49            Config::new()
50        };
51        let [symbol, typ, prods]: [_; 3] = vec.try_into().unwrap();
52        let prods: Vec<_> = prods.into_inner().map(|x| ProdRule::parse(x)).try_collect()?;
53        Ok(NonTerminal(symbol.as_str().into(), Type::parse(typ)?, prods, config))
54    }
55}
56
57#[derive(DebugCustom, Clone)]
58#[debug(fmt = "{:?} [{:?}]", "self.inner", "self.config")]
59/// A struct that represents a grammar configuration for the synthesis problem. 
60/// 
61/// It contains three fields: `start`, `inner`, and `config`. 
62/// The `start` field is a `String` representing the initial non-terminal symbol of the grammar. 
63/// The `inner` field is a vector of `NonTerminal`, detailing the additional non-terminals involved in the synthesis process. 
64/// The `config` field is an instance of `Config`, which provides specific settings or parameters that modify or define the grammar's behavior within the synthesis context. 
65/// Together, these fields collectively describe the essential components and settings required for configuring the grammar used in string synthesis tasks. 
66/// 
67/// 
68pub struct Cfg {
69    pub start: String,
70    pub inner: Vec<NonTerminal>, 
71    pub config: Config
72}
73
74impl Cfg {
75    /// Parses a `Pair` using a context-free grammar (Cfg) representation and returns a `Cfg`. 
76    /// 
77    /// The function takes a `Pair` object that adheres to an outlined grammatical rule (`Rule`), transforming it into a collection of `NonTerminal` elements while also handling configuration through optional `Config` parsing. 
78    /// 
79    pub fn parse(pair: Pair<'_, Rule>) -> Result<Self, Error> {
80        let mut cfgvec = pair.into_inner().collect_vec();
81        let config = cfgvec.last().unwrap().clone();
82        let config = if config.as_rule() == Rule::config {
83            cfgvec.pop();
84            Config::parse(config.clone())?
85        } else {
86            Config::new()
87        };
88        let mut cfgiter = cfgvec.into_iter().peekable();
89        let start = NonTerminal::parse(cfgiter.peek().unwrap().clone())?;
90        let start = if let [ProdRule::Var(s, _)] =  start.2.as_slice() { cfgiter.next(); s } else { &start.0 };
91        let start = start.clone();
92        let mut inner: Vec<_> = cfgiter.map(|x| NonTerminal::parse(x)).try_collect()?;
93        let mut cfg = Cfg{start, inner, config};
94        cfg.reset_start();
95        Ok(cfg)
96    }
97    /// This function resets the position of the start non-terminal in the control flow graph. 
98
99    pub fn reset_start(&mut self) {
100        let start_index = self.inner.iter().position(|x| x.0 == self.start).unwrap();
101        let start_nt = self.inner.remove(start_index);
102        self.inner.insert(0, start_nt);
103    }
104    /// Retrieves the name of the non-terminal by type. 
105    /// 
106
107    pub fn get_nt_by_type(&self, ty: &Type) -> String {
108        self.inner.iter().find_map(|x| (x.1 == *ty).then_some(x.0.clone())).unwrap()
109    }
110    // pub fn sort(&mut self) {
111    //     let mut sort = topological_sort::TopologicalSort::<NonTerminal>::new();
112    //     for nt in self.inner.iter() {
113    //         for rule in nt.2.iter() {
114    //             if let ProdRule::Var(name, _) = rule {
115    //                 if let Some(r) = self.inner.iter().find(|a| &a.0 == name) {
116    //                     sort.add_dependency(*r, *nt);
117    //                 }
118    //             }
119    //         }
120    //     }
121    //     let mut v = Vec::new();
122    //     loop {
123    //         let mut a = sort.pop_all();
124    //         if a.is_empty() { break; }
125    //         v.append(&mut a);
126    //     }
127    //     self.inner = v;
128    // }
129}
130
131#[derive(Debug, Display, Clone)]
132#[display(fmt = "{} ({}) {:?}", "self.name", r#"self.args.iter().map(|(s, t)| format!("({} {:?})", s, t)).collect_vec().join(" ")"#, "self.rettype")]
133/// A struct that represents a function signature. 
134/// 
135/// This structure stores a function's name alongside its argument list and return type. 
136/// The `name` field holds the function's name as a string. 
137/// The `args` field is a vector of tuples, each containing a string representing an argument's name and a `Type` specifying the argument's type. 
138/// The `rettype` field indicates the return type of the function. 
139/// This struct provides a way to comprehensively define and store the components of a function's signature, facilitating operations that involve introspection or manipulation of function definitions within the context of string synthesis problems.
140/// 
141pub struct FunSig {
142    pub name: String,
143    pub args: Vec<(String, Type)>,
144    pub rettype: Type,
145}
146
147impl FunSig {
148    /// Returns the index of a named argument within the function signature's argument list. 
149
150    pub fn index(&self, argname: &str) -> Option<usize> {
151        self.args.iter().position(|x| x.0 == argname)
152    }
153}
154
155#[derive(Debug, Clone)]
156/// A struct that encapsulates a synthesis function's core attributes. 
157/// 
158/// It contains the `sig`, which holds the function's signature, defining the input and output types and any constraints applicable to the function. 
159/// The `cfg` field represents the configuration grammar, likely detailing specific rules or patterns that govern how the function operates or is derived during synthesis. 
160/// The `subproblem` field is a boolean flag indicating whether this function represents a subproblem within a larger synthesis task, potentially distinguishing it from primary synthesis functions or marking it for special processing.
161/// 
162pub struct SynthFun {
163    pub sig: FunSig,
164    pub cfg: Cfg,
165    pub subproblem: bool
166}
167
168impl SynthFun {
169    /// Parses a `synthfun` rule from a given input and constructs a `SynthFun` instance. 
170
171    pub fn parse(synthfun: Pair<'_, Rule>) -> Result<Self, Error> {
172        let subproblem = synthfun.as_rule() == Rule::synthsubproblem;
173        let [name, arglist, typ, cfg]: [_; 4] = synthfun.into_inner().collect_vec().try_into().unwrap();
174        let args: Vec<(String, Type)> = arglist
175            .into_inner()
176            .map(|x| {
177                let [name, typ]: [_; 2] = x.into_inner().collect_vec().try_into().unwrap();
178                Ok((name.as_str().to_owned(), Type::parse(typ)?))
179            })
180            .try_collect()?;
181        let rettype = Type::parse(typ)?;
182        let cfg = Cfg::parse(cfg)?;
183        Ok(Self{sig: FunSig{name: name.as_str().into(), args, rettype}, cfg, subproblem})
184    }
185    /// Lookup for a non-terminal within the synthesis function's configuration. 
186
187    pub fn lookup_nt(&self, nt: &str) -> Option<usize> {
188        self.cfg.inner.iter().find_position(|x| x.0.as_str() == nt).map(|x| x.0)
189    }
190    /// Searches for an argument name within the function signature and returns its index if found. 
191    pub fn lookup_arg(&self, arg: &str) -> Option<usize> {
192        self.sig.args.iter().find_position(|x| x.0.as_str() == arg).map(|x| x.0)
193    }
194}
195
196
197
198
199impl Type {
200    /// Parses a type from a given token pair. 
201    /// 
202    /// This function starts by processing the provided `Pair` to extract a contained symbol. 
203    /// It then matches this symbol against several predefined strings representing basic types, converting it into the corresponding `Type` variant. 
204    /// These types include `Int`, `String`, `Bool`, and `Float`. 
205    /// If the string representation of the pair contains "List", the function attempts to convert the basic type into a list type using `to_list()`, returning an error if this is unsupported. 
206    /// The function returns the parsed `Type` or an error if an unknown type is encountered.
207    /// 
208    pub fn parse(pair: Pair<'_, Rule>) -> Result<Self, Error> {
209        let [symbol]: [_; 1] = pair.clone().into_inner().collect_vec().try_into().unwrap();
210        if pair.as_str().contains("BitVec") {
211            let b = symbol.as_str().parse::<usize>().map_err(|_| new_custom_error_span("Can not parse BitVec".into(), pair.as_span()))?;
212            return Ok(Self::BitVector(b));
213        }
214        let basic = match symbol.as_str() {
215            "Int" => Self::Int,
216            "String" => Self::Str,
217            "Bool" => Self::Bool,
218            "Float" => Self::Float,
219            _ => panic!("Unknown Type {}", symbol.as_str()),
220        };
221        if pair.as_str().contains("List") {
222            basic.to_list().ok_or(new_custom_error_span("Unsupported list type".into(), pair.as_span()))
223        } else {
224            Ok(basic)
225        }
226    }
227}
228#[derive(Debug)]
229/// A struct representing a synthesis problem to be solved. 
230/// This structure contains four fields: `logic`, a string specifying the logic or domain in which the synthesis problem is defined; `synthfuns`, a vector of `SynthFun` instances representing the functions to be synthesized as part of solving the problem; `problem_index`, a usize value denoting the particular index or identifier of this problem within a broader set of problems; and `examples`, an `IOExamples` instance that holds input-output exemplars relevant to the synthesis task, to ground the problem solution in practical demonstrations of expected behavior.
231pub struct PBEProblem {
232    pub logic: String,
233    pub synthfuns: Vec<SynthFun>,
234    pub problem_index: usize,
235    pub examples: IOExamples,
236}
237
238impl PBEProblem {
239    /// Provides access to a specific `SynthFun` based on the current `problem_index`. 
240    /// 
241    /// This function retrieves a reference to the `SynthFun` within the `synthfuns` vector of the `PBEProblem` instance. 
242    /// The `problem_index` specifies which `SynthFun` to access, allowing direct retrieval of the active synthesis function's details, such as its signature and configuration, from the list of available functions.
243    /// 
244    pub fn synthfun(&self) -> &SynthFun {
245        &self.synthfuns[self.problem_index]
246    } 
247    
248    /// Parses a string input to create an instance of `PBEProblem`. 
249    /// 
250    /// This method uses the `ProblemParser` to initially parse the input string according to predefined grammar rules, extracting relevant components for logic, synthesis functions, examples, and checks. 
251    /// It specifically targets obtaining the logic definition, synthesizing problem configurations, and IO examples used in problem-solving. 
252    /// The method processes these components to extract the inner details of logic and synthesis, where synthesis functions are parsed individually. 
253    /// A verification step ensures that only one main synthesis function (`synth-fun`) is designated as the primary problem by filtering out those marked as subproblems. 
254    /// The synthesis examples are parsed to ensure they match the signature of the main synthesis function. 
255    /// It constructs and returns a `PBEProblem` comprising the logic, a vector of synthesis functions, the index of the main problem, and the parsed examples. 
256    /// The method will fail if the input does not conform to expected structures or logic, returning an error.
257    /// 
258    pub fn parse(input: &str) -> Result<PBEProblem, Error> {
259        let [file]: [_; 1] = ProblemParser::parse(Rule::file, input)?.collect_vec().try_into().unwrap();
260        let [_, logic, synthproblem, examples, checksynth]: [_; 5] = file.into_inner().collect_vec().try_into().unwrap();
261        let [logic]: [_; 1] = logic.into_inner().collect_vec().try_into().unwrap();
262        let synthfuns: Vec<_> = synthproblem.into_inner().enumerate().map(|(i, pair)| SynthFun::parse(pair)).collect::<Result<Vec<_>, _>>()?;
263        let vec = synthfuns.iter().enumerate().filter(|x| !x.1.subproblem).map(|i|i.0).collect_vec();
264        let problem_index = if let [a] = vec.as_slice() {*a} else { panic!("There should be only one synth-fun."); };
265        let examples = IOExamples::parse(examples, &synthfuns[problem_index].sig, true)?;
266
267        Ok(PBEProblem {
268            logic: logic.as_str().to_owned(),
269            synthfuns,
270            problem_index,
271            examples,
272        })
273    }
274}
275
276#[derive(pest_derive::Parser)]
277#[grammar = "src/parser/problem.pest"]
278/// A unit struct that serves as a parser for synthesis problems. 
279/// 
280/// This struct, given the absence of any fields or methods, functions as a marker or indicator within the module, potentially signifying a namespace or a type dedicated to parsing tasks related to synthesis problems. 
281/// It's expected to be expanded or referenced in functions or traits that implement its parsing functionality.
282/// 
283pub struct ProblemParser;
284
285#[cfg(test)]
286mod tests {
287    use std::fs;
288
289    use super::PBEProblem;
290
291    #[test]
292    fn parse_test() {
293        let s = fs::read_to_string("test/test.sl").unwrap();
294        let result = PBEProblem::parse(s.as_str());
295        println!("{:?}", result.map(|x| x.synthfun().cfg.clone()));
296    }
297}