synthphonia/utils/
nested.rs

1use std::{iter, ops::Range};
2
3use iset::IntervalMap;
4use itertools::{Itertools};
5use radix_trie::{Trie, TrieCommon};
6
7use crate::closure;
8use ahash::AHashMap as HashMap;
9/// An enumeration representing a recursive interval tree that can contain either an inner node with associated intervals or a terminal leaf with a stored value. 
10/// 
11/// 
12/// The inner node variant encapsulates an interval map associating ranges (identified by usize keys) to boxed nested trees, enabling hierarchical and ordered organization of intervals. 
13/// In contrast, the leaf variant holds a direct value, serving as the base case in this nested tree structure.
14pub enum NestedIntervalTree<T>{
15    Node(IntervalMap<usize, Box<NestedIntervalTree<T>>>),
16    Leaf(T)
17}
18
19impl<T: Clone> NestedIntervalTree<T> {
20    /// Builds a nested interval tree by recursively processing an iterator of range iterators and embedding a terminal value when no more ranges remain. 
21    /// 
22    /// 
23    /// Recursively processes the outer iterator such that if a current iterator exists, it iterates over its ranges and creates subtrees for each by cloning the remaining iterators and value; otherwise, it produces a leaf containing the provided value. 
24    /// This design constructs a hierarchical structure where each node associates intervals with nested subtrees, culminating in leaves holding the synthesized value.
25    pub fn build(mut ranges: impl Iterator<Item=impl Iterator<Item=Range<usize>>> + Clone, value: T) -> Self {
26        if let Some(head) = ranges.next() {
27            let mut maps = IntervalMap::new();
28            for range in head {
29                let inner = Self::build(ranges.clone(), value.clone());
30                maps.insert(range, inner.into());
31            }
32            Self::Node(maps)
33        } else {
34            Self::Leaf(value)
35        }
36    }
37    /// Inserts elements into a nested interval structure by recursively traversing an iterator over iterators of range indices to either update existing leaf values or create new branches with default values.
38    /// 
39    /// The function operates by taking an iterator of iterators of ranges, a function to update leaf values, and a default value. 
40    /// For an internal node, it iterates over each range from the provided iterator: if a corresponding child exists, it recurses into that child with cloned iterators; if not, it creates a new branch using the default value. 
41    /// When a leaf node is reached and there are no more ranges, it applies the update function to modify the leaf's contained value. 
42    /// The function will panic if the provided range structure does not match the depth of the tree.
43    pub fn insert_using_iter<'a: 'b, 'b>(&'a mut self, mut ranges: impl Iterator<Item=impl Iterator<Item=Range<usize>> + 'b> + Clone + 'b, update: &impl Fn(&mut T), default: T) {
44        let head = ranges.next();
45        match (self, head) {
46            (NestedIntervalTree::Node(maps), Some(head)) => {
47                for range in head {
48                    if let Some(r) = maps.get_mut(range.clone()) {
49                        r.insert_using_iter(ranges.clone(), update, default.clone());
50                    } else {
51                        maps.insert(range, Self::build(ranges.clone(), default.clone()).into());
52                    }
53                }
54            }
55            (NestedIntervalTree::Leaf(v), None) => {
56                update(v);
57            }
58            _ => panic!("DeepIntervalTree have a different number of ranges indices."),
59        }
60    }
61    /// Inserts a value at a nested tree leaf corresponding to a multi-dimensional sequence of interval ranges. 
62    /// 
63    /// 
64    /// Uses a vector of interval range vectors to represent the nesting levels, delegating recursive insertion to a helper that traverses and builds intermediate nodes as needed, while using a no-op update function for branch nodes.
65    pub fn insert_multiple<'a: 'b, 'b>(&'a mut self, ranges: &Vec<Vec<Range<usize>>>, value: T) {
66        self.insert_using_iter(ranges.iter().map(|x| x.iter().cloned()), &|_| (), value)
67    }
68    /// Inserts a value into the nested interval tree using a provided slice of ranges. 
69    /// This function transforms the slice of ranges into an iterator of one-item iterators and then delegates to the internal insertion procedure that supports multi-level range specifications.
70    pub fn insert<'a: 'b, 'b>(&'a mut self, ranges: &[Range<usize>], value: T) {
71        self.insert_using_iter(ranges.iter().map(|x| iter::once(x.clone())), &|_| (), value)
72    }
73}
74
75impl<T> Default for NestedIntervalTree<T> {
76    /// Returns a default instance by invoking the constructor for a new instance. 
77    /// This implementation enables the use of default() to create an object with initial empty configuration without the need for manual initialization.
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl<T> NestedIntervalTree<T> {
84    /// Creates an empty nested interval tree by initializing it as a node containing an empty interval map. 
85    /// This function serves as a constructor for the tree structure, providing a starting point for inserting ranges and associated values in later operations.
86    pub fn new() -> Self {
87        Self::Node(IntervalMap::new())
88    }
89    /// Retrieves a stored value by following a sequence of interval ranges across a nested data structure. 
90    /// 
91    /// This function accepts a slice of range indices that represent a traversal path through nested intervals. 
92    /// It navigates the structure by, for non-empty range sequences, using the first range to determine which subtree to search and recursing with the remaining ranges, while returning the leaf’s value if there are no ranges left. 
93    /// If the provided path does not match the structure, it results in a None.
94    /// 
95    pub fn get(&self, ranges: &[Range<usize>]) -> Option<&T> {
96        match self {
97            NestedIntervalTree::Node(maps) if !ranges.is_empty() => {
98                let (head, tail) = (&ranges[0], &ranges[1..]);
99                maps.get(head.clone()).and_then(|x| x.get(tail))
100            }
101            NestedIntervalTree::Leaf(v) if ranges.is_empty() => Some(v),
102            _ => None,
103        }
104    }
105    /// Returns an iterator over all values in the tree that are reachable via a chain of intervals where each enclosing interval is a superrange of the corresponding query range. 
106    /// 
107    /// This method recursively traverses a nested interval structure using an iterator of iterators of ranges; in internal nodes it filters subintervals that fully contain the query interval and descends recursively, while at a leaf node with no remaining query ranges it yields the stored value. 
108    /// It panics if the depth of provided range sequences does not match the tree structure.
109    /// 
110    pub fn superrange_using_iter<'a: 'b, 'b>(&'a self, mut ranges: impl Iterator<Item=impl Iterator<Item=Range<usize>> + 'b> + Clone + 'b) -> Box<dyn Iterator<Item=&'b T> + 'b> {
111        let head = ranges.next();
112        match (self, head) {
113            (NestedIntervalTree::Node(maps), Some(head)) => {
114                let it = head.flat_map(move |head| {
115                    maps.iter(head.clone())
116                        .filter(move |(r, _)| head.start >= r.start && r.end >= head.end) 
117                        .flat_map(closure![clone ranges; move |(_, t)| t.superrange_using_iter(ranges.clone())])
118                });
119                Box::new(it)
120            }
121            (NestedIntervalTree::Leaf(v), None) => Box::new(Some(v).into_iter()),
122            _ => panic!("DeepIntervalTree have a different number of ranges indices."),
123        }
124    }
125    /// Returns an iterator over subrange-matching values from a nested interval tree structure based on a sequence of range iterators. 
126    /// 
127    /// 
128    /// Processes a cloned iterator yielding iterators over range indices, where at each node it filters for entries whose ranges are completely contained within the provided range, and recursively applies the same filtering on the resulting subtrees. 
129    /// In the leaf case when no further range iterator is provided, it yields the leaf value, while differing iterator depths result in a runtime panic.
130    pub fn subrange_using_iter<'a: 'b, 'b>(&'a self, mut ranges: impl Iterator<Item=impl Iterator<Item=Range<usize>> + 'b> + Clone + 'b) -> Box<dyn Iterator<Item=&'b T> + 'b> {
131        let head = ranges.next();
132        match (self, head) {
133            (NestedIntervalTree::Node(maps), Some(head)) => {
134                let it = head.flat_map(move |head| {
135                    maps.iter(head.clone())
136                        .filter(move |(r, _)| r.start >= head.start && head.end >= r.end) 
137                        .flat_map(closure![clone ranges; move |(_, t)| t.subrange_using_iter(ranges.clone())])
138                });
139                Box::new(it)
140            }
141            (NestedIntervalTree::Leaf(v), None) => Box::new(Some(v).into_iter()),
142            _ => panic!("DeepIntervalTree have a different number of ranges indices."),
143        }
144    }
145    /// Returns an iterator over all values whose associated intervals contain the specified super ranges.
146    /// 
147    /// Delegates to a lower-level iterator implementation by mapping the provided vector of range vectors into an appropriate iterator form for traversing the nested interval structure, ultimately yielding references to values that satisfy the super-range condition.
148    pub fn superrange_multiple<'a: 'b, 'b>(&'a self, ranges: &'b Vec<Vec<Range<usize>>>) -> Box<dyn Iterator<Item=&'b T> + 'b> {
149        self.superrange_using_iter(ranges.iter().map(|x| x.iter().cloned()))
150    }
151    /// Returns an iterator over elements from nested intervals that are enclosed by the specified multiple subrange lists provided as a vector of vectors. 
152    /// 
153    /// 
154    /// Invokes an internal subrange retrieval function by mapping the provided vector of range lists into an iterator of cloned ranges, thereby allowing the caller to iterate over all matching elements in the tree that fall within the prescribed nested subranges.
155    pub fn subrange_multiple<'a: 'b, 'b>(&'a self, ranges: &'b Vec<Vec<Range<usize>>>) -> Box<dyn Iterator<Item=&'b T> + 'b> {
156        self.subrange_using_iter(ranges.iter().map(|x| x.iter().cloned()))
157    }
158    /// Return an iterator yielding references to values whose associated intervals form a superrange of the provided sequence of index ranges. 
159    /// This function converts a vector of ranges into the expected single-element iterators and delegates the lookup to the underlying superrange retrieval logic, thereby providing access to all elements that satisfy the defined superrange condition.
160    pub fn superrange<'a: 'b, 'b>(&'a self, ranges: Vec<Range<usize>>) -> Box<dyn Iterator<Item=&'b T> + 'b> {
161        self.superrange_using_iter(ranges.into_iter().map(std::iter::once))
162    }
163    /// Returns a boxed iterator over references to values matching the provided subrange specification. 
164    /// 
165    /// 
166    /// Maps the given vector of ranges into the expected iterator form and delegates to an underlying iterator-based subrange retrieval method to extract matching elements from the nested structure.
167    pub fn subrange<'a: 'b, 'b>(&'a self, ranges: Vec<Range<usize>>) -> Box<dyn Iterator<Item=&'b T> + 'b> {
168        self.subrange_using_iter(ranges.into_iter().map(std::iter::once))
169    }
170}
171
172/// This structure represents an encoder that maps unique keys to integer codes while storing associated values. 
173/// It encapsulates a vector of key-value pairs alongside a hash map that records the corresponding code for each key.
174/// 
175/// The interface supports operations to insert new key-value associations, retrieve a code given a key, and decode keys and values based on their assigned codes, ensuring efficient bidirectional lookup between keys and integer representations.
176pub struct Encoder<K: std::hash::Hash, V> {
177    values: Vec<(K, V)>,
178    codes: HashMap<K, u32>
179}
180
181impl<K: std::hash::Hash + Clone + std::cmp::Eq, V> Default for Encoder<K, V> {
182    /// Returns a new instance with default settings by invoking the standard constructor. 
183    /// This method provides the default initialization behavior by delegating its work to the dedicated creation function, ensuring consistent instantiation for the type.
184    fn default() -> Self {
185        Self::new()
186    }
187}
188
189impl<K: std::hash::Hash + Clone + std::cmp::Eq, V> Encoder<K, V> {
190    /// Creates a new encoder instance with no stored key-value pairs. 
191    /// 
192    /// This constructor initializes the internal storage to be empty, allowing subsequent insertions to assign unique numerical codes to keys while associating corresponding values. 
193    /// 
194    /// 
195    pub fn new() -> Self {
196        Encoder { values: Vec::new(), codes: HashMap::new() }
197    }
198    /// Inserts a new key-value pair into the encoder, assigning and returning a unique numeric code for the key. 
199    /// 
200    /// 
201    /// Generates a code based on the current state of internal storage, clones the key, and stores the pair in parallel structures that maintain both an ordered list of values and a mapping from keys to their corresponding codes, enabling efficient retrieval and translation between keys and their numeric identifiers.
202    pub fn insert(&mut self, k: K, v: V) -> u32 {
203        let code = self.values.len() as u32;
204        self.values.push((k.clone(), v));
205        self.codes.insert(k, code);
206        code
207    }
208    /// Retrieves a numeric code corresponding to a given key from the internal mapping. 
209    /// 
210    /// 
211    /// Returns an optional value representing the unique encoding if the key exists, or None otherwise.
212    pub fn encode(&self, t: &K) -> Option<u32> {
213        self.codes.get(t).cloned()
214    }
215    /// Retrieves a reference to the key corresponding to the provided encoded numeric value. 
216    /// 
217    /// 
218    /// Returns the key by interpreting the given integer as an index into an internal collection, enabling reverse mapping from a numeric code back to its associated key.
219    pub fn decode(&self, t: u32) -> &K {
220        &self.values[t as usize].0
221    }
222    /// Returns a reference to the value associated with the supplied numerical code from the internal storage. 
223    /// This method converts the provided u32 code into an index and accesses the corresponding entry, retrieving the second element of the stored key–value pair.
224    pub fn value(&self, t: u32) -> &V {
225        &self.values[t as usize].1
226    }
227}
228
229/// A container wrapping a collection of radix tries that map static string slices to lists of string slice arrays. 
230/// This structure is designed to group multiple tries, enabling efficient organization and retrieval of sequences of static strings by leveraging trie-based lookups for operations such as prefix search and related string queries.
231pub struct RadixTrieN(Vec<Trie<&'static str, Vec<&'static [&'static str]>>>);
232
233impl RadixTrieN {
234    /// Creates a new instance of the specialized trie collection with a specified number of inner trie elements.
235    /// 
236    /// Initializes the generator by taking a length parameter and generating a vector with that many default trie structures. 
237    /// Each inner trie is created using its default constructor, and the resulting collection is encapsulated within the instance.
238    pub fn new(len: usize) -> Self {
239        Self( (0..len).map(|_| Trie::new()).collect_vec() )
240    }
241    /// Inserts a provided key composed of static string slices into a multi-layer trie structure. 
242    /// This function iterates over the key’s components paired with corresponding trie elements and updates each trie by appending the key to an existing entry or inserting a new entry when none exists.
243    pub fn insert(&mut self, key: &'static [&'static str]) {
244        for (s, v) in key.iter().cloned().zip(self.0.iter_mut()) {
245            if let Some(a) = v.get_mut(s) {
246                a.push(key);
247            } else {
248                v.insert(s, vec![key]);
249            }
250        }
251    }
252    #[inline]
253    /// Returns an iterator over candidate key slices that are recognized as superfixes relative to the provided key.
254    /// 
255    /// Determines the most significant component of the key based on length and queries the corresponding subtrie, then filters and flattens the resulting values to yield only those keys that satisfy the superfix property with respect to the original key.
256    pub fn superfixes(&self, key: &'static [&'static str]) -> impl Iterator<Item=&'static [&'static str]> + '_ {
257        let (i, _) = key.iter().cloned().enumerate().max_by_key(|(i, x)| x.len()).unwrap();
258        self.0[i].subtrie(key[i]).map(|x| x.values().flat_map(|v| v.iter().cloned().filter(|x| is_prefix(key, x))) ).into_iter().flatten()
259    }
260    #[inline]
261    /// Returns an iterator that produces all stored string sequences from the underlying trie that qualify as prefixes of the provided key sequence.
262    /// 
263    /// Operates by selecting the key element with the smallest length to determine the appropriate subtrie, then iterates over candidate values from that subtrie. 
264    /// The iterator is filtered to include only those sequence elements that satisfy the prefix relationship with the full key as determined by a predicate.
265    pub fn prefixes(&self, key: &'static [&'static str]) -> impl Iterator<Item=&'static [&'static str]> + '_ {
266        let (i, _) = key.iter().cloned().enumerate().min_by_key(|(i, x)| x.len()).unwrap();
267        PrefixIter{ trie: &self.0[i], key: Some(key[i])}.flat_map(|x|  x.iter().cloned().filter(|x| is_prefix(x, key)) )
268    }
269}
270
271/// Checks whether each element in the first slice is a prefix of its corresponding element in the second slice. 
272/// This function iterates over paired elements from both slices and returns true only if every element from the first slice is a starting substring of the corresponding element in the second slice, ensuring a complete prefix match across the pair of slices.
273fn is_prefix(x: &[&str], k: &[&str]) -> bool {
274    x.iter().cloned().zip(k.iter().cloned()).all(|(a,b)| b.starts_with(a))
275}
276
277/// A structure representing an iterator that traverses ancestors in a trie based on a specific key. 
278/// It maintains a reference to a trie mapping static string keys to associated values and an optional key used to initiate and guide the iteration process.
279/// 
280/// This iterator retrieves entries from the trie by progressively reducing the key, enabling prefix-based lookup. 
281/// It encapsulates the necessary state to iterate over trie elements whose keys are prefixes, offering a convenient abstraction for navigating hierarchical or nested string data.
282pub struct PrefixIter<'a, T>{
283    trie: &'a Trie<&'static str, T>,
284    key: Option<&'static str>,
285}
286
287impl<'a, T> Iterator for PrefixIter<'a, T> {
288    type Item=&'a T;
289
290    /// Advances the iterator by retrieving the next element associated with the current prefix state. 
291    /// This method queries the underlying trie for an ancestor of the current key and, if found, updates the internal key by trimming its last element or marks the iteration as complete when empty, finally returning the corresponding value.
292    fn next(&mut self) -> Option<Self::Item> {
293        if let Some(key) = self.key {
294            self.trie.get_ancestor(key).and_then(|x| {
295                x.key().map(|k| {
296                    if k.is_empty() {
297                        self.key = None;    
298                    } else {
299                        self.key = Some(&k[0..k.len()-1]);
300                    }
301                    x.value().unwrap()
302                })
303            })
304        } else { None }
305    }
306}
307
308/// A structure encapsulating an interval-based mapping designed to index and query segments of expected string data. 
309/// 
310/// 
311/// It maintains a reference to an array of constant strings along with a vector of interval maps that associate ranges of indices with collections of string slice arrays. 
312/// This design facilitates efficient operations for matching, extracting, and navigating through substrings or superstrings based on predefined expectations.
313pub struct IntervalTreeN {
314    expected: &'static [&'static str],
315    maps: Vec<IntervalMap<usize, Vec<&'static [&'static str]>>>
316}
317
318impl IntervalTreeN {
319    /// Creates a new instance using the provided expected strings and initializes an internal collection of interval maps. 
320    /// 
321    /// 
322    /// Constructs the instance by assigning the given expected strings and generating one interval map per expected element, ensuring a corresponding structure is available for each.
323    pub fn new(expected: &'static [&'static str]) -> Self {
324        Self { expected, maps: (0..expected.len()).map(|_| IntervalMap::new()).collect_vec()}
325    }
326    /// Inserts a key into the interval-based mapping structure by iterating over corresponding expected string slices and mutable interval maps. 
327    /// 
328    /// The method scans each component of the key against its paired expected string to identify all matching substrings through pattern matching. 
329    /// For each match, it computes a range based on the match index and the length of the key component. 
330    /// If an existing entry for the range is found, it appends the key; otherwise, it creates a new entry with the key.
331    /// 
332    /// 
333    pub fn insert(&mut self, key: &'static [&'static str]) {
334        for (k, (e, v)) in key.iter().cloned().zip(self.expected.iter().cloned().zip(self.maps.iter_mut())) {
335            for (i, _) in e.match_indices(k) {
336                let range = i..(i + k.len());
337                if let Some(l) = v.get_mut(range.clone()) {
338                    l.push(key);
339                } else {
340                    v.insert(range, vec![key]);
341                }
342            }
343        }
344    }
345    /// Inserts the first occurrence of each element of the provided key into the interval tree by mapping the range of its first match in the corresponding expected string. 
346    /// 
347    /// 
348    /// Iterates over each element of the key array alongside the expected strings and mutable maps. 
349    /// For each pair, it identifies the first matching index in the expected string and constructs a range covering that occurrence. 
350    /// It then updates the map at that range by appending the key if the range already exists or inserting a new entry if it does not.
351    pub fn insert_first_occur(&mut self, key: &'static [&'static str]) {
352        for (k, (e, v)) in key.iter().cloned().zip(self.expected.iter().cloned().zip(self.maps.iter_mut())) {
353            if let Some((i, _)) = e.match_indices(k).next() {
354                let range = i..(i + k.len());
355                if let Some(l) = v.get_mut(range.clone()) {
356                    l.push(key);
357                } else {
358                    v.insert(range, vec![key]);
359                }
360            }
361        }
362    }
363    #[inline]
364    /// Returns an iterator over stored string sequences that are valid superstrings of the provided key. 
365    /// 
366    /// 
367    /// Selects a dimension based on the longest string in the key and then retrieves intervals from the corresponding expected string. 
368    /// These intervals are used to access candidate sequences, which are filtered to retain only those sequences that have the supplied key as a substring, yielding an iterator over matching sequences.
369    pub fn superstrings(&self, key: &'static [&'static str]) -> impl Iterator<Item=&'static [&'static str]> + '_ {
370        let (i, _) = key.iter().cloned().enumerate().max_by_key(|(_, x)| x.len()).unwrap();
371        self.expected[i].match_indices(key[i]).map(move |(k, _)| k..(k+key[i].len())).flat_map(move |range| {
372            self.maps[i].iter(range).flat_map(move |(_, v)| {
373                v.iter().cloned().filter(|x| is_substring(key, x))
374            })
375        })
376    }
377    #[inline]
378    /// Returns an iterator over elements from the interval tree whose stored string slices satisfy a substring inclusion condition with respect to the provided key. 
379    /// 
380    /// 
381    /// Selects the key element with the minimum length to guide the search for matching intervals. 
382    /// Then, for each occurrence of that element within the expected string, it gathers a range corresponding to that match and scans the associated interval map entries. 
383    /// Finally, it filters and returns only those entries whose string slices fulfill the substring checking predicate relative to the entire key.
384    pub fn substrings(&self, key: &'static [&'static str]) -> impl Iterator<Item=&'static [&'static str]> + '_ {
385        let (i, _) = key.iter().cloned().enumerate().min_by_key(|(_, x)| x.len()).unwrap();
386        self.expected[i].match_indices(key[i]).map(move |(k, _)| k..(k+key[i].len())).flat_map(move |range| {
387            self.maps[i].iter(range).flat_map(move |(_, v)| {
388                v.iter().cloned().filter(|x| is_substring(x, key))
389            })
390        })
391    }
392}
393
394/// Determines if each string in one slice occurs as a substring within the corresponding string in another slice. 
395/// 
396/// 
397/// Compares elements from two slices pairwise, returning true only if every element from the first slice is contained within its matching element in the second slice. 
398/// The function uses an iterator to zip through both slices and applies the substring check to each pair, ensuring that the condition holds across all compared pairs.
399fn is_substring(x: &[&str], k: &[&str]) -> bool {
400    x.iter().cloned().zip(k.iter().cloned()).all(|(a,b)| b.contains(a))
401}
402
403
404#[cfg(test)]
405mod test {
406    use itertools::Itertools;
407    use radix_trie::Trie;
408
409    use crate::galloc::AllocForAny;
410
411    use super::RadixTrieN;
412
413    #[test]
414    fn test() {
415        let mut trie = RadixTrieN::new(2);
416        trie.insert(["", "a"].galloc());
417        trie.insert(["al", ""].galloc());
418        trie.insert(["alpha", "alp"].galloc());
419        trie.insert(["", "alp"].galloc());
420        trie.insert(["", "alpha"].galloc());
421        trie.insert(["", "alphb"].galloc());
422        trie.insert(["alpha", "alpha"].galloc());
423        
424        for k in trie.prefixes(["alpha", "alpha"].galloc()) {
425            println!("{:?}", k);
426        }
427
428    }
429}