synthphonia/expr/ops/
str.rs

1use std::cmp::min;
2use std::ops::Not;
3
4use bumpalo::collections::CollectIn;
5use derive_more::DebugCustom;
6use crate::galloc::{AllocForStr, AllocForExactSizeIter, TryAllocForExactSizeIter, AllocForIter, AllocForCharIter};
7use crate::utils::F64;
8use crate::{new_op1, new_op2, new_op3, new_op3_opt, new_op2_opt};
9use itertools::izip;
10
11
12
13use super::list::to_index;
14use super::{Op1, Op3, Op2};
15
16
17new_op2!(Concat, "str.++",
18    (Str, Str) -> Str { |(s1, s2)| {
19        (s1.galloc_owned_str() + s2).into_bump_str()
20    }}
21);
22
23mod replace;
24pub use replace::*;
25
26
27new_op3!(SubStr, "str.substr",
28    (Str, Int, Int) -> Str { |(s1, s2, s3)| {
29        if s1.is_empty() { return ""; }
30        if *s2 >= 0 && (*s2 as usize) < s1.len() && *s3 >= 0 {
31            let i = *s2 as usize;
32            let j = std::cmp::min(i + *s3 as usize, s1.len());
33            s1[i..j].galloc_str()
34        } else { "" }
35    }}
36);
37
38new_op2_opt!(Head, "str.head",
39    (Str, Int) -> Str { |(s1, s2)| {
40        if s1.len() <= 1 { return None; }
41        let i = to_index(s1.len(), *s2);
42        if i == 0 || i == s1.len() { return None; }
43        Some(s1[0..i].galloc_str())
44    }},
45    (Str, Float) -> Str { |(s1, s2)| {
46        if s1.len() <= 1 { return None; }
47        let i = to_index(s1.len(), **s2 as i64);
48        if i == 0 || i == s1.len() { return None; }
49        Some(s1[0..i].galloc_str())
50    }}
51);
52
53new_op2_opt!(Tail, "str.tail",
54    (Str, Int) -> Str { |(s1, s2)| {
55        if s1.len() <= 1 { return None; }
56        let i = to_index(s1.len(), *s2);
57        if i == 0 || i == s1.len() { return None; }
58        Some(s1[i..].galloc_str())
59    }},
60    (Str, Float) -> Str { |(s1, s2)| {
61        if s1.len() <= 1 { return None; }
62        let i = to_index(s1.len(), **s2 as i64);
63        if i == 0 || i == s1.len() { return None; }
64        Some(s1[i..].galloc_str())
65    }}
66);
67
68new_op1!(ToInt, "str.to.int",
69    Str -> Int { |s1| {
70        s1.parse::<i64>().unwrap_or(0)
71    }}
72);
73
74/// Finds the zero-based index of the nth occurrence of a substring `s2` within the string `s1`, starting the search at position `s3`. 
75/// 
76/// The function iteratively searches for the substring `s2` in `s1`, updating the starting position with each found occurrence while ensuring that it does not exceed the length of `s1`. 
77/// If the search exceeds the bounds of `s1` or if `s2` is not found, the function returns `-1`. 
78/// Otherwise, it returns the index of the nth occurrence of `s2`, offset by `1` due to incrementing `result`.
79/// 
80pub fn str_index_of_f(s1: &str, s2: &str, s3: usize) -> i64 {
81    let mut result: usize = 0;
82    for _ in 0..=s3 {
83        if result >= s1.len() { return -1; }
84        if let Some(r) = s1[result..].find(s2) {
85            result += r + 1;
86        } else {return -1;}
87    }
88    result as i64 - 1
89}
90
91/// Returns the index of the last occurrence of one string within a substring of another string. 
92/// 
93/// The function searches for the string `s2` within a substring of `s1` that extends from the start to an index initially set to the length of `s1`. 
94/// It performs this search `s3` times. 
95/// During each iteration, if the target string `s2` is found, the index of its last occurrence is updated as the new search boundary for the next iteration. 
96/// If `s2` is not found during any iteration, or if the search reaches the beginning of `s1`, the function returns `-1`. 
97/// The function ultimately returns the index as an `i64` type.
98/// 
99pub fn str_index_of_b(s1: &str, s2: &str, s3: usize) -> i64 {
100    let mut result: usize = s1.len();
101    for _ in 0..s3 {
102        if result == 0 { return -1; }
103        if let Some(r) = s1[0..result].rfind(s2) {
104            result = r;
105        } else {return -1;}
106    }
107    result as i64
108}
109
110new_op3!(IndexOf, "str.indexof",
111    (Str, Str, Int) -> Int { |(s1, s2, s3)| {
112        if *s3 < 0 || *s3 as usize > s1.len() { return -1i64; }
113        if let Some(r) = s1[*s3 as usize..].find(s2) {
114            *s3 + r as i64
115        } else { -1i64 }
116    }}
117);
118
119new_op2!(PrefixOf, "str.prefixof",
120    (Str, Str) -> Bool { |(s1, s2)| {
121        s2.starts_with(s1)
122    }}
123);
124new_op2!(SuffixOf, "str.suffixof",
125    (Str, Str) -> Bool { |(s1, s2)| {
126        s2.ends_with(s1)
127    }}
128);
129new_op2!(Contains, "str.contains",
130    (Str, Str) -> Bool { |(s1, s2)| {
131        s1.contains(s2)
132    }}
133);
134
135
136new_op2!(Split, "str.split",
137    (Str, Str) -> ListStr { |(s1, s2)| {
138        s1.split(s2).galloc_collect()
139    }}
140);
141
142new_op2!(Join, "str.join",
143    (ListStr, Str) -> Str { |(s1, s2)| {
144        s1.join(s2).galloc_str()
145    }}
146);
147
148new_op2!(Count, "str.count",
149    (Str, Str) -> Int { |(s1, s2)| {
150        s1.matches(s2).count() as i64
151    }}
152);
153
154new_op2!(FCount, "str.fcount",
155    (Str, Str) -> Float { |(s1, s2)| {
156        F64::from_usize(s1.matches(s2).count())
157    }}
158);
159
160new_op1!(RetainLl, "str.retainLl",
161    Str -> Str { |s1| {
162        s1.chars().filter(|s| s.is_lowercase()).galloc_collect_str()
163    }}
164);
165
166new_op1!(RetainLc, "str.retainLc",
167    Str -> Str { |s1| {
168        s1.chars().filter(|s| s.is_uppercase()).galloc_collect_str()
169    }}
170);
171
172new_op1!(RetainN, "str.retainN",
173    Str -> Str { |s1| {
174        s1.chars().filter(|s| s.is_ascii_digit()).galloc_collect_str()
175    }}
176);
177
178new_op1!(RetainL, "str.retainL",
179    Str -> Str { |s1| {
180        s1.chars().filter(|s| s.is_alphabetic()).galloc_collect_str()
181    }}
182);
183
184new_op1!(RetainLN, "str.retainLN",
185    Str -> Str { |s1| {
186        s1.chars().filter(|s| s.is_alphanumeric()).galloc_collect_str()
187    }}
188);
189
190new_op1!(Uppercase, "str.uppercase",
191    Str -> Str { |s1| {
192        s1.to_uppercase().galloc_str()
193    }}
194);
195
196new_op1!(Lowercase, "str.lowercase",
197    Str -> Str { |s1| {
198        s1.to_lowercase().galloc_str()
199    }}
200);
201
202#[cfg(test)]
203mod tests {
204    use crate::expr::ops::str::{str_index_of_f, str_index_of_b};
205
206    #[test]
207    fn test_str_index_of_inner() {
208        assert!(str_index_of_f("s1asdf", "s1", 0) == 0);
209        assert!(str_index_of_f("a s1s1s1", "s1", 0) == 2);
210        assert!(str_index_of_f("a s1s1s1", "s1", 1) == 4);
211        assert!(str_index_of_f("a s1s1s1", "s1", 2) == 6);
212        assert!(str_index_of_f("a s1s1s1 s1", "s1", 3) == 9);
213        assert!(str_index_of_f("a s1s1s1 s1", "s1", 4) == -1);
214
215        assert!(str_index_of_f("s", "s", 0) == 0);
216        assert!(str_index_of_f("s1asdf", "s", 0) == 0);
217        assert!(str_index_of_f("a s1s1s1", "s", 0) == 2);
218        assert!(str_index_of_f("a s1s1s1", "s", 1) == 4);
219        assert!(str_index_of_f("a s1s1s1", "s", 2) == 6);
220        assert!(str_index_of_f("a s1s1s1 s", "s", 3) == 9);
221        assert!(str_index_of_f("a s1s1s1 s", "s", 4) == -1);
222
223        assert!(str_index_of_b("s1asdf", "s1", 1) == 0);
224        assert!(str_index_of_b("a s1s1s1", "s1", 3) == 2);
225        assert!(str_index_of_b("a s1s1s1", "s1", 2) == 4);
226        assert!(str_index_of_b("a s1s1s1", "s1", 1) == 6);
227        assert!(str_index_of_b("a s1s1s1 s1", "s1", 1) == 9);
228        assert!(str_index_of_b("a s1s1s1 s1", "s1", 5) == -1);
229
230        assert!(str_index_of_b("s", "s", 1) == 0);
231        assert!(str_index_of_b("s1asdf", "s", 2) == 0);
232        assert!(str_index_of_b("a s1s1s1", "s", 3) == 2);
233        assert!(str_index_of_b("a s1s1s1", "s", 2) == 4);
234        assert!(str_index_of_b("a s1s1s1", "s", 1) == 6);
235        assert!(str_index_of_b("a s1s1s1 s", "s", 1) == 9);
236        assert!(str_index_of_b("a s1s1s1 s", "s", 5) == -1);
237    }
238}
239