kv_trie_rs/lib.rs
1//! KV capable prefix trie library based on LOUDS.
2//!
3//! This is a fork of https://laysakura.github.io/trie-rs/trie_rs.
4//!
5//! # Quickstart
6//!
7//! To use trie-rs, add the following to your `Cargo.toml` file:
8//!
9//! ```toml
10//! [dependencies]
11//! trie-rs = "0.1" # NOTE: Replace to latest minor version.
12//! ```
13//!
14//! ## Usage Overview
15//! ```rust
16//! use std::str;
17//! use kv_trie_rs::TrieBuilder;
18//!
19//! let mut builder = TrieBuilder::new(); // Inferred `TrieBuilder<u8>` automatically
20//! builder.push("すし", 1);
21//! builder.push("すしや", 2);
22//! builder.push("すしだね", 3);
23//! builder.push("すしづめ", 4);
24//! builder.push("すしめし", 5);
25//! builder.push("すしをにぎる", 6);
26//! builder.push("すし", 7); // Word `push`ed twice is over-written.
27//! builder.push("🍣", 8);
28//!
29//! let mut trie = builder.build();
30//!
31//! // exact_match(): Find a word exactly match to query.
32//! assert_eq!(trie.exact_match("すし"), true);
33//! assert_eq!(trie.exact_match("🍣"), true);
34//! assert_eq!(trie.exact_match("🍜"), false);
35//!
36//! // predictive_search(): Find words which include `query` as their prefix.
37//! let results_in_u8s: Vec<Vec<u8>> = trie.predictive_search("すし");
38//! let results_in_str: Vec<&str> = results_in_u8s
39//! .iter()
40//! .map(|u8s| str::from_utf8(u8s).unwrap())
41//! .collect();
42//! assert_eq!(
43//! results_in_str,
44//! vec![
45//! "すし",
46//! "すしだね",
47//! "すしづめ",
48//! "すしめし",
49//! "すしや",
50//! "すしをにぎる"
51//! ] // Sorted by `Vec<u8>`'s order
52//! );
53//!
54//! // common_prefix_search(): Find words which is included in `query`'s prefix.
55//! let results_in_u8s: Vec<Vec<u8>> = trie.common_prefix_search("すしや");
56//! let results_in_str: Vec<&str> = results_in_u8s
57//! .iter()
58//! .map(|u8s| str::from_utf8(u8s).unwrap())
59//! .collect();
60//! assert_eq!(
61//! results_in_str,
62//! vec![
63//! "すし",
64//! "すしや",
65//! ] // Sorted by `Vec<u8>`'s order
66//! );
67//!
68//! // common_prefix_search_with_value(): Find words which is included in `query`'s prefix and return their values.
69//! let results_in_u8s: Vec<(Vec<u8>, u8)> = trie.common_prefix_search_with_values("すしや");
70//! let results_in_str: Vec<(&str, u8)> = results_in_u8s
71//! .iter()
72//! .map(|(u8s, v)| (str::from_utf8(u8s).unwrap(), *v))
73//! .collect();
74//!
75//! assert_eq!(
76//! results_in_str,
77//! vec![
78//! ("すし", 7),
79//! ("すしや", 2),
80//! ] // Sorted by `Vec<u8>`'s order
81//! );
82//!
83//! // get_value(): Get value of a word.
84//! assert_eq!(trie.get("すし"), Some(&7));
85//!
86//! // set value in a built trie.
87//! trie.set("すし", 9);
88//! assert_eq!(trie.get("すし"), Some(&9));
89//!
90//! ```
91//!
92//! ## Using with Various Data Types
93//! `TrieBuilder` is implemented using generic type like following:
94//!
95//! ```text
96//! impl<Label: Ord + Clone> TrieBuilder<Label> {
97//! ...
98//! pub fn push<Arr: AsRef<[Label]>>(&mut self, word: Arr) { ... }
99//! ...
100//! }
101//! ```
102//!
103//! In the above `Usage Overview` example, we used `Label=u8, Arr=&str`.
104//!
105//! Here shows other `Label` and `Arr` type examples.
106//!
107//! ### `Label=&str, Arr=Vec<&str>`
108//! Say `Label` is English words and `Arr` is English phrases.
109//!
110//! ```rust
111//! use kv_trie_rs::TrieBuilder;
112//!
113//! let mut builder = TrieBuilder::new();
114//! builder.push(vec!["a", "woman"], 0 );
115//! builder.push(vec!["a", "woman", "on", "the", "beach"], 1);
116//! builder.push(vec!["a", "woman", "on", "the", "run"], 2);
117//!
118//! let trie = builder.build();
119//!
120//! assert_eq!(
121//! trie.exact_match(vec!["a", "woman", "on", "the", "beach"]),
122//! true
123//! );
124//! assert_eq!(
125//! trie.predictive_search(vec!["a", "woman", "on"]),
126//! vec![
127//! ["a", "woman", "on", "the", "beach"],
128//! ["a", "woman", "on", "the", "run"],
129//! ],
130//! );
131//! assert_eq!(
132//! trie.common_prefix_search(vec!["a", "woman", "on", "the", "beach"]),
133//! vec![vec!["a", "woman"], vec!["a", "woman", "on", "the", "beach"]],
134//! );
135//! ```
136//!
137//! ### `Label=u8, Arr=[u8; n]`
138//! Say `Label` is a digit in Pi (= 3.14...) and Arr is a window to separate pi's digit by 10.
139//!
140//! ```rust
141//! use kv_trie_rs::TrieBuilder;
142//!
143//! let mut builder = TrieBuilder::<u8, u8>::new(); // Pi = 3.14...
144//!
145//! builder.push([1, 4, 1, 5, 9, 2, 6, 5, 3, 5], 1);
146//! builder.push([8, 9, 7, 9, 3, 2, 3, 8, 4, 6], 2);
147//! builder.push([2, 6, 4, 3, 3, 8, 3, 2, 7, 9], 3);
148//! builder.push([6, 9, 3, 9, 9, 3, 7, 5, 1, 0], 4);
149//! builder.push([5, 8, 2, 0, 9, 7, 4, 9, 4, 4], 5);
150//! builder.push([5, 9, 2, 3, 0, 7, 8, 1, 6, 4], 6);
151//! builder.push([0, 6, 2, 8, 6, 2, 0, 8, 9, 9], 7);
152//! builder.push([8, 6, 2, 8, 0, 3, 4, 8, 2, 5], 8);
153//! builder.push([3, 4, 2, 1, 1, 7, 0, 6, 7, 9], 9);
154//! builder.push([8, 2, 1, 4, 8, 0, 8, 6, 5, 1], 10);
155//! builder.push([3, 2, 8, 2, 3, 0, 6, 6, 4, 7], 11);
156//! builder.push([0, 9, 3, 8, 4, 4, 6, 0, 9, 5], 12);
157//! builder.push([5, 0, 5, 8, 2, 2, 3, 1, 7, 2], 13);
158//! builder.push([5, 3, 5, 9, 4, 0, 8, 1, 2, 8], 14);
159//!
160//! let trie = builder.build();
161//!
162//! assert_eq!(trie.exact_match([5, 3, 5, 9, 4, 0, 8, 1, 2, 8]), true);
163//! assert_eq!(
164//! trie.predictive_search([3]),
165//! vec![
166//! [3, 2, 8, 2, 3, 0, 6, 6, 4, 7],
167//! [3, 4, 2, 1, 1, 7, 0, 6, 7, 9],
168//! ],
169//! );
170//! assert_eq!(
171//! trie.common_prefix_search([1, 4, 1, 5, 9, 2, 6, 5, 3, 5]),
172//! vec![[1, 4, 1, 5, 9, 2, 6, 5, 3, 5]],
173//! );
174//! ```
175//!
176//! # Features
177//! - **Generic type support**: As the above examples show, trie-rs can be used for searching not only UTF-8 string but also other data types.
178//! - **Based on [louds-rs](https://crates.io/crates/louds-rs)**, which is fast, parallelized, and memory efficient.
179//! - **Latest benchmark results are always accessible**: trie-rs is continuously benchmarked in Travis CI using [Criterion.rs](https://crates.io/crates/criterion). Graphical benchmark results are published [here](https://laysakura.github.io/trie-rs/criterion/report/).
180//!
181//! # Acknowledgments
182//! [`edict.furigana`](https://github.com/laysakura/trie-rs/blob/master/benches/edict.furigana) is used for benchmark.
183//! This file is constructed in the following step:
184//!
185//! 1. Download `edict.gz` from [EDICT](http://www.edrdg.org/jmdict/edict.html).
186//! 2. Convert it from original EUC into UTF-8.
187//! 3. Translate it into CSV file with [edict-to-csv](https://pypi.org/project/edict-to-csv/).
188//! 4. Extract field $1 for Hiragana/Katakana words, and field $3 for other (like Kanji) words.
189//! 5. Translate Katakana into Hiragana with [kana2hira](https://github.com/ShellShoccar-jpn/misc-tools/blob/master/kata2hira).
190//!
191//! Many thanks for these dictionaries and tools.
192
193pub use trie::Trie;
194pub use trie::TrieBuilder;
195
196mod internal_data_structure;
197pub mod trie;