clippy_utils/
str_utils.rs

1/// Dealing with sting indices can be hard, this struct ensures that both the
2/// character and byte index are provided for correct indexing.
3#[derive(Debug, Default, PartialEq, Eq)]
4pub struct StrIndex {
5    pub char_index: usize,
6    pub byte_index: usize,
7}
8
9impl StrIndex {
10    pub fn new(char_index: usize, byte_index: usize) -> Self {
11        Self { char_index, byte_index }
12    }
13}
14
15/// Returns the index of the character after the first camel-case component of `s`.
16///
17/// ```no_run
18/// # use clippy_utils::str_utils::{camel_case_until, StrIndex};
19/// assert_eq!(camel_case_until("AbcDef"), StrIndex::new(6, 6));
20/// assert_eq!(camel_case_until("ABCD"), StrIndex::new(0, 0));
21/// assert_eq!(camel_case_until("AbcDD"), StrIndex::new(3, 3));
22/// assert_eq!(camel_case_until("Abc\u{f6}\u{f6}DD"), StrIndex::new(5, 7));
23/// ```
24#[must_use]
25pub fn camel_case_until(s: &str) -> StrIndex {
26    let mut iter = s.char_indices().enumerate();
27    if let Some((_char_index, (_, first))) = iter.next() {
28        if !first.is_uppercase() {
29            return StrIndex::new(0, 0);
30        }
31    } else {
32        return StrIndex::new(0, 0);
33    }
34    let mut up = true;
35    let mut last_index = StrIndex::new(0, 0);
36    for (char_index, (byte_index, c)) in iter {
37        if up {
38            if c.is_lowercase() {
39                up = false;
40            } else {
41                return last_index;
42            }
43        } else if c.is_uppercase() {
44            up = true;
45            last_index.byte_index = byte_index;
46            last_index.char_index = char_index;
47        } else if !c.is_lowercase() {
48            return StrIndex::new(char_index, byte_index);
49        }
50    }
51
52    if up {
53        last_index
54    } else {
55        StrIndex::new(s.chars().count(), s.len())
56    }
57}
58
59/// Returns index of the first camel-case component of `s`.
60///
61/// ```no_run
62/// # use clippy_utils::str_utils::{camel_case_start, StrIndex};
63/// assert_eq!(camel_case_start("AbcDef"), StrIndex::new(0, 0));
64/// assert_eq!(camel_case_start("abcDef"), StrIndex::new(3, 3));
65/// assert_eq!(camel_case_start("ABCD"), StrIndex::new(4, 4));
66/// assert_eq!(camel_case_start("abcd"), StrIndex::new(4, 4));
67/// assert_eq!(camel_case_start("\u{f6}\u{f6}cd"), StrIndex::new(4, 6));
68/// ```
69#[must_use]
70pub fn camel_case_start(s: &str) -> StrIndex {
71    camel_case_start_from_idx(s, 0)
72}
73
74/// Returns `StrIndex` of the last camel-case component of `s[idx..]`.
75///
76/// ```no_run
77/// # use clippy_utils::str_utils::{camel_case_start_from_idx, StrIndex};
78/// assert_eq!(camel_case_start_from_idx("AbcDef", 0), StrIndex::new(0, 0));
79/// assert_eq!(camel_case_start_from_idx("AbcDef", 1), StrIndex::new(3, 3));
80/// assert_eq!(camel_case_start_from_idx("AbcDefGhi", 0), StrIndex::new(0, 0));
81/// assert_eq!(camel_case_start_from_idx("AbcDefGhi", 1), StrIndex::new(3, 3));
82/// assert_eq!(camel_case_start_from_idx("Abcdefg", 1), StrIndex::new(7, 7));
83/// ```
84pub fn camel_case_start_from_idx(s: &str, start_idx: usize) -> StrIndex {
85    let char_count = s.chars().count();
86    let range = 0..char_count;
87    let mut iter = range.rev().zip(s.char_indices().rev());
88    if let Some((_, (_, first))) = iter.next() {
89        if !first.is_lowercase() {
90            return StrIndex::new(char_count, s.len());
91        }
92    } else {
93        return StrIndex::new(char_count, s.len());
94    }
95
96    let mut down = true;
97    let mut last_index = StrIndex::new(char_count, s.len());
98    for (char_index, (byte_index, c)) in iter {
99        if byte_index < start_idx {
100            break;
101        }
102        if down {
103            if c.is_uppercase() {
104                down = false;
105                last_index.byte_index = byte_index;
106                last_index.char_index = char_index;
107            } else if !c.is_lowercase() {
108                return last_index;
109            }
110        } else if c.is_lowercase() {
111            down = true;
112        } else if c.is_uppercase() {
113            last_index.byte_index = byte_index;
114            last_index.char_index = char_index;
115        } else {
116            return last_index;
117        }
118    }
119
120    last_index
121}
122
123/// Get the indexes of camel case components of a string `s`
124///
125/// ```no_run
126/// # use clippy_utils::str_utils::{camel_case_indices, StrIndex};
127/// assert_eq!(
128///     camel_case_indices("AbcDef"),
129///     vec![StrIndex::new(0, 0), StrIndex::new(3, 3), StrIndex::new(6, 6)]
130/// );
131/// assert_eq!(
132///     camel_case_indices("abcDef"),
133///     vec![StrIndex::new(3, 3), StrIndex::new(6, 6)]
134/// );
135/// ```
136pub fn camel_case_indices(s: &str) -> Vec<StrIndex> {
137    let mut result = Vec::new();
138    let mut str_idx = camel_case_start(s);
139
140    while str_idx.byte_index < s.len() {
141        let next_idx = str_idx.byte_index + 1;
142        result.push(str_idx);
143        str_idx = camel_case_start_from_idx(s, next_idx);
144    }
145    result.push(str_idx);
146
147    result
148}
149
150/// Split camel case string into a vector of its components
151///
152/// ```no_run
153/// # use clippy_utils::str_utils::{camel_case_split, StrIndex};
154/// assert_eq!(camel_case_split("AbcDef"), vec!["Abc", "Def"]);
155/// ```
156pub fn camel_case_split(s: &str) -> Vec<&str> {
157    let mut offsets = camel_case_indices(s)
158        .iter()
159        .map(|e| e.byte_index)
160        .collect::<Vec<usize>>();
161    if offsets[0] != 0 {
162        offsets.insert(0, 0);
163    }
164
165    offsets.windows(2).map(|w| &s[w[0]..w[1]]).collect()
166}
167
168/// Dealing with sting comparison can be complicated, this struct ensures that both the
169/// character and byte count are provided for correct indexing.
170#[derive(Debug, Default, PartialEq, Eq)]
171pub struct StrCount {
172    pub char_count: usize,
173    pub byte_count: usize,
174}
175
176impl StrCount {
177    pub fn new(char_count: usize, byte_count: usize) -> Self {
178        Self { char_count, byte_count }
179    }
180}
181
182/// Returns the number of chars that match from the start
183///
184/// ```no_run
185/// # use clippy_utils::str_utils::{count_match_start, StrCount};
186/// assert_eq!(count_match_start("hello_mouse", "hello_penguin"), StrCount::new(6, 6));
187/// assert_eq!(count_match_start("hello_clippy", "bye_bugs"), StrCount::new(0, 0));
188/// assert_eq!(count_match_start("hello_world", "hello_world"), StrCount::new(11, 11));
189/// assert_eq!(count_match_start("T\u{f6}ffT\u{f6}ff", "T\u{f6}ff"), StrCount::new(4, 5));
190/// ```
191#[must_use]
192pub fn count_match_start(str1: &str, str2: &str) -> StrCount {
193    // (char_index, char1)
194    let char_count = str1.chars().count();
195    let iter1 = (0..=char_count).zip(str1.chars());
196    // (byte_index, char2)
197    let iter2 = str2.char_indices();
198
199    iter1
200        .zip(iter2)
201        .take_while(|((_, c1), (_, c2))| c1 == c2)
202        .last()
203        .map_or_else(StrCount::default, |((char_index, _), (byte_index, character))| {
204            StrCount::new(char_index + 1, byte_index + character.len_utf8())
205        })
206}
207
208/// Returns the number of chars and bytes that match from the end
209///
210/// ```no_run
211/// # use clippy_utils::str_utils::{count_match_end, StrCount};
212/// assert_eq!(count_match_end("hello_cat", "bye_cat"), StrCount::new(4, 4));
213/// assert_eq!(count_match_end("if_item_thing", "enum_value"), StrCount::new(0, 0));
214/// assert_eq!(count_match_end("Clippy", "Clippy"), StrCount::new(6, 6));
215/// assert_eq!(count_match_end("MyT\u{f6}ff", "YourT\u{f6}ff"), StrCount::new(4, 5));
216/// ```
217#[must_use]
218pub fn count_match_end(str1: &str, str2: &str) -> StrCount {
219    let char_count = str1.chars().count();
220    if char_count == 0 {
221        return StrCount::default();
222    }
223
224    // (char_index, char1)
225    let iter1 = (0..char_count).rev().zip(str1.chars().rev());
226    // (byte_index, char2)
227    let byte_count = str2.len();
228    let iter2 = str2.char_indices().rev();
229
230    iter1
231        .zip(iter2)
232        .take_while(|((_, c1), (_, c2))| c1 == c2)
233        .last()
234        .map_or_else(StrCount::default, |((char_index, _), (byte_index, _))| {
235            StrCount::new(char_count - char_index, byte_count - byte_index)
236        })
237}
238
239/// Returns a `snake_case` version of the input
240/// ```no_run
241/// use clippy_utils::str_utils::to_snake_case;
242/// assert_eq!(to_snake_case("AbcDef"), "abc_def");
243/// assert_eq!(to_snake_case("ABCD"), "a_b_c_d");
244/// assert_eq!(to_snake_case("AbcDD"), "abc_d_d");
245/// assert_eq!(to_snake_case("Abc1DD"), "abc1_d_d");
246/// ```
247pub fn to_snake_case(name: &str) -> String {
248    let mut s = String::new();
249    for (i, c) in name.chars().enumerate() {
250        if c.is_uppercase() {
251            // characters without capitalization are considered lowercase
252            if i != 0 {
253                s.push('_');
254            }
255            s.extend(c.to_lowercase());
256        } else {
257            s.push(c);
258        }
259    }
260    s
261}
262/// Returns a `CamelCase` version of the input
263/// ```no_run
264/// use clippy_utils::str_utils::to_camel_case;
265/// assert_eq!(to_camel_case("abc_def"), "AbcDef");
266/// assert_eq!(to_camel_case("a_b_c_d"), "ABCD");
267/// assert_eq!(to_camel_case("abc_d_d"), "AbcDD");
268/// assert_eq!(to_camel_case("abc1_d_d"), "Abc1DD");
269/// ```
270pub fn to_camel_case(item_name: &str) -> String {
271    let mut s = String::new();
272    let mut up = true;
273    for c in item_name.chars() {
274        if c.is_uppercase() {
275            // we only turn snake case text into CamelCase
276            return item_name.to_string();
277        }
278        if c == '_' {
279            up = true;
280            continue;
281        }
282        if up {
283            up = false;
284            s.extend(c.to_uppercase());
285        } else {
286            s.push(c);
287        }
288    }
289    s
290}
291
292#[cfg(test)]
293mod test {
294    use super::*;
295
296    #[test]
297    fn camel_case_start_full() {
298        assert_eq!(camel_case_start("AbcDef"), StrIndex::new(0, 0));
299        assert_eq!(camel_case_start("Abc"), StrIndex::new(0, 0));
300        assert_eq!(camel_case_start("ABcd"), StrIndex::new(0, 0));
301        assert_eq!(camel_case_start("ABcdEf"), StrIndex::new(0, 0));
302        assert_eq!(camel_case_start("AabABcd"), StrIndex::new(0, 0));
303    }
304
305    #[test]
306    fn camel_case_start_partial() {
307        assert_eq!(camel_case_start("abcDef"), StrIndex::new(3, 3));
308        assert_eq!(camel_case_start("aDbc"), StrIndex::new(1, 1));
309        assert_eq!(camel_case_start("aabABcd"), StrIndex::new(3, 3));
310        assert_eq!(camel_case_start("\u{f6}\u{f6}AabABcd"), StrIndex::new(2, 4));
311    }
312
313    #[test]
314    fn camel_case_start_not() {
315        assert_eq!(camel_case_start("AbcDef_"), StrIndex::new(7, 7));
316        assert_eq!(camel_case_start("AbcDD"), StrIndex::new(5, 5));
317        assert_eq!(camel_case_start("all_small"), StrIndex::new(9, 9));
318        assert_eq!(camel_case_start("\u{f6}_all_small"), StrIndex::new(11, 12));
319    }
320
321    #[test]
322    fn camel_case_start_caps() {
323        assert_eq!(camel_case_start("ABCD"), StrIndex::new(4, 4));
324    }
325
326    #[test]
327    fn camel_case_until_full() {
328        assert_eq!(camel_case_until("AbcDef"), StrIndex::new(6, 6));
329        assert_eq!(camel_case_until("Abc"), StrIndex::new(3, 3));
330        assert_eq!(camel_case_until("Abc\u{f6}\u{f6}\u{f6}"), StrIndex::new(6, 9));
331    }
332
333    #[test]
334    fn camel_case_until_not() {
335        assert_eq!(camel_case_until("abcDef"), StrIndex::new(0, 0));
336        assert_eq!(camel_case_until("aDbc"), StrIndex::new(0, 0));
337    }
338
339    #[test]
340    fn camel_case_until_partial() {
341        assert_eq!(camel_case_until("AbcDef_"), StrIndex::new(6, 6));
342        assert_eq!(camel_case_until("CallTypeC"), StrIndex::new(8, 8));
343        assert_eq!(camel_case_until("AbcDD"), StrIndex::new(3, 3));
344        assert_eq!(camel_case_until("Abc\u{f6}\u{f6}DD"), StrIndex::new(5, 7));
345    }
346
347    #[test]
348    fn until_caps() {
349        assert_eq!(camel_case_until("ABCD"), StrIndex::new(0, 0));
350    }
351
352    #[test]
353    fn camel_case_start_from_idx_full() {
354        assert_eq!(camel_case_start_from_idx("AbcDef", 0), StrIndex::new(0, 0));
355        assert_eq!(camel_case_start_from_idx("AbcDef", 1), StrIndex::new(3, 3));
356        assert_eq!(camel_case_start_from_idx("AbcDef", 4), StrIndex::new(6, 6));
357        assert_eq!(camel_case_start_from_idx("AbcDefGhi", 0), StrIndex::new(0, 0));
358        assert_eq!(camel_case_start_from_idx("AbcDefGhi", 1), StrIndex::new(3, 3));
359        assert_eq!(camel_case_start_from_idx("Abcdefg", 1), StrIndex::new(7, 7));
360    }
361
362    #[test]
363    fn camel_case_indices_full() {
364        assert_eq!(camel_case_indices("Abc\u{f6}\u{f6}DD"), vec![StrIndex::new(7, 9)]);
365    }
366
367    #[test]
368    fn camel_case_split_full() {
369        assert_eq!(camel_case_split("A"), vec!["A"]);
370        assert_eq!(camel_case_split("AbcDef"), vec!["Abc", "Def"]);
371        assert_eq!(camel_case_split("Abc"), vec!["Abc"]);
372        assert_eq!(camel_case_split("abcDef"), vec!["abc", "Def"]);
373        assert_eq!(
374            camel_case_split("\u{f6}\u{f6}AabABcd"),
375            vec!["\u{f6}\u{f6}", "Aab", "A", "Bcd"]
376        );
377    }
378}