rustc_span/
analyze_source_file.rs1use super::*;
2
3#[cfg(test)]
4mod tests;
5
6pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>) {
12 let mut lines = vec![RelativeBytePos::from_u32(0)];
13 let mut multi_byte_chars = vec![];
14
15 analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars);
17
18 if let Some(&last_line_start) = lines.last() {
22 let source_file_end = RelativeBytePos::from_usize(src.len());
23 assert!(source_file_end >= last_line_start);
24 if last_line_start == source_file_end {
25 lines.pop();
26 }
27 }
28
29 (lines, multi_byte_chars)
30}
31
32cfg_select! {
33 any(target_arch = "x86", target_arch = "x86_64") => {
34 fn analyze_source_file_dispatch(
35 src: &str,
36 lines: &mut Vec<RelativeBytePos>,
37 multi_byte_chars: &mut Vec<MultiByteChar>,
38 ) {
39 if is_x86_feature_detected!("sse2") {
40 unsafe {
41 analyze_source_file_sse2(src, lines, multi_byte_chars);
42 }
43 } else {
44 analyze_source_file_generic(
45 src,
46 src.len(),
47 RelativeBytePos::from_u32(0),
48 lines,
49 multi_byte_chars,
50 );
51 }
52 }
53
54 #[target_feature(enable = "sse2")]
59 unsafe fn analyze_source_file_sse2(
60 src: &str,
61 lines: &mut Vec<RelativeBytePos>,
62 multi_byte_chars: &mut Vec<MultiByteChar>,
63 ) {
64 #[cfg(target_arch = "x86")]
65 use std::arch::x86::*;
66 #[cfg(target_arch = "x86_64")]
67 use std::arch::x86_64::*;
68
69 const CHUNK_SIZE: usize = 16;
70
71 let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
72
73 let mut intra_chunk_offset = 0;
78
79 for (chunk_index, chunk) in chunks.iter().enumerate() {
80 let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
83
84 let multibyte_test = _mm_cmplt_epi8(chunk, _mm_set1_epi8(0));
87 let multibyte_mask = _mm_movemask_epi8(multibyte_test);
89
90 if multibyte_mask == 0 {
92 assert!(intra_chunk_offset == 0);
93
94 let newlines_test = _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8));
96 let mut newlines_mask = _mm_movemask_epi8(newlines_test);
97
98 let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
99
100 while newlines_mask != 0 {
101 let index = newlines_mask.trailing_zeros();
102
103 lines.push(RelativeBytePos(index) + output_offset);
104
105 newlines_mask &= newlines_mask - 1;
107 }
108 } else {
109 let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
112 intra_chunk_offset = analyze_source_file_generic(
113 &src[scan_start..],
114 CHUNK_SIZE - intra_chunk_offset,
115 RelativeBytePos::from_usize(scan_start),
116 lines,
117 multi_byte_chars,
118 );
119 }
120 }
121
122 let tail_start = src.len() - tail.len() + intra_chunk_offset;
124 if tail_start < src.len() {
125 analyze_source_file_generic(
126 &src[tail_start..],
127 src.len() - tail_start,
128 RelativeBytePos::from_usize(tail_start),
129 lines,
130 multi_byte_chars,
131 );
132 }
133 }
134 }
135 target_arch = "loongarch64" => {
136 fn analyze_source_file_dispatch(
137 src: &str,
138 lines: &mut Vec<RelativeBytePos>,
139 multi_byte_chars: &mut Vec<MultiByteChar>,
140 ) {
141 use std::arch::is_loongarch_feature_detected;
142
143 if is_loongarch_feature_detected!("lsx") {
144 unsafe {
145 analyze_source_file_lsx(src, lines, multi_byte_chars);
146 }
147 } else {
148 analyze_source_file_generic(
149 src,
150 src.len(),
151 RelativeBytePos::from_u32(0),
152 lines,
153 multi_byte_chars,
154 );
155 }
156 }
157
158 #[target_feature(enable = "lsx")]
163 unsafe fn analyze_source_file_lsx(
164 src: &str,
165 lines: &mut Vec<RelativeBytePos>,
166 multi_byte_chars: &mut Vec<MultiByteChar>,
167 ) {
168 use std::arch::loongarch64::*;
169
170 const CHUNK_SIZE: usize = 16;
171
172 let (chunks, tail) = src.as_bytes().as_chunks::<CHUNK_SIZE>();
173
174 let mut intra_chunk_offset = 0;
179
180 for (chunk_index, chunk) in chunks.iter().enumerate() {
181 let chunk = unsafe { lsx_vld::<0>(chunk.as_ptr() as *const i8) };
184
185 let multibyte_mask = lsx_vmskltz_b(chunk);
188 let multibyte_mask = lsx_vpickve2gr_w::<0>(multibyte_mask);
190
191 if multibyte_mask == 0 {
193 assert!(intra_chunk_offset == 0);
194
195 let newlines_test = lsx_vseqi_b::<{b'\n' as i32}>(chunk);
197 let newlines_mask = lsx_vmskltz_b(newlines_test);
198 let mut newlines_mask = lsx_vpickve2gr_w::<0>(newlines_mask);
199
200 let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
201
202 while newlines_mask != 0 {
203 let index = newlines_mask.trailing_zeros();
204
205 lines.push(RelativeBytePos(index) + output_offset);
206
207 newlines_mask &= newlines_mask - 1;
209 }
210 } else {
211 let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
214 intra_chunk_offset = analyze_source_file_generic(
215 &src[scan_start..],
216 CHUNK_SIZE - intra_chunk_offset,
217 RelativeBytePos::from_usize(scan_start),
218 lines,
219 multi_byte_chars,
220 );
221 }
222 }
223
224 let tail_start = src.len() - tail.len() + intra_chunk_offset;
226 if tail_start < src.len() {
227 analyze_source_file_generic(
228 &src[tail_start..],
229 src.len() - tail_start,
230 RelativeBytePos::from_usize(tail_start),
231 lines,
232 multi_byte_chars,
233 );
234 }
235 }
236 }
237 _ => {
238 fn analyze_source_file_dispatch(
241 src: &str,
242 lines: &mut Vec<RelativeBytePos>,
243 multi_byte_chars: &mut Vec<MultiByteChar>,
244 ) {
245 analyze_source_file_generic(
246 src,
247 src.len(),
248 RelativeBytePos::from_u32(0),
249 lines,
250 multi_byte_chars,
251 );
252 }
253 }
254}
255
256fn analyze_source_file_generic(
260 src: &str,
261 scan_len: usize,
262 output_offset: RelativeBytePos,
263 lines: &mut Vec<RelativeBytePos>,
264 multi_byte_chars: &mut Vec<MultiByteChar>,
265) -> usize {
266 assert!(src.len() >= scan_len);
267 let mut i = 0;
268 let src_bytes = src.as_bytes();
269
270 while i < scan_len {
271 let byte = unsafe {
272 *src_bytes.get_unchecked(i)
274 };
275
276 let mut char_len = 1;
279
280 if byte == b'\n' {
281 let pos = RelativeBytePos::from_usize(i) + output_offset;
282 lines.push(pos + RelativeBytePos(1));
283 } else if byte >= 128 {
284 let c = src[i..].chars().next().unwrap();
286 char_len = c.len_utf8();
287
288 let pos = RelativeBytePos::from_usize(i) + output_offset;
289 assert!((2..=4).contains(&char_len));
290 let mbc = MultiByteChar { pos, bytes: char_len as u8 };
291 multi_byte_chars.push(mbc);
292 }
293
294 i += char_len;
295 }
296
297 i - scan_len
298}