rustc_span/
analyze_source_file.rs1use super::*;
2
3#[cfg(test)]
4mod tests;
5
6pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<MultiByteChar>) {
12 let mut lines = vec![RelativeBytePos::from_u32(0)];
13 let mut multi_byte_chars = vec![];
14
15 analyze_source_file_dispatch(src, &mut lines, &mut multi_byte_chars);
17
18 if let Some(&last_line_start) = lines.last() {
22 let source_file_end = RelativeBytePos::from_usize(src.len());
23 assert!(source_file_end >= last_line_start);
24 if last_line_start == source_file_end {
25 lines.pop();
26 }
27 }
28
29 (lines, multi_byte_chars)
30}
31
32#[cfg(bootstrap)]
33cfg_match! {
34 cfg(any(target_arch = "x86", target_arch = "x86_64")) => {
35 fn analyze_source_file_dispatch(
36 src: &str,
37 lines: &mut Vec<RelativeBytePos>,
38 multi_byte_chars: &mut Vec<MultiByteChar>,
39 ) {
40 if is_x86_feature_detected!("sse2") {
41 unsafe {
42 analyze_source_file_sse2(src, lines, multi_byte_chars);
43 }
44 } else {
45 analyze_source_file_generic(
46 src,
47 src.len(),
48 RelativeBytePos::from_u32(0),
49 lines,
50 multi_byte_chars,
51 );
52 }
53 }
54
55 #[target_feature(enable = "sse2")]
60 unsafe fn analyze_source_file_sse2(
61 src: &str,
62 lines: &mut Vec<RelativeBytePos>,
63 multi_byte_chars: &mut Vec<MultiByteChar>,
64 ) {
65 #[cfg(target_arch = "x86")]
66 use std::arch::x86::*;
67 #[cfg(target_arch = "x86_64")]
68 use std::arch::x86_64::*;
69
70 const CHUNK_SIZE: usize = 16;
71
72 let src_bytes = src.as_bytes();
73
74 let chunk_count = src.len() / CHUNK_SIZE;
75
76 let mut intra_chunk_offset = 0;
81
82 for chunk_index in 0..chunk_count {
83 let ptr = src_bytes.as_ptr() as *const __m128i;
84 let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };
87
88 let multibyte_test = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)) };
91 let multibyte_mask = unsafe { _mm_movemask_epi8(multibyte_test) };
93
94 if multibyte_mask == 0 {
96 assert!(intra_chunk_offset == 0);
97
98 let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
100 let mut newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };
101
102 let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
103
104 while newlines_mask != 0 {
105 let index = newlines_mask.trailing_zeros();
106
107 lines.push(RelativeBytePos(index) + output_offset);
108
109 newlines_mask &= newlines_mask - 1;
111 }
112 } else {
113 let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
116 intra_chunk_offset = analyze_source_file_generic(
117 &src[scan_start..],
118 CHUNK_SIZE - intra_chunk_offset,
119 RelativeBytePos::from_usize(scan_start),
120 lines,
121 multi_byte_chars,
122 );
123 }
124 }
125
126 let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
128 if tail_start < src.len() {
129 analyze_source_file_generic(
130 &src[tail_start..],
131 src.len() - tail_start,
132 RelativeBytePos::from_usize(tail_start),
133 lines,
134 multi_byte_chars,
135 );
136 }
137 }
138 }
139 _ => {
140 fn analyze_source_file_dispatch(
142 src: &str,
143 lines: &mut Vec<RelativeBytePos>,
144 multi_byte_chars: &mut Vec<MultiByteChar>,
145 ) {
146 analyze_source_file_generic(
147 src,
148 src.len(),
149 RelativeBytePos::from_u32(0),
150 lines,
151 multi_byte_chars,
152 );
153 }
154 }
155}
156
157#[cfg(not(bootstrap))]
158cfg_match! {
159 any(target_arch = "x86", target_arch = "x86_64") => {
160 fn analyze_source_file_dispatch(
161 src: &str,
162 lines: &mut Vec<RelativeBytePos>,
163 multi_byte_chars: &mut Vec<MultiByteChar>,
164 ) {
165 if is_x86_feature_detected!("sse2") {
166 unsafe {
167 analyze_source_file_sse2(src, lines, multi_byte_chars);
168 }
169 } else {
170 analyze_source_file_generic(
171 src,
172 src.len(),
173 RelativeBytePos::from_u32(0),
174 lines,
175 multi_byte_chars,
176 );
177 }
178 }
179
180 #[target_feature(enable = "sse2")]
185 unsafe fn analyze_source_file_sse2(
186 src: &str,
187 lines: &mut Vec<RelativeBytePos>,
188 multi_byte_chars: &mut Vec<MultiByteChar>,
189 ) {
190 #[cfg(target_arch = "x86")]
191 use std::arch::x86::*;
192 #[cfg(target_arch = "x86_64")]
193 use std::arch::x86_64::*;
194
195 const CHUNK_SIZE: usize = 16;
196
197 let src_bytes = src.as_bytes();
198
199 let chunk_count = src.len() / CHUNK_SIZE;
200
201 let mut intra_chunk_offset = 0;
206
207 for chunk_index in 0..chunk_count {
208 let ptr = src_bytes.as_ptr() as *const __m128i;
209 let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };
212
213 let multibyte_test = unsafe { _mm_cmplt_epi8(chunk, _mm_set1_epi8(0)) };
216 let multibyte_mask = unsafe { _mm_movemask_epi8(multibyte_test) };
218
219 if multibyte_mask == 0 {
221 assert!(intra_chunk_offset == 0);
222
223 let newlines_test = unsafe { _mm_cmpeq_epi8(chunk, _mm_set1_epi8(b'\n' as i8)) };
225 let mut newlines_mask = unsafe { _mm_movemask_epi8(newlines_test) };
226
227 let output_offset = RelativeBytePos::from_usize(chunk_index * CHUNK_SIZE + 1);
228
229 while newlines_mask != 0 {
230 let index = newlines_mask.trailing_zeros();
231
232 lines.push(RelativeBytePos(index) + output_offset);
233
234 newlines_mask &= newlines_mask - 1;
236 }
237 } else {
238 let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
241 intra_chunk_offset = analyze_source_file_generic(
242 &src[scan_start..],
243 CHUNK_SIZE - intra_chunk_offset,
244 RelativeBytePos::from_usize(scan_start),
245 lines,
246 multi_byte_chars,
247 );
248 }
249 }
250
251 let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
253 if tail_start < src.len() {
254 analyze_source_file_generic(
255 &src[tail_start..],
256 src.len() - tail_start,
257 RelativeBytePos::from_usize(tail_start),
258 lines,
259 multi_byte_chars,
260 );
261 }
262 }
263 }
264 _ => {
265 fn analyze_source_file_dispatch(
267 src: &str,
268 lines: &mut Vec<RelativeBytePos>,
269 multi_byte_chars: &mut Vec<MultiByteChar>,
270 ) {
271 analyze_source_file_generic(
272 src,
273 src.len(),
274 RelativeBytePos::from_u32(0),
275 lines,
276 multi_byte_chars,
277 );
278 }
279 }
280}
281
282fn analyze_source_file_generic(
286 src: &str,
287 scan_len: usize,
288 output_offset: RelativeBytePos,
289 lines: &mut Vec<RelativeBytePos>,
290 multi_byte_chars: &mut Vec<MultiByteChar>,
291) -> usize {
292 assert!(src.len() >= scan_len);
293 let mut i = 0;
294 let src_bytes = src.as_bytes();
295
296 while i < scan_len {
297 let byte = unsafe {
298 *src_bytes.get_unchecked(i)
300 };
301
302 let mut char_len = 1;
305
306 if byte == b'\n' {
307 let pos = RelativeBytePos::from_usize(i) + output_offset;
308 lines.push(pos + RelativeBytePos(1));
309 } else if byte >= 128 {
310 let c = src[i..].chars().next().unwrap();
312 char_len = c.len_utf8();
313
314 let pos = RelativeBytePos::from_usize(i) + output_offset;
315 assert!((2..=4).contains(&char_len));
316 let mbc = MultiByteChar { pos, bytes: char_len as u8 };
317 multi_byte_chars.push(mbc);
318 }
319
320 i += char_len;
321 }
322
323 i - scan_len
324}