1#![allow(non_camel_case_types)]
4
5#[rustfmt::skip]
6mod generated;
7#[rustfmt::skip]
8#[stable(feature = "neon_intrinsics", since = "1.59.0")]
9pub use self::generated::*;
10
11use crate::{
14 core_arch::{arm_shared::*, simd::*},
15 hint::unreachable_unchecked,
16 intrinsics::{simd::*, *},
17 mem::transmute,
18};
19#[cfg(test)]
20use stdarch_test::assert_instr;
21
22types! {
23 #![stable(feature = "neon_intrinsics", since = "1.59.0")]
24
25 pub struct float64x1_t(1 x f64); pub struct float64x2_t(2 x f64);
29}
30
31#[repr(C)]
33#[derive(Copy, Clone, Debug)]
34#[stable(feature = "neon_intrinsics", since = "1.59.0")]
35pub struct float64x1x2_t(pub float64x1_t, pub float64x1_t);
36#[repr(C)]
38#[derive(Copy, Clone, Debug)]
39#[stable(feature = "neon_intrinsics", since = "1.59.0")]
40pub struct float64x1x3_t(pub float64x1_t, pub float64x1_t, pub float64x1_t);
41#[repr(C)]
43#[derive(Copy, Clone, Debug)]
44#[stable(feature = "neon_intrinsics", since = "1.59.0")]
45pub struct float64x1x4_t(
46 pub float64x1_t,
47 pub float64x1_t,
48 pub float64x1_t,
49 pub float64x1_t,
50);
51
52#[repr(C)]
54#[derive(Copy, Clone, Debug)]
55#[stable(feature = "neon_intrinsics", since = "1.59.0")]
56pub struct float64x2x2_t(pub float64x2_t, pub float64x2_t);
57#[repr(C)]
59#[derive(Copy, Clone, Debug)]
60#[stable(feature = "neon_intrinsics", since = "1.59.0")]
61pub struct float64x2x3_t(pub float64x2_t, pub float64x2_t, pub float64x2_t);
62#[repr(C)]
64#[derive(Copy, Clone, Debug)]
65#[stable(feature = "neon_intrinsics", since = "1.59.0")]
66pub struct float64x2x4_t(
67 pub float64x2_t,
68 pub float64x2_t,
69 pub float64x2_t,
70 pub float64x2_t,
71);
72
73macro_rules! shift_right_and_insert {
75 ($ty:ty, $width:literal, $N:expr, $a:expr, $b:expr) => {{
76 type V = Simd<$ty, $width>;
77
78 if $N as u32 == <$ty>::BITS {
79 $a
80 } else {
81 let a: V = transmute($a);
82 let b: V = transmute($b);
83
84 let mask = <$ty>::MAX >> $N;
85 let kept: V = simd_and(a, V::splat(!mask));
86
87 let shift_counts = V::splat($N as $ty);
88 let shifted = simd_shr(b, shift_counts);
89
90 transmute(simd_or(kept, shifted))
91 }
92 }};
93}
94
95pub(crate) use shift_right_and_insert;
96
97#[inline]
99#[target_feature(enable = "neon")]
100#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
101#[rustc_legacy_const_generics(1, 3)]
102#[stable(feature = "neon_intrinsics", since = "1.59.0")]
103pub fn vcopy_lane_s64<const N1: i32, const N2: i32>(_a: int64x1_t, b: int64x1_t) -> int64x1_t {
104 static_assert!(N1 == 0);
105 static_assert!(N2 == 0);
106 b
107}
108
109#[inline]
111#[target_feature(enable = "neon")]
112#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
113#[rustc_legacy_const_generics(1, 3)]
114#[stable(feature = "neon_intrinsics", since = "1.59.0")]
115pub fn vcopy_lane_u64<const N1: i32, const N2: i32>(_a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
116 static_assert!(N1 == 0);
117 static_assert!(N2 == 0);
118 b
119}
120
121#[inline]
123#[target_feature(enable = "neon")]
124#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
125#[rustc_legacy_const_generics(1, 3)]
126#[stable(feature = "neon_intrinsics", since = "1.59.0")]
127pub fn vcopy_lane_p64<const N1: i32, const N2: i32>(_a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
128 static_assert!(N1 == 0);
129 static_assert!(N2 == 0);
130 b
131}
132
133#[inline]
135#[target_feature(enable = "neon")]
136#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
137#[rustc_legacy_const_generics(1, 3)]
138#[stable(feature = "neon_intrinsics", since = "1.59.0")]
139pub fn vcopy_lane_f64<const N1: i32, const N2: i32>(
140 _a: float64x1_t,
141 b: float64x1_t,
142) -> float64x1_t {
143 static_assert!(N1 == 0);
144 static_assert!(N2 == 0);
145 b
146}
147
148#[inline]
150#[target_feature(enable = "neon")]
151#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
152#[rustc_legacy_const_generics(1, 3)]
153#[stable(feature = "neon_intrinsics", since = "1.59.0")]
154pub fn vcopy_laneq_s64<const LANE1: i32, const LANE2: i32>(
155 _a: int64x1_t,
156 b: int64x2_t,
157) -> int64x1_t {
158 static_assert!(LANE1 == 0);
159 static_assert_uimm_bits!(LANE2, 1);
160 unsafe { transmute::<i64, _>(simd_extract!(b, LANE2 as u32)) }
161}
162
163#[inline]
165#[target_feature(enable = "neon")]
166#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
167#[rustc_legacy_const_generics(1, 3)]
168#[stable(feature = "neon_intrinsics", since = "1.59.0")]
169pub fn vcopy_laneq_u64<const LANE1: i32, const LANE2: i32>(
170 _a: uint64x1_t,
171 b: uint64x2_t,
172) -> uint64x1_t {
173 static_assert!(LANE1 == 0);
174 static_assert_uimm_bits!(LANE2, 1);
175 unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
176}
177
178#[inline]
180#[target_feature(enable = "neon")]
181#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
182#[rustc_legacy_const_generics(1, 3)]
183#[stable(feature = "neon_intrinsics", since = "1.59.0")]
184pub fn vcopy_laneq_p64<const LANE1: i32, const LANE2: i32>(
185 _a: poly64x1_t,
186 b: poly64x2_t,
187) -> poly64x1_t {
188 static_assert!(LANE1 == 0);
189 static_assert_uimm_bits!(LANE2, 1);
190 unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
191}
192
193#[inline]
195#[target_feature(enable = "neon")]
196#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
197#[rustc_legacy_const_generics(1, 3)]
198#[stable(feature = "neon_intrinsics", since = "1.59.0")]
199pub fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
200 _a: float64x1_t,
201 b: float64x2_t,
202) -> float64x1_t {
203 static_assert!(LANE1 == 0);
204 static_assert_uimm_bits!(LANE2, 1);
205 unsafe { transmute::<f64, _>(simd_extract!(b, LANE2 as u32)) }
206}
207
208#[inline]
210#[target_feature(enable = "neon")]
211#[cfg_attr(test, assert_instr(ldr))]
212#[stable(feature = "neon_intrinsics", since = "1.59.0")]
213pub unsafe fn vld1_dup_f64(ptr: *const f64) -> float64x1_t {
214 vld1_f64(ptr)
215}
216
217#[inline]
219#[target_feature(enable = "neon")]
220#[cfg_attr(test, assert_instr(ld1r))]
221#[stable(feature = "neon_intrinsics", since = "1.59.0")]
222pub unsafe fn vld1q_dup_f64(ptr: *const f64) -> float64x2_t {
223 let x = vld1q_lane_f64::<0>(ptr, transmute(f64x2::splat(0.)));
224 simd_shuffle!(x, x, [0, 0])
225}
226
227#[inline]
229#[target_feature(enable = "neon")]
230#[rustc_legacy_const_generics(2)]
231#[cfg_attr(test, assert_instr(ldr, LANE = 0))]
232#[stable(feature = "neon_intrinsics", since = "1.59.0")]
233pub unsafe fn vld1_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x1_t) -> float64x1_t {
234 static_assert!(LANE == 0);
235 simd_insert!(src, LANE as u32, *ptr)
236}
237
238#[inline]
240#[target_feature(enable = "neon")]
241#[rustc_legacy_const_generics(2)]
242#[cfg_attr(test, assert_instr(ld1, LANE = 1))]
243#[stable(feature = "neon_intrinsics", since = "1.59.0")]
244pub unsafe fn vld1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
245 static_assert_uimm_bits!(LANE, 1);
246 simd_insert!(src, LANE as u32, *ptr)
247}
248
249#[inline]
253#[target_feature(enable = "neon")]
254#[cfg_attr(test, assert_instr(bsl))]
255#[stable(feature = "neon_intrinsics", since = "1.59.0")]
256pub fn vbsl_f64(a: uint64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
257 let not = int64x1_t::splat(-1);
258 unsafe {
259 transmute(simd_or(
260 simd_and(a, transmute(b)),
261 simd_and(simd_xor(a, transmute(not)), transmute(c)),
262 ))
263 }
264}
265#[inline]
267#[target_feature(enable = "neon")]
268#[cfg_attr(test, assert_instr(bsl))]
269#[stable(feature = "neon_intrinsics", since = "1.59.0")]
270pub fn vbsl_p64(a: poly64x1_t, b: poly64x1_t, c: poly64x1_t) -> poly64x1_t {
271 let not = int64x1_t::splat(-1);
272 unsafe { simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c)) }
273}
274#[inline]
276#[target_feature(enable = "neon")]
277#[cfg_attr(test, assert_instr(bsl))]
278#[stable(feature = "neon_intrinsics", since = "1.59.0")]
279pub fn vbslq_f64(a: uint64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t {
280 let not = int64x2_t::splat(-1);
281 unsafe {
282 transmute(simd_or(
283 simd_and(a, transmute(b)),
284 simd_and(simd_xor(a, transmute(not)), transmute(c)),
285 ))
286 }
287}
288#[inline]
290#[target_feature(enable = "neon")]
291#[cfg_attr(test, assert_instr(bsl))]
292#[stable(feature = "neon_intrinsics", since = "1.59.0")]
293pub fn vbslq_p64(a: poly64x2_t, b: poly64x2_t, c: poly64x2_t) -> poly64x2_t {
294 let not = int64x2_t::splat(-1);
295 unsafe { simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c)) }
296}
297
298#[inline]
300#[target_feature(enable = "neon")]
301#[cfg_attr(test, assert_instr(fadd))]
302#[stable(feature = "neon_intrinsics", since = "1.59.0")]
303pub fn vadd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
304 unsafe { simd_add(a, b) }
305}
306
307#[inline]
309#[target_feature(enable = "neon")]
310#[cfg_attr(test, assert_instr(fadd))]
311#[stable(feature = "neon_intrinsics", since = "1.59.0")]
312pub fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
313 unsafe { simd_add(a, b) }
314}
315
316#[inline]
318#[target_feature(enable = "neon")]
319#[cfg_attr(test, assert_instr(add))]
320#[stable(feature = "neon_intrinsics", since = "1.59.0")]
321pub fn vadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
322 unsafe { simd_add(a, b) }
323}
324
325#[inline]
327#[target_feature(enable = "neon")]
328#[cfg_attr(test, assert_instr(add))]
329#[stable(feature = "neon_intrinsics", since = "1.59.0")]
330pub fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
331 unsafe { simd_add(a, b) }
332}
333
334#[inline]
336#[target_feature(enable = "neon")]
337#[cfg_attr(test, assert_instr(add))]
338#[stable(feature = "neon_intrinsics", since = "1.59.0")]
339pub fn vaddd_s64(a: i64, b: i64) -> i64 {
340 a.wrapping_add(b)
341}
342
343#[inline]
345#[target_feature(enable = "neon")]
346#[cfg_attr(test, assert_instr(add))]
347#[stable(feature = "neon_intrinsics", since = "1.59.0")]
348pub fn vaddd_u64(a: u64, b: u64) -> u64 {
349 a.wrapping_add(b)
350}
351
352#[inline]
354#[target_feature(enable = "neon")]
355#[cfg_attr(test, assert_instr(nop, N = 0))]
356#[rustc_legacy_const_generics(2)]
357#[stable(feature = "neon_intrinsics", since = "1.59.0")]
358pub fn vext_p64<const N: i32>(a: poly64x1_t, _b: poly64x1_t) -> poly64x1_t {
359 static_assert!(N == 0);
360 a
361}
362
363#[inline]
365#[target_feature(enable = "neon")]
366#[cfg_attr(test, assert_instr(nop, N = 0))]
367#[rustc_legacy_const_generics(2)]
368#[stable(feature = "neon_intrinsics", since = "1.59.0")]
369pub fn vext_f64<const N: i32>(a: float64x1_t, _b: float64x1_t) -> float64x1_t {
370 static_assert!(N == 0);
371 a
372}
373
374#[inline]
376#[target_feature(enable = "neon")]
377#[cfg_attr(test, assert_instr(fmov))]
378#[stable(feature = "neon_intrinsics", since = "1.59.0")]
379pub fn vdup_n_p64(value: p64) -> poly64x1_t {
380 unsafe { transmute(u64x1::new(value)) }
381}
382
383#[inline]
385#[target_feature(enable = "neon")]
386#[cfg_attr(test, assert_instr(nop))]
387#[stable(feature = "neon_intrinsics", since = "1.59.0")]
388pub fn vdup_n_f64(value: f64) -> float64x1_t {
389 float64x1_t::splat(value)
390}
391
392#[inline]
394#[target_feature(enable = "neon")]
395#[cfg_attr(test, assert_instr(dup))]
396#[stable(feature = "neon_intrinsics", since = "1.59.0")]
397pub fn vdupq_n_p64(value: p64) -> poly64x2_t {
398 unsafe { transmute(u64x2::new(value, value)) }
399}
400
401#[inline]
403#[target_feature(enable = "neon")]
404#[cfg_attr(test, assert_instr(dup))]
405#[stable(feature = "neon_intrinsics", since = "1.59.0")]
406pub fn vdupq_n_f64(value: f64) -> float64x2_t {
407 float64x2_t::splat(value)
408}
409
410#[inline]
412#[target_feature(enable = "neon")]
413#[cfg_attr(test, assert_instr(fmov))]
414#[stable(feature = "neon_intrinsics", since = "1.59.0")]
415pub fn vmov_n_p64(value: p64) -> poly64x1_t {
416 vdup_n_p64(value)
417}
418
419#[inline]
421#[target_feature(enable = "neon")]
422#[cfg_attr(test, assert_instr(nop))]
423#[stable(feature = "neon_intrinsics", since = "1.59.0")]
424pub fn vmov_n_f64(value: f64) -> float64x1_t {
425 vdup_n_f64(value)
426}
427
428#[inline]
430#[target_feature(enable = "neon")]
431#[cfg_attr(test, assert_instr(dup))]
432#[stable(feature = "neon_intrinsics", since = "1.59.0")]
433pub fn vmovq_n_p64(value: p64) -> poly64x2_t {
434 vdupq_n_p64(value)
435}
436
437#[inline]
439#[target_feature(enable = "neon")]
440#[cfg_attr(test, assert_instr(dup))]
441#[stable(feature = "neon_intrinsics", since = "1.59.0")]
442pub fn vmovq_n_f64(value: f64) -> float64x2_t {
443 vdupq_n_f64(value)
444}
445
446#[inline]
448#[target_feature(enable = "neon")]
449#[cfg_attr(test, assert_instr(nop))]
450#[stable(feature = "neon_intrinsics", since = "1.59.0")]
451pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
452 unsafe { float64x1_t([simd_extract!(a, 1)]) }
453}
454
455#[inline]
457#[target_feature(enable = "neon")]
458#[cfg_attr(test, assert_instr(ext))]
459#[stable(feature = "neon_intrinsics", since = "1.59.0")]
460pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
461 unsafe { transmute(u64x1::new(simd_extract!(a, 1))) }
462}
463
464#[inline]
466#[target_feature(enable = "neon")]
467#[cfg_attr(test, assert_instr(nop))]
468#[stable(feature = "neon_intrinsics", since = "1.59.0")]
469pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
470 unsafe { float64x1_t([simd_extract!(a, 0)]) }
471}
472
473#[inline]
475#[target_feature(enable = "neon")]
476#[cfg_attr(test, assert_instr(nop))]
477#[stable(feature = "neon_intrinsics", since = "1.59.0")]
478pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
479 unsafe { transmute(u64x1::new(simd_extract!(a, 0))) }
480}
481
482#[inline]
484#[target_feature(enable = "neon")]
485#[rustc_legacy_const_generics(1)]
486#[stable(feature = "neon_intrinsics", since = "1.59.0")]
487#[cfg_attr(
488 all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
489 assert_instr(nop, IMM5 = 0)
490)]
491pub fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
492 static_assert!(IMM5 == 0);
493 unsafe { simd_extract!(v, IMM5 as u32) }
494}
495
496#[inline]
498#[target_feature(enable = "neon")]
499#[rustc_legacy_const_generics(1)]
500#[stable(feature = "neon_intrinsics", since = "1.59.0")]
501#[cfg_attr(
502 all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
503 assert_instr(nop, IMM5 = 0)
504)]
505pub fn vgetq_lane_f64<const IMM5: i32>(v: float64x2_t) -> f64 {
506 static_assert_uimm_bits!(IMM5, 1);
507 unsafe { simd_extract!(v, IMM5 as u32) }
508}
509
510#[inline]
512#[target_feature(enable = "neon")]
513#[cfg_attr(test, assert_instr(mov))]
514#[stable(feature = "neon_intrinsics", since = "1.59.0")]
515pub fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t {
516 unsafe { simd_shuffle!(low, high, [0, 1]) }
517}
518
519#[inline]
521#[target_feature(enable = "neon")]
522#[cfg_attr(test, assert_instr(nop, N = 2))]
523#[rustc_legacy_const_generics(1)]
524#[stable(feature = "neon_intrinsics", since = "1.59.0")]
525pub fn vshld_n_s64<const N: i32>(a: i64) -> i64 {
526 static_assert_uimm_bits!(N, 6);
527 a << N
528}
529
530#[inline]
532#[target_feature(enable = "neon")]
533#[cfg_attr(test, assert_instr(nop, N = 2))]
534#[rustc_legacy_const_generics(1)]
535#[stable(feature = "neon_intrinsics", since = "1.59.0")]
536pub fn vshld_n_u64<const N: i32>(a: u64) -> u64 {
537 static_assert_uimm_bits!(N, 6);
538 a << N
539}
540
541#[inline]
543#[target_feature(enable = "neon")]
544#[cfg_attr(test, assert_instr(nop, N = 2))]
545#[rustc_legacy_const_generics(1)]
546#[stable(feature = "neon_intrinsics", since = "1.59.0")]
547pub fn vshrd_n_s64<const N: i32>(a: i64) -> i64 {
548 static_assert!(N >= 1 && N <= 64);
549 let n: i32 = if N == 64 { 63 } else { N };
550 a >> n
551}
552
553#[inline]
555#[target_feature(enable = "neon")]
556#[cfg_attr(test, assert_instr(nop, N = 2))]
557#[rustc_legacy_const_generics(1)]
558#[stable(feature = "neon_intrinsics", since = "1.59.0")]
559pub fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
560 static_assert!(N >= 1 && N <= 64);
561 let n: i32 = if N == 64 {
562 return 0;
563 } else {
564 N
565 };
566 a >> n
567}
568
569#[inline]
571#[target_feature(enable = "neon")]
572#[cfg_attr(test, assert_instr(nop, N = 2))]
573#[rustc_legacy_const_generics(2)]
574#[stable(feature = "neon_intrinsics", since = "1.59.0")]
575pub fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
576 static_assert!(N >= 1 && N <= 64);
577 a.wrapping_add(vshrd_n_s64::<N>(b))
578}
579
580#[inline]
582#[target_feature(enable = "neon")]
583#[cfg_attr(test, assert_instr(nop, N = 2))]
584#[rustc_legacy_const_generics(2)]
585#[stable(feature = "neon_intrinsics", since = "1.59.0")]
586pub fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
587 static_assert!(N >= 1 && N <= 64);
588 a.wrapping_add(vshrd_n_u64::<N>(b))
589}
590
591#[cfg(test)]
592mod tests {
593 use crate::core_arch::aarch64::test_support::*;
594 use crate::core_arch::arm_shared::test_support::*;
595 use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*};
596 use stdarch_test::simd_test;
597
598 #[simd_test(enable = "neon")]
599 fn test_vadd_f64() {
600 let a = f64x1::from_array([1.]);
601 let b = f64x1::from_array([8.]);
602 let e = f64x1::from_array([9.]);
603 let r = f64x1::from(vadd_f64(a.into(), b.into()));
604 assert_eq!(r, e);
605 }
606
607 #[simd_test(enable = "neon")]
608 fn test_vaddq_f64() {
609 let a = f64x2::new(1., 2.);
610 let b = f64x2::new(8., 7.);
611 let e = f64x2::new(9., 9.);
612 let r = f64x2::from(vaddq_f64(a.into(), b.into()));
613 assert_eq!(r, e);
614 }
615
616 #[simd_test(enable = "neon")]
617 fn test_vadd_s64() {
618 let a = i64x1::from_array([1]);
619 let b = i64x1::from_array([8]);
620 let e = i64x1::from_array([9]);
621 let r = i64x1::from(vadd_s64(a.into(), b.into()));
622 assert_eq!(r, e);
623 }
624
625 #[simd_test(enable = "neon")]
626 fn test_vadd_u64() {
627 let a = u64x1::from_array([1]);
628 let b = u64x1::from_array([8]);
629 let e = u64x1::from_array([9]);
630 let r = u64x1::from(vadd_u64(a.into(), b.into()));
631 assert_eq!(r, e);
632 }
633
634 #[simd_test(enable = "neon")]
635 fn test_vaddd_s64() {
636 let a = 1_i64;
637 let b = 8_i64;
638 let e = 9_i64;
639 let r: i64 = vaddd_s64(a, b);
640 assert_eq!(r, e);
641 }
642
643 #[simd_test(enable = "neon")]
644 fn test_vaddd_u64() {
645 let a = 1_u64;
646 let b = 8_u64;
647 let e = 9_u64;
648 let r: u64 = vaddd_u64(a, b);
649 assert_eq!(r, e);
650 }
651
652 #[simd_test(enable = "neon")]
653 fn test_vext_p64() {
654 let a = u64x1::new(0);
655 let b = u64x1::new(1);
656 let e = u64x1::new(0);
657 let r = u64x1::from(vext_p64::<0>(a.into(), b.into()));
658 assert_eq!(r, e);
659 }
660
661 #[simd_test(enable = "neon")]
662 fn test_vext_f64() {
663 let a = f64x1::new(0.);
664 let b = f64x1::new(1.);
665 let e = f64x1::new(0.);
666 let r = f64x1::from(vext_f64::<0>(a.into(), b.into()));
667 assert_eq!(r, e);
668 }
669
670 #[simd_test(enable = "neon")]
671 fn test_vshld_n_s64() {
672 let a: i64 = 1;
673 let e: i64 = 4;
674 let r: i64 = vshld_n_s64::<2>(a);
675 assert_eq!(r, e);
676 }
677
678 #[simd_test(enable = "neon")]
679 fn test_vshld_n_u64() {
680 let a: u64 = 1;
681 let e: u64 = 4;
682 let r: u64 = vshld_n_u64::<2>(a);
683 assert_eq!(r, e);
684 }
685
686 #[simd_test(enable = "neon")]
687 fn test_vshrd_n_s64() {
688 let a: i64 = 4;
689 let e: i64 = 1;
690 let r: i64 = vshrd_n_s64::<2>(a);
691 assert_eq!(r, e);
692 }
693
694 #[simd_test(enable = "neon")]
695 fn test_vshrd_n_u64() {
696 let a: u64 = 4;
697 let e: u64 = 1;
698 let r: u64 = vshrd_n_u64::<2>(a);
699 assert_eq!(r, e);
700 }
701
702 #[simd_test(enable = "neon")]
703 fn test_vsrad_n_s64() {
704 let a: i64 = 1;
705 let b: i64 = 4;
706 let e: i64 = 2;
707 let r: i64 = vsrad_n_s64::<2>(a, b);
708 assert_eq!(r, e);
709 }
710
711 #[simd_test(enable = "neon")]
712 fn test_vsrad_n_u64() {
713 let a: u64 = 1;
714 let b: u64 = 4;
715 let e: u64 = 2;
716 let r: u64 = vsrad_n_u64::<2>(a, b);
717 assert_eq!(r, e);
718 }
719
720 #[simd_test(enable = "neon")]
721 fn test_vdup_n_f64() {
722 let a: f64 = 3.3;
723 let e = f64x1::new(3.3);
724 let r = f64x1::from(vdup_n_f64(a));
725 assert_eq!(r, e);
726 }
727
728 #[simd_test(enable = "neon")]
729 fn test_vdup_n_p64() {
730 let a: u64 = 3;
731 let e = u64x1::new(3);
732 let r = u64x1::from(vdup_n_p64(a));
733 assert_eq!(r, e);
734 }
735
736 #[simd_test(enable = "neon")]
737 fn test_vdupq_n_f64() {
738 let a: f64 = 3.3;
739 let e = f64x2::new(3.3, 3.3);
740 let r = f64x2::from(vdupq_n_f64(a));
741 assert_eq!(r, e);
742 }
743
744 #[simd_test(enable = "neon")]
745 fn test_vdupq_n_p64() {
746 let a: u64 = 3;
747 let e = u64x2::new(3, 3);
748 let r = u64x2::from(vdupq_n_p64(a));
749 assert_eq!(r, e);
750 }
751
752 #[simd_test(enable = "neon")]
753 fn test_vmov_n_p64() {
754 let a: u64 = 3;
755 let e = u64x1::new(3);
756 let r = u64x1::from(vmov_n_p64(a));
757 assert_eq!(r, e);
758 }
759
760 #[simd_test(enable = "neon")]
761 fn test_vmov_n_f64() {
762 let a: f64 = 3.3;
763 let e = f64x1::new(3.3);
764 let r = f64x1::from(vmov_n_f64(a));
765 assert_eq!(r, e);
766 }
767
768 #[simd_test(enable = "neon")]
769 fn test_vmovq_n_p64() {
770 let a: u64 = 3;
771 let e = u64x2::new(3, 3);
772 let r = u64x2::from(vmovq_n_p64(a));
773 assert_eq!(r, e);
774 }
775
776 #[simd_test(enable = "neon")]
777 fn test_vmovq_n_f64() {
778 let a: f64 = 3.3;
779 let e = f64x2::new(3.3, 3.3);
780 let r = f64x2::from(vmovq_n_f64(a));
781 assert_eq!(r, e);
782 }
783
784 #[simd_test(enable = "neon")]
785 fn test_vget_high_f64() {
786 let a = f64x2::new(1.0, 2.0);
787 let e = f64x1::new(2.0);
788 let r = f64x1::from(vget_high_f64(a.into()));
789 assert_eq!(r, e);
790 }
791
792 #[simd_test(enable = "neon")]
793 fn test_vget_high_p64() {
794 let a = u64x2::new(1, 2);
795 let e = u64x1::new(2);
796 let r = u64x1::from(vget_high_p64(a.into()));
797 assert_eq!(r, e);
798 }
799
800 #[simd_test(enable = "neon")]
801 fn test_vget_low_f64() {
802 let a = f64x2::new(1.0, 2.0);
803 let e = f64x1::new(1.0);
804 let r = f64x1::from(vget_low_f64(a.into()));
805 assert_eq!(r, e);
806 }
807
808 #[simd_test(enable = "neon")]
809 fn test_vget_low_p64() {
810 let a = u64x2::new(1, 2);
811 let e = u64x1::new(1);
812 let r = u64x1::from(vget_low_p64(a.into()));
813 assert_eq!(r, e);
814 }
815
816 #[simd_test(enable = "neon")]
817 fn test_vget_lane_f64() {
818 let v = f64x1::new(1.0);
819 let r = vget_lane_f64::<0>(v.into());
820 assert_eq!(r, 1.0);
821 }
822
823 #[simd_test(enable = "neon")]
824 fn test_vgetq_lane_f64() {
825 let v = f64x2::new(0.0, 1.0);
826 let r = vgetq_lane_f64::<1>(v.into());
827 assert_eq!(r, 1.0);
828 let r = vgetq_lane_f64::<0>(v.into());
829 assert_eq!(r, 0.0);
830 }
831
832 #[simd_test(enable = "neon")]
833 fn test_vcopy_lane_s64() {
834 let a = i64x1::new(1);
835 let b = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
836 let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
837 let r = i64x1::from(vcopy_lane_s64::<0, 0>(a.into(), b.into()));
838 assert_eq!(r, e);
839 }
840
841 #[simd_test(enable = "neon")]
842 fn test_vcopy_lane_u64() {
843 let a = u64x1::new(1);
844 let b = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
845 let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
846 let r = u64x1::from(vcopy_lane_u64::<0, 0>(a.into(), b.into()));
847 assert_eq!(r, e);
848 }
849
850 #[simd_test(enable = "neon")]
851 fn test_vcopy_lane_p64() {
852 let a = u64x1::new(1);
853 let b = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
854 let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
855 let r = u64x1::from(vcopy_lane_p64::<0, 0>(a.into(), b.into()));
856 assert_eq!(r, e);
857 }
858
859 #[simd_test(enable = "neon")]
860 fn test_vcopy_lane_f64() {
861 let a = f64x1::from_array([1.]);
862 let b = f64x1::from_array([0.]);
863 let e = f64x1::from_array([0.]);
864 let r = f64x1::from(vcopy_lane_f64::<0, 0>(a.into(), b.into()));
865 assert_eq!(r, e);
866 }
867
868 #[simd_test(enable = "neon")]
869 fn test_vcopy_laneq_s64() {
870 let a = i64x1::new(1);
871 let b = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
872 let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
873 let r = i64x1::from(vcopy_laneq_s64::<0, 1>(a.into(), b.into()));
874 assert_eq!(r, e);
875 }
876
877 #[simd_test(enable = "neon")]
878 fn test_vcopy_laneq_u64() {
879 let a = u64x1::new(1);
880 let b = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
881 let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
882 let r = u64x1::from(vcopy_laneq_u64::<0, 1>(a.into(), b.into()));
883 assert_eq!(r, e);
884 }
885
886 #[simd_test(enable = "neon")]
887 fn test_vcopy_laneq_p64() {
888 let a = u64x1::new(1);
889 let b = u64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
890 let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
891 let r = u64x1::from(vcopy_laneq_p64::<0, 1>(a.into(), b.into()));
892 assert_eq!(r, e);
893 }
894
895 #[simd_test(enable = "neon")]
896 fn test_vcopy_laneq_f64() {
897 let a = f64x1::from_array([1.]);
898 let b = f64x2::from_array([0., 0.5]);
899 let e = f64x1::from_array([0.5]);
900 let r = f64x1::from(vcopy_laneq_f64::<0, 1>(a.into(), b.into()));
901 assert_eq!(r, e);
902 }
903
904 #[simd_test(enable = "neon")]
905 fn test_vbsl_f64() {
906 let a = u64x1::new(0x8000000000000000);
907 let b = f64x1::new(-1.23f64);
908 let c = f64x1::new(2.34f64);
909 let e = f64x1::new(-2.34f64);
910 let r = f64x1::from(vbsl_f64(a.into(), b.into(), c.into()));
911 assert_eq!(r, e);
912 }
913
914 #[simd_test(enable = "neon")]
915 fn test_vbsl_p64() {
916 let a = u64x1::new(1);
917 let b = u64x1::new(u64::MAX);
918 let c = u64x1::new(u64::MIN);
919 let e = u64x1::new(1);
920 let r = u64x1::from(vbsl_p64(a.into(), b.into(), c.into()));
921 assert_eq!(r, e);
922 }
923
924 #[simd_test(enable = "neon")]
925 fn test_vbslq_f64() {
926 let a = u64x2::new(1, 0x8000000000000000);
927 let b = f64x2::new(f64::MAX, -1.23f64);
928 let c = f64x2::new(f64::MIN, 2.34f64);
929 let e = f64x2::new(f64::MIN, -2.34f64);
930 let r = f64x2::from(vbslq_f64(a.into(), b.into(), c.into()));
931 assert_eq!(r, e);
932 }
933
934 #[simd_test(enable = "neon")]
935 fn test_vbslq_p64() {
936 let a = u64x2::new(u64::MAX, 1);
937 let b = u64x2::new(u64::MAX, u64::MAX);
938 let c = u64x2::new(u64::MIN, u64::MIN);
939 let e = u64x2::new(u64::MAX, 1);
940 let r = u64x2::from(vbslq_p64(a.into(), b.into(), c.into()));
941 assert_eq!(r, e);
942 }
943
944 #[simd_test(enable = "neon")]
945 fn test_vld1_f64() {
946 let a: [f64; 2] = [0., 1.];
947 let e = f64x1::new(1.);
948 let r = unsafe { f64x1::from(vld1_f64(a[1..].as_ptr())) };
949 assert_eq!(r, e)
950 }
951
952 #[simd_test(enable = "neon")]
953 fn test_vld1q_f64() {
954 let a: [f64; 3] = [0., 1., 2.];
955 let e = f64x2::new(1., 2.);
956 let r = unsafe { f64x2::from(vld1q_f64(a[1..].as_ptr())) };
957 assert_eq!(r, e)
958 }
959
960 #[simd_test(enable = "neon")]
961 fn test_vld1_dup_f64() {
962 let a: [f64; 2] = [1., 42.];
963 let e = f64x1::new(42.);
964 let r = unsafe { f64x1::from(vld1_dup_f64(a[1..].as_ptr())) };
965 assert_eq!(r, e)
966 }
967
968 #[simd_test(enable = "neon")]
969 fn test_vld1q_dup_f64() {
970 let elem: f64 = 42.;
971 let e = f64x2::new(42., 42.);
972 let r = unsafe { f64x2::from(vld1q_dup_f64(&elem)) };
973 assert_eq!(r, e)
974 }
975
976 #[simd_test(enable = "neon")]
977 fn test_vld1_lane_f64() {
978 let a = f64x1::new(0.);
979 let elem: f64 = 42.;
980 let e = f64x1::new(42.);
981 let r = unsafe { f64x1::from(vld1_lane_f64::<0>(&elem, a.into())) };
982 assert_eq!(r, e)
983 }
984
985 #[simd_test(enable = "neon")]
986 fn test_vld1q_lane_f64() {
987 let a = f64x2::new(0., 1.);
988 let elem: f64 = 42.;
989 let e = f64x2::new(0., 42.);
990 let r = unsafe { f64x2::from(vld1q_lane_f64::<1>(&elem, a.into())) };
991 assert_eq!(r, e)
992 }
993
994 #[simd_test(enable = "neon")]
995 fn test_vst1_f64() {
996 let mut vals = [0_f64; 2];
997 let a = f64x1::new(1.);
998
999 unsafe {
1000 vst1_f64(vals[1..].as_mut_ptr(), a.into());
1001 }
1002
1003 assert_eq!(vals[0], 0.);
1004 assert_eq!(vals[1], 1.);
1005 }
1006
1007 #[simd_test(enable = "neon")]
1008 fn test_vst1q_f64() {
1009 let mut vals = [0_f64; 3];
1010 let a = f64x2::new(1., 2.);
1011
1012 unsafe {
1013 vst1q_f64(vals[1..].as_mut_ptr(), a.into());
1014 }
1015
1016 assert_eq!(vals[0], 0.);
1017 assert_eq!(vals[1], 1.);
1018 assert_eq!(vals[2], 2.);
1019 }
1020
1021 macro_rules! wide_store_load_roundtrip {
1022 ($elem_ty:ty, $len:expr, $vec_ty:ty, $store:expr, $load:expr) => {
1023 let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
1024 let a: $vec_ty = transmute(vals);
1025 let mut tmp = core::mem::MaybeUninit::<[$elem_ty; $len]>::uninit();
1026 $store(tmp.as_mut_ptr().cast(), a);
1027
1028 let tmp = tmp.assume_init();
1030
1031 let r: $vec_ty = $load(tmp.as_ptr().cast());
1032 let out: [$elem_ty; $len] = transmute(r);
1033 assert_eq!(out, vals);
1034 };
1035 }
1036
1037 macro_rules! wide_store_load_roundtrip_fp16 {
1038 ($( $name:ident $args:tt);* $(;)?) => {
1039 $(
1040 #[cfg_attr(miri, ignore)]
1041 #[simd_test(enable = "neon,fp16")]
1042 #[cfg(not(target_arch = "arm64ec"))]
1043 unsafe fn $name() {
1044 wide_store_load_roundtrip! $args;
1045 }
1046 )*
1047 };
1048 }
1049
1050 wide_store_load_roundtrip_fp16! {
1051 test_vld1_f16_x2(f16, 8, float16x4x2_t, vst1_f16_x2, vld1_f16_x2);
1052 test_vld1_f16_x3(f16, 12, float16x4x3_t, vst1_f16_x3, vld1_f16_x3);
1053 test_vld1_f16_x4(f16, 16, float16x4x4_t, vst1_f16_x4, vld1_f16_x4);
1054
1055 test_vld1q_f16_x2(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2);
1056 test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3);
1057 test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4);
1058
1059 test_vld2_f16(f16, 8, float16x4x2_t, vst2_f16, vld2_f16);
1060 test_vld3_f16(f16, 12, float16x4x3_t, vst3_f16, vld3_f16);
1061 test_vld4_f16(f16, 16, float16x4x4_t, vst4_f16, vld4_f16);
1062
1063 test_vld2q_f16(f16, 16, float16x8x2_t, vst2q_f16, vld2q_f16);
1064 test_vld3q_f16(f16, 24, float16x8x3_t, vst3q_f16, vld3q_f16);
1065 test_vld4q_f16(f16, 32, float16x8x4_t, vst4q_f16, vld4q_f16);
1066 }
1067
1068 macro_rules! wide_store_load_roundtrip_aes {
1069 ($( $name:ident $args:tt);* $(;)?) => {
1070 $(
1071 #[simd_test(enable = "neon,aes")]
1072 unsafe fn $name() {
1073 wide_store_load_roundtrip! $args;
1074 }
1075 )*
1076 };
1077 }
1078
1079 wide_store_load_roundtrip_aes! {
1080 test_vld1_p64_x2(p64, 2, poly64x1x2_t, vst1_p64_x2, vld1_p64_x2);
1081 test_vld1_p64_x3(p64, 3, poly64x1x3_t, vst1_p64_x3, vld1_p64_x3);
1082 test_vld1_p64_x4(p64, 4, poly64x1x4_t, vst1_p64_x4, vld1_p64_x4);
1083
1084 test_vld1q_p64_x2(p64, 4, poly64x2x2_t, vst1q_p64_x2, vld1q_p64_x2);
1085 test_vld1q_p64_x3(p64, 6, poly64x2x3_t, vst1q_p64_x3, vld1q_p64_x3);
1086 test_vld1q_p64_x4(p64, 8, poly64x2x4_t, vst1q_p64_x4, vld1q_p64_x4);
1087 }
1088
1089 macro_rules! wide_store_load_roundtrip_neon {
1090 ($( $name:ident $args:tt);* $(;)?) => {
1091 $(
1092 #[simd_test(enable = "neon")]
1093 unsafe fn $name() {
1094 wide_store_load_roundtrip! $args;
1095 }
1096 )*
1097 };
1098 }
1099
1100 wide_store_load_roundtrip_neon! {
1101 test_vld1_f32_x2(f32, 4, float32x2x2_t, vst1_f32_x2, vld1_f32_x2);
1102 test_vld1_f32_x3(f32, 6, float32x2x3_t, vst1_f32_x3, vld1_f32_x3);
1103 test_vld1_f32_x4(f32, 8, float32x2x4_t, vst1_f32_x4, vld1_f32_x4);
1104
1105 test_vld1q_f32_x2(f32, 8, float32x4x2_t, vst1q_f32_x2, vld1q_f32_x2);
1106 test_vld1q_f32_x3(f32, 12, float32x4x3_t, vst1q_f32_x3, vld1q_f32_x3);
1107 test_vld1q_f32_x4(f32, 16, float32x4x4_t, vst1q_f32_x4, vld1q_f32_x4);
1108
1109 test_vld1_f64_x2(f64, 2, float64x1x2_t, vst1_f64_x2, vld1_f64_x2);
1110 test_vld1_f64_x3(f64, 3, float64x1x3_t, vst1_f64_x3, vld1_f64_x3);
1111 test_vld1_f64_x4(f64, 4, float64x1x4_t, vst1_f64_x4, vld1_f64_x4);
1112
1113 test_vld1q_f64_x2(f64, 4, float64x2x2_t, vst1q_f64_x2, vld1q_f64_x2);
1114 test_vld1q_f64_x3(f64, 6, float64x2x3_t, vst1q_f64_x3, vld1q_f64_x3);
1115 test_vld1q_f64_x4(f64, 8, float64x2x4_t, vst1q_f64_x4, vld1q_f64_x4);
1116
1117 test_vld1_s8_x2(i8, 16, int8x8x2_t, vst1_s8_x2, vld1_s8_x2);
1118 test_vld1_s8_x3(i8, 24, int8x8x3_t, vst1_s8_x3, vld1_s8_x3);
1119 test_vld1_s8_x4(i8, 32, int8x8x4_t, vst1_s8_x4, vld1_s8_x4);
1120
1121 test_vld1q_s8_x2(i8, 32, int8x16x2_t, vst1q_s8_x2, vld1q_s8_x2);
1122 test_vld1q_s8_x3(i8, 48, int8x16x3_t, vst1q_s8_x3, vld1q_s8_x3);
1123 test_vld1q_s8_x4(i8, 64, int8x16x4_t, vst1q_s8_x4, vld1q_s8_x4);
1124
1125 test_vld1_s16_x2(i16, 8, int16x4x2_t, vst1_s16_x2, vld1_s16_x2);
1126 test_vld1_s16_x3(i16, 12, int16x4x3_t, vst1_s16_x3, vld1_s16_x3);
1127 test_vld1_s16_x4(i16, 16, int16x4x4_t, vst1_s16_x4, vld1_s16_x4);
1128
1129 test_vld1q_s16_x2(i16, 16, int16x8x2_t, vst1q_s16_x2, vld1q_s16_x2);
1130 test_vld1q_s16_x3(i16, 24, int16x8x3_t, vst1q_s16_x3, vld1q_s16_x3);
1131 test_vld1q_s16_x4(i16, 32, int16x8x4_t, vst1q_s16_x4, vld1q_s16_x4);
1132
1133 test_vld1_s32_x2(i32, 4, int32x2x2_t, vst1_s32_x2, vld1_s32_x2);
1134 test_vld1_s32_x3(i32, 6, int32x2x3_t, vst1_s32_x3, vld1_s32_x3);
1135 test_vld1_s32_x4(i32, 8, int32x2x4_t, vst1_s32_x4, vld1_s32_x4);
1136
1137 test_vld1q_s32_x2(i32, 8, int32x4x2_t, vst1q_s32_x2, vld1q_s32_x2);
1138 test_vld1q_s32_x3(i32, 12, int32x4x3_t, vst1q_s32_x3, vld1q_s32_x3);
1139 test_vld1q_s32_x4(i32, 16, int32x4x4_t, vst1q_s32_x4, vld1q_s32_x4);
1140
1141 test_vld1_s64_x2(i64, 2, int64x1x2_t, vst1_s64_x2, vld1_s64_x2);
1142 test_vld1_s64_x3(i64, 3, int64x1x3_t, vst1_s64_x3, vld1_s64_x3);
1143 test_vld1_s64_x4(i64, 4, int64x1x4_t, vst1_s64_x4, vld1_s64_x4);
1144
1145 test_vld1q_s64_x2(i64, 4, int64x2x2_t, vst1q_s64_x2, vld1q_s64_x2);
1146 test_vld1q_s64_x3(i64, 6, int64x2x3_t, vst1q_s64_x3, vld1q_s64_x3);
1147 test_vld1q_s64_x4(i64, 8, int64x2x4_t, vst1q_s64_x4, vld1q_s64_x4);
1148
1149 test_vld1_u8_x2(u8, 16, uint8x8x2_t, vst1_u8_x2, vld1_u8_x2);
1150 test_vld1_u8_x3(u8, 24, uint8x8x3_t, vst1_u8_x3, vld1_u8_x3);
1151 test_vld1_u8_x4(u8, 32, uint8x8x4_t, vst1_u8_x4, vld1_u8_x4);
1152
1153 test_vld1q_u8_x2(u8, 32, uint8x16x2_t, vst1q_u8_x2, vld1q_u8_x2);
1154 test_vld1q_u8_x3(u8, 48, uint8x16x3_t, vst1q_u8_x3, vld1q_u8_x3);
1155 test_vld1q_u8_x4(u8, 64, uint8x16x4_t, vst1q_u8_x4, vld1q_u8_x4);
1156
1157 test_vld1_u16_x2(u16, 8, uint16x4x2_t, vst1_u16_x2, vld1_u16_x2);
1158 test_vld1_u16_x3(u16, 12, uint16x4x3_t, vst1_u16_x3, vld1_u16_x3);
1159 test_vld1_u16_x4(u16, 16, uint16x4x4_t, vst1_u16_x4, vld1_u16_x4);
1160
1161 test_vld1q_u16_x2(u16, 16, uint16x8x2_t, vst1q_u16_x2, vld1q_u16_x2);
1162 test_vld1q_u16_x3(u16, 24, uint16x8x3_t, vst1q_u16_x3, vld1q_u16_x3);
1163 test_vld1q_u16_x4(u16, 32, uint16x8x4_t, vst1q_u16_x4, vld1q_u16_x4);
1164
1165 test_vld1_u32_x2(u32, 4, uint32x2x2_t, vst1_u32_x2, vld1_u32_x2);
1166 test_vld1_u32_x3(u32, 6, uint32x2x3_t, vst1_u32_x3, vld1_u32_x3);
1167 test_vld1_u32_x4(u32, 8, uint32x2x4_t, vst1_u32_x4, vld1_u32_x4);
1168
1169 test_vld1q_u32_x2(u32, 8, uint32x4x2_t, vst1q_u32_x2, vld1q_u32_x2);
1170 test_vld1q_u32_x3(u32, 12, uint32x4x3_t, vst1q_u32_x3, vld1q_u32_x3);
1171 test_vld1q_u32_x4(u32, 16, uint32x4x4_t, vst1q_u32_x4, vld1q_u32_x4);
1172
1173 test_vld1_u64_x2(u64, 2, uint64x1x2_t, vst1_u64_x2, vld1_u64_x2);
1174 test_vld1_u64_x3(u64, 3, uint64x1x3_t, vst1_u64_x3, vld1_u64_x3);
1175 test_vld1_u64_x4(u64, 4, uint64x1x4_t, vst1_u64_x4, vld1_u64_x4);
1176
1177 test_vld1q_u64_x2(u64, 4, uint64x2x2_t, vst1q_u64_x2, vld1q_u64_x2);
1178 test_vld1q_u64_x3(u64, 6, uint64x2x3_t, vst1q_u64_x3, vld1q_u64_x3);
1179 test_vld1q_u64_x4(u64, 8, uint64x2x4_t, vst1q_u64_x4, vld1q_u64_x4);
1180
1181 test_vld1_p8_x2(p8, 16, poly8x8x2_t, vst1_p8_x2, vld1_p8_x2);
1182 test_vld1_p8_x3(p8, 24, poly8x8x3_t, vst1_p8_x3, vld1_p8_x3);
1183 test_vld1_p8_x4(p8, 32, poly8x8x4_t, vst1_p8_x4, vld1_p8_x4);
1184
1185 test_vld1q_p8_x2(p8, 32, poly8x16x2_t, vst1q_p8_x2, vld1q_p8_x2);
1186 test_vld1q_p8_x3(p8, 48, poly8x16x3_t, vst1q_p8_x3, vld1q_p8_x3);
1187 test_vld1q_p8_x4(p8, 64, poly8x16x4_t, vst1q_p8_x4, vld1q_p8_x4);
1188
1189 test_vld1_p16_x2(p16, 8, poly16x4x2_t, vst1_p16_x2, vld1_p16_x2);
1190 test_vld1_p16_x3(p16, 12, poly16x4x3_t, vst1_p16_x3, vld1_p16_x3);
1191 test_vld1_p16_x4(p16, 16, poly16x4x4_t, vst1_p16_x4, vld1_p16_x4);
1192
1193 test_vld1q_p16_x2(p16, 16, poly16x8x2_t, vst1q_p16_x2, vld1q_p16_x2);
1194 test_vld1q_p16_x3(p16, 24, poly16x8x3_t, vst1q_p16_x3, vld1q_p16_x3);
1195 test_vld1q_p16_x4(p16, 32, poly16x8x4_t, vst1q_p16_x4, vld1q_p16_x4);
1196 }
1197
1198 wide_store_load_roundtrip_neon! {
1199 test_vld2_f32(f32, 4, float32x2x2_t, vst2_f32, vld2_f32);
1200 test_vld3_f32(f32, 6, float32x2x3_t, vst3_f32, vld3_f32);
1201 test_vld4_f32(f32, 8, float32x2x4_t, vst4_f32, vld4_f32);
1202
1203 test_vld2q_f32(f32, 8, float32x4x2_t, vst2q_f32, vld2q_f32);
1204 test_vld3q_f32(f32, 12, float32x4x3_t, vst3q_f32, vld3q_f32);
1205 test_vld4q_f32(f32, 16, float32x4x4_t, vst4q_f32, vld4q_f32);
1206
1207 test_vld2_f64(f64, 2, float64x1x2_t, vst2_f64, vld2_f64);
1208 test_vld3_f64(f64, 3, float64x1x3_t, vst3_f64, vld3_f64);
1209 test_vld4_f64(f64, 4, float64x1x4_t, vst4_f64, vld4_f64);
1210
1211 test_vld2q_f64(f64, 4, float64x2x2_t, vst2q_f64, vld2q_f64);
1212 test_vld3q_f64(f64, 6, float64x2x3_t, vst3q_f64, vld3q_f64);
1213 test_vld4q_f64(f64, 8, float64x2x4_t, vst4q_f64, vld4q_f64);
1214
1215 test_vld2_s8(i8, 16, int8x8x2_t, vst2_s8, vld2_s8);
1216 test_vld3_s8(i8, 24, int8x8x3_t, vst3_s8, vld3_s8);
1217 test_vld4_s8(i8, 32, int8x8x4_t, vst4_s8, vld4_s8);
1218
1219 test_vld2q_s8(i8, 32, int8x16x2_t, vst2q_s8, vld2q_s8);
1220 test_vld3q_s8(i8, 48, int8x16x3_t, vst3q_s8, vld3q_s8);
1221 test_vld4q_s8(i8, 64, int8x16x4_t, vst4q_s8, vld4q_s8);
1222
1223 test_vld2_s16(i16, 8, int16x4x2_t, vst2_s16, vld2_s16);
1224 test_vld3_s16(i16, 12, int16x4x3_t, vst3_s16, vld3_s16);
1225 test_vld4_s16(i16, 16, int16x4x4_t, vst4_s16, vld4_s16);
1226
1227 test_vld2q_s16(i16, 16, int16x8x2_t, vst2q_s16, vld2q_s16);
1228 test_vld3q_s16(i16, 24, int16x8x3_t, vst3q_s16, vld3q_s16);
1229 test_vld4q_s16(i16, 32, int16x8x4_t, vst4q_s16, vld4q_s16);
1230
1231 test_vld2_s32(i32, 4, int32x2x2_t, vst2_s32, vld2_s32);
1232 test_vld3_s32(i32, 6, int32x2x3_t, vst3_s32, vld3_s32);
1233 test_vld4_s32(i32, 8, int32x2x4_t, vst4_s32, vld4_s32);
1234
1235 test_vld2q_s32(i32, 8, int32x4x2_t, vst2q_s32, vld2q_s32);
1236 test_vld3q_s32(i32, 12, int32x4x3_t, vst3q_s32, vld3q_s32);
1237 test_vld4q_s32(i32, 16, int32x4x4_t, vst4q_s32, vld4q_s32);
1238
1239 test_vld2_s64(i64, 2, int64x1x2_t, vst2_s64, vld2_s64);
1240 test_vld3_s64(i64, 3, int64x1x3_t, vst3_s64, vld3_s64);
1241 test_vld4_s64(i64, 4, int64x1x4_t, vst4_s64, vld4_s64);
1242
1243 test_vld2q_s64(i64, 4, int64x2x2_t, vst2q_s64, vld2q_s64);
1244 test_vld3q_s64(i64, 6, int64x2x3_t, vst3q_s64, vld3q_s64);
1245 test_vld4q_s64(i64, 8, int64x2x4_t, vst4q_s64, vld4q_s64);
1246
1247 test_vld2_u8(u8, 16, uint8x8x2_t, vst2_u8, vld2_u8);
1248 test_vld3_u8(u8, 24, uint8x8x3_t, vst3_u8, vld3_u8);
1249 test_vld4_u8(u8, 32, uint8x8x4_t, vst4_u8, vld4_u8);
1250
1251 test_vld2q_u8(u8, 32, uint8x16x2_t, vst2q_u8, vld2q_u8);
1252 test_vld3q_u8(u8, 48, uint8x16x3_t, vst3q_u8, vld3q_u8);
1253 test_vld4q_u8(u8, 64, uint8x16x4_t, vst4q_u8, vld4q_u8);
1254
1255 test_vld2_u16(u16, 8, uint16x4x2_t, vst2_u16, vld2_u16);
1256 test_vld3_u16(u16, 12, uint16x4x3_t, vst3_u16, vld3_u16);
1257 test_vld4_u16(u16, 16, uint16x4x4_t, vst4_u16, vld4_u16);
1258
1259 test_vld2q_u16(u16, 16, uint16x8x2_t, vst2q_u16, vld2q_u16);
1260 test_vld3q_u16(u16, 24, uint16x8x3_t, vst3q_u16, vld3q_u16);
1261 test_vld4q_u16(u16, 32, uint16x8x4_t, vst4q_u16, vld4q_u16);
1262
1263 test_vld2_u32(u32, 4, uint32x2x2_t, vst2_u32, vld2_u32);
1264 test_vld3_u32(u32, 6, uint32x2x3_t, vst3_u32, vld3_u32);
1265 test_vld4_u32(u32, 8, uint32x2x4_t, vst4_u32, vld4_u32);
1266
1267 test_vld2q_u32(u32, 8, uint32x4x2_t, vst2q_u32, vld2q_u32);
1268 test_vld3q_u32(u32, 12, uint32x4x3_t, vst3q_u32, vld3q_u32);
1269 test_vld4q_u32(u32, 16, uint32x4x4_t, vst4q_u32, vld4q_u32);
1270
1271 test_vld2_u64(u64, 2, uint64x1x2_t, vst2_u64, vld2_u64);
1272 test_vld3_u64(u64, 3, uint64x1x3_t, vst3_u64, vld3_u64);
1273 test_vld4_u64(u64, 4, uint64x1x4_t, vst4_u64, vld4_u64);
1274
1275 test_vld2q_u64(u64, 4, uint64x2x2_t, vst2q_u64, vld2q_u64);
1276 test_vld3q_u64(u64, 6, uint64x2x3_t, vst3q_u64, vld3q_u64);
1277 test_vld4q_u64(u64, 8, uint64x2x4_t, vst4q_u64, vld4q_u64);
1278
1279 test_vld2_p8(p8, 16, poly8x8x2_t, vst2_p8, vld2_p8);
1280 test_vld3_p8(p8, 24, poly8x8x3_t, vst3_p8, vld3_p8);
1281 test_vld4_p8(p8, 32, poly8x8x4_t, vst4_p8, vld4_p8);
1282
1283 test_vld2q_p8(p8, 32, poly8x16x2_t, vst2q_p8, vld2q_p8);
1284 test_vld3q_p8(p8, 48, poly8x16x3_t, vst3q_p8, vld3q_p8);
1285 test_vld4q_p8(p8, 64, poly8x16x4_t, vst4q_p8, vld4q_p8);
1286
1287 test_vld2_p16(p16, 8, poly16x4x2_t, vst2_p16, vld2_p16);
1288 test_vld3_p16(p16, 12, poly16x4x3_t, vst3_p16, vld3_p16);
1289 test_vld4_p16(p16, 16, poly16x4x4_t, vst4_p16, vld4_p16);
1290
1291 test_vld2q_p16(p16, 16, poly16x8x2_t, vst2q_p16, vld2q_p16);
1292 test_vld3q_p16(p16, 24, poly16x8x3_t, vst3q_p16, vld3q_p16);
1293 test_vld4q_p16(p16, 32, poly16x8x4_t, vst4q_p16, vld4q_p16);
1294 }
1295
1296 macro_rules! lane_wide_store_load_roundtrip {
1297 ($elem_ty:ty, $len:expr, $idx:expr, $vec_ty:ty, $store:ident, $load:ident) => {
1298 let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
1299 let a: $vec_ty = transmute(vals);
1300 let mut tmp = [0 as $elem_ty; 4];
1301 $store::<$idx>(tmp.as_mut_ptr().cast(), a);
1302 let r: $vec_ty = $load::<$idx>(tmp.as_ptr().cast(), a);
1303 let out: [$elem_ty; $len] = transmute(r);
1304 assert_eq!(out, vals);
1305 };
1306 }
1307
1308 macro_rules! lane_wide_store_load_roundtrip_neon {
1309 ($( $name:ident $args:tt);* $(;)?) => {
1310 $(
1311 #[cfg_attr(miri, ignore)]
1312 #[simd_test(enable = "neon")]
1313 unsafe fn $name() {
1314 lane_wide_store_load_roundtrip! $args;
1315 }
1316 )*
1317 };
1318 }
1319
1320 lane_wide_store_load_roundtrip_neon! {
1321 test_vld2q_lane_s8(i8, 32, 15, int8x16x2_t, vst2q_lane_s8, vld2q_lane_s8);
1322 test_vld3q_lane_s8(i8, 48, 15, int8x16x3_t, vst3q_lane_s8, vld3q_lane_s8);
1323 test_vld4q_lane_s8(i8, 64, 15, int8x16x4_t, vst4q_lane_s8, vld4q_lane_s8);
1324
1325 test_vld2q_lane_u8(u8, 32, 15, uint8x16x2_t, vst2q_lane_u8, vld2q_lane_u8);
1326 test_vld3q_lane_u8(u8, 48, 15, uint8x16x3_t, vst3q_lane_u8, vld3q_lane_u8);
1327 test_vld4q_lane_u8(u8, 64, 15, uint8x16x4_t, vst4q_lane_u8, vld4q_lane_u8);
1328
1329 test_vld2_lane_s64(i64, 2, 0, int64x1x2_t, vst2_lane_s64, vld2_lane_s64);
1330 test_vld3_lane_s64(i64, 3, 0, int64x1x3_t, vst3_lane_s64, vld3_lane_s64);
1331 test_vld4_lane_s64(i64, 4, 0, int64x1x4_t, vst4_lane_s64, vld4_lane_s64);
1332 test_vld2q_lane_s64(i64, 4, 1, int64x2x2_t, vst2q_lane_s64, vld2q_lane_s64);
1333 test_vld3q_lane_s64(i64, 6, 1, int64x2x3_t, vst3q_lane_s64, vld3q_lane_s64);
1334 test_vld4q_lane_s64(i64, 8, 1, int64x2x4_t, vst4q_lane_s64, vld4q_lane_s64);
1335
1336 test_vld2_lane_u64(u64, 2, 0, uint64x1x2_t, vst2_lane_u64, vld2_lane_u64);
1337 test_vld3_lane_u64(u64, 3, 0, uint64x1x3_t, vst3_lane_u64, vld3_lane_u64);
1338 test_vld4_lane_u64(u64, 4, 0, uint64x1x4_t, vst4_lane_u64, vld4_lane_u64);
1339 test_vld2q_lane_u64(u64, 4, 1, uint64x2x2_t, vst2q_lane_u64, vld2q_lane_u64);
1340 test_vld3q_lane_u64(u64, 6, 1, uint64x2x3_t, vst3q_lane_u64, vld3q_lane_u64);
1341 test_vld4q_lane_u64(u64, 8, 1, uint64x2x4_t, vst4q_lane_u64, vld4q_lane_u64);
1342 }
1343}
1344
1345#[cfg(test)]
1346#[path = "../../arm_shared/neon/table_lookup_tests.rs"]
1347mod table_lookup_tests;
1348
1349#[cfg(test)]
1350#[path = "../../arm_shared/neon/shift_and_insert_tests.rs"]
1351mod shift_and_insert_tests;
1352
1353#[cfg(test)]
1354#[path = "../../arm_shared/neon/load_tests.rs"]
1355mod load_tests;
1356
1357#[cfg(test)]
1358#[path = "../../arm_shared/neon/store_tests.rs"]
1359mod store_tests;