core/stdarch/crates/core_arch/src/riscv_shared/zk.rs
1#[cfg(test)]
2use stdarch_test::assert_instr;
3
4unsafe extern "unadjusted" {
5 #[link_name = "llvm.riscv.sm4ed"]
6 fn _sm4ed(rs1: i32, rs2: i32, bs: i32) -> i32;
7
8 #[link_name = "llvm.riscv.sm4ks"]
9 fn _sm4ks(rs1: i32, rs2: i32, bs: i32) -> i32;
10
11 #[link_name = "llvm.riscv.sm3p0"]
12 fn _sm3p0(rs1: i32) -> i32;
13
14 #[link_name = "llvm.riscv.sm3p1"]
15 fn _sm3p1(rs1: i32) -> i32;
16
17 #[link_name = "llvm.riscv.sha256sig0"]
18 fn _sha256sig0(rs1: i32) -> i32;
19
20 #[link_name = "llvm.riscv.sha256sig1"]
21 fn _sha256sig1(rs1: i32) -> i32;
22
23 #[link_name = "llvm.riscv.sha256sum0"]
24 fn _sha256sum0(rs1: i32) -> i32;
25
26 #[link_name = "llvm.riscv.sha256sum1"]
27 fn _sha256sum1(rs1: i32) -> i32;
28}
29
30#[cfg(target_arch = "riscv32")]
31unsafe extern "unadjusted" {
32 #[link_name = "llvm.riscv.xperm8.i32"]
33 fn _xperm8_32(rs1: i32, rs2: i32) -> i32;
34
35 #[link_name = "llvm.riscv.xperm4.i32"]
36 fn _xperm4_32(rs1: i32, rs2: i32) -> i32;
37}
38
39#[cfg(target_arch = "riscv64")]
40unsafe extern "unadjusted" {
41 #[link_name = "llvm.riscv.xperm8.i64"]
42 fn _xperm8_64(rs1: i64, rs2: i64) -> i64;
43
44 #[link_name = "llvm.riscv.xperm4.i64"]
45 fn _xperm4_64(rs1: i64, rs2: i64) -> i64;
46}
47
48/// Byte-wise lookup of indicies into a vector in registers.
49///
50/// The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8
51/// 8-bit elements. The rs2 register contains a vector of XLEN/8 8-bit indexes. The result is
52/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2
53/// is out of bounds.
54///
55/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
56///
57/// Version: v1.0.1
58///
59/// Section: 3.47
60#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
61#[target_feature(enable = "zbkx")]
62#[cfg_attr(test, assert_instr(xperm8))]
63#[inline]
64pub fn xperm8(rs1: usize, rs2: usize) -> usize {
65 #[cfg(target_arch = "riscv32")]
66 unsafe {
67 _xperm8_32(rs1 as i32, rs2 as i32) as usize
68 }
69
70 #[cfg(target_arch = "riscv64")]
71 unsafe {
72 _xperm8_64(rs1 as i64, rs2 as i64) as usize
73 }
74}
75
76/// Nibble-wise lookup of indicies into a vector.
77///
78/// The xperm4 instruction operates on nibbles. The rs1 register contains a vector of XLEN/4
79/// 4-bit elements. The rs2 register contains a vector of XLEN/4 4-bit indexes. The result is
80/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2
81/// is out of bounds.
82///
83/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
84///
85/// Version: v1.0.1
86///
87/// Section: 3.48
88#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
89#[target_feature(enable = "zbkx")]
90#[cfg_attr(test, assert_instr(xperm4))]
91#[inline]
92pub fn xperm4(rs1: usize, rs2: usize) -> usize {
93 #[cfg(target_arch = "riscv32")]
94 unsafe {
95 _xperm4_32(rs1 as i32, rs2 as i32) as usize
96 }
97
98 #[cfg(target_arch = "riscv64")]
99 unsafe {
100 _xperm4_64(rs1 as i64, rs2 as i64) as usize
101 }
102}
103
104/// Implements the Sigma0 transformation function as used in the SHA2-256 hash function \[49\]
105/// (Section 4.1.2).
106///
107/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
108/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
109/// register are operated on, and the result sign extended to XLEN bits. Though named for
110/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
111/// described in \[49\]. This instruction must always be implemented such that its execution
112/// latency does not depend on the data being operated on.
113///
114/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
115///
116/// Version: v1.0.1
117///
118/// Section: 3.27
119#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
120#[target_feature(enable = "zknh")]
121#[cfg_attr(test, assert_instr(sha256sig0))]
122#[inline]
123pub fn sha256sig0(rs1: u32) -> u32 {
124 unsafe { _sha256sig0(rs1 as i32) as u32 }
125}
126
127/// Implements the Sigma1 transformation function as used in the SHA2-256 hash function \[49\]
128/// (Section 4.1.2).
129///
130/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
131/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
132/// register are operated on, and the result sign extended to XLEN bits. Though named for
133/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
134/// described in \[49\]. This instruction must always be implemented such that its execution
135/// latency does not depend on the data being operated on.
136///
137/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
138///
139/// Version: v1.0.1
140///
141/// Section: 3.28
142#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
143#[target_feature(enable = "zknh")]
144#[cfg_attr(test, assert_instr(sha256sig1))]
145#[inline]
146pub fn sha256sig1(rs1: u32) -> u32 {
147 unsafe { _sha256sig1(rs1 as i32) as u32 }
148}
149
150/// Implements the Sum0 transformation function as used in the SHA2-256 hash function \[49\]
151/// (Section 4.1.2).
152///
153/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
154/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
155/// register are operated on, and the result sign extended to XLEN bits. Though named for
156/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
157/// described in \[49\]. This instruction must always be implemented such that its execution
158/// latency does not depend on the data being operated on.
159///
160/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
161///
162/// Version: v1.0.1
163///
164/// Section: 3.29
165#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
166#[target_feature(enable = "zknh")]
167#[cfg_attr(test, assert_instr(sha256sum0))]
168#[inline]
169pub fn sha256sum0(rs1: u32) -> u32 {
170 unsafe { _sha256sum0(rs1 as i32) as u32 }
171}
172
173/// Implements the Sum1 transformation function as used in the SHA2-256 hash function \[49\]
174/// (Section 4.1.2).
175///
176/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the
177/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source
178/// register are operated on, and the result sign extended to XLEN bits. Though named for
179/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as
180/// described in \[49\]. This instruction must always be implemented such that its execution
181/// latency does not depend on the data being operated on.
182///
183/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
184///
185/// Version: v1.0.1
186///
187/// Section: 3.30
188#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
189#[target_feature(enable = "zknh")]
190#[cfg_attr(test, assert_instr(sha256sum1))]
191#[inline]
192pub fn sha256sum1(rs1: u32) -> u32 {
193 unsafe { _sha256sum1(rs1 as i32) as u32 }
194}
195
196/// Accelerates the block encrypt/decrypt operation of the SM4 block cipher \[5, 31\].
197///
198/// Implements a T-tables in hardware style approach to accelerating the SM4 round function. A
199/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are
200/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction
201/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to
202/// XLEN bits. This instruction must always be implemented such that its execution latency does
203/// not depend on the data being operated on.
204///
205/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
206///
207/// Version: v1.0.1
208///
209/// Section: 3.43
210///
211/// # Note
212///
213/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
214/// used.
215///
216/// # Details
217///
218/// Accelerates the round function `F` in the SM4 block cipher algorithm
219///
220/// This instruction is included in extension `Zksed`. It's defined as:
221///
222/// ```text
223/// SM4ED(x, a, BS) = x ⊕ T(ai)
224/// ... where
225/// ai = a.bytes[BS]
226/// T(ai) = L(τ(ai))
227/// bi = τ(ai) = SM4-S-Box(ai)
228/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24)
229/// SM4ED = (ci ≪ (BS * 8)) ⊕ x
230/// ```
231///
232/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
233/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ`
234/// and linear layer transform `L`.
235///
236/// In the SM4 algorithm, the round function `F` is defined as:
237///
238/// ```text
239/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk)
240/// ... where
241/// T(A) = L(τ(A))
242/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3))
243/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24)
244/// ```
245///
246/// It can be implemented by `sm4ed` instruction like:
247///
248/// ```no_run
249/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
250/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
251/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
252/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
253/// let a = x1 ^ x2 ^ x3 ^ rk;
254/// let c0 = sm4ed(x0, a, 0);
255/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc.
256/// let c2 = sm4ed(c1, a, 2);
257/// let c3 = sm4ed(c2, a, 3);
258/// return c3; // c3 represents c[0..=3]
259/// # }
260/// ```
261#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
262#[target_feature(enable = "zksed")]
263#[rustc_legacy_const_generics(2)]
264#[cfg_attr(test, assert_instr(sm4ed, BS = 0))]
265#[inline]
266pub fn sm4ed<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
267 static_assert!(BS < 4);
268
269 unsafe { _sm4ed(rs1 as i32, rs2 as i32, BS as i32) as u32 }
270}
271
272/// Accelerates the Key Schedule operation of the SM4 block cipher \[5, 31\] with `bs=0`.
273///
274/// Implements a T-tables in hardware style approach to accelerating the SM4 Key Schedule. A
275/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are
276/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction
277/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to
278/// XLEN bits. This instruction must always be implemented such that its execution latency does
279/// not depend on the data being operated on.
280///
281/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
282///
283/// Version: v1.0.1
284///
285/// Section: 3.44
286///
287/// # Note
288///
289/// The `BS` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are
290/// used.
291///
292/// # Details
293///
294/// Accelerates the round function `F` in the SM4 block cipher algorithm
295///
296/// This instruction is included in extension `Zksed`. It's defined as:
297///
298/// ```text
299/// SM4ED(x, a, BS) = x ⊕ T(ai)
300/// ... where
301/// ai = a.bytes[BS]
302/// T(ai) = L(τ(ai))
303/// bi = τ(ai) = SM4-S-Box(ai)
304/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24)
305/// SM4ED = (ci ≪ (BS * 8)) ⊕ x
306/// ```
307///
308/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
309/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ`
310/// and linear layer transform `L`.
311///
312/// In the SM4 algorithm, the round function `F` is defined as:
313///
314/// ```text
315/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk)
316/// ... where
317/// T(A) = L(τ(A))
318/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3))
319/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24)
320/// ```
321///
322/// It can be implemented by `sm4ed` instruction like:
323///
324/// ```no_run
325/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
326/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
327/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
328/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
329/// let a = x1 ^ x2 ^ x3 ^ rk;
330/// let c0 = sm4ed(x0, a, 0);
331/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc.
332/// let c2 = sm4ed(c1, a, 2);
333/// let c3 = sm4ed(c2, a, 3);
334/// return c3; // c3 represents c[0..=3]
335/// # }
336/// ```
337#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
338#[target_feature(enable = "zksed")]
339#[rustc_legacy_const_generics(2)]
340#[cfg_attr(test, assert_instr(sm4ks, BS = 0))]
341#[inline]
342pub fn sm4ks<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
343 static_assert!(BS < 4);
344
345 unsafe { _sm4ks(rs1 as i32, rs2 as i32, BS as i32) as u32 }
346}
347
348/// Implements the P0 transformation function as used in the SM3 hash function [4, 30].
349///
350/// This instruction is supported for the RV32 and RV64 base architectures. It implements the
351/// P0 transform of the SM3 hash function [4, 30]. This instruction must always be implemented
352/// such that its execution latency does not depend on the data being operated on.
353///
354/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
355///
356/// Version: v1.0.1
357///
358/// Section: 3.41
359///
360/// # Details
361///
362/// `P0` transformation function as is used in the SM3 hash algorithm
363///
364/// This function is included in `Zksh` extension. It's defined as:
365///
366/// ```text
367/// P0(X) = X ⊕ (X ≪ 9) ⊕ (X ≪ 17)
368/// ```
369///
370/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
371///
372/// In the SM3 algorithm, the `P0` transformation is used as `E ← P0(TT2)` when the
373/// compression function `CF` uses the intermediate value `TT2` to calculate
374/// the variable `E` in one iteration for subsequent processes.
375#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
376#[target_feature(enable = "zksh")]
377#[cfg_attr(test, assert_instr(sm3p0))]
378#[inline]
379pub fn sm3p0(rs1: u32) -> u32 {
380 unsafe { _sm3p0(rs1 as i32) as u32 }
381}
382
383/// Implements the P1 transformation function as used in the SM3 hash function [4, 30].
384///
385/// This instruction is supported for the RV32 and RV64 base architectures. It implements the
386/// P1 transform of the SM3 hash function [4, 30]. This instruction must always be implemented
387/// such that its execution latency does not depend on the data being operated on.
388///
389/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
390///
391/// Version: v1.0.1
392///
393/// Section: 3.42
394///
395/// # Details
396///
397/// `P1` transformation function as is used in the SM3 hash algorithm
398///
399/// This function is included in `Zksh` extension. It's defined as:
400///
401/// ```text
402/// P1(X) = X ⊕ (X ≪ 15) ⊕ (X ≪ 23)
403/// ```
404///
405/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits.
406///
407/// In the SM3 algorithm, the `P1` transformation is used to expand message,
408/// where expanded word `Wj` can be generated from the previous words.
409/// The whole process can be described as the following pseudocode:
410///
411/// ```text
412/// FOR j=16 TO 67
413/// Wj ← P1(Wj−16 ⊕ Wj−9 ⊕ (Wj−3 ≪ 15)) ⊕ (Wj−13 ≪ 7) ⊕ Wj−6
414/// ENDFOR
415/// ```
416#[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
417#[target_feature(enable = "zksh")]
418#[cfg_attr(test, assert_instr(sm3p1))]
419#[inline]
420pub fn sm3p1(rs1: u32) -> u32 {
421 unsafe { _sm3p1(rs1 as i32) as u32 }
422}