~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,11 +1,8 @@
	1	+/* SPDX-License-Identifier: GPL-2.0-only */
1	2	/*
2	3	* sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions
3	4	*
4	5	* Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
5		- *
6		- * This program is free software; you can redistribute it and/or modify
7		- * it under the terms of the GNU General Public License version 2 as
8		- * published by the Free Software Foundation.
9	6	*/
10	7
11	8	#include <linux/linkage.h>
..	..	@@ -78,37 +75,31 @@
78	75	* int blocks)
79	76	*/
80	77	.text
81		-ENTRY(sha2_ce_transform)
82		- frame_push 3
83		-
84		- mov x19, x0
85		- mov x20, x1
86		- mov x21, x2
87		-
	78	+SYM_FUNC_START(sha2_ce_transform)
88	79	/* load round constants */
89		-0: adr_l x8, .Lsha2_rcon
	80	+ adr_l x8, .Lsha2_rcon
90	81	ld1 { v0.4s- v3.4s}, [x8], #64
91	82	ld1 { v4.4s- v7.4s}, [x8], #64
92	83	ld1 { v8.4s-v11.4s}, [x8], #64
93	84	ld1 {v12.4s-v15.4s}, [x8]
94	85
95	86	/* load state */
96		- ld1 {dgav.4s, dgbv.4s}, [x19]
	87	+ ld1 {dgav.4s, dgbv.4s}, [x0]
97	88
98	89	/* load sha256_ce_state::finalize */
99	90	ldr_l w4, sha256_ce_offsetof_finalize, x4
100		- ldr w4, [x19, x4]
	91	+ ldr w4, [x0, x4]
101	92
102	93	/* load input */
103		-1: ld1 {v16.4s-v19.4s}, [x20], #64
104		- sub w21, w21, #1
	94	+0: ld1 {v16.4s-v19.4s}, [x1], #64
	95	+ sub w2, w2, #1
105	96
106	97	CPU_LE( rev32 v16.16b, v16.16b )
107	98	CPU_LE( rev32 v17.16b, v17.16b )
108	99	CPU_LE( rev32 v18.16b, v18.16b )
109	100	CPU_LE( rev32 v19.16b, v19.16b )
110	101
111		-2: add t0.4s, v16.4s, v0.4s
	102	+1: add t0.4s, v16.4s, v0.4s
112	103	mov dg0v.16b, dgav.16b
113	104	mov dg1v.16b, dgbv.16b
114	105
..	..	@@ -137,24 +128,18 @@
137	128	add dgbv.4s, dgbv.4s, dg1v.4s
138	129
139	130	/* handled all input blocks? */
140		- cbz w21, 3f
141		-
142		- if_will_cond_yield_neon
143		- st1 {dgav.4s, dgbv.4s}, [x19]
144		- do_cond_yield_neon
	131	+ cbz w2, 2f
	132	+ cond_yield 3f, x5, x6
145	133	b 0b
146		- endif_yield_neon
147		-
148		- b 1b
149	134
150	135	/*
151	136	* Final block: add padding and total bit count.
152	137	* Skip if the input size was not a round multiple of the block size,
153	138	* the padding is handled by the C code in that case.
154	139	*/
155		-3: cbz x4, 4f
	140	+2: cbz x4, 3f
156	141	ldr_l w4, sha256_ce_offsetof_count, x4
157		- ldr x4, [x19, x4]
	142	+ ldr x4, [x0, x4]
158	143	movi v17.2d, #0
159	144	mov x8, #0x80000000
160	145	movi v18.2d, #0
..	..	@@ -163,10 +148,10 @@
163	148	mov x4, #0
164	149	mov v19.d[0], xzr
165	150	mov v19.d[1], x7
166		- b 2b
	151	+ b 1b
167	152
168	153	/* store new state */
169		-4: st1 {dgav.4s, dgbv.4s}, [x19]
170		- frame_pop
	154	+3: st1 {dgav.4s, dgbv.4s}, [x0]
	155	+ mov w0, w2
171	156	ret
172		-ENDPROC(sha2_ce_transform)
	157	+SYM_FUNC_END(sha2_ce_transform)