~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,6 +1,6 @@
1	1	/*
2		- * Copyright (c) 2013, Kenneth MacKay
3		- * All rights reserved.
	2	+ * Copyright (c) 2013, 2014 Kenneth MacKay. All rights reserved.
	3	+ * Copyright (c) 2019 Vitaly Chikunov <vt@altlinux.org>
4	4	*
5	5	* Redistribution and use in source and binary forms, with or without
6	6	* modification, are permitted provided that the following conditions are
..	..	@@ -24,12 +24,15 @@
24	24	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	25	*/
26	26
	27	+#include <linux/module.h>
27	28	#include <linux/random.h>
28	29	#include <linux/slab.h>
29	30	#include <linux/swab.h>
30	31	#include <linux/fips.h>
31	32	#include <crypto/ecdh.h>
32	33	#include <crypto/rng.h>
	34	+#include <asm/unaligned.h>
	35	+#include <linux/ratelimit.h>
33	36
34	37	#include "ecc.h"
35	38	#include "ecc_curve_defs.h"
..	..	@@ -64,7 +67,7 @@
64	67
65	68	static void ecc_free_digits_space(u64 *space)
66	69	{
67		- kzfree(space);
	70	+ kfree_sensitive(space);
68	71	}
69	72
70	73	static struct ecc_point *ecc_alloc_point(unsigned int ndigits)
..	..	@@ -98,9 +101,9 @@
98	101	if (!p)
99	102	return;
100	103
101		- kzfree(p->x);
102		- kzfree(p->y);
103		- kzfree(p);
	104	+ kfree_sensitive(p->x);
	105	+ kfree_sensitive(p->y);
	106	+ kfree_sensitive(p);
104	107	}
105	108
106	109	static void vli_clear(u64 *vli, unsigned int ndigits)
..	..	@@ -112,7 +115,7 @@
112	115	}
113	116
114	117	/* Returns true if vli == 0, false otherwise. */
115		-static bool vli_is_zero(const u64 *vli, unsigned int ndigits)
	118	+bool vli_is_zero(const u64 *vli, unsigned int ndigits)
116	119	{
117	120	int i;
118	121
..	..	@@ -123,11 +126,17 @@
123	126
124	127	return true;
125	128	}
	129	+EXPORT_SYMBOL(vli_is_zero);
126	130
127	131	/* Returns nonzero if bit bit of vli is set. */
128	132	static u64 vli_test_bit(const u64 *vli, unsigned int bit)
129	133	{
130	134	return (vli[bit / 64] & ((u64)1 << (bit % 64)));
	135	+}
	136	+
	137	+static bool vli_is_negative(const u64 *vli, unsigned int ndigits)
	138	+{
	139	+ return vli_test_bit(vli, ndigits * 64 - 1);
131	140	}
132	141
133	142	/* Counts the number of 64-bit "digits" in vli. */
..	..	@@ -161,6 +170,27 @@
161	170	return ((num_digits - 1) * 64 + i);
162	171	}
163	172
	173	+/* Set dest from unaligned bit string src. */
	174	+void vli_from_be64(u64 dest, const void src, unsigned int ndigits)
	175	+{
	176	+ int i;
	177	+ const u64 *from = src;
	178	+
	179	+ for (i = 0; i < ndigits; i++)
	180	+ dest[i] = get_unaligned_be64(&from[ndigits - 1 - i]);
	181	+}
	182	+EXPORT_SYMBOL(vli_from_be64);
	183	+
	184	+void vli_from_le64(u64 dest, const void src, unsigned int ndigits)
	185	+{
	186	+ int i;
	187	+ const u64 *from = src;
	188	+
	189	+ for (i = 0; i < ndigits; i++)
	190	+ dest[i] = get_unaligned_le64(&from[i]);
	191	+}
	192	+EXPORT_SYMBOL(vli_from_le64);
	193	+
164	194	/* Sets dest = src. */
165	195	static void vli_set(u64 dest, const u64 src, unsigned int ndigits)
166	196	{
..	..	@@ -171,7 +201,7 @@
171	201	}
172	202
173	203	/* Returns sign of left - right. */
174		-static int vli_cmp(const u64 left, const u64 right, unsigned int ndigits)
	204	+int vli_cmp(const u64 left, const u64 right, unsigned int ndigits)
175	205	{
176	206	int i;
177	207
..	..	@@ -184,6 +214,7 @@
184	214
185	215	return 0;
186	216	}
	217	+EXPORT_SYMBOL(vli_cmp);
187	218
188	219	/* Computes result = in << c, returning carry. Can modify in place
189	220	* (if result == in). 0 < shift < 64.
..	..	@@ -239,8 +270,30 @@
239	270	return carry;
240	271	}
241	272
	273	+/* Computes result = left + right, returning carry. Can modify in place. */
	274	+static u64 vli_uadd(u64 result, const u64 left, u64 right,
	275	+ unsigned int ndigits)
	276	+{
	277	+ u64 carry = right;
	278	+ int i;
	279	+
	280	+ for (i = 0; i < ndigits; i++) {
	281	+ u64 sum;
	282	+
	283	+ sum = left[i] + carry;
	284	+ if (sum != left[i])
	285	+ carry = (sum < left[i]);
	286	+ else
	287	+ carry = !!carry;
	288	+
	289	+ result[i] = sum;
	290	+ }
	291	+
	292	+ return carry;
	293	+}
	294	+
242	295	/* Computes result = left - right, returning borrow. Can modify in place. */
243		-static u64 vli_sub(u64 result, const u64 left, const u64 *right,
	296	+u64 vli_sub(u64 result, const u64 left, const u64 *right,
244	297	unsigned int ndigits)
245	298	{
246	299	u64 borrow = 0;
..	..	@@ -258,9 +311,37 @@
258	311
259	312	return borrow;
260	313	}
	314	+EXPORT_SYMBOL(vli_sub);
	315	+
	316	+/* Computes result = left - right, returning borrow. Can modify in place. */
	317	+static u64 vli_usub(u64 result, const u64 left, u64 right,
	318	+ unsigned int ndigits)
	319	+{
	320	+ u64 borrow = right;
	321	+ int i;
	322	+
	323	+ for (i = 0; i < ndigits; i++) {
	324	+ u64 diff;
	325	+
	326	+ diff = left[i] - borrow;
	327	+ if (diff != left[i])
	328	+ borrow = (diff > left[i]);
	329	+
	330	+ result[i] = diff;
	331	+ }
	332	+
	333	+ return borrow;
	334	+}
261	335
262	336	static uint128_t mul_64_64(u64 left, u64 right)
263	337	{
	338	+ uint128_t result;
	339	+#if defined(CONFIG_ARCH_SUPPORTS_INT128)
	340	+ unsigned __int128 m = (unsigned __int128)left * right;
	341	+
	342	+ result.m_low = m;
	343	+ result.m_high = m >> 64;
	344	+#else
264	345	u64 a0 = left & 0xffffffffull;
265	346	u64 a1 = left >> 32;
266	347	u64 b0 = right & 0xffffffffull;
..	..	@@ -269,7 +350,6 @@
269	350	u64 m1 = a0 * b1;
270	351	u64 m2 = a1 * b0;
271	352	u64 m3 = a1 * b1;
272		- uint128_t result;
273	353
274	354	m2 += (m0 >> 32);
275	355	m2 += m1;
..	..	@@ -280,7 +360,7 @@
280	360
281	361	result.m_low = (m0 & 0xffffffffull) \| (m2 << 32);
282	362	result.m_high = m3 + (m2 >> 32);
283		-
	363	+#endif
284	364	return result;
285	365	}
286	366
..	..	@@ -328,6 +408,28 @@
328	408	}
329	409
330	410	result[ndigits * 2 - 1] = r01.m_low;
	411	+}
	412	+
	413	+/* Compute product = left * right, for a small right value. */
	414	+static void vli_umult(u64 result, const u64 left, u32 right,
	415	+ unsigned int ndigits)
	416	+{
	417	+ uint128_t r01 = { 0 };
	418	+ unsigned int k;
	419	+
	420	+ for (k = 0; k < ndigits; k++) {
	421	+ uint128_t product;
	422	+
	423	+ product = mul_64_64(left[k], right);
	424	+ r01 = add_128_128(r01, product);
	425	+ /* no carry */
	426	+ result[k] = r01.m_low;
	427	+ r01.m_low = r01.m_high;
	428	+ r01.m_high = 0;
	429	+ }
	430	+ result[k] = r01.m_low;
	431	+ for (++k; k < ndigits * 2; k++)
	432	+ result[k] = 0;
331	433	}
332	434
333	435	static void vli_square(u64 result, const u64 left, unsigned int ndigits)
..	..	@@ -400,6 +502,170 @@
400	502	*/
401	503	if (borrow)
402	504	vli_add(result, result, mod, ndigits);
	505	+}
	506	+
	507	+/*
	508	+ * Computes result = product % mod
	509	+ * for special form moduli: p = 2^k-c, for small c (note the minus sign)
	510	+ *
	511	+ * References:
	512	+ * R. Crandall, C. Pomerance. Prime Numbers: A Computational Perspective.
	513	+ * 9 Fast Algorithms for Large-Integer Arithmetic. 9.2.3 Moduli of special form
	514	+ * Algorithm 9.2.13 (Fast mod operation for special-form moduli).
	515	+ */
	516	+static void vli_mmod_special(u64 result, const u64 product,
	517	+ const u64 *mod, unsigned int ndigits)
	518	+{
	519	+ u64 c = -mod[0];
	520	+ u64 t[ECC_MAX_DIGITS * 2];
	521	+ u64 r[ECC_MAX_DIGITS * 2];
	522	+
	523	+ vli_set(r, product, ndigits * 2);
	524	+ while (!vli_is_zero(r + ndigits, ndigits)) {
	525	+ vli_umult(t, r + ndigits, c, ndigits);
	526	+ vli_clear(r + ndigits, ndigits);
	527	+ vli_add(r, r, t, ndigits * 2);
	528	+ }
	529	+ vli_set(t, mod, ndigits);
	530	+ vli_clear(t + ndigits, ndigits);
	531	+ while (vli_cmp(r, t, ndigits * 2) >= 0)
	532	+ vli_sub(r, r, t, ndigits * 2);
	533	+ vli_set(result, r, ndigits);
	534	+}
	535	+
	536	+/*
	537	+ * Computes result = product % mod
	538	+ * for special form moduli: p = 2^{k-1}+c, for small c (note the plus sign)
	539	+ * where k-1 does not fit into qword boundary by -1 bit (such as 255).
	540	+
	541	+ * References (loosely based on):
	542	+ * A. Menezes, P. van Oorschot, S. Vanstone. Handbook of Applied Cryptography.
	543	+ * 14.3.4 Reduction methods for moduli of special form. Algorithm 14.47.
	544	+ * URL: http://cacr.uwaterloo.ca/hac/about/chap14.pdf
	545	+ *
	546	+ * H. Cohen, G. Frey, R. Avanzi, C. Doche, T. Lange, K. Nguyen, F. Vercauteren.
	547	+ * Handbook of Elliptic and Hyperelliptic Curve Cryptography.
	548	+ * Algorithm 10.25 Fast reduction for special form moduli
	549	+ */
	550	+static void vli_mmod_special2(u64 result, const u64 product,
	551	+ const u64 *mod, unsigned int ndigits)
	552	+{
	553	+ u64 c2 = mod[0] * 2;
	554	+ u64 q[ECC_MAX_DIGITS];
	555	+ u64 r[ECC_MAX_DIGITS * 2];
	556	+ u64 m[ECC_MAX_DIGITS * 2]; /* expanded mod */
	557	+ int carry; /* last bit that doesn't fit into q */
	558	+ int i;
	559	+
	560	+ vli_set(m, mod, ndigits);
	561	+ vli_clear(m + ndigits, ndigits);
	562	+
	563	+ vli_set(r, product, ndigits);
	564	+ /* q and carry are top bits */
	565	+ vli_set(q, product + ndigits, ndigits);
	566	+ vli_clear(r + ndigits, ndigits);
	567	+ carry = vli_is_negative(r, ndigits);
	568	+ if (carry)
	569	+ r[ndigits - 1] &= (1ull << 63) - 1;
	570	+ for (i = 1; carry \|\| !vli_is_zero(q, ndigits); i++) {
	571	+ u64 qc[ECC_MAX_DIGITS * 2];
	572	+
	573	+ vli_umult(qc, q, c2, ndigits);
	574	+ if (carry)
	575	+ vli_uadd(qc, qc, mod[0], ndigits * 2);
	576	+ vli_set(q, qc + ndigits, ndigits);
	577	+ vli_clear(qc + ndigits, ndigits);
	578	+ carry = vli_is_negative(qc, ndigits);
	579	+ if (carry)
	580	+ qc[ndigits - 1] &= (1ull << 63) - 1;
	581	+ if (i & 1)
	582	+ vli_sub(r, r, qc, ndigits * 2);
	583	+ else
	584	+ vli_add(r, r, qc, ndigits * 2);
	585	+ }
	586	+ while (vli_is_negative(r, ndigits * 2))
	587	+ vli_add(r, r, m, ndigits * 2);
	588	+ while (vli_cmp(r, m, ndigits * 2) >= 0)
	589	+ vli_sub(r, r, m, ndigits * 2);
	590	+
	591	+ vli_set(result, r, ndigits);
	592	+}
	593	+
	594	+/*
	595	+ * Computes result = product % mod, where product is 2N words long.
	596	+ * Reference: Ken MacKay's micro-ecc.
	597	+ * Currently only designed to work for curve_p or curve_n.
	598	+ */
	599	+static void vli_mmod_slow(u64 result, u64 product, const u64 *mod,
	600	+ unsigned int ndigits)
	601	+{
	602	+ u64 mod_m[2 * ECC_MAX_DIGITS];
	603	+ u64 tmp[2 * ECC_MAX_DIGITS];
	604	+ u64 *v[2] = { tmp, product };
	605	+ u64 carry = 0;
	606	+ unsigned int i;
	607	+ /* Shift mod so its highest set bit is at the maximum position. */
	608	+ int shift = (ndigits * 2 * 64) - vli_num_bits(mod, ndigits);
	609	+ int word_shift = shift / 64;
	610	+ int bit_shift = shift % 64;
	611	+
	612	+ vli_clear(mod_m, word_shift);
	613	+ if (bit_shift > 0) {
	614	+ for (i = 0; i < ndigits; ++i) {
	615	+ mod_m[word_shift + i] = (mod[i] << bit_shift) \| carry;
	616	+ carry = mod[i] >> (64 - bit_shift);
	617	+ }
	618	+ } else
	619	+ vli_set(mod_m + word_shift, mod, ndigits);
	620	+
	621	+ for (i = 1; shift >= 0; --shift) {
	622	+ u64 borrow = 0;
	623	+ unsigned int j;
	624	+
	625	+ for (j = 0; j < ndigits * 2; ++j) {
	626	+ u64 diff = v[i][j] - mod_m[j] - borrow;
	627	+
	628	+ if (diff != v[i][j])
	629	+ borrow = (diff > v[i][j]);
	630	+ v[1 - i][j] = diff;
	631	+ }
	632	+ i = !(i ^ borrow); /* Swap the index if there was no borrow */
	633	+ vli_rshift1(mod_m, ndigits);
	634	+ mod_m[ndigits - 1] \|= mod_m[ndigits] << (64 - 1);
	635	+ vli_rshift1(mod_m + ndigits, ndigits);
	636	+ }
	637	+ vli_set(result, v[i], ndigits);
	638	+}
	639	+
	640	+/* Computes result = product % mod using Barrett's reduction with precomputed
	641	+ * value mu appended to the mod after ndigits, mu = (2^{2w} / mod) and have
	642	+ * length ndigits + 1, where mu * (2^w - 1) should not overflow ndigits
	643	+ * boundary.
	644	+ *
	645	+ * Reference:
	646	+ * R. Brent, P. Zimmermann. Modern Computer Arithmetic. 2010.
	647	+ * 2.4.1 Barrett's algorithm. Algorithm 2.5.
	648	+ */
	649	+static void vli_mmod_barrett(u64 result, u64 product, const u64 *mod,
	650	+ unsigned int ndigits)
	651	+{
	652	+ u64 q[ECC_MAX_DIGITS * 2];
	653	+ u64 r[ECC_MAX_DIGITS * 2];
	654	+ const u64 *mu = mod + ndigits;
	655	+
	656	+ vli_mult(q, product + ndigits, mu, ndigits);
	657	+ if (mu[ndigits])
	658	+ vli_add(q + ndigits, q + ndigits, product + ndigits, ndigits);
	659	+ vli_mult(r, mod, q + ndigits, ndigits);
	660	+ vli_sub(r, product, r, ndigits * 2);
	661	+ while (!vli_is_zero(r + ndigits, ndigits) \|\|
	662	+ vli_cmp(r, mod, ndigits) != -1) {
	663	+ u64 carry;
	664	+
	665	+ carry = vli_sub(r, r, mod, ndigits);
	666	+ vli_usub(r + ndigits, r + ndigits, carry, ndigits);
	667	+ }
	668	+ vli_set(result, r, ndigits);
403	669	}
404	670
405	671	/* Computes p_result = p_product % curve_p.
..	..	@@ -509,13 +775,32 @@
509	775	}
510	776	}
511	777
512		-/* Computes result = product % curve_prime
513		- * from http://www.nsa.gov/ia/_files/nist-routines.pdf
514		-*/
	778	+/* Computes result = product % curve_prime for different curve_primes.
	779	+ *
	780	+ * Note that curve_primes are distinguished just by heuristic check and
	781	+ * not by complete conformance check.
	782	+ */
515	783	static bool vli_mmod_fast(u64 result, u64 product,
516	784	const u64 *curve_prime, unsigned int ndigits)
517	785	{
518	786	u64 tmp[2 * ECC_MAX_DIGITS];
	787	+
	788	+ /* Currently, both NIST primes have -1 in lowest qword. */
	789	+ if (curve_prime[0] != -1ull) {
	790	+ /* Try to handle Pseudo-Marsenne primes. */
	791	+ if (curve_prime[ndigits - 1] == -1ull) {
	792	+ vli_mmod_special(result, product, curve_prime,
	793	+ ndigits);
	794	+ return true;
	795	+ } else if (curve_prime[ndigits - 1] == 1ull << 63 &&
	796	+ curve_prime[ndigits - 2] == 0) {
	797	+ vli_mmod_special2(result, product, curve_prime,
	798	+ ndigits);
	799	+ return true;
	800	+ }
	801	+ vli_mmod_barrett(result, product, curve_prime, ndigits);
	802	+ return true;
	803	+ }
519	804
520	805	switch (ndigits) {
521	806	case 3:
..	..	@@ -525,12 +810,25 @@
525	810	vli_mmod_fast_256(result, product, curve_prime, tmp);
526	811	break;
527	812	default:
528		- pr_err("unsupports digits size!\n");
	813	+ pr_err_ratelimited("ecc: unsupported digits size!\n");
529	814	return false;
530	815	}
531	816
532	817	return true;
533	818	}
	819	+
	820	+/* Computes result = (left * right) % mod.
	821	+ * Assumes that mod is big enough curve order.
	822	+ */
	823	+void vli_mod_mult_slow(u64 result, const u64 left, const u64 *right,
	824	+ const u64 *mod, unsigned int ndigits)
	825	+{
	826	+ u64 product[ECC_MAX_DIGITS * 2];
	827	+
	828	+ vli_mult(product, left, right, ndigits);
	829	+ vli_mmod_slow(result, product, mod, ndigits);
	830	+}
	831	+EXPORT_SYMBOL(vli_mod_mult_slow);
534	832
535	833	/* Computes result = (left * right) % curve_prime. */
536	834	static void vli_mod_mult_fast(u64 result, const u64 left, const u64 *right,
..	..	@@ -557,7 +855,7 @@
557	855	* See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
558	856	* https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
559	857	*/
560		-static void vli_mod_inv(u64 result, const u64 input, const u64 *mod,
	858	+void vli_mod_inv(u64 result, const u64 input, const u64 *mod,
561	859	unsigned int ndigits)
562	860	{
563	861	u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS];
..	..	@@ -630,6 +928,7 @@
630	928
631	929	vli_set(result, u, ndigits);
632	930	}
	931	+EXPORT_SYMBOL(vli_mod_inv);
633	932
634	933	/* ------ Point operations ------ */
635	934
..	..	@@ -641,7 +940,7 @@
641	940	}
642	941
643	942	/* Point multiplication algorithm using Montgomery's ladder with co-Z
644		- * coordinates. From http://eprint.iacr.org/2011/338.pdf
	943	+ * coordinates. From https://eprint.iacr.org/2011/338.pdf
645	944	*/
646	945
647	946	/* Double in place */
..	..	@@ -903,6 +1202,85 @@
903	1202	vli_set(result->y, ry[0], ndigits);
904	1203	}
905	1204
	1205	+/* Computes R = P + Q mod p */
	1206	+static void ecc_point_add(const struct ecc_point *result,
	1207	+ const struct ecc_point p, const struct ecc_point q,
	1208	+ const struct ecc_curve *curve)
	1209	+{
	1210	+ u64 z[ECC_MAX_DIGITS];
	1211	+ u64 px[ECC_MAX_DIGITS];
	1212	+ u64 py[ECC_MAX_DIGITS];
	1213	+ unsigned int ndigits = curve->g.ndigits;
	1214	+
	1215	+ vli_set(result->x, q->x, ndigits);
	1216	+ vli_set(result->y, q->y, ndigits);
	1217	+ vli_mod_sub(z, result->x, p->x, curve->p, ndigits);
	1218	+ vli_set(px, p->x, ndigits);
	1219	+ vli_set(py, p->y, ndigits);
	1220	+ xycz_add(px, py, result->x, result->y, curve->p, ndigits);
	1221	+ vli_mod_inv(z, z, curve->p, ndigits);
	1222	+ apply_z(result->x, result->y, z, curve->p, ndigits);
	1223	+}
	1224	+
	1225	+/* Computes R = u1P + u2Q mod p using Shamir's trick.
	1226	+ * Based on: Kenneth MacKay's micro-ecc (2014).
	1227	+ */
	1228	+void ecc_point_mult_shamir(const struct ecc_point *result,
	1229	+ const u64 u1, const struct ecc_point p,
	1230	+ const u64 u2, const struct ecc_point q,
	1231	+ const struct ecc_curve *curve)
	1232	+{
	1233	+ u64 z[ECC_MAX_DIGITS];
	1234	+ u64 sump[2][ECC_MAX_DIGITS];
	1235	+ u64 *rx = result->x;
	1236	+ u64 *ry = result->y;
	1237	+ unsigned int ndigits = curve->g.ndigits;
	1238	+ unsigned int num_bits;
	1239	+ struct ecc_point sum = ECC_POINT_INIT(sump[0], sump[1], ndigits);
	1240	+ const struct ecc_point *points[4];
	1241	+ const struct ecc_point *point;
	1242	+ unsigned int idx;
	1243	+ int i;
	1244	+
	1245	+ ecc_point_add(&sum, p, q, curve);
	1246	+ points[0] = NULL;
	1247	+ points[1] = p;
	1248	+ points[2] = q;
	1249	+ points[3] = ∑
	1250	+
	1251	+ num_bits = max(vli_num_bits(u1, ndigits),
	1252	+ vli_num_bits(u2, ndigits));
	1253	+ i = num_bits - 1;
	1254	+ idx = (!!vli_test_bit(u1, i)) \| ((!!vli_test_bit(u2, i)) << 1);
	1255	+ point = points[idx];
	1256	+
	1257	+ vli_set(rx, point->x, ndigits);
	1258	+ vli_set(ry, point->y, ndigits);
	1259	+ vli_clear(z + 1, ndigits - 1);
	1260	+ z[0] = 1;
	1261	+
	1262	+ for (--i; i >= 0; i--) {
	1263	+ ecc_point_double_jacobian(rx, ry, z, curve->p, ndigits);
	1264	+ idx = (!!vli_test_bit(u1, i)) \| ((!!vli_test_bit(u2, i)) << 1);
	1265	+ point = points[idx];
	1266	+ if (point) {
	1267	+ u64 tx[ECC_MAX_DIGITS];
	1268	+ u64 ty[ECC_MAX_DIGITS];
	1269	+ u64 tz[ECC_MAX_DIGITS];
	1270	+
	1271	+ vli_set(tx, point->x, ndigits);
	1272	+ vli_set(ty, point->y, ndigits);
	1273	+ apply_z(tx, ty, z, curve->p, ndigits);
	1274	+ vli_mod_sub(tz, rx, tx, curve->p, ndigits);
	1275	+ xycz_add(tx, ty, rx, ry, curve->p, ndigits);
	1276	+ vli_mod_mult_fast(z, z, tz, curve->p, ndigits);
	1277	+ }
	1278	+ }
	1279	+ vli_mod_inv(z, z, curve->p, ndigits);
	1280	+ apply_z(rx, ry, z, curve->p, ndigits);
	1281	+}
	1282	+EXPORT_SYMBOL(ecc_point_mult_shamir);
	1283	+
906	1284	static inline void ecc_swap_digits(const u64 in, u64 out,
907	1285	unsigned int ndigits)
908	1286	{
..	..	@@ -949,6 +1327,7 @@
949	1327
950	1328	return __ecc_is_key_valid(curve, private_key, ndigits);
951	1329	}
	1330	+EXPORT_SYMBOL(ecc_is_key_valid);
952	1331
953	1332	/*
954	1333	* ECC private keys are generated using the method of extra random bits,
..	..	@@ -1001,6 +1380,7 @@
1001	1380
1002	1381	return 0;
1003	1382	}
	1383	+EXPORT_SYMBOL(ecc_gen_privkey);
1004	1384
1005	1385	int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
1006	1386	const u64 private_key, u64 public_key)
..	..	@@ -1024,7 +1404,9 @@
1024	1404	}
1025	1405
1026	1406	ecc_point_mult(pk, &curve->g, priv, NULL, curve, ndigits);
1027		- if (ecc_point_is_zero(pk)) {
	1407	+
	1408	+ /* SP800-56A rev 3 5.6.2.1.3 key check */
	1409	+ if (ecc_is_pubkey_valid_full(curve, pk)) {
1028	1410	ret = -EAGAIN;
1029	1411	goto err_free_point;
1030	1412	}
..	..	@@ -1037,12 +1419,16 @@
1037	1419	out:
1038	1420	return ret;
1039	1421	}
	1422	+EXPORT_SYMBOL(ecc_make_pub_key);
1040	1423
1041	1424	/* SP800-56A section 5.6.2.3.4 partial verification: ephemeral keys only */
1042		-static int ecc_is_pubkey_valid_partial(const struct ecc_curve *curve,
1043		- struct ecc_point *pk)
	1425	+int ecc_is_pubkey_valid_partial(const struct ecc_curve *curve,
	1426	+ struct ecc_point *pk)
1044	1427	{
1045	1428	u64 yy[ECC_MAX_DIGITS], xxx[ECC_MAX_DIGITS], w[ECC_MAX_DIGITS];
	1429	+
	1430	+ if (WARN_ON(pk->ndigits != curve->g.ndigits))
	1431	+ return -EINVAL;
1046	1432
1047	1433	/* Check 1: Verify key is not the zero point. */
1048	1434	if (ecc_point_is_zero(pk))
..	..	@@ -1065,8 +1451,35 @@
1065	1451	return -EINVAL;
1066	1452
1067	1453	return 0;
1068		-
1069	1454	}
	1455	+EXPORT_SYMBOL(ecc_is_pubkey_valid_partial);
	1456	+
	1457	+/* SP800-56A section 5.6.2.3.3 full verification */
	1458	+int ecc_is_pubkey_valid_full(const struct ecc_curve *curve,
	1459	+ struct ecc_point *pk)
	1460	+{
	1461	+ struct ecc_point *nQ;
	1462	+
	1463	+ /* Checks 1 through 3 */
	1464	+ int ret = ecc_is_pubkey_valid_partial(curve, pk);
	1465	+
	1466	+ if (ret)
	1467	+ return ret;
	1468	+
	1469	+ /* Check 4: Verify that nQ is the zero point. */
	1470	+ nQ = ecc_alloc_point(pk->ndigits);
	1471	+ if (!nQ)
	1472	+ return -ENOMEM;
	1473	+
	1474	+ ecc_point_mult(nQ, pk, curve->n, NULL, curve, pk->ndigits);
	1475	+ if (!ecc_point_is_zero(nQ))
	1476	+ ret = -EINVAL;
	1477	+
	1478	+ ecc_free_point(nQ);
	1479	+
	1480	+ return ret;
	1481	+}
	1482	+EXPORT_SYMBOL(ecc_is_pubkey_valid_full);
1070	1483
1071	1484	int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
1072	1485	const u64 private_key, const u64 public_key,
..	..	@@ -1111,14 +1524,22 @@
1111	1524
1112	1525	ecc_point_mult(product, pk, priv, rand_z, curve, ndigits);
1113	1526
	1527	+ if (ecc_point_is_zero(product)) {
	1528	+ ret = -EFAULT;
	1529	+ goto err_validity;
	1530	+ }
	1531	+
1114	1532	ecc_swap_digits(product->x, secret, ndigits);
1115	1533
1116		- if (ecc_point_is_zero(product))
1117		- ret = -EFAULT;
1118		-
	1534	+err_validity:
	1535	+ memzero_explicit(priv, sizeof(priv));
	1536	+ memzero_explicit(rand_z, sizeof(rand_z));
1119	1537	ecc_free_point(product);
1120	1538	err_alloc_product:
1121	1539	ecc_free_point(pk);
1122	1540	out:
1123	1541	return ret;
1124	1542	}
	1543	+EXPORT_SYMBOL(crypto_ecdh_shared_secret);
	1544	+
	1545	+MODULE_LICENSE("Dual BSD/GPL");