Skip to content

Commit b61b08d

Browse files
committed
chacha20: extend ppc64le support to ppc64
This requires fixing an incorrect save of the counter. It is a word value. It happens to work on LE because length is limited to u32. Refactor the constant table to load correctly independent of byte ordering. Add byte order swapping where output needs converted to LE ordering for storage. Change-Id: Ic7e09bd1c769bb77dd6e817f5a8639ba765f4c0f Reviewed-on: https://go-review.googlesource.com/c/crypto/+/614297 Reviewed-by: Cherry Mui <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> Reviewed-by: Archana Ravindar <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 6c21748 commit b61b08d

File tree

3 files changed

+88
-30
lines changed

3 files changed

+88
-30
lines changed

Diff for: chacha20/chacha_noasm.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
//go:build (!arm64 && !s390x && !ppc64le) || !gc || purego
5+
//go:build (!arm64 && !s390x && !ppc64 && !ppc64le) || !gc || purego
66

77
package chacha20
88

Diff for: chacha20/chacha_ppc64le.go renamed to chacha20/chacha_ppc64x.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Use of this source code is governed by a BSD-style
33
// license that can be found in the LICENSE file.
44

5-
//go:build gc && !purego
5+
//go:build gc && !purego && (ppc64 || ppc64le)
66

77
package chacha20
88

Diff for: chacha20/chacha_ppc64le.s renamed to chacha20/chacha_ppc64x.s

+86-28
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
// The differences in this and the original implementation are
2020
// due to the calling conventions and initialization of constants.
2121

22-
//go:build gc && !purego
22+
//go:build gc && !purego && (ppc64 || ppc64le)
2323

2424
#include "textflag.h"
2525

@@ -36,32 +36,68 @@
3636
// for VPERMXOR
3737
#define MASK R18
3838

39-
DATA consts<>+0x00(SB)/8, $0x3320646e61707865
40-
DATA consts<>+0x08(SB)/8, $0x6b20657479622d32
41-
DATA consts<>+0x10(SB)/8, $0x0000000000000001
42-
DATA consts<>+0x18(SB)/8, $0x0000000000000000
43-
DATA consts<>+0x20(SB)/8, $0x0000000000000004
44-
DATA consts<>+0x28(SB)/8, $0x0000000000000000
45-
DATA consts<>+0x30(SB)/8, $0x0a0b08090e0f0c0d
46-
DATA consts<>+0x38(SB)/8, $0x0203000106070405
47-
DATA consts<>+0x40(SB)/8, $0x090a0b080d0e0f0c
48-
DATA consts<>+0x48(SB)/8, $0x0102030005060704
49-
DATA consts<>+0x50(SB)/8, $0x6170786561707865
50-
DATA consts<>+0x58(SB)/8, $0x6170786561707865
51-
DATA consts<>+0x60(SB)/8, $0x3320646e3320646e
52-
DATA consts<>+0x68(SB)/8, $0x3320646e3320646e
53-
DATA consts<>+0x70(SB)/8, $0x79622d3279622d32
54-
DATA consts<>+0x78(SB)/8, $0x79622d3279622d32
55-
DATA consts<>+0x80(SB)/8, $0x6b2065746b206574
56-
DATA consts<>+0x88(SB)/8, $0x6b2065746b206574
57-
DATA consts<>+0x90(SB)/8, $0x0000000100000000
58-
DATA consts<>+0x98(SB)/8, $0x0000000300000002
59-
DATA consts<>+0xa0(SB)/8, $0x5566774411223300
60-
DATA consts<>+0xa8(SB)/8, $0xddeeffcc99aabb88
61-
DATA consts<>+0xb0(SB)/8, $0x6677445522330011
62-
DATA consts<>+0xb8(SB)/8, $0xeeffccddaabb8899
39+
DATA consts<>+0x00(SB)/4, $0x61707865
40+
DATA consts<>+0x04(SB)/4, $0x3320646e
41+
DATA consts<>+0x08(SB)/4, $0x79622d32
42+
DATA consts<>+0x0c(SB)/4, $0x6b206574
43+
DATA consts<>+0x10(SB)/4, $0x00000001
44+
DATA consts<>+0x14(SB)/4, $0x00000000
45+
DATA consts<>+0x18(SB)/4, $0x00000000
46+
DATA consts<>+0x1c(SB)/4, $0x00000000
47+
DATA consts<>+0x20(SB)/4, $0x00000004
48+
DATA consts<>+0x24(SB)/4, $0x00000000
49+
DATA consts<>+0x28(SB)/4, $0x00000000
50+
DATA consts<>+0x2c(SB)/4, $0x00000000
51+
DATA consts<>+0x30(SB)/4, $0x0e0f0c0d
52+
DATA consts<>+0x34(SB)/4, $0x0a0b0809
53+
DATA consts<>+0x38(SB)/4, $0x06070405
54+
DATA consts<>+0x3c(SB)/4, $0x02030001
55+
DATA consts<>+0x40(SB)/4, $0x0d0e0f0c
56+
DATA consts<>+0x44(SB)/4, $0x090a0b08
57+
DATA consts<>+0x48(SB)/4, $0x05060704
58+
DATA consts<>+0x4c(SB)/4, $0x01020300
59+
DATA consts<>+0x50(SB)/4, $0x61707865
60+
DATA consts<>+0x54(SB)/4, $0x61707865
61+
DATA consts<>+0x58(SB)/4, $0x61707865
62+
DATA consts<>+0x5c(SB)/4, $0x61707865
63+
DATA consts<>+0x60(SB)/4, $0x3320646e
64+
DATA consts<>+0x64(SB)/4, $0x3320646e
65+
DATA consts<>+0x68(SB)/4, $0x3320646e
66+
DATA consts<>+0x6c(SB)/4, $0x3320646e
67+
DATA consts<>+0x70(SB)/4, $0x79622d32
68+
DATA consts<>+0x74(SB)/4, $0x79622d32
69+
DATA consts<>+0x78(SB)/4, $0x79622d32
70+
DATA consts<>+0x7c(SB)/4, $0x79622d32
71+
DATA consts<>+0x80(SB)/4, $0x6b206574
72+
DATA consts<>+0x84(SB)/4, $0x6b206574
73+
DATA consts<>+0x88(SB)/4, $0x6b206574
74+
DATA consts<>+0x8c(SB)/4, $0x6b206574
75+
DATA consts<>+0x90(SB)/4, $0x00000000
76+
DATA consts<>+0x94(SB)/4, $0x00000001
77+
DATA consts<>+0x98(SB)/4, $0x00000002
78+
DATA consts<>+0x9c(SB)/4, $0x00000003
79+
DATA consts<>+0xa0(SB)/4, $0x11223300
80+
DATA consts<>+0xa4(SB)/4, $0x55667744
81+
DATA consts<>+0xa8(SB)/4, $0x99aabb88
82+
DATA consts<>+0xac(SB)/4, $0xddeeffcc
83+
DATA consts<>+0xb0(SB)/4, $0x22330011
84+
DATA consts<>+0xb4(SB)/4, $0x66774455
85+
DATA consts<>+0xb8(SB)/4, $0xaabb8899
86+
DATA consts<>+0xbc(SB)/4, $0xeeffccdd
6387
GLOBL consts<>(SB), RODATA, $0xc0
6488

89+
#ifdef GOARCH_ppc64
90+
#define BE_XXBRW_INIT() \
91+
LVSL (R0)(R0), V24 \
92+
VSPLTISB $3, V25 \
93+
VXOR V24, V25, V24 \
94+
95+
#define BE_XXBRW(vr) VPERM vr, vr, V24, vr
96+
#else
97+
#define BE_XXBRW_INIT()
98+
#define BE_XXBRW(vr)
99+
#endif
100+
65101
//func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
66102
TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
67103
MOVD out+0(FP), OUT
@@ -94,6 +130,8 @@ TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
94130
// Clear V27
95131
VXOR V27, V27, V27
96132

133+
BE_XXBRW_INIT()
134+
97135
// V28
98136
LXVW4X (CONSTBASE)(R11), VS60
99137

@@ -299,6 +337,11 @@ loop_vsx:
299337
VADDUWM V8, V18, V8
300338
VADDUWM V12, V19, V12
301339

340+
BE_XXBRW(V0)
341+
BE_XXBRW(V4)
342+
BE_XXBRW(V8)
343+
BE_XXBRW(V12)
344+
302345
CMPU LEN, $64
303346
BLT tail_vsx
304347

@@ -327,15 +370,20 @@ loop_vsx:
327370
VADDUWM V9, V18, V8
328371
VADDUWM V13, V19, V12
329372

373+
BE_XXBRW(V0)
374+
BE_XXBRW(V4)
375+
BE_XXBRW(V8)
376+
BE_XXBRW(V12)
377+
330378
CMPU LEN, $64
331379
BLT tail_vsx
332380

333381
LXVW4X (INP)(R0), VS59
334382
LXVW4X (INP)(R8), VS60
335383
LXVW4X (INP)(R9), VS61
336384
LXVW4X (INP)(R10), VS62
337-
VXOR V27, V0, V27
338385

386+
VXOR V27, V0, V27
339387
VXOR V28, V4, V28
340388
VXOR V29, V8, V29
341389
VXOR V30, V12, V30
@@ -354,6 +402,11 @@ loop_vsx:
354402
VADDUWM V10, V18, V8
355403
VADDUWM V14, V19, V12
356404

405+
BE_XXBRW(V0)
406+
BE_XXBRW(V4)
407+
BE_XXBRW(V8)
408+
BE_XXBRW(V12)
409+
357410
CMPU LEN, $64
358411
BLT tail_vsx
359412

@@ -381,6 +434,11 @@ loop_vsx:
381434
VADDUWM V11, V18, V8
382435
VADDUWM V15, V19, V12
383436

437+
BE_XXBRW(V0)
438+
BE_XXBRW(V4)
439+
BE_XXBRW(V8)
440+
BE_XXBRW(V12)
441+
384442
CMPU LEN, $64
385443
BLT tail_vsx
386444

@@ -408,9 +466,9 @@ loop_vsx:
408466

409467
done_vsx:
410468
// Increment counter by number of 64 byte blocks
411-
MOVD (CNT), R14
469+
MOVWZ (CNT), R14
412470
ADD BLOCKS, R14
413-
MOVD R14, (CNT)
471+
MOVWZ R14, (CNT)
414472
RET
415473

416474
tail_vsx:

0 commit comments

Comments
 (0)