19
19
// The differences in this and the original implementation are
20
20
// due to the calling conventions and initialization of constants.
21
21
22
- //go:build gc && !purego
22
+ //go:build gc && !purego && (ppc64 || ppc64le)
23
23
24
24
#include "textflag.h"
25
25
36
36
// for VPERMXOR
37
37
#define MASK R18
38
38
39
- DATA consts<>+0x00 (SB)/8 , $0x3320646e61707865
40
- DATA consts<>+0x08 (SB)/8 , $0x6b20657479622d32
41
- DATA consts<>+0x10 (SB)/8 , $0x0000000000000001
42
- DATA consts<>+0x18 (SB)/8 , $0x0000000000000000
43
- DATA consts<>+0x20 (SB)/8 , $0x0000000000000004
44
- DATA consts<>+0x28 (SB)/8 , $0x0000000000000000
45
- DATA consts<>+0x30 (SB)/8 , $0x0a0b08090e0f0c0d
46
- DATA consts<>+0x38 (SB)/8 , $0x0203000106070405
47
- DATA consts<>+0x40 (SB)/8 , $0x090a0b080d0e0f0c
48
- DATA consts<>+0x48 (SB)/8 , $0x0102030005060704
49
- DATA consts<>+0x50 (SB)/8 , $0x6170786561707865
50
- DATA consts<>+0x58 (SB)/8 , $0x6170786561707865
51
- DATA consts<>+0x60 (SB)/8 , $0x3320646e3320646e
52
- DATA consts<>+0x68 (SB)/8 , $0x3320646e3320646e
53
- DATA consts<>+0x70 (SB)/8 , $0x79622d3279622d32
54
- DATA consts<>+0x78 (SB)/8 , $0x79622d3279622d32
55
- DATA consts<>+0x80 (SB)/8 , $0x6b2065746b206574
56
- DATA consts<>+0x88 (SB)/8 , $0x6b2065746b206574
57
- DATA consts<>+0x90 (SB)/8 , $0x0000000100000000
58
- DATA consts<>+0x98 (SB)/8 , $0x0000000300000002
59
- DATA consts<>+0xa0 (SB)/8 , $0x5566774411223300
60
- DATA consts<>+0xa8 (SB)/8 , $0xddeeffcc99aabb88
61
- DATA consts<>+0xb0 (SB)/8 , $0x6677445522330011
62
- DATA consts<>+0xb8 (SB)/8 , $0xeeffccddaabb8899
39
+ DATA consts<>+0x00 (SB)/4 , $0x61707865
40
+ DATA consts<>+0x04 (SB)/4 , $0x3320646e
41
+ DATA consts<>+0x08 (SB)/4 , $0x79622d32
42
+ DATA consts<>+0x0c (SB)/4 , $0x6b206574
43
+ DATA consts<>+0x10 (SB)/4 , $0x00000001
44
+ DATA consts<>+0x14 (SB)/4 , $0x00000000
45
+ DATA consts<>+0x18 (SB)/4 , $0x00000000
46
+ DATA consts<>+0x1c (SB)/4 , $0x00000000
47
+ DATA consts<>+0x20 (SB)/4 , $0x00000004
48
+ DATA consts<>+0x24 (SB)/4 , $0x00000000
49
+ DATA consts<>+0x28 (SB)/4 , $0x00000000
50
+ DATA consts<>+0x2c (SB)/4 , $0x00000000
51
+ DATA consts<>+0x30 (SB)/4 , $0x0e0f0c0d
52
+ DATA consts<>+0x34 (SB)/4 , $0x0a0b0809
53
+ DATA consts<>+0x38 (SB)/4 , $0x06070405
54
+ DATA consts<>+0x3c (SB)/4 , $0x02030001
55
+ DATA consts<>+0x40 (SB)/4 , $0x0d0e0f0c
56
+ DATA consts<>+0x44 (SB)/4 , $0x090a0b08
57
+ DATA consts<>+0x48 (SB)/4 , $0x05060704
58
+ DATA consts<>+0x4c (SB)/4 , $0x01020300
59
+ DATA consts<>+0x50 (SB)/4 , $0x61707865
60
+ DATA consts<>+0x54 (SB)/4 , $0x61707865
61
+ DATA consts<>+0x58 (SB)/4 , $0x61707865
62
+ DATA consts<>+0x5c (SB)/4 , $0x61707865
63
+ DATA consts<>+0x60 (SB)/4 , $0x3320646e
64
+ DATA consts<>+0x64 (SB)/4 , $0x3320646e
65
+ DATA consts<>+0x68 (SB)/4 , $0x3320646e
66
+ DATA consts<>+0x6c (SB)/4 , $0x3320646e
67
+ DATA consts<>+0x70 (SB)/4 , $0x79622d32
68
+ DATA consts<>+0x74 (SB)/4 , $0x79622d32
69
+ DATA consts<>+0x78 (SB)/4 , $0x79622d32
70
+ DATA consts<>+0x7c (SB)/4 , $0x79622d32
71
+ DATA consts<>+0x80 (SB)/4 , $0x6b206574
72
+ DATA consts<>+0x84 (SB)/4 , $0x6b206574
73
+ DATA consts<>+0x88 (SB)/4 , $0x6b206574
74
+ DATA consts<>+0x8c (SB)/4 , $0x6b206574
75
+ DATA consts<>+0x90 (SB)/4 , $0x00000000
76
+ DATA consts<>+0x94 (SB)/4 , $0x00000001
77
+ DATA consts<>+0x98 (SB)/4 , $0x00000002
78
+ DATA consts<>+0x9c (SB)/4 , $0x00000003
79
+ DATA consts<>+0xa0 (SB)/4 , $0x11223300
80
+ DATA consts<>+0xa4 (SB)/4 , $0x55667744
81
+ DATA consts<>+0xa8 (SB)/4 , $0x99aabb88
82
+ DATA consts<>+0xac (SB)/4 , $0xddeeffcc
83
+ DATA consts<>+0xb0 (SB)/4 , $0x22330011
84
+ DATA consts<>+0xb4 (SB)/4 , $0x66774455
85
+ DATA consts<>+0xb8 (SB)/4 , $0xaabb8899
86
+ DATA consts<>+0xbc (SB)/4 , $0xeeffccdd
63
87
GLOBL consts<>(SB), RODATA, $0xc0
64
88
89
+ #ifdef GOARCH_ppc64
90
+ #define BE_XXBRW_INIT() \
91
+ LVSL (R0)(R0), V24 \
92
+ VSPLTISB $3 , V25 \
93
+ VXOR V24, V25, V24 \
94
+
95
+ #define BE_XXBRW(vr) VPERM vr, vr, V24, vr
96
+ #else
97
+ #define BE_XXBRW_INIT()
98
+ #define BE_XXBRW(vr)
99
+ #endif
100
+
65
101
//func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
66
102
TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64 -40
67
103
MOVD out +0 (FP), OUT
@@ -94,6 +130,8 @@ TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
94
130
// Clear V27
95
131
VXOR V27, V27, V27
96
132
133
+ BE_XXBRW_INIT()
134
+
97
135
// V28
98
136
LXVW4X (CONSTBASE)(R11), VS60
99
137
@@ -299,6 +337,11 @@ loop_vsx:
299
337
VADDUWM V8, V18, V8
300
338
VADDUWM V12, V19, V12
301
339
340
+ BE_XXBRW(V0)
341
+ BE_XXBRW(V4)
342
+ BE_XXBRW(V8)
343
+ BE_XXBRW(V12)
344
+
302
345
CMPU LEN, $64
303
346
BLT tail_vsx
304
347
@@ -327,15 +370,20 @@ loop_vsx:
327
370
VADDUWM V9, V18, V8
328
371
VADDUWM V13, V19, V12
329
372
373
+ BE_XXBRW(V0)
374
+ BE_XXBRW(V4)
375
+ BE_XXBRW(V8)
376
+ BE_XXBRW(V12)
377
+
330
378
CMPU LEN, $64
331
379
BLT tail_vsx
332
380
333
381
LXVW4X (INP)(R0), VS59
334
382
LXVW4X (INP)(R8), VS60
335
383
LXVW4X (INP)(R9), VS61
336
384
LXVW4X (INP)(R10), VS62
337
- VXOR V27, V0, V27
338
385
386
+ VXOR V27, V0, V27
339
387
VXOR V28, V4, V28
340
388
VXOR V29, V8, V29
341
389
VXOR V30, V12, V30
@@ -354,6 +402,11 @@ loop_vsx:
354
402
VADDUWM V10, V18, V8
355
403
VADDUWM V14, V19, V12
356
404
405
+ BE_XXBRW(V0)
406
+ BE_XXBRW(V4)
407
+ BE_XXBRW(V8)
408
+ BE_XXBRW(V12)
409
+
357
410
CMPU LEN, $64
358
411
BLT tail_vsx
359
412
@@ -381,6 +434,11 @@ loop_vsx:
381
434
VADDUWM V11, V18, V8
382
435
VADDUWM V15, V19, V12
383
436
437
+ BE_XXBRW(V0)
438
+ BE_XXBRW(V4)
439
+ BE_XXBRW(V8)
440
+ BE_XXBRW(V12)
441
+
384
442
CMPU LEN, $64
385
443
BLT tail_vsx
386
444
@@ -408,9 +466,9 @@ loop_vsx:
408
466
409
467
done_vsx:
410
468
// Increment counter by number of 64 byte blocks
411
- MOVD (CNT), R14
469
+ MOVWZ (CNT), R14
412
470
ADD BLOCKS, R14
413
- MOVD R14, (CNT)
471
+ MOVWZ R14, (CNT)
414
472
RET
415
473
416
474
tail_vsx:
0 commit comments