8
8
// option. This file may not be copied, modified, or distributed
9
9
// except according to those terms.
10
10
11
+ use std:: uint;
12
+
11
13
use cryptoutil:: { write_u64_be, write_u32_be, read_u64v_be, read_u32v_be, FixedBuffer ,
12
14
FixedBuffer128 , FixedBuffer64 , StandardPadding } ;
13
15
use digest:: Digest ;
14
16
15
17
18
+ // Sha-512 and Sha-256 use basically the same calculations which are implemented by these macros.
19
+ // Inlining the calculations seems to result in better generated code.
20
+ macro_rules! schedule_round( ( $t: expr) => (
21
+ W [ $t] = sigma1( W [ $t - 2 ] ) + W [ $t - 7 ] + sigma0( W [ $t - 15 ] ) + W [ $t - 16 ] ;
22
+ )
23
+ )
24
+
25
+ macro_rules! sha2_round(
26
+ ( $A: ident, $B: ident, $C: ident, $D: ident,
27
+ $E: ident, $F: ident, $G: ident, $H: ident, $K: ident, $t: expr) => (
28
+ {
29
+ $H += sum1( $E) + ch( $E, $F, $G) + $K[ $t] + W [ $t] ;
30
+ $D += $H;
31
+ $H += sum0( $A) + maj( $A, $B, $C) ;
32
+ }
33
+ )
34
+ )
35
+
36
+
16
37
// BitCounter is a specialized structure intended simply for counting the
17
38
// number of bits that have been processed by the SHA-2 512 family of functions.
18
39
// It does very little overflow checking since such checking is not necessary
@@ -117,15 +138,6 @@ impl Engine512State {
117
138
( ( x << 45 ) | ( x >> 19 ) ) ^ ( ( x << 3 ) | ( x >> 61 ) ) ^ ( x >> 6 )
118
139
}
119
140
120
- let mut W = [ 0u64 , ..80 ] ;
121
-
122
- read_u64v_be( W . mut_slice( 0 , 16 ) , data) ;
123
-
124
- foreach t in range( 16 u, 80 ) {
125
- W [ t] = sigma1( W [ t - 2 ] ) + W [ t - 7 ] + sigma0( W [ t - 15 ] ) +
126
- W [ t - 16 ] ;
127
- }
128
-
129
141
let mut a = self . H0 ;
130
142
let mut b = self . H1 ;
131
143
let mut c = self . H2 ;
@@ -135,48 +147,41 @@ impl Engine512State {
135
147
let mut g = self . H6 ;
136
148
let mut h = self . H7 ;
137
149
138
- let mut t = 0 ;
139
-
140
- foreach _ in range( 0 u, 10 ) {
141
- h += sum1( e) + ch( e, f, g) + K64 [ t] + W [ t] ;
142
- d += h;
143
- h += sum0( a) + maj( a, b, c) ;
144
- t += 1 ;
145
-
146
- g += sum1( d) + ch( d, e, f) + K64 [ t] + W [ t] ;
147
- c += g;
148
- g += sum0( h) + maj( h, a, b) ;
149
- t += 1 ;
150
-
151
- f += sum1( c) + ch( c, d, e) + K64 [ t] + W [ t] ;
152
- b += f;
153
- f += sum0( g) + maj( g, h, a) ;
154
- t += 1 ;
155
-
156
- e += sum1( b) + ch( b, c, d) + K64 [ t] + W [ t] ;
157
- a += e;
158
- e += sum0( f) + maj( f, g, h) ;
159
- t += 1 ;
160
-
161
- d += sum1( a) + ch( a, b, c) + K64 [ t] + W [ t] ;
162
- h += d;
163
- d += sum0( e) + maj( e, f, g) ;
164
- t += 1 ;
165
-
166
- c += sum1( h) + ch( h, a, b) + K64 [ t] + W [ t] ;
167
- g += c;
168
- c += sum0( d) + maj( d, e, f) ;
169
- t += 1 ;
170
-
171
- b += sum1( g) + ch( g, h, a) + K64 [ t] + W [ t] ;
172
- f += b;
173
- b += sum0( c) + maj( c, d, e) ;
174
- t += 1 ;
175
-
176
- a += sum1( f) + ch( f, g, h) + K64 [ t] + W [ t] ;
177
- e += a;
178
- a += sum0( b) + maj( b, c, d) ;
179
- t += 1 ;
150
+ let mut W = [ 0u64 , ..80 ] ;
151
+
152
+ read_u64v_be( W . mut_slice( 0 , 16 ) , data) ;
153
+
154
+ // Putting the message schedule inside the same loop as the round calculations allows for
155
+ // the compiler to generate better code.
156
+ for uint:: range_step( 0 , 64 , 8 ) |t| {
157
+ schedule_round ! ( t + 16 ) ;
158
+ schedule_round ! ( t + 17 ) ;
159
+ schedule_round ! ( t + 18 ) ;
160
+ schedule_round ! ( t + 19 ) ;
161
+ schedule_round ! ( t + 20 ) ;
162
+ schedule_round ! ( t + 21 ) ;
163
+ schedule_round ! ( t + 22 ) ;
164
+ schedule_round ! ( t + 23 ) ;
165
+
166
+ sha2_round ! ( a, b, c, d, e, f, g, h, K64 , t) ;
167
+ sha2_round ! ( h, a, b, c, d, e, f, g, K64 , t + 1 ) ;
168
+ sha2_round ! ( g, h, a, b, c, d, e, f, K64 , t + 2 ) ;
169
+ sha2_round ! ( f, g, h, a, b, c, d, e, K64 , t + 3 ) ;
170
+ sha2_round ! ( e, f, g, h, a, b, c, d, K64 , t + 4 ) ;
171
+ sha2_round ! ( d, e, f, g, h, a, b, c, K64 , t + 5 ) ;
172
+ sha2_round ! ( c, d, e, f, g, h, a, b, K64 , t + 6 ) ;
173
+ sha2_round ! ( b, c, d, e, f, g, h, a, K64 , t + 7 ) ;
174
+ }
175
+
176
+ for uint:: range_step( 64 , 80 , 8 ) |t| {
177
+ sha2_round ! ( a, b, c, d, e, f, g, h, K64 , t) ;
178
+ sha2_round ! ( h, a, b, c, d, e, f, g, K64 , t + 1 ) ;
179
+ sha2_round ! ( g, h, a, b, c, d, e, f, K64 , t + 2 ) ;
180
+ sha2_round ! ( f, g, h, a, b, c, d, e, K64 , t + 3 ) ;
181
+ sha2_round ! ( e, f, g, h, a, b, c, d, K64 , t + 4 ) ;
182
+ sha2_round ! ( d, e, f, g, h, a, b, c, K64 , t + 5 ) ;
183
+ sha2_round ! ( c, d, e, f, g, h, a, b, K64 , t + 6 ) ;
184
+ sha2_round ! ( b, c, d, e, f, g, h, a, K64 , t + 7 ) ;
180
185
}
181
186
182
187
self . H0 += a ;
@@ -523,15 +528,6 @@ impl Engine256State {
523
528
( ( x >> 17 ) | ( x << 15 ) ) ^ ( ( x >> 19 ) | ( x << 13 ) ) ^ ( x >> 10 )
524
529
}
525
530
526
- let mut W = [ 0u32 , ..80 ] ;
527
-
528
- read_u32v_be( W . mut_slice( 0 , 16 ) , data) ;
529
-
530
- foreach t in range( 16 u, 64 ) {
531
- W [ t] = sigma1( W [ t - 2 ] ) + W [ t - 7 ] + sigma0( W [ t - 15 ] ) +
532
- W [ t - 16 ] ;
533
- }
534
-
535
531
let mut a = self . H0 ;
536
532
let mut b = self . H1 ;
537
533
let mut c = self . H2 ;
@@ -541,48 +537,41 @@ impl Engine256State {
541
537
let mut g = self . H6 ;
542
538
let mut h = self . H7 ;
543
539
544
- let mut t = 0 ;
545
-
546
- foreach _ in range( 0 u, 8 ) {
547
- h += sum1( e) + ch( e, f, g) + K32 [ t] + W [ t] ;
548
- d += h;
549
- h += sum0( a) + maj( a, b, c) ;
550
- t += 1 ;
551
-
552
- g += sum1( d) + ch( d, e, f) + K32 [ t] + W [ t] ;
553
- c += g;
554
- g += sum0( h) + maj( h, a, b) ;
555
- t += 1 ;
556
-
557
- f += sum1( c) + ch( c, d, e) + K32 [ t] + W [ t] ;
558
- b += f;
559
- f += sum0( g) + maj( g, h, a) ;
560
- t += 1 ;
561
-
562
- e += sum1( b) + ch( b, c, d) + K32 [ t] + W [ t] ;
563
- a += e;
564
- e += sum0( f) + maj( f, g, h) ;
565
- t += 1 ;
566
-
567
- d += sum1( a) + ch( a, b, c) + K32 [ t] + W [ t] ;
568
- h += d;
569
- d += sum0( e) + maj( e, f, g) ;
570
- t += 1 ;
571
-
572
- c += sum1( h) + ch( h, a, b) + K32 [ t] + W [ t] ;
573
- g += c;
574
- c += sum0( d) + maj( d, e, f) ;
575
- t += 1 ;
576
-
577
- b += sum1( g) + ch( g, h, a) + K32 [ t] + W [ t] ;
578
- f += b;
579
- b += sum0( c) + maj( c, d, e) ;
580
- t += 1 ;
581
-
582
- a += sum1( f) + ch( f, g, h) + K32 [ t] + W [ t] ;
583
- e += a;
584
- a += sum0( b) + maj( b, c, d) ;
585
- t += 1 ;
540
+ let mut W = [ 0u32 , ..64 ] ;
541
+
542
+ read_u32v_be( W . mut_slice( 0 , 16 ) , data) ;
543
+
544
+ // Putting the message schedule inside the same loop as the round calculations allows for
545
+ // the compiler to generate better code.
546
+ for uint:: range_step( 0 , 48 , 8 ) |t| {
547
+ schedule_round!( t + 16 ) ;
548
+ schedule_round!( t + 17 ) ;
549
+ schedule_round!( t + 18 ) ;
550
+ schedule_round!( t + 19 ) ;
551
+ schedule_round!( t + 20 ) ;
552
+ schedule_round!( t + 21 ) ;
553
+ schedule_round!( t + 22 ) ;
554
+ schedule_round!( t + 23 ) ;
555
+
556
+ sha2_round!( a, b, c, d, e, f, g, h, K32 , t) ;
557
+ sha2_round!( h, a, b, c, d, e, f, g, K32 , t + 1 ) ;
558
+ sha2_round!( g, h, a, b, c, d, e, f, K32 , t + 2 ) ;
559
+ sha2_round!( f, g, h, a, b, c, d, e, K32 , t + 3 ) ;
560
+ sha2_round!( e, f, g, h, a, b, c, d, K32 , t + 4 ) ;
561
+ sha2_round!( d, e, f, g, h, a, b, c, K32 , t + 5 ) ;
562
+ sha2_round!( c, d, e, f, g, h, a, b, K32 , t + 6 ) ;
563
+ sha2_round!( b, c, d, e, f, g, h, a, K32 , t + 7 ) ;
564
+ }
565
+
566
+ for uint:: range_step( 48 , 64 , 8 ) |t| {
567
+ sha2_round!( a, b, c, d, e, f, g, h, K32 , t) ;
568
+ sha2_round!( h, a, b, c, d, e, f, g, K32 , t + 1 ) ;
569
+ sha2_round!( g, h, a, b, c, d, e, f, K32 , t + 2 ) ;
570
+ sha2_round!( f, g, h, a, b, c, d, e, K32 , t + 3 ) ;
571
+ sha2_round!( e, f, g, h, a, b, c, d, K32 , t + 4 ) ;
572
+ sha2_round!( d, e, f, g, h, a, b, c, K32 , t + 5 ) ;
573
+ sha2_round!( c, d, e, f, g, h, a, b, K32 , t + 6 ) ;
574
+ sha2_round!( b, c, d, e, f, g, h, a, K32 , t + 7 ) ;
586
575
}
587
576
588
577
self . H0 += a;
0 commit comments