Skip to content

Commit ee3f753

Browse files
author
Palmer Cox
committed
Sha2: Re-write the Sha2 compression functions to improve performance.
The Sha2 compression functions were re-written to execute the message scheduling calculations in the same loop as the rest of the compression function. The compiler is able to generate much better code. Additionally, innermost part of the compression functions were turned into macros to reduce code duplicate and to make the functions more concise.
1 parent 654c536 commit ee3f753

File tree

1 file changed

+91
-102
lines changed

1 file changed

+91
-102
lines changed

src/libextra/crypto/sha2.rs

Lines changed: 91 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,32 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11+
use std::uint;
12+
1113
use cryptoutil::{write_u64_be, write_u32_be, read_u64v_be, read_u32v_be, FixedBuffer,
1214
FixedBuffer128, FixedBuffer64, StandardPadding};
1315
use digest::Digest;
1416

1517

18+
// Sha-512 and Sha-256 use basically the same calculations which are implemented by these macros.
19+
// Inlining the calculations seems to result in better generated code.
20+
macro_rules! schedule_round( ($t:expr) => (
21+
W[$t] = sigma1(W[$t - 2]) + W[$t - 7] + sigma0(W[$t - 15]) + W[$t - 16];
22+
)
23+
)
24+
25+
macro_rules! sha2_round(
26+
($A:ident, $B:ident, $C:ident, $D:ident,
27+
$E:ident, $F:ident, $G:ident, $H:ident, $K:ident, $t:expr) => (
28+
{
29+
$H += sum1($E) + ch($E, $F, $G) + $K[$t] + W[$t];
30+
$D += $H;
31+
$H += sum0($A) + maj($A, $B, $C);
32+
}
33+
)
34+
)
35+
36+
1637
// BitCounter is a specialized structure intended simply for counting the
1738
// number of bits that have been processed by the SHA-2 512 family of functions.
1839
// It does very little overflow checking since such checking is not necessary
@@ -117,15 +138,6 @@ impl Engine512State {
117138
((x << 45) | (x >> 19)) ^ ((x << 3) | (x >> 61)) ^ (x >> 6)
118139
}
119140

120-
let mut W = [0u64, ..80];
121-
122-
read_u64v_be(W.mut_slice(0, 16), data);
123-
124-
foreach t in range(16u, 80) {
125-
W[t] = sigma1(W[t - 2]) + W[t - 7] + sigma0(W[t - 15]) +
126-
W[t - 16];
127-
}
128-
129141
let mut a = self.H0;
130142
let mut b = self.H1;
131143
let mut c = self.H2;
@@ -135,48 +147,41 @@ impl Engine512State {
135147
let mut g = self.H6;
136148
let mut h = self.H7;
137149

138-
let mut t = 0;
139-
140-
foreach _ in range(0u, 10) {
141-
h += sum1(e) + ch(e, f, g) + K64[t] + W[t];
142-
d += h;
143-
h += sum0(a) + maj(a, b, c);
144-
t += 1;
145-
146-
g += sum1(d) + ch(d, e, f) + K64[t] + W[t];
147-
c += g;
148-
g += sum0(h) + maj(h, a, b);
149-
t += 1;
150-
151-
f += sum1(c) + ch(c, d, e) + K64[t] + W[t];
152-
b += f;
153-
f += sum0(g) + maj(g, h, a);
154-
t += 1;
155-
156-
e += sum1(b) + ch(b, c, d) + K64[t] + W[t];
157-
a += e;
158-
e += sum0(f) + maj(f, g, h);
159-
t += 1;
160-
161-
d += sum1(a) + ch(a, b, c) + K64[t] + W[t];
162-
h += d;
163-
d += sum0(e) + maj(e, f, g);
164-
t += 1;
165-
166-
c += sum1(h) + ch(h, a, b) + K64[t] + W[t];
167-
g += c;
168-
c += sum0(d) + maj(d, e, f);
169-
t += 1;
170-
171-
b += sum1(g) + ch(g, h, a) + K64[t] + W[t];
172-
f += b;
173-
b += sum0(c) + maj(c, d, e);
174-
t += 1;
175-
176-
a += sum1(f) + ch(f, g, h) + K64[t] + W[t];
177-
e += a;
178-
a += sum0(b) + maj(b, c, d);
179-
t += 1;
150+
let mut W = [0u64, ..80];
151+
152+
read_u64v_be(W.mut_slice(0, 16), data);
153+
154+
// Putting the message schedule inside the same loop as the round calculations allows for
155+
// the compiler to generate better code.
156+
for uint::range_step(0, 64, 8) |t| {
157+
schedule_round!(t + 16);
158+
schedule_round!(t + 17);
159+
schedule_round!(t + 18);
160+
schedule_round!(t + 19);
161+
schedule_round!(t + 20);
162+
schedule_round!(t + 21);
163+
schedule_round!(t + 22);
164+
schedule_round!(t + 23);
165+
166+
sha2_round!(a, b, c, d, e, f, g, h, K64, t);
167+
sha2_round!(h, a, b, c, d, e, f, g, K64, t + 1);
168+
sha2_round!(g, h, a, b, c, d, e, f, K64, t + 2);
169+
sha2_round!(f, g, h, a, b, c, d, e, K64, t + 3);
170+
sha2_round!(e, f, g, h, a, b, c, d, K64, t + 4);
171+
sha2_round!(d, e, f, g, h, a, b, c, K64, t + 5);
172+
sha2_round!(c, d, e, f, g, h, a, b, K64, t + 6);
173+
sha2_round!(b, c, d, e, f, g, h, a, K64, t + 7);
174+
}
175+
176+
for uint::range_step(64, 80, 8) |t| {
177+
sha2_round!(a, b, c, d, e, f, g, h, K64, t);
178+
sha2_round!(h, a, b, c, d, e, f, g, K64, t + 1);
179+
sha2_round!(g, h, a, b, c, d, e, f, K64, t + 2);
180+
sha2_round!(f, g, h, a, b, c, d, e, K64, t + 3);
181+
sha2_round!(e, f, g, h, a, b, c, d, K64, t + 4);
182+
sha2_round!(d, e, f, g, h, a, b, c, K64, t + 5);
183+
sha2_round!(c, d, e, f, g, h, a, b, K64, t + 6);
184+
sha2_round!(b, c, d, e, f, g, h, a, K64, t + 7);
180185
}
181186

182187
self.H0 += a;
@@ -523,15 +528,6 @@ impl Engine256State {
523528
((x >> 17) | (x << 15)) ^ ((x >> 19) | (x << 13)) ^ (x >> 10)
524529
}
525530

526-
let mut W = [0u32, ..80];
527-
528-
read_u32v_be(W.mut_slice(0, 16), data);
529-
530-
foreach t in range(16u, 64) {
531-
W[t] = sigma1(W[t - 2]) + W[t - 7] + sigma0(W[t - 15]) +
532-
W[t - 16];
533-
}
534-
535531
let mut a = self.H0;
536532
let mut b = self.H1;
537533
let mut c = self.H2;
@@ -541,48 +537,41 @@ impl Engine256State {
541537
let mut g = self.H6;
542538
let mut h = self.H7;
543539

544-
let mut t = 0;
545-
546-
foreach _ in range(0u, 8) {
547-
h += sum1(e) + ch(e, f, g) + K32[t] + W[t];
548-
d += h;
549-
h += sum0(a) + maj(a, b, c);
550-
t += 1;
551-
552-
g += sum1(d) + ch(d, e, f) + K32[t] + W[t];
553-
c += g;
554-
g += sum0(h) + maj(h, a, b);
555-
t += 1;
556-
557-
f += sum1(c) + ch(c, d, e) + K32[t] + W[t];
558-
b += f;
559-
f += sum0(g) + maj(g, h, a);
560-
t += 1;
561-
562-
e += sum1(b) + ch(b, c, d) + K32[t] + W[t];
563-
a += e;
564-
e += sum0(f) + maj(f, g, h);
565-
t += 1;
566-
567-
d += sum1(a) + ch(a, b, c) + K32[t] + W[t];
568-
h += d;
569-
d += sum0(e) + maj(e, f, g);
570-
t += 1;
571-
572-
c += sum1(h) + ch(h, a, b) + K32[t] + W[t];
573-
g += c;
574-
c += sum0(d) + maj(d, e, f);
575-
t += 1;
576-
577-
b += sum1(g) + ch(g, h, a) + K32[t] + W[t];
578-
f += b;
579-
b += sum0(c) + maj(c, d, e);
580-
t += 1;
581-
582-
a += sum1(f) + ch(f, g, h) + K32[t] + W[t];
583-
e += a;
584-
a += sum0(b) + maj(b, c, d);
585-
t += 1;
540+
let mut W = [0u32, ..64];
541+
542+
read_u32v_be(W.mut_slice(0, 16), data);
543+
544+
// Putting the message schedule inside the same loop as the round calculations allows for
545+
// the compiler to generate better code.
546+
for uint::range_step(0, 48, 8) |t| {
547+
schedule_round!(t + 16);
548+
schedule_round!(t + 17);
549+
schedule_round!(t + 18);
550+
schedule_round!(t + 19);
551+
schedule_round!(t + 20);
552+
schedule_round!(t + 21);
553+
schedule_round!(t + 22);
554+
schedule_round!(t + 23);
555+
556+
sha2_round!(a, b, c, d, e, f, g, h, K32, t);
557+
sha2_round!(h, a, b, c, d, e, f, g, K32, t + 1);
558+
sha2_round!(g, h, a, b, c, d, e, f, K32, t + 2);
559+
sha2_round!(f, g, h, a, b, c, d, e, K32, t + 3);
560+
sha2_round!(e, f, g, h, a, b, c, d, K32, t + 4);
561+
sha2_round!(d, e, f, g, h, a, b, c, K32, t + 5);
562+
sha2_round!(c, d, e, f, g, h, a, b, K32, t + 6);
563+
sha2_round!(b, c, d, e, f, g, h, a, K32, t + 7);
564+
}
565+
566+
for uint::range_step(48, 64, 8) |t| {
567+
sha2_round!(a, b, c, d, e, f, g, h, K32, t);
568+
sha2_round!(h, a, b, c, d, e, f, g, K32, t + 1);
569+
sha2_round!(g, h, a, b, c, d, e, f, K32, t + 2);
570+
sha2_round!(f, g, h, a, b, c, d, e, K32, t + 3);
571+
sha2_round!(e, f, g, h, a, b, c, d, K32, t + 4);
572+
sha2_round!(d, e, f, g, h, a, b, c, K32, t + 5);
573+
sha2_round!(c, d, e, f, g, h, a, b, K32, t + 6);
574+
sha2_round!(b, c, d, e, f, g, h, a, K32, t + 7);
586575
}
587576

588577
self.H0 += a;

0 commit comments

Comments
 (0)