Skip to content

Commit 1a0ce97

Browse files
committed
Add SHA256 file content hashing functions
1 parent b6c7dee commit 1a0ce97

File tree

4 files changed

+414
-1
lines changed

4 files changed

+414
-1
lines changed

libvtrutil/src/picosha2.h

Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
/*
2+
The MIT License (MIT)
3+
4+
Copyright (C) 2014 okdshin
5+
6+
Permission is hereby granted, free of charge, to any person obtaining a copy
7+
of this software and associated documentation files (the "Software"), to deal
8+
in the Software without restriction, including without limitation the rights
9+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
copies of the Software, and to permit persons to whom the Software is
11+
furnished to do so, subject to the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included in
14+
all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22+
THE SOFTWARE.
23+
*/
24+
#ifndef PICOSHA2_H
25+
#define PICOSHA2_H
26+
//picosha2:20140213
27+
#include <iostream>
28+
#include <vector>
29+
#include <iterator>
30+
#include <cassert>
31+
#include <sstream>
32+
#include <algorithm>
33+
34+
namespace picosha2
35+
{
36+
typedef unsigned long word_t;
37+
typedef unsigned char byte_t;
38+
39+
namespace detail
40+
{
41+
inline byte_t mask_8bit(byte_t x){
42+
return x&0xff;
43+
}
44+
45+
inline word_t mask_32bit(word_t x){
46+
return x&0xffffffff;
47+
}
48+
49+
const word_t add_constant[64] = {
50+
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
51+
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
52+
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
53+
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
54+
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
55+
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
56+
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
57+
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
58+
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
59+
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
60+
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
61+
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
62+
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
63+
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
64+
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
65+
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
66+
};
67+
68+
const word_t initial_message_digest[8] = {
69+
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
70+
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
71+
};
72+
73+
inline word_t ch(word_t x, word_t y, word_t z){
74+
return (x&y)^((~x)&z);
75+
}
76+
77+
inline word_t maj(word_t x, word_t y, word_t z){
78+
return (x&y)^(x&z)^(y&z);
79+
}
80+
81+
inline word_t rotr(word_t x, std::size_t n){
82+
assert(n < 32);
83+
return mask_32bit((x>>n)|(x<<(32-n)));
84+
}
85+
86+
inline word_t bsig0(word_t x){
87+
return rotr(x, 2)^rotr(x, 13)^rotr(x, 22);
88+
}
89+
90+
inline word_t bsig1(word_t x){
91+
return rotr(x, 6)^rotr(x, 11)^rotr(x, 25);
92+
}
93+
94+
inline word_t shr(word_t x, std::size_t n){
95+
assert(n < 32);
96+
return x >> n;
97+
}
98+
99+
inline word_t ssig0(word_t x){
100+
return rotr(x, 7)^rotr(x, 18)^shr(x, 3);
101+
}
102+
103+
inline word_t ssig1(word_t x){
104+
return rotr(x, 17)^rotr(x, 19)^shr(x, 10);
105+
}
106+
107+
template<typename RaIter1, typename RaIter2>
108+
void hash256_block(RaIter1 message_digest, RaIter2 first, RaIter2 /*last*/){
109+
word_t w[64];
110+
std::fill(w, w+64, 0);
111+
for(std::size_t i = 0; i < 16; ++i){
112+
w[i] = (static_cast<word_t>(mask_8bit(*(first+i*4)))<<24)
113+
|(static_cast<word_t>(mask_8bit(*(first+i*4+1)))<<16)
114+
|(static_cast<word_t>(mask_8bit(*(first+i*4+2)))<<8)
115+
|(static_cast<word_t>(mask_8bit(*(first+i*4+3))));
116+
}
117+
for(std::size_t i = 16; i < 64; ++i){
118+
w[i] = mask_32bit(ssig1(w[i-2])+w[i-7]+ssig0(w[i-15])+w[i-16]);
119+
}
120+
121+
word_t a = *message_digest;
122+
word_t b = *(message_digest+1);
123+
word_t c = *(message_digest+2);
124+
word_t d = *(message_digest+3);
125+
word_t e = *(message_digest+4);
126+
word_t f = *(message_digest+5);
127+
word_t g = *(message_digest+6);
128+
word_t h = *(message_digest+7);
129+
130+
for(std::size_t i = 0; i < 64; ++i){
131+
word_t temp1 = h+bsig1(e)+ch(e,f,g)+add_constant[i]+w[i];
132+
word_t temp2 = bsig0(a)+maj(a,b,c);
133+
h = g;
134+
g = f;
135+
f = e;
136+
e = mask_32bit(d+temp1);
137+
d = c;
138+
c = b;
139+
b = a;
140+
a = mask_32bit(temp1+temp2);
141+
}
142+
*message_digest += a;
143+
*(message_digest+1) += b;
144+
*(message_digest+2) += c;
145+
*(message_digest+3) += d;
146+
*(message_digest+4) += e;
147+
*(message_digest+5) += f;
148+
*(message_digest+6) += g;
149+
*(message_digest+7) += h;
150+
for(std::size_t i = 0; i < 8; ++i){
151+
*(message_digest+i) = mask_32bit(*(message_digest+i));
152+
}
153+
}
154+
155+
}//namespace detail
156+
157+
template<typename InIter>
158+
void output_hex(InIter first, InIter last, std::ostream& os){
159+
os.setf(std::ios::hex, std::ios::basefield);
160+
while(first != last){
161+
os.width(2);
162+
os.fill('0');
163+
os << static_cast<unsigned int>(*first);
164+
++first;
165+
}
166+
os.setf(std::ios::dec, std::ios::basefield);
167+
}
168+
169+
template<typename InIter>
170+
void bytes_to_hex_string(InIter first, InIter last, std::string& hex_str){
171+
std::ostringstream oss;
172+
output_hex(first, last, oss);
173+
hex_str.assign(oss.str());
174+
}
175+
176+
template<typename InContainer>
177+
void bytes_to_hex_string(const InContainer& bytes, std::string& hex_str){
178+
bytes_to_hex_string(bytes.begin(), bytes.end(), hex_str);
179+
}
180+
181+
template<typename InIter>
182+
std::string bytes_to_hex_string(InIter first, InIter last){
183+
std::string hex_str;
184+
bytes_to_hex_string(first, last, hex_str);
185+
return hex_str;
186+
}
187+
188+
template<typename InContainer>
189+
std::string bytes_to_hex_string(const InContainer& bytes){
190+
std::string hex_str;
191+
bytes_to_hex_string(bytes, hex_str);
192+
return hex_str;
193+
}
194+
195+
class hash256_one_by_one {
196+
public:
197+
hash256_one_by_one(){
198+
init();
199+
}
200+
201+
void init(){
202+
buffer_.clear();
203+
std::fill(data_length_digits_, data_length_digits_+4, 0);
204+
std::copy(detail::initial_message_digest, detail::initial_message_digest+8, h_);
205+
}
206+
207+
template<typename RaIter>
208+
void process(RaIter first, RaIter last){
209+
add_to_data_length(std::distance(first, last));
210+
std::copy(first, last, std::back_inserter(buffer_));
211+
std::size_t i = 0;
212+
for(;i+64 <= buffer_.size(); i+=64){
213+
detail::hash256_block(h_, buffer_.begin()+i, buffer_.begin()+i+64);
214+
}
215+
buffer_.erase(buffer_.begin(), buffer_.begin()+i);
216+
}
217+
218+
void finish(){
219+
byte_t temp[64];
220+
std::fill(temp, temp+64, 0);
221+
std::size_t remains = buffer_.size();
222+
std::copy(buffer_.begin(), buffer_.end(), temp);
223+
temp[remains] = 0x80;
224+
225+
if(remains > 55){
226+
std::fill(temp+remains+1, temp+64, 0);
227+
detail::hash256_block(h_, temp, temp+64);
228+
std::fill(temp, temp+64-4, 0);
229+
}
230+
else {
231+
std::fill(temp+remains+1, temp+64-4, 0);
232+
}
233+
234+
write_data_bit_length(&(temp[56]));
235+
detail::hash256_block(h_, temp, temp+64);
236+
}
237+
238+
template<typename OutIter>
239+
void get_hash_bytes(OutIter first, OutIter last)const{
240+
for(const word_t* iter = h_; iter != h_+8; ++iter){
241+
for(std::size_t i = 0; i < 4 && first != last; ++i){
242+
*(first++) = detail::mask_8bit(static_cast<byte_t>((*iter >> (24-8*i))));
243+
}
244+
}
245+
}
246+
247+
private:
248+
void add_to_data_length(word_t n) {
249+
word_t carry = 0;
250+
data_length_digits_[0] += n;
251+
for(std::size_t i = 0; i < 4; ++i) {
252+
data_length_digits_[i] += carry;
253+
if(data_length_digits_[i] >= 65536u) {
254+
carry = data_length_digits_[i]>>16;
255+
data_length_digits_[i] &= 65535u;
256+
}
257+
else {
258+
break;
259+
}
260+
}
261+
}
262+
void write_data_bit_length(byte_t* begin) {
263+
word_t data_bit_length_digits[4];
264+
std::copy(
265+
data_length_digits_, data_length_digits_+4,
266+
data_bit_length_digits
267+
);
268+
269+
// convert byte length to bit length (multiply 8 or shift 3 times left)
270+
word_t carry = 0;
271+
for(std::size_t i = 0; i < 4; ++i) {
272+
word_t before_val = data_bit_length_digits[i];
273+
data_bit_length_digits[i] <<= 3;
274+
data_bit_length_digits[i] |= carry;
275+
data_bit_length_digits[i] &= 65535u;
276+
carry = (before_val >> (16-3)) & 65535u;
277+
}
278+
279+
// write data_bit_length
280+
for(int i = 3; i >= 0; --i) {
281+
(*begin++) = static_cast<byte_t>(data_bit_length_digits[i] >> 8);
282+
(*begin++) = static_cast<byte_t>(data_bit_length_digits[i]);
283+
}
284+
}
285+
std::vector<byte_t> buffer_;
286+
word_t data_length_digits_[4]; //as 64bit integer (16bit x 4 integer)
287+
word_t h_[8];
288+
};
289+
290+
inline void get_hash_hex_string(const hash256_one_by_one& hasher, std::string& hex_str){
291+
byte_t hash[32];
292+
hasher.get_hash_bytes(hash, hash+32);
293+
return bytes_to_hex_string(hash, hash+32, hex_str);
294+
}
295+
296+
inline std::string get_hash_hex_string(const hash256_one_by_one& hasher){
297+
std::string hex_str;
298+
get_hash_hex_string(hasher, hex_str);
299+
return hex_str;
300+
}
301+
302+
template<typename RaIter, typename OutIter>
303+
void hash256(RaIter first, RaIter last, OutIter first2, OutIter last2){
304+
hash256_one_by_one hasher;
305+
//hasher.init();
306+
hasher.process(first, last);
307+
hasher.finish();
308+
hasher.get_hash_bytes(first2, last2);
309+
}
310+
311+
template<typename RaIter, typename OutContainer>
312+
void hash256(RaIter first, RaIter last, OutContainer& dst){
313+
hash256(first, last, dst.begin(), dst.end());
314+
}
315+
316+
template<typename RaContainer, typename OutIter>
317+
void hash256(const RaContainer& src, OutIter first, OutIter last){
318+
hash256(src.begin(), src.end(), first, last);
319+
}
320+
321+
template<typename RaContainer, typename OutContainer>
322+
void hash256(const RaContainer& src, OutContainer& dst){
323+
hash256(src.begin(), src.end(), dst.begin(), dst.end());
324+
}
325+
326+
327+
template<typename RaIter>
328+
void hash256_hex_string(RaIter first, RaIter last, std::string& hex_str){
329+
byte_t hashed[32];
330+
hash256(first, last, hashed, hashed+32);
331+
std::ostringstream oss;
332+
output_hex(hashed, hashed+32, oss);
333+
hex_str.assign(oss.str());
334+
}
335+
336+
template<typename RaIter>
337+
std::string hash256_hex_string(RaIter first, RaIter last){
338+
std::string hex_str;
339+
hash256_hex_string(first, last, hex_str);
340+
return hex_str;
341+
}
342+
343+
inline void hash256_hex_string(const std::string& src, std::string& hex_str){
344+
hash256_hex_string(src.begin(), src.end(), hex_str);
345+
}
346+
347+
template<typename RaContainer>
348+
void hash256_hex_string(const RaContainer& src, std::string& hex_str){
349+
hash256_hex_string(src.begin(), src.end(), hex_str);
350+
}
351+
352+
template<typename RaContainer>
353+
std::string hash256_hex_string(const RaContainer& src){
354+
return hash256_hex_string(src.begin(), src.end());
355+
}
356+
357+
}//namespace picosha2
358+
359+
#endif //PICOSHA2_H

libvtrutil/src/vtr_digest.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#include "vtr_digest.h"
2+
#include "vtr_error.h"
3+
4+
#include <iostream>
5+
#include <fstream>
6+
#include <array>
7+
8+
#include "picosha2.h"
9+
10+
namespace vtr {
11+
12+
std::string secure_digest_file(const std::string& filepath) {
13+
std::ifstream is(filepath);
14+
if (!is) {
15+
throw VtrError("Failed to open file", filepath);
16+
}
17+
return secure_digest_stream(is);
18+
}
19+
20+
std::string secure_digest_stream(std::istream& is) {
21+
//Read the stream in chunks and calculate the SHA256 digest
22+
picosha2::hash256_one_by_one hasher;
23+
24+
std::array<char, 1024> buf;
25+
while(!is.eof()) {
26+
//Process a chunk
27+
is.read(buf.data(), buf.size());
28+
hasher.process(buf.begin(), buf.begin() + is.gcount());
29+
}
30+
hasher.finish();
31+
32+
//Return the digest as a hex string
33+
return picosha2::get_hash_hex_string(hasher);
34+
}
35+
36+
} //namespace

0 commit comments

Comments
 (0)