]>
code.octet-stream.net Git - hashgood/blob - src/calculate.rs
2 use md5
::{Digest
, Md5
};
8 use std
::io
::prelude
::*;
10 use std
::sync
::mpsc
::{channel
, Receiver
};
13 use std
::thread
::JoinHandle
;
15 pub type CalculateResult
= Result
<Vec
<(Algorithm
, Vec
<u8>)>, Box
<dyn Error
>>;
17 /// For a given path to the input (may be "-" for STDIN), try to obtain a reader for the data within it.
18 pub fn get_input_reader(input
: &Path
) -> Result
<Box
<dyn Read
>, String
> {
19 if input
.to_str() == Some("-") {
20 // Special case: standard input
21 return Ok(Box
::new(std
::io
::stdin()));
25 "The path '{}' does not exist.",
26 input
.to_string_lossy()
31 "The path '{}' is not a regular file.",
32 input
.to_string_lossy()
35 match File
::open(input
) {
36 Ok(f
) => Ok(Box
::new(f
)),
37 Err(e
) => Err(format
!("File open: {}", e
)),
41 /// For the given input stream, calculate all requested digest types
42 pub fn create_digests(algorithms
: &[Algorithm
], mut input
: Box
<dyn Read
>) -> CalculateResult
{
43 let mut senders
= vec
![];
44 let mut handles
= vec
![];
46 if algorithms
.contains(&Algorithm
::Md5
) {
47 let (s
, r
) = channel();
49 handles
.push(md5_digest(r
));
51 if algorithms
.contains(&Algorithm
::Sha1
) {
52 let (s
, r
) = channel();
54 handles
.push(sha1_digest(r
));
56 if algorithms
.contains(&Algorithm
::Sha256
) {
57 let (s
, r
) = channel();
59 handles
.push(sha256_digest(r
));
61 if algorithms
.contains(&Algorithm
::Sha512
) {
62 let (s
, r
) = channel();
64 handles
.push(sha512_digest(r
));
67 // 64 KB chunks will be read from the input at 64 KB and supplied to all hashing threads at once
68 // Right now that could be up to three threads. If CPU-bound, the other threads will mostly block while the slowest one finishes
69 const BUF_SIZE
: usize = 1024 * 64;
70 let mut buf
= [0; BUF_SIZE
];
71 while let Ok(size
) = input
.read(&mut buf
) {
75 // Create a shared read-only copy for the hashers to take as input
76 // buf is freed up for more reading
77 let chunk
= Arc
::new(buf
[0..size
].to_vec());
79 s
.send(chunk
.clone())?
;
84 // Once all data has been sent we just have to wait for the digests to fall out
85 Ok(handles
.into
_iter
().map(|h
| h
.join().unwrap
()).collect())
88 /// Calculate the md5 digest of some data on the given channel
89 fn md5_digest(rx
: Receiver
<Arc
<Vec
<u8>>>) -> JoinHandle
<(Algorithm
, Vec
<u8>)> {
90 thread
::spawn(move || {
91 let mut md5
= Md5
::new();
92 while let Ok(chunk
) = rx
.recv() {
95 let result
= md5
.finalize();
96 (Algorithm
::Md5
, result
.to_vec())
100 /// Calculate the sha1 digest of some data on the given channel
101 fn sha1_digest(rx
: Receiver
<Arc
<Vec
<u8>>>) -> JoinHandle
<(Algorithm
, Vec
<u8>)> {
102 thread
::spawn(move || {
103 let mut sha1
= Sha1
::new();
104 while let Ok(chunk
) = rx
.recv() {
105 sha1
.update(&*chunk
);
107 let result
= sha1
.finalize();
108 (Algorithm
::Sha1
, result
.to_vec())
112 /// Calculate the sha256 digest of some data on the given channel
113 fn sha256_digest(rx
: Receiver
<Arc
<Vec
<u8>>>) -> JoinHandle
<(Algorithm
, Vec
<u8>)> {
114 thread
::spawn(move || {
115 let mut sha256
= Sha256
::new();
116 while let Ok(chunk
) = rx
.recv() {
117 sha256
.update(&*chunk
);
119 let result
= sha256
.finalize();
120 (Algorithm
::Sha256
, result
.to_vec())
124 /// Calculate the sha512 digest of some data on the given channel
125 fn sha512_digest(rx
: Receiver
<Arc
<Vec
<u8>>>) -> JoinHandle
<(Algorithm
, Vec
<u8>)> {
126 thread
::spawn(move || {
127 let mut sha512
= Sha512
::new();
128 while let Ok(chunk
) = rx
.recv() {
129 sha512
.update(&*chunk
);
131 let result
= sha512
.finalize();
132 (Algorithm
::Sha512
, result
.to_vec())
141 static SMALL_DATA
: [u8; 10] = [b'A'
; 10];
142 // python3 -c 'print ("A"*10, end="", flush=True)' | md5sum
143 static SMALL_DATA_MD5
: &str = "16c52c6e8326c071da771e66dc6e9e57";
144 // python3 -c 'print ("A"*10, end="", flush=True)' | sha1sum
145 static SMALL_DATA_SHA1
: &str = "c71613a7386fd67995708464bf0223c0d78225c4";
146 // python3 -c 'print ("A"*10, end="", flush=True)' | sha256sum
147 static SMALL_DATA_SHA256
: &str =
148 "1d65bf29403e4fb1767522a107c827b8884d16640cf0e3b18c4c1dd107e0d49d";
150 static LARGE_DATA
: [u8; 1_000_000] = [b'B'
; 1_000_000];
151 // python3 -c 'print ("B"*1000000, end="", flush=True)' | md5sum
152 static LARGE_DATA_MD5
: &str = "9171f6d67a87ca649a702434a03458a1";
153 // python3 -c 'print ("B"*1000000, end="", flush=True)' | sha1sum
154 static LARGE_DATA_SHA1
: &str = "cfae4cebfd01884111bdede7cf983626bb249c94";
155 // python3 -c 'print ("B"*1000000, end="", flush=True)' | sha256sum
156 static LARGE_DATA_SHA256
: &str =
157 "b9193853f7798e92e2f6b82eda336fa7d6fc0fa90fdefe665f372b0bad8cdf8c";
159 fn verify_digest(alg
: Algorithm
, data
: &'
static [u8], hash
: &str) {
160 let reader
= Cursor
::new(&*data
);
161 let digests
= create_digests(&[alg
], Box
::new(reader
)).unwrap
();
162 assert_eq
!(digests
.len(), 1);
163 assert_eq
!(digests
[0], (alg
, hex
::decode(hash
).unwrap
()));
166 /// Assert that digests for all algorithms are calculated correctly for a small piece
167 /// of test data (single block).
170 verify_digest(Algorithm
::Md5
, &SMALL_DATA
, SMALL_DATA_MD5
);
171 verify_digest(Algorithm
::Sha1
, &SMALL_DATA
, SMALL_DATA_SHA1
);
172 verify_digest(Algorithm
::Sha256
, &SMALL_DATA
, SMALL_DATA_SHA256
);
175 /// Assert that digests for all algorithms are calculated correctly for a large piece
176 /// of test data. For our purposes, "large" means that it spans several of the 64 KB
177 /// blocks used to break up the input processing. Using one million bytes instead of
178 /// 1 MiB means that the final block will be slightly smaller than the others.
181 verify_digest(Algorithm
::Md5
, &LARGE_DATA
, LARGE_DATA_MD5
);
182 verify_digest(Algorithm
::Sha1
, &LARGE_DATA
, LARGE_DATA_SHA1
);
183 verify_digest(Algorithm
::Sha256
, &LARGE_DATA
, LARGE_DATA_SHA256
);