]> code.octet-stream.net Git - hashgood/blob - src/calculate.rs
59395d3c6b5544969c41bab22d67efdeb0fb6b42
[hashgood] / src / calculate.rs
1 use super::Algorithm;
2 use crossbeam_channel::bounded;
3 use crossbeam_channel::Receiver;
4 use crypto::digest::Digest;
5 use crypto::md5::Md5;
6 use crypto::sha1::Sha1;
7 use crypto::sha2::Sha256;
8 use std::error::Error;
9 use std::fs::File;
10 use std::io::prelude::*;
11 use std::path::Path;
12 use std::sync::Arc;
13 use std::thread;
14 use std::thread::JoinHandle;
15
16 pub type CalculateResult = Result<Vec<(Algorithm, Vec<u8>)>, Box<dyn Error>>;
17
18 /// For a given path to the input (may be "-" for STDIN), try to obtain a reader for the data within it.
19 pub fn get_input_reader(input: &Path) -> Result<Box<dyn Read>, String> {
20 if input.to_str() == Some("-") {
21 // Special case: standard input
22 return Ok(Box::new(std::io::stdin()));
23 }
24 if !input.exists() {
25 return Err(format!(
26 "The path '{}' does not exist.",
27 input.to_string_lossy()
28 ));
29 }
30 if !input.is_file() {
31 return Err(format!(
32 "The path '{}' is not a regular file.",
33 input.to_string_lossy()
34 ));
35 }
36 match File::open(input) {
37 Ok(f) => Ok(Box::new(f)),
38 Err(e) => Err(format!("File open: {}", e)),
39 }
40 }
41
42 /// For the given input stream, calculate all requested digest types
43 pub fn create_digests(algorithms: &[Algorithm], mut input: Box<dyn Read>) -> CalculateResult {
44 let mut senders = vec![];
45 let mut handles = vec![];
46
47 if algorithms.contains(&Algorithm::Md5) {
48 let (s, r) = bounded::<Arc<Vec<u8>>>(1);
49 senders.push(s);
50 handles.push(md5_digest(r));
51 }
52 if algorithms.contains(&Algorithm::Sha1) {
53 let (s, r) = bounded::<Arc<Vec<u8>>>(1);
54 senders.push(s);
55 handles.push(sha1_digest(r));
56 }
57 if algorithms.contains(&Algorithm::Sha256) {
58 let (s, r) = bounded::<Arc<Vec<u8>>>(1);
59 senders.push(s);
60 handles.push(sha256_digest(r));
61 }
62
63 // 64 KB chunks will be read from the input at 64 KB and supplied to all hashing threads at once
64 // Right now that could be up to three threads. If CPU-bound, the other threads will mostly block while the slowest one finishes
65 const BUF_SIZE: usize = 1024 * 64;
66 let mut buf = [0; BUF_SIZE];
67 while let Ok(size) = input.read(&mut buf) {
68 if size == 0 {
69 break;
70 } else {
71 // Create a shared read-only copy for the hashers to take as input
72 // buf is freed up for more reading
73 let chunk = Arc::new(buf[0..size].to_vec());
74 for s in &senders {
75 s.send(chunk.clone())?;
76 }
77 }
78 }
79 drop(senders);
80 // Once all data has been sent we just have to wait for the digests to fall out
81 Ok(handles.into_iter().map(|h| h.join().unwrap()).collect())
82 }
83
84 /// Calculate the md5 digest of some data on the given channel
85 fn md5_digest(rx: Receiver<Arc<Vec<u8>>>) -> JoinHandle<(Algorithm, Vec<u8>)> {
86 thread::spawn(move || {
87 let mut md5 = Md5::new();
88 while let Ok(chunk) = rx.recv() {
89 md5.input(&chunk);
90 }
91 let mut result = [0; 16];
92 md5.result(&mut result);
93 (Algorithm::Md5, result.to_vec())
94 })
95 }
96
97 /// Calculate the sha1 digest of some data on the given channel
98 fn sha1_digest(rx: Receiver<Arc<Vec<u8>>>) -> JoinHandle<(Algorithm, Vec<u8>)> {
99 thread::spawn(move || {
100 let mut sha1 = Sha1::new();
101 while let Ok(chunk) = rx.recv() {
102 sha1.input(&chunk);
103 }
104 let mut result = [0; 20];
105 sha1.result(&mut result);
106 (Algorithm::Sha1, result.to_vec())
107 })
108 }
109
110 /// Calculate the sha256 digest of some data on the given channel
111 fn sha256_digest(rx: Receiver<Arc<Vec<u8>>>) -> JoinHandle<(Algorithm, Vec<u8>)> {
112 thread::spawn(move || {
113 let mut sha256 = Sha256::new();
114 while let Ok(chunk) = rx.recv() {
115 sha256.input(&chunk);
116 }
117 let mut result = [0; 32];
118 sha256.result(&mut result);
119 (Algorithm::Sha256, result.to_vec())
120 })
121 }
122
123 #[cfg(test)]
124 mod tests {
125 use super::*;
126 use std::io::Cursor;
127
128 const SMALL_DATA: [u8; 10] = ['A' as u8; 10];
129 // python3 -c 'print ("A"*10, end="", flush=True)' | md5sum
130 const SMALL_DATA_MD5: &'static str = "16c52c6e8326c071da771e66dc6e9e57";
131 // python3 -c 'print ("A"*10, end="", flush=True)' | sha1sum
132 const SMALL_DATA_SHA1: &'static str = "c71613a7386fd67995708464bf0223c0d78225c4";
133 // python3 -c 'print ("A"*10, end="", flush=True)' | sha256sum
134 const SMALL_DATA_SHA256: &'static str =
135 "1d65bf29403e4fb1767522a107c827b8884d16640cf0e3b18c4c1dd107e0d49d";
136
137 const LARGE_DATA: [u8; 1_000_000] = ['B' as u8; 1_000_000];
138 // python3 -c 'print ("B"*1000000, end="", flush=True)' | md5sum
139 const LARGE_DATA_MD5: &'static str = "9171f6d67a87ca649a702434a03458a1";
140 // python3 -c 'print ("B"*1000000, end="", flush=True)' | sha1sum
141 const LARGE_DATA_SHA1: &'static str = "cfae4cebfd01884111bdede7cf983626bb249c94";
142 // python3 -c 'print ("B"*1000000, end="", flush=True)' | sha256sum
143 const LARGE_DATA_SHA256: &'static str =
144 "b9193853f7798e92e2f6b82eda336fa7d6fc0fa90fdefe665f372b0bad8cdf8c";
145
146 fn verify_digest(alg: Algorithm, data: &'static [u8], hash: &str) {
147 let reader = Cursor::new(&*data);
148 let digests = create_digests(&[alg], Box::new(reader)).unwrap();
149 assert_eq!(digests.len(), 1);
150 assert_eq!(digests[0], (alg, hex::decode(hash).unwrap()));
151 }
152
153 /// Assert that digests for all algorithms are calculated correctly for a small piece
154 /// of test data (single block).
155 #[test]
156 fn small_digests() {
157 verify_digest(Algorithm::Md5, &SMALL_DATA, &SMALL_DATA_MD5);
158 verify_digest(Algorithm::Sha1, &SMALL_DATA, &SMALL_DATA_SHA1);
159 verify_digest(Algorithm::Sha256, &SMALL_DATA, &SMALL_DATA_SHA256);
160 }
161
162 /// Assert that digests for all algorithms are calculated correctly for a large piece
163 /// of test data. For our purposes, "large" means that it spans several of the 64 KB
164 /// blocks used to break up the input processing. Using one million bytes instead of
165 /// 1 MiB means that the final block will be slightly smaller than the others.
166 #[test]
167 fn large_digests() {
168 verify_digest(Algorithm::Md5, &LARGE_DATA, &LARGE_DATA_MD5);
169 verify_digest(Algorithm::Sha1, &LARGE_DATA, &LARGE_DATA_SHA1);
170 verify_digest(Algorithm::Sha256, &LARGE_DATA, &LARGE_DATA_SHA256);
171 }
172 }