X-Git-Url: https://code.octet-stream.net/hashgood/blobdiff_plain/0252a4d67a68e71c811a6f7ec8057b616e65bca7..19eca3a9a82251447f3c86a14ad48c1db3d8bc1a:/src/verify.rs

diff --git a/src/verify.rs b/src/verify.rs
index e519f8a..0f2d1d8 100644
--- a/src/verify.rs
+++ b/src/verify.rs
@@ -3,13 +3,12 @@ use super::{
     VerificationSource,
 };
 #[cfg(feature = "paste")]
-use clipboard::{ClipboardContext, ClipboardProvider};
-use regex::Regex;
+use copypasta::{ClipboardContext, ClipboardProvider};
 use std::fs::File;
 use std::io;
 use std::io::prelude::*;
 use std::io::BufReader;
-use std::path::PathBuf;
+use std::path::Path;
 
 /// Calculate a list of candidate hashes based on the options specified.
 /// If no hash options have been specified returns None.
@@ -45,7 +44,7 @@ fn get_by_parameter(param: &str) -> Result<CandidateHashes, String> {
 fn get_from_clipboard() -> Result<CandidateHashes, String> {
     #[cfg(feature = "paste")]
     {
-        let mut ctx: ClipboardContext = match ClipboardProvider::new() {
+        let mut ctx: ClipboardContext = match ClipboardContext::new() {
             Ok(ctx) => ctx,
             Err(e) => return Err(format!("Error getting system clipboard: {}", e)),
         };
@@ -62,20 +61,20 @@ fn get_from_clipboard() -> Result<CandidateHashes, String> {
             filename: None,
             bytes,
         };
-        return Ok(CandidateHashes {
+        Ok(CandidateHashes {
             alg,
             hashes: vec![candidate],
             source: VerificationSource::Clipboard,
-        });
+        })
     }
     #[cfg(not(feature = "paste"))]
     {
-        return Err("Paste not implemented".to_owned());
+        Err("Paste not implemented".to_owned())
     }
 }
 
 /// Generate a candidate hash from the digests file specified (could be "-" for STDIN), or throw an error.
-fn get_from_file(path: &PathBuf) -> Result<CandidateHashes, String> {
+fn get_from_file(path: &Path) -> Result<CandidateHashes, String> {
     // Get a reader for either standard input or the chosen path
     let reader: Box<dyn Read> = if path.to_str() == Some("-") {
         Box::new(std::io::stdin())
@@ -97,7 +96,7 @@ fn get_from_file(path: &PathBuf) -> Result<CandidateHashes, String> {
     let line = line.trim().to_owned();
 
     // Does our first line look like a raw hash on its own? If so, use that
-    if let Some(candidate) = read_raw_candidate_from_file(&line, &path) {
+    if let Some(candidate) = read_raw_candidate_from_file(&line, path) {
         return Ok(candidate);
     }
 
@@ -106,7 +105,7 @@ fn get_from_file(path: &PathBuf) -> Result<CandidateHashes, String> {
     let full_lines = vec![Ok(line)].into_iter().chain(reader.lines());
 
     // Does the entire file look like a coreutils-style digests file? (SHA1SUMS, etc.)
-    if let Some(candidate) = read_coreutils_digests_from_file(full_lines, &path) {
+    if let Some(candidate) = read_coreutils_digests_from_file(full_lines, path) {
         return Ok(candidate);
     }
 
@@ -118,81 +117,80 @@ fn get_from_file(path: &PathBuf) -> Result<CandidateHashes, String> {
     ))
 }
 
-fn read_raw_candidate_from_file(line: &str, path: &PathBuf) -> Option<CandidateHashes> {
-    // It is a little sad to use a dynamic regex in an otherwise nice Rust program
-    // These deserve to be replaced with a good old fashioned static parser
-    // But let's be honest: the impact is negligible
-    let re = Regex::new(r"^([[:xdigit:]]{32}|[[:xdigit:]]{40}|[[:xdigit:]]{64})$").unwrap();
-    if re.is_match(line) {
-        // These should both always succeed due to the matching
-        let bytes = match hex::decode(line) {
-            Ok(bytes) => bytes,
-            _ => return None,
-        };
-        let alg = match Algorithm::from_len(bytes.len()) {
-            Ok(alg) => alg,
-            _ => return None,
-        };
-        return Some(CandidateHashes {
-            alg,
-            source: VerificationSource::RawFile(path.clone()),
-            hashes: vec![CandidateHash {
-                bytes,
-                filename: None,
-            }],
-        });
-    }
-    None
+fn try_parse_hash(s: &str) -> Option<(Algorithm, Vec<u8>)> {
+    let bytes = match hex::decode(s.trim()) {
+        Ok(bytes) => bytes,
+        _ => return None,
+    };
+    let alg = match Algorithm::from_len(bytes.len()) {
+        Ok(alg) => alg,
+        _ => return None,
+    };
+    Some((alg, bytes))
 }
 
-fn read_coreutils_digests_from_file<I>(lines: I, path: &PathBuf) -> Option<CandidateHashes>
+fn read_raw_candidate_from_file(line: &str, path: &Path) -> Option<CandidateHashes> {
+    let (alg, bytes) = try_parse_hash(line)?;
+    Some(CandidateHashes {
+        alg,
+        source: VerificationSource::RawFile(path.to_string_lossy().to_string()),
+        hashes: vec![CandidateHash {
+            bytes,
+            filename: None,
+        }],
+    })
+}
+
+fn read_coreutils_digests_from_file<I, S>(lines: I, path: &Path) -> Option<CandidateHashes>
 where
-    I: Iterator<Item = io::Result<String>>,
+    I: Iterator<Item = io::Result<S>>,
+    S: AsRef<str>,
 {
-    let re = Regex::new(
-        r"^(?P<hash>([[:xdigit:]]{32}|[[:xdigit:]]{40}|[[:xdigit:]]{64})) .(?P<filename>.+)$",
-    )
-    .unwrap();
-
     let mut hashes = vec![];
     let mut alg: Option<Algorithm> = None;
-    for l in lines {
-        if let Ok(l) = l {
-            let l = l.trim();
-            // Allow (ignore) blank lines
-            if l.is_empty() {
-                continue;
-            }
-            // If we can capture a valid line, use it
-            if let Some(captures) = re.captures(&l) {
-                let hash = &captures["hash"];
-                let filename = &captures["filename"];
-                // Decode the hex and algorithm for this line
-                let line_bytes = match hex::decode(hash) {
-                    Ok(bytes) => bytes,
-                    _ => return None,
-                };
-                let line_alg = match Algorithm::from_len(line_bytes.len()) {
-                    Ok(alg) => alg,
-                    _ => return None,
-                };
-                if alg.is_some() && alg != Some(line_alg) {
-                    // Different algorithms in the same digest file are not supported
-                    return None;
+    for l in lines.flatten() {
+        let l = l.as_ref().trim();
+        // Allow (ignore) blank lines
+        if l.is_empty() {
+            continue;
+        }
+        // Expected format
+        // <valid-hash><space><space-or-*><filename>
+        let (line_alg, bytes, filename) = match l
+            .find(' ')
+            .and_then(|space_pos| {
+                // Char before filename should be space for text or * for binary
+                match l.chars().nth(space_pos + 1) {
+                    Some(' ') | Some('*') => (l.get(..space_pos)).zip(l.get(space_pos + 2..)),
+                    _ => None,
+                }
+            })
+            .and_then(|(maybe_hash, filename)| {
+                // Filename should be in this position without extra whitespace
+                if filename.trim() == filename {
+                    try_parse_hash(maybe_hash).map(|(alg, bytes)| (alg, bytes, filename))
                 } else {
-                    // If we are the first line, we define the overall algorithm
-                    alg = Some(line_alg);
+                    None
                 }
-                // So far so good - create an entry for this line
-                hashes.push(CandidateHash {
-                    bytes: line_bytes,
-                    filename: Some(filename.to_owned()),
-                });
-            } else {
-                // But if we have a line with content we cannot parse, this is an error
+            }) {
+            Some(t) => t,
+            None => {
+                // if we have a line with content we cannot parse, this is an error
                 return None;
             }
+        };
+        if alg.is_some() && alg != Some(line_alg) {
+            // Different algorithms in the same digest file are not supported
+            return None;
+        } else {
+            // If we are the first line, we define the overall algorithm
+            alg = Some(line_alg);
         }
+        // So far so good - create an entry for this line
+        hashes.push(CandidateHash {
+            bytes,
+            filename: Some(filename.to_owned()),
+        });
     }
 
     // It is a failure if we got zero hashes or we somehow don't know the algorithm
@@ -207,7 +205,7 @@ where
     // Otherwise all is well and we can return our results
     Some(CandidateHashes {
         alg,
-        source: VerificationSource::DigestsFile(path.clone()),
+        source: VerificationSource::DigestsFile(path.to_string_lossy().to_string()),
         hashes,
     })
 }
@@ -281,3 +279,105 @@ pub fn verify_hash<'a>(calculated: &Hash, candidates: &'a CandidateHashes) -> Ve
         messages,
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_read_raw_inputs() {
+        let example_path = Path::new("some_file");
+        let valid_md5 = "d229da563da18fe5d58cd95a6467d584";
+        let valid_sha1 = "b314c7ebb7d599944981908b7f3ed33a30e78f3a";
+        let valid_sha1_2 = valid_sha1.to_uppercase();
+        let valid_sha256 = "1eb85fc97224598dad1852b5d6483bbcf0aa8608790dcc657a5a2a761ae9c8c6";
+
+        let invalid1 = "x";
+        let invalid2 = "a";
+        let invalid3 = "d229da563da18fe5d58cd95a6467d58";
+        let invalid4 = "1eb85fc97224598dad1852b5d6483bbcf0aa8608790dcc657a5a2a761ae9c8c67";
+        let invalid5 = "1eb85fc97224598dad1852b5d 483bbcf0aa8608790dcc657a5a2a761ae9c8c6";
+
+        assert!(matches!(
+            read_raw_candidate_from_file(valid_md5, example_path),
+            Some(CandidateHashes {
+                alg: Algorithm::Md5,
+                ..
+            })
+        ));
+        assert!(matches!(
+            read_raw_candidate_from_file(valid_sha1, example_path),
+            Some(CandidateHashes {
+                alg: Algorithm::Sha1,
+                ..
+            })
+        ));
+        assert!(matches!(
+            read_raw_candidate_from_file(&valid_sha1_2, example_path),
+            Some(CandidateHashes {
+                alg: Algorithm::Sha1,
+                ..
+            })
+        ));
+        assert!(matches!(
+            read_raw_candidate_from_file(valid_sha256, example_path),
+            Some(CandidateHashes {
+                alg: Algorithm::Sha256,
+                ..
+            })
+        ));
+
+        for i in &[invalid1, invalid2, invalid3, invalid4, invalid5] {
+            assert!(read_raw_candidate_from_file(*i, example_path).is_none());
+        }
+    }
+
+    #[test]
+    fn test_read_shasums() {
+        let shasums = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b *cp
+        75eb7420a9f5a260b04a3e8ad51e50f2838a17fc  lel.txt
+
+        fe6c26d485a3573a1cb0ad0682f5105325a1905f  shasums";
+        let lines = shasums.lines().map(std::io::Result::Ok);
+        let path = Path::new("SHASUMS");
+        let candidates = read_coreutils_digests_from_file(lines, path);
+
+        assert_eq!(
+            candidates,
+            Some(CandidateHashes {
+                alg: Algorithm::Sha1,
+                hashes: vec![
+                    CandidateHash {
+                        bytes: hex::decode("4b91f7a387a6edd4a7c0afb2897f1ca968c9695b").unwrap(),
+                        filename: Some("cp".to_owned()),
+                    },
+                    CandidateHash {
+                        bytes: hex::decode("75eb7420a9f5a260b04a3e8ad51e50f2838a17fc").unwrap(),
+                        filename: Some("lel.txt".to_owned()),
+                    },
+                    CandidateHash {
+                        bytes: hex::decode("fe6c26d485a3573a1cb0ad0682f5105325a1905f").unwrap(),
+                        filename: Some("shasums".to_owned()),
+                    }
+                ],
+                source: VerificationSource::DigestsFile(path.to_string_lossy().to_string()),
+            })
+        );
+    }
+
+    #[test]
+    fn test_invalid_shasums() {
+        let no_format = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b cp";
+        let invalid_format = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b .cp";
+        let extra_space = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b   cp";
+
+        for digest in [no_format, invalid_format, extra_space] {
+            let lines = digest.lines().map(std::io::Result::Ok);
+            assert!(
+                read_coreutils_digests_from_file(lines, Path::new("SHASUMS")).is_none(),
+                "Should be invalid digest: {:?}",
+                digest
+            );
+        }
+    }
+}