X-Git-Url: https://code.octet-stream.net/hashgood/blobdiff_plain/2692cd1acb891e7a89041e1095de6df438c0326e..b237f9d12de2062843975400c85dcad05bf4065c:/src/verify.rs?ds=inline

diff --git a/src/verify.rs b/src/verify.rs
index 0811606..25e565f 100644
--- a/src/verify.rs
+++ b/src/verify.rs
@@ -3,13 +3,12 @@ use super::{
     VerificationSource,
 };
 #[cfg(feature = "paste")]
-use clipboard::{ClipboardContext, ClipboardProvider};
-use regex::Regex;
+use copypasta::{ClipboardContext, ClipboardProvider};
 use std::fs::File;
 use std::io;
 use std::io::prelude::*;
 use std::io::BufReader;
-use std::path::PathBuf;
+use std::path::Path;
 
 /// Calculate a list of candidate hashes based on the options specified.
 /// If no hash options have been specified returns None.
@@ -45,7 +44,7 @@ fn get_by_parameter(param: &str) -> Result<CandidateHashes, String> {
 fn get_from_clipboard() -> Result<CandidateHashes, String> {
     #[cfg(feature = "paste")]
     {
-        let mut ctx: ClipboardContext = match ClipboardProvider::new() {
+        let mut ctx: ClipboardContext = match ClipboardContext::new() {
             Ok(ctx) => ctx,
             Err(e) => return Err(format!("Error getting system clipboard: {}", e)),
         };
@@ -75,7 +74,7 @@ fn get_from_clipboard() -> Result<CandidateHashes, String> {
 }
 
 /// Generate a candidate hash from the digests file specified (could be "-" for STDIN), or throw an error.
-fn get_from_file(path: &PathBuf) -> Result<CandidateHashes, String> {
+fn get_from_file(path: &Path) -> Result<CandidateHashes, String> {
     // Get a reader for either standard input or the chosen path
     let reader: Box<dyn Read> = if path.to_str() == Some("-") {
         Box::new(std::io::stdin())
@@ -118,80 +117,81 @@ fn get_from_file(path: &PathBuf) -> Result<CandidateHashes, String> {
     ))
 }
 
-fn read_raw_candidate_from_file(line: &str, path: &PathBuf) -> Option<CandidateHashes> {
-    // It is a little sad to use a dynamic regex in an otherwise nice Rust program
-    // These deserve to be replaced with a good old fashioned static parser
-    // But let's be honest: the impact is negligible
-    let re = Regex::new(r"^([[:xdigit:]]{32}|[[:xdigit:]]{40}|[[:xdigit:]]{64})$").unwrap();
-    if re.is_match(line) {
-        // These should both always succeed due to the matching
-        let bytes = match hex::decode(line) {
-            Ok(bytes) => bytes,
-            _ => return None,
-        };
-        let alg = match Algorithm::from_len(bytes.len()) {
-            Ok(alg) => alg,
-            _ => return None,
-        };
-        return Some(CandidateHashes {
-            alg,
-            source: VerificationSource::RawFile(path.clone()),
-            hashes: vec![CandidateHash {
-                bytes,
-                filename: None,
-            }],
-        });
-    }
-    None
+fn try_parse_hash(s: &str) -> Option<(Algorithm, Vec<u8>)> {
+    let bytes = match hex::decode(s.trim()) {
+        Ok(bytes) => bytes,
+        _ => return None,
+    };
+    let alg = match Algorithm::from_len(bytes.len()) {
+        Ok(alg) => alg,
+        _ => return None,
+    };
+    Some((alg, bytes))
+}
+
+fn read_raw_candidate_from_file(line: &str, path: &Path) -> Option<CandidateHashes> {
+    let (alg, bytes) = try_parse_hash(line)?;
+    Some(CandidateHashes {
+        alg,
+        source: VerificationSource::RawFile(path.to_string_lossy().to_string()),
+        hashes: vec![CandidateHash {
+            bytes,
+            filename: None,
+        }],
+    })
 }
 
-fn read_coreutils_digests_from_file<I>(lines: I, path: &PathBuf) -> Option<CandidateHashes>
+fn read_coreutils_digests_from_file<I, S>(lines: I, path: &Path) -> Option<CandidateHashes>
 where
-    I: Iterator<Item = io::Result<String>>,
+    I: Iterator<Item = io::Result<S>>,
+    S: AsRef<str>,
 {
-    let re = Regex::new(
-        r"^(?P<hash>([[:xdigit:]]{32}|[[:xdigit:]]{40}|[[:xdigit:]]{64})) .(?P<filename>.+)$",
-    )
-    .unwrap();
-
     let mut hashes = vec![];
     let mut alg: Option<Algorithm> = None;
     for l in lines {
         if let Ok(l) = l {
-            let l = l.trim();
+            let l = l.as_ref().trim();
             // Allow (ignore) blank lines
             if l.is_empty() {
                 continue;
             }
-            // If we can capture a valid line, use it
-            if let Some(captures) = re.captures(&l) {
-                let hash = &captures["hash"];
-                let filename = &captures["filename"];
-                // Decode the hex and algorithm for this line
-                let line_bytes = match hex::decode(hash) {
-                    Ok(bytes) => bytes,
-                    _ => return None,
-                };
-                let line_alg = match Algorithm::from_len(line_bytes.len()) {
-                    Ok(alg) => alg,
-                    _ => return None,
-                };
-                if alg.is_some() && alg != Some(line_alg) {
-                    // Different algorithms in the same digest file are not supported
+            // Expected format
+            // <valid-hash><space><space-or-*><filename>
+            let (line_alg, bytes, filename) = match l
+                .find(' ')
+                .and_then(|space_pos| {
+                    // Char before filename should be space for text or * for binary
+                    match l.chars().nth(space_pos + 1) {
+                        Some(' ') | Some('*') => (l.get(..space_pos)).zip(l.get(space_pos + 2..)),
+                        _ => None,
+                    }
+                })
+                .and_then(|(maybe_hash, filename)| {
+                    // Filename should be in this position without extra whitespace
+                    if filename.trim() == filename {
+                        try_parse_hash(maybe_hash).map(|(alg, bytes)| (alg, bytes, filename))
+                    } else {
+                        None
+                    }
+                }) {
+                Some(t) => t,
+                None => {
+                    // if we have a line with content we cannot parse, this is an error
                     return None;
-                } else {
-                    // If we are the first line, we define the overall algorithm
-                    alg = Some(line_alg);
                 }
-                // So far so good - create an entry for this line
-                hashes.push(CandidateHash {
-                    bytes: line_bytes,
-                    filename: Some(filename.to_owned()),
-                });
-            } else {
-                // But if we have a line with content we cannot parse, this is an error
+            };
+            if alg.is_some() && alg != Some(line_alg) {
+                // Different algorithms in the same digest file are not supported
                 return None;
+            } else {
+                // If we are the first line, we define the overall algorithm
+                alg = Some(line_alg);
             }
+            // So far so good - create an entry for this line
+            hashes.push(CandidateHash {
+                bytes,
+                filename: Some(filename.to_owned()),
+            });
         }
     }
 
@@ -207,7 +207,7 @@ where
     // Otherwise all is well and we can return our results
     Some(CandidateHashes {
         alg,
-        source: VerificationSource::DigestsFile(path.clone()),
+        source: VerificationSource::DigestsFile(path.to_string_lossy().to_string()),
         hashes,
     })
 }
@@ -288,7 +288,7 @@ mod tests {
 
     #[test]
     fn test_read_raw_inputs() {
-        let example_path: PathBuf = "some_file".into();
+        let example_path = Path::new("some_file");
         let valid_md5 = "d229da563da18fe5d58cd95a6467d584";
         let valid_sha1 = "b314c7ebb7d599944981908b7f3ed33a30e78f3a";
         let valid_sha1_2 = valid_sha1.to_uppercase();
@@ -333,4 +333,53 @@ mod tests {
             assert!(read_raw_candidate_from_file(*i, &example_path).is_none());
         }
     }
+
+    #[test]
+    fn test_read_shasums() {
+        let shasums = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b *cp
+        75eb7420a9f5a260b04a3e8ad51e50f2838a17fc  lel.txt
+
+        fe6c26d485a3573a1cb0ad0682f5105325a1905f  shasums";
+        let lines = shasums.lines().map(|l| std::io::Result::Ok(l));
+        let path = Path::new("SHASUMS");
+        let candidates = read_coreutils_digests_from_file(lines, &path);
+
+        assert_eq!(
+            candidates,
+            Some(CandidateHashes {
+                alg: Algorithm::Sha1,
+                hashes: vec![
+                    CandidateHash {
+                        bytes: hex::decode("4b91f7a387a6edd4a7c0afb2897f1ca968c9695b").unwrap(),
+                        filename: Some("cp".to_owned()),
+                    },
+                    CandidateHash {
+                        bytes: hex::decode("75eb7420a9f5a260b04a3e8ad51e50f2838a17fc").unwrap(),
+                        filename: Some("lel.txt".to_owned()),
+                    },
+                    CandidateHash {
+                        bytes: hex::decode("fe6c26d485a3573a1cb0ad0682f5105325a1905f").unwrap(),
+                        filename: Some("shasums".to_owned()),
+                    }
+                ],
+                source: VerificationSource::DigestsFile(path.to_string_lossy().to_string()),
+            })
+        );
+    }
+
+    #[test]
+    fn test_invalid_shasums() {
+        let no_format = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b cp";
+        let invalid_format = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b .cp";
+        let extra_space = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b   cp";
+
+        for digest in [no_format, invalid_format, extra_space] {
+            let lines = digest.lines().map(|l| std::io::Result::Ok(l));
+            assert!(
+                read_coreutils_digests_from_file(lines, Path::new("SHASUMS")).is_none(),
+                "Should be invalid digest: {:?}",
+                digest
+            );
+        }
+    }
 }