Tighten up digests file parsing

[hashgood] / src / verify.rs
diff --git a/src/verify.rs b/src/verify.rs

index 0811606408233d1b3c9bc83838959d03b03a1abb..8f0807444614e8b91ddca1914f3348aebb4116dd 100644 (file)
--- a/src/verify.rs
+++ b/src/verify.rs
@@ -3,8 +3,7 @@ use super::{
      VerificationSource,
  };
  #[cfg(feature = "paste")]
-use clipboard::{ClipboardContext, ClipboardProvider};
-use regex::Regex;
+use copypasta::{ClipboardContext, ClipboardProvider};
  use std::fs::File;
  use std::io;
  use std::io::prelude::*;
@@ -45,7 +44,7 @@ fn get_by_parameter(param: &str) -> Result<CandidateHashes, String> {
  fn get_from_clipboard() -> Result<CandidateHashes, String> {
      #[cfg(feature = "paste")]
      {
-        let mut ctx: ClipboardContext = match ClipboardProvider::new() {
+        let mut ctx: ClipboardContext = match ClipboardContext::new() {
              Ok(ctx) => ctx,
              Err(e) => return Err(format!("Error getting system clipboard: {}", e)),
          };
@@ -118,80 +117,81 @@ fn get_from_file(path: &PathBuf) -> Result<CandidateHashes, String> {
      ))
  }
  
+fn try_parse_hash(s: &str) -> Option<(Algorithm, Vec<u8>)> {
+    let bytes = match hex::decode(s.trim()) {
+        Ok(bytes) => bytes,
+        _ => return None,
+    };
+    let alg = match Algorithm::from_len(bytes.len()) {
+        Ok(alg) => alg,
+        _ => return None,
+    };
+    Some((alg, bytes))
+}
+
  fn read_raw_candidate_from_file(line: &str, path: &PathBuf) -> Option<CandidateHashes> {
-    // It is a little sad to use a dynamic regex in an otherwise nice Rust program
-    // These deserve to be replaced with a good old fashioned static parser
-    // But let's be honest: the impact is negligible
-    let re = Regex::new(r"^([[:xdigit:]]{32}|[[:xdigit:]]{40}|[[:xdigit:]]{64})$").unwrap();
-    if re.is_match(line) {
-        // These should both always succeed due to the matching
-        let bytes = match hex::decode(line) {
-            Ok(bytes) => bytes,
-            _ => return None,
-        };
-        let alg = match Algorithm::from_len(bytes.len()) {
-            Ok(alg) => alg,
-            _ => return None,
-        };
-        return Some(CandidateHashes {
-            alg,
-            source: VerificationSource::RawFile(path.clone()),
-            hashes: vec![CandidateHash {
-                bytes,
-                filename: None,
-            }],
-        });
-    }
-    None
+    let (alg, bytes) = try_parse_hash(line)?;
+    Some(CandidateHashes {
+        alg,
+        source: VerificationSource::RawFile(path.clone()),
+        hashes: vec![CandidateHash {
+            bytes,
+            filename: None,
+        }],
+    })
  }
  
-fn read_coreutils_digests_from_file<I>(lines: I, path: &PathBuf) -> Option<CandidateHashes>
+fn read_coreutils_digests_from_file<I, S>(lines: I, path: &PathBuf) -> Option<CandidateHashes>
  where
-    I: Iterator<Item = io::Result<String>>,
+    I: Iterator<Item = io::Result<S>>,
+    S: AsRef<str>,
  {
-    let re = Regex::new(
-        r"^(?P<hash>([[:xdigit:]]{32}|[[:xdigit:]]{40}|[[:xdigit:]]{64})) .(?P<filename>.+)$",
-    )
-    .unwrap();
-
      let mut hashes = vec![];
      let mut alg: Option<Algorithm> = None;
      for l in lines {
          if let Ok(l) = l {
-            let l = l.trim();
+            let l = l.as_ref().trim();
              // Allow (ignore) blank lines
              if l.is_empty() {
                  continue;
              }
-            // If we can capture a valid line, use it
-            if let Some(captures) = re.captures(&l) {
-                let hash = &captures["hash"];
-                let filename = &captures["filename"];
-                // Decode the hex and algorithm for this line
-                let line_bytes = match hex::decode(hash) {
-                    Ok(bytes) => bytes,
-                    _ => return None,
-                };
-                let line_alg = match Algorithm::from_len(line_bytes.len()) {
-                    Ok(alg) => alg,
-                    _ => return None,
-                };
-                if alg.is_some() && alg != Some(line_alg) {
-                    // Different algorithms in the same digest file are not supported
+            // Expected format
+            // <valid-hash><space><space-or-*><filename>
+            let (line_alg, bytes, filename) = match l
+                .find(' ')
+                .and_then(|space_pos| {
+                    // Char before filename should be space for text or * for binary
+                    match l.chars().nth(space_pos + 1) {
+                        Some(' ') | Some('*') => (l.get(..space_pos)).zip(l.get(space_pos + 2..)),
+                        _ => None,
+                    }
+                })
+                .and_then(|(maybe_hash, filename)| {
+                    // Filename should be in this position without extra whitespace
+                    if filename.trim() == filename {
+                        try_parse_hash(maybe_hash).map(|(alg, bytes)| (alg, bytes, filename))
+                    } else {
+                        None
+                    }
+                }) {
+                Some(t) => t,
+                None => {
+                    // if we have a line with content we cannot parse, this is an error
                      return None;
-                } else {
-                    // If we are the first line, we define the overall algorithm
-                    alg = Some(line_alg);
                  }
-                // So far so good - create an entry for this line
-                hashes.push(CandidateHash {
-                    bytes: line_bytes,
-                    filename: Some(filename.to_owned()),
-                });
-            } else {
-                // But if we have a line with content we cannot parse, this is an error
+            };
+            if alg.is_some() && alg != Some(line_alg) {
+                // Different algorithms in the same digest file are not supported
                  return None;
+            } else {
+                // If we are the first line, we define the overall algorithm
+                alg = Some(line_alg);
              }
+            // So far so good - create an entry for this line
+            hashes.push(CandidateHash {
+                bytes,
+                filename: Some(filename.to_owned()),
+            });
          }
      }
  
@@ -333,4 +333,53 @@ mod tests {
              assert!(read_raw_candidate_from_file(*i, &example_path).is_none());
          }
      }
+
+    #[test]
+    fn test_read_shasums() {
+        let shasums = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b *cp
+        75eb7420a9f5a260b04a3e8ad51e50f2838a17fc  lel.txt
+
+        fe6c26d485a3573a1cb0ad0682f5105325a1905f  shasums";
+        let lines = shasums.lines().map(|l| std::io::Result::Ok(l));
+        let path = PathBuf::from("SHASUMS");
+        let candidates = read_coreutils_digests_from_file(lines, &path);
+
+        assert_eq!(
+            candidates,
+            Some(CandidateHashes {
+                alg: Algorithm::Sha1,
+                hashes: vec![
+                    CandidateHash {
+                        bytes: hex::decode("4b91f7a387a6edd4a7c0afb2897f1ca968c9695b").unwrap(),
+                        filename: Some("cp".to_owned()),
+                    },
+                    CandidateHash {
+                        bytes: hex::decode("75eb7420a9f5a260b04a3e8ad51e50f2838a17fc").unwrap(),
+                        filename: Some("lel.txt".to_owned()),
+                    },
+                    CandidateHash {
+                        bytes: hex::decode("fe6c26d485a3573a1cb0ad0682f5105325a1905f").unwrap(),
+                        filename: Some("shasums".to_owned()),
+                    }
+                ],
+                source: VerificationSource::DigestsFile(path),
+            })
+        );
+    }
+
+    #[test]
+    fn test_invalid_shasums() {
+        let no_format = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b cp";
+        let invalid_format = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b .cp";
+        let extra_space = "4b91f7a387a6edd4a7c0afb2897f1ca968c9695b   cp";
+
+        for digest in [no_format, invalid_format, extra_space] {
+            let lines = digest.lines().map(|l| std::io::Result::Ok(l));
+            assert!(
+                read_coreutils_digests_from_file(lines, &PathBuf::from("SHASUMS")).is_none(),
+                "Should be invalid digest: {:?}",
+                digest
+            );
+        }
+    }
  }