about summary refs log tree commit diff stats
path: root/src/storage/video_database/extractor_hash.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/storage/video_database/extractor_hash.rs')
-rw-r--r--src/storage/video_database/extractor_hash.rs151
1 files changed, 151 insertions, 0 deletions
diff --git a/src/storage/video_database/extractor_hash.rs b/src/storage/video_database/extractor_hash.rs
new file mode 100644
index 0000000..3af4f60
--- /dev/null
+++ b/src/storage/video_database/extractor_hash.rs
@@ -0,0 +1,151 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+use std::{collections::HashMap, fmt::Display, str::FromStr};
+
+use anyhow::{bail, Result};
+use blake3::Hash;
+use log::debug;
+use tokio::sync::OnceCell;
+
+use crate::{app::App, storage::video_database::getters::get_all_hashes};
+
+static EXTRACTOR_HASH_LENGTH: OnceCell<usize> = OnceCell::const_new();
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ExtractorHash {
+    hash: Hash,
+}
+
+#[derive(Debug, Clone)]
+pub struct ShortHash(String);
+
+impl Display for ShortHash {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.0.fmt(f)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct LazyExtractorHash {
+    value: ShortHash,
+}
+
+impl FromStr for LazyExtractorHash {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        // perform some cheap validation
+        if s.len() > 64 {
+            bail!("A hash can only contain 64 bytes!");
+        }
+
+        Ok(Self {
+            value: ShortHash(s.to_owned()),
+        })
+    }
+}
+
+impl LazyExtractorHash {
+    /// Turn the [`LazyExtractorHash`] into the [`ExtractorHash`]
+    pub async fn realize(self, app: &App) -> Result<ExtractorHash> {
+        ExtractorHash::from_short_hash(app, &self.value).await
+    }
+}
+
+impl ExtractorHash {
+    pub fn from_hash(hash: Hash) -> Self {
+        Self { hash }
+    }
+    pub async fn from_short_hash(app: &App, s: &ShortHash) -> Result<Self> {
+        Ok(Self {
+            hash: Self::short_hash_to_full_hash(app, s).await?,
+        })
+    }
+
+    pub fn hash(&self) -> &Hash {
+        &self.hash
+    }
+
+    pub async fn into_short_hash(&self, app: &App) -> Result<ShortHash> {
+        let needed_chars = if let Some(needed_chars) = EXTRACTOR_HASH_LENGTH.get() {
+            debug!("Using cached char length: {}", needed_chars);
+            *needed_chars
+        } else {
+            let needed_chars = self.get_needed_char_len(app).await?;
+            debug!("Setting the needed has char lenght.");
+            EXTRACTOR_HASH_LENGTH
+                .set(needed_chars)
+                .expect("This should work at this stage");
+
+            needed_chars
+        };
+
+        debug!("Formatting a hash with char length: {}", needed_chars);
+
+        Ok(ShortHash(
+            self.hash()
+                .to_hex()
+                .chars()
+                .into_iter()
+                .take(needed_chars)
+                .collect::<String>(),
+        ))
+    }
+
+    async fn short_hash_to_full_hash(app: &App, s: &ShortHash) -> Result<Hash> {
+        let all_hashes = get_all_hashes(app).await?;
+
+        let needed_chars = s.0.len();
+
+        for hash in all_hashes {
+            if &hash.to_hex()[..needed_chars] == s.0 {
+                return Ok(hash);
+            }
+        }
+
+        bail!("Your shortend hash, does not match a real hash (this is probably a bug)!");
+    }
+
+    async fn get_needed_char_len(&self, app: &App) -> Result<usize> {
+        debug!("Calculating the needed hash char length");
+        let all_hashes = get_all_hashes(app).await?;
+
+        let all_char_vec_hashes = all_hashes
+            .into_iter()
+            .map(|hash| hash.to_hex().chars().collect::<Vec<char>>())
+            .collect::<Vec<Vec<_>>>();
+
+        // This value should be updated later, if not rust will panic in the assertion.
+        let mut needed_chars: usize = 1000;
+        'outer: for i in 1..64 {
+            let i_chars: Vec<String> = all_char_vec_hashes
+                .iter()
+                .map(|vec| vec.iter().take(i).collect::<String>())
+                .collect();
+
+            let mut uniqnes_hashmap: HashMap<String, ()> = HashMap::new();
+            for ch in i_chars {
+                if let Some(()) = uniqnes_hashmap.insert(ch, ()) {
+                    // The key was already in the hash map, thus we have a duplicated char and need
+                    // at least one char more
+                    continue 'outer;
+                }
+            }
+
+            needed_chars = i;
+            break 'outer;
+        }
+
+        assert!(needed_chars <= 64, "Hashes are only 64 bytes long");
+
+        Ok(needed_chars)
+    }
+}