about summary refs log blame commit diff stats
path: root/src/storage/video_database/extractor_hash.rs
blob: 62a9eda2a05e0b34767b1287c6fff7387776fa16 (plain) (tree)



















                                                                          
                                            






















































                                                                                    

                                                                    





                                                          


                         









                                                                                
                                                     








































                                                                                                   
// yt - A fully featured command line YouTube client
//
// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
// SPDX-License-Identifier: GPL-3.0-or-later
//
// This file is part of Yt.
//
// You should have received a copy of the License along with this program.
// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.

use std::{collections::HashMap, fmt::Display, str::FromStr};

use anyhow::{bail, Result};
use blake3::Hash;
use log::debug;
use tokio::sync::OnceCell;

use crate::{app::App, storage::video_database::getters::get_all_hashes};

static EXTRACTOR_HASH_LENGTH: OnceCell<usize> = OnceCell::const_new();

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ExtractorHash {
    hash: Hash,
}

#[derive(Debug, Clone)]
pub struct ShortHash(String);

impl Display for ShortHash {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        self.0.fmt(f)
    }
}

#[derive(Debug, Clone)]
pub struct LazyExtractorHash {
    value: ShortHash,
}

impl FromStr for LazyExtractorHash {
    type Err = anyhow::Error;

    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
        // perform some cheap validation
        if s.len() > 64 {
            bail!("A hash can only contain 64 bytes!");
        }

        Ok(Self {
            value: ShortHash(s.to_owned()),
        })
    }
}

impl LazyExtractorHash {
    /// Turn the [`LazyExtractorHash`] into the [`ExtractorHash`]
    pub async fn realize(self, app: &App) -> Result<ExtractorHash> {
        ExtractorHash::from_short_hash(app, &self.value).await
    }
}

impl ExtractorHash {
    pub fn from_hash(hash: Hash) -> Self {
        Self { hash }
    }
    pub async fn from_short_hash(app: &App, s: &ShortHash) -> Result<Self> {
        Ok(Self {
            hash: Self::short_hash_to_full_hash(app, s).await?,
        })
    }

    pub fn hash(&self) -> &Hash {
        &self.hash
    }

    pub async fn into_short_hash(&self, app: &App) -> Result<ShortHash> {
        let needed_chars = if let Some(needed_chars) = EXTRACTOR_HASH_LENGTH.get() {
            *needed_chars
        } else {
            let needed_chars = self.get_needed_char_len(app).await?;
            EXTRACTOR_HASH_LENGTH
                .set(needed_chars)
                .expect("This should work at this stage");

            needed_chars
        };

        Ok(ShortHash(
            self.hash()
                .to_hex()
                .chars()
                .take(needed_chars)
                .collect::<String>(),
        ))
    }

    async fn short_hash_to_full_hash(app: &App, s: &ShortHash) -> Result<Hash> {
        let all_hashes = get_all_hashes(app).await?;

        let needed_chars = s.0.len();

        for hash in all_hashes {
            if hash.to_hex()[..needed_chars] == s.0 {
                return Ok(hash);
            }
        }

        bail!("Your shortend hash, does not match a real hash (this is probably a bug)!");
    }

    async fn get_needed_char_len(&self, app: &App) -> Result<usize> {
        debug!("Calculating the needed hash char length");
        let all_hashes = get_all_hashes(app).await?;

        let all_char_vec_hashes = all_hashes
            .into_iter()
            .map(|hash| hash.to_hex().chars().collect::<Vec<char>>())
            .collect::<Vec<Vec<_>>>();

        // This value should be updated later, if not rust will panic in the assertion.
        let mut needed_chars: usize = 1000;
        'outer: for i in 1..64 {
            let i_chars: Vec<String> = all_char_vec_hashes
                .iter()
                .map(|vec| vec.iter().take(i).collect::<String>())
                .collect();

            let mut uniqnes_hashmap: HashMap<String, ()> = HashMap::new();
            for ch in i_chars {
                if let Some(()) = uniqnes_hashmap.insert(ch, ()) {
                    // The key was already in the hash map, thus we have a duplicated char and need
                    // at least one char more
                    continue 'outer;
                }
            }

            needed_chars = i;
            break 'outer;
        }

        assert!(needed_chars <= 64, "Hashes are only 64 bytes long");

        Ok(needed_chars)
    }
}