From e4d6fc04f60cf7b8173df7f261428b25d009ba39 Mon Sep 17 00:00:00 2001 From: Benedikt Peetz Date: Mon, 14 Jul 2025 16:03:50 +0200 Subject: feat(crates/yt/storage): Migrate inserts to operations and use methods This allows us to re-use the operations and in the future to provide undo-capabilities and a git-reflog like changelog. This commit also fixes some bugs with the old design. --- crates/yt/src/storage/db/extractor_hash.rs | 214 ++++++++ crates/yt/src/storage/db/get/extractor_hash.rs | 58 ++ crates/yt/src/storage/db/get/mod.rs | 4 + crates/yt/src/storage/db/get/playlist.rs | 58 ++ crates/yt/src/storage/db/get/subscription.rs | 39 ++ crates/yt/src/storage/db/get/video/mod.rs | 188 +++++++ crates/yt/src/storage/db/insert/mod.rs | 73 +++ crates/yt/src/storage/db/insert/playlist.rs | 207 +++++++ crates/yt/src/storage/db/insert/subscription.rs | 84 +++ crates/yt/src/storage/db/insert/video/mod.rs | 599 +++++++++++++++++++++ crates/yt/src/storage/db/mod.rs | 7 + crates/yt/src/storage/db/playlist/mod.rs | 49 ++ crates/yt/src/storage/db/subscription.rs | 41 ++ crates/yt/src/storage/db/video.rs | 313 +++++++++++ crates/yt/src/storage/notify.rs | 77 +++ crates/yt/src/storage/subscriptions.rs | 141 ----- crates/yt/src/storage/video_database/downloader.rs | 130 ----- .../src/storage/video_database/extractor_hash.rs | 163 ------ crates/yt/src/storage/video_database/get/mod.rs | 307 ----------- .../video_database/get/playlist/iterator.rs | 101 ---- .../src/storage/video_database/get/playlist/mod.rs | 167 ------ crates/yt/src/storage/video_database/mod.rs | 329 ----------- crates/yt/src/storage/video_database/notify.rs | 77 --- crates/yt/src/storage/video_database/set/mod.rs | 327 ----------- .../yt/src/storage/video_database/set/playlist.rs | 101 ---- 25 files changed, 2011 insertions(+), 1843 deletions(-) create mode 100644 crates/yt/src/storage/db/extractor_hash.rs create mode 100644 crates/yt/src/storage/db/get/extractor_hash.rs create mode 100644 crates/yt/src/storage/db/get/mod.rs create mode 100644 crates/yt/src/storage/db/get/playlist.rs create mode 100644 crates/yt/src/storage/db/get/subscription.rs create mode 100644 crates/yt/src/storage/db/get/video/mod.rs create mode 100644 crates/yt/src/storage/db/insert/mod.rs create mode 100644 crates/yt/src/storage/db/insert/playlist.rs create mode 100644 crates/yt/src/storage/db/insert/subscription.rs create mode 100644 crates/yt/src/storage/db/insert/video/mod.rs create mode 100644 crates/yt/src/storage/db/mod.rs create mode 100644 crates/yt/src/storage/db/playlist/mod.rs create mode 100644 crates/yt/src/storage/db/subscription.rs create mode 100644 crates/yt/src/storage/db/video.rs create mode 100644 crates/yt/src/storage/notify.rs delete mode 100644 crates/yt/src/storage/subscriptions.rs delete mode 100644 crates/yt/src/storage/video_database/downloader.rs delete mode 100644 crates/yt/src/storage/video_database/extractor_hash.rs delete mode 100644 crates/yt/src/storage/video_database/get/mod.rs delete mode 100644 crates/yt/src/storage/video_database/get/playlist/iterator.rs delete mode 100644 crates/yt/src/storage/video_database/get/playlist/mod.rs delete mode 100644 crates/yt/src/storage/video_database/mod.rs delete mode 100644 crates/yt/src/storage/video_database/notify.rs delete mode 100644 crates/yt/src/storage/video_database/set/mod.rs delete mode 100644 crates/yt/src/storage/video_database/set/playlist.rs (limited to 'crates') diff --git a/crates/yt/src/storage/db/extractor_hash.rs b/crates/yt/src/storage/db/extractor_hash.rs new file mode 100644 index 0000000..abe1f0f --- /dev/null +++ b/crates/yt/src/storage/db/extractor_hash.rs @@ -0,0 +1,214 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz +// Copyright (C) 2025 Benedikt Peetz +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see . + +use std::{collections::HashSet, fmt::Display, str::FromStr}; + +use anyhow::{Context, Result, bail}; +use blake3::Hash; +use log::debug; +use tokio::sync::OnceCell; +use yt_dlp::{info_json::InfoJson, json_cast, json_get}; + +use crate::app::App; + +static EXTRACTOR_HASH_LENGTH: OnceCell = OnceCell::const_new(); + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)] +pub(crate) struct ExtractorHash { + hash: Hash, +} + +impl Display for ExtractorHash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.hash.fmt(f) + } +} + +#[derive(Debug, Clone)] +pub(crate) struct ShortHash(String); + +impl Display for ShortHash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +#[derive(Debug, Clone)] +#[allow(clippy::module_name_repetitions)] +pub(crate) struct LazyExtractorHash { + value: ShortHash, +} + +impl FromStr for LazyExtractorHash { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + // perform some cheap validation + if s.len() > 64 { + bail!("A hash can only contain 64 bytes!"); + } + + Ok(Self { + value: ShortHash(s.to_owned()), + }) + } +} + +impl LazyExtractorHash { + /// Turn the [`LazyExtractorHash`] into the [`ExtractorHash`] + pub(crate) async fn realize(self, app: &App) -> Result { + ExtractorHash::from_short_hash(app, &self.value).await + } +} + +impl ExtractorHash { + #[must_use] + pub(crate) fn from_hash(hash: Hash) -> Self { + Self { hash } + } + + pub(crate) async fn from_short_hash(app: &App, s: &ShortHash) -> Result { + Ok(Self { + hash: Self::short_hash_to_full_hash(app, s).await?.hash, + }) + } + + pub(crate) fn from_info_json(entry: &InfoJson) -> Self { + // HACK(@bpeetz): The code that follows is a gross hack. + // One would expect the `id` to be unique _and_ constant for each and every possible info JSON. + // But .. it's just not. The `ARDMediathek` extractor, will sometimes return different `id`s for the same + // video, effectively causing us to insert the same video again into the db (which fails, + // because the URL is still unique). + // + // As such we _should_ probably find a constant value for all extractors, but that just does + // not exist currently, without processing each entry (which is expensive and which I would + // like to avoid). + // + // Therefor, we simply special case the `ARDBetaMediathek` extractor. <2025-07-04> + + // NOTE(@bpeetz): `yt-dlp` apparently uses these two different names for the same thing <2025-07-04> + let ie_key = { + if let Some(val) = entry.get("ie_key") { + json_cast!(val, as_str) + } else if let Some(val) = entry.get("extractor_key") { + json_cast!(val, as_str) + } else { + unreachable!( + "Either `ie_key` or `extractor_key` \ + should be set on every entry info json" + ) + } + }; + + if ie_key == "ARDBetaMediathek" { + // NOTE(@bpeetz): The mediathek is changing their Id scheme, from an `short` old Id to the + // new id. As the new id is too long for some people, yt-dlp will be default return the old + // one (when it is still available!). The new one is called `display_id`. + // Therefore, we simply check if the new one is explicitly returned, and otherwise use the + // normal `id` value, as these are cases where the old one is no longer available. <2025-07-04> + let id = if let Some(val) = entry.get("display_id") { + json_cast!(val, as_str).as_bytes() + } else { + json_get!(entry, "id", as_str).as_bytes() + }; + + Self { + hash: blake3::hash(id), + } + } else { + Self { + hash: blake3::hash(json_get!(entry, "id", as_str).as_bytes()), + } + } + } + + #[must_use] + pub(crate) fn hash(&self) -> &Hash { + &self.hash + } + + pub(crate) async fn into_short_hash(&self, app: &App) -> Result { + let needed_chars = if let Some(needed_chars) = EXTRACTOR_HASH_LENGTH.get() { + *needed_chars + } else { + let needed_chars = self + .get_needed_char_len(app) + .await + .context("Failed to calculate needed char length")?; + EXTRACTOR_HASH_LENGTH + .set(needed_chars) + .expect("This should work at this stage, as we checked above that it is empty."); + + needed_chars + }; + + Ok(ShortHash( + self.hash() + .to_hex() + .chars() + .take(needed_chars) + .collect::(), + )) + } + + async fn short_hash_to_full_hash(app: &App, s: &ShortHash) -> Result { + let all_hashes = Self::get_all(app) + .await + .context("Failed to fetch all extractor -hashesh from database")?; + + let needed_chars = s.0.len(); + + for hash in all_hashes { + if hash.hash().to_hex()[..needed_chars] == s.0 { + return Ok(hash); + } + } + + bail!("Your shortend hash, does not match a real hash (this is probably a bug)!"); + } + + async fn get_needed_char_len(&self, app: &App) -> Result { + debug!("Calculating the needed hash char length"); + let all_hashes = Self::get_all(app) + .await + .context("Failed to fetch all extractor -hashesh from database")?; + + let all_char_vec_hashes = all_hashes + .into_iter() + .map(|hash| hash.hash().to_hex().chars().collect::>()) + .collect::>>(); + + // This value should be updated later, if not rust will panic in the assertion. + let mut needed_chars: usize = 1000; + 'outer: for i in 1..64 { + let i_chars: Vec = all_char_vec_hashes + .iter() + .map(|vec| vec.iter().take(i).collect::()) + .collect(); + + let mut uniqnes_hashmap: HashSet = HashSet::new(); + for ch in i_chars { + if !uniqnes_hashmap.insert(ch) { + // The key was already in the hash map, thus we have a duplicated char and need + // at least one char more + continue 'outer; + } + } + + needed_chars = i; + break 'outer; + } + + assert!(needed_chars <= 64, "Hashes are only 64 bytes long"); + + Ok(needed_chars) + } +} diff --git a/crates/yt/src/storage/db/get/extractor_hash.rs b/crates/yt/src/storage/db/get/extractor_hash.rs new file mode 100644 index 0000000..d10b326 --- /dev/null +++ b/crates/yt/src/storage/db/get/extractor_hash.rs @@ -0,0 +1,58 @@ +use anyhow::Result; +use blake3::Hash; +use sqlx::{SqliteConnection, query}; + +use crate::{ + app::App, + storage::db::{ + extractor_hash::ExtractorHash, + video::{Video, video_from_record}, + }, +}; + +impl ExtractorHash { + pub(crate) async fn get(&self, txn: &mut SqliteConnection) -> Result