diff options
author | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2025-07-18 18:10:58 +0200 |
---|---|---|
committer | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2025-07-18 18:10:58 +0200 |
commit | c254ed0c7d098cf3224e004a4b13a11632e432a3 (patch) | |
tree | 0d5b5950ca85397f94ca32bada8e7397a4d8659c /crates | |
parent | fix(crates/yt/config): Ensure that the download_dir is created (diff) | |
download | yt-c254ed0c7d098cf3224e004a4b13a11632e432a3.zip |
perf(crates/yt/db/extractor_hash/realize): Allow passing in a `all_hashes`
This avoids having to re-fetch that value for each realized short hash and massively speeds up the `process_line` code while running `select {file,split}`.
Diffstat (limited to 'crates')
-rw-r--r-- | crates/yt/src/storage/db/extractor_hash.rs | 51 |
1 files changed, 28 insertions, 23 deletions
diff --git a/crates/yt/src/storage/db/extractor_hash.rs b/crates/yt/src/storage/db/extractor_hash.rs index abe1f0f..b828348 100644 --- a/crates/yt/src/storage/db/extractor_hash.rs +++ b/crates/yt/src/storage/db/extractor_hash.rs @@ -64,8 +64,12 @@ impl FromStr for LazyExtractorHash { impl LazyExtractorHash { /// Turn the [`LazyExtractorHash`] into the [`ExtractorHash`] - pub(crate) async fn realize(self, app: &App) -> Result<ExtractorHash> { - ExtractorHash::from_short_hash(app, &self.value).await + pub(crate) async fn realize( + self, + app: &App, + all_hashes: Option<&[ExtractorHash]>, + ) -> Result<ExtractorHash> { + ExtractorHash::from_short_hash(app, &self.value, all_hashes).await } } @@ -75,10 +79,27 @@ impl ExtractorHash { Self { hash } } - pub(crate) async fn from_short_hash(app: &App, s: &ShortHash) -> Result<Self> { - Ok(Self { - hash: Self::short_hash_to_full_hash(app, s).await?.hash, - }) + pub(crate) async fn from_short_hash( + app: &App, + s: &ShortHash, + all_hashes: Option<&[Self]>, + ) -> Result<Self> { + let all_hashes = if let Some(all) = all_hashes { + all + } else { + &Self::get_all(app) + .await + .context("Failed to fetch all extractor-hashes from the database")? + }; + let needed_chars = s.0.len(); + for hash in all_hashes { + // PERFORMANCE(@bpeetz): This could avoid the string construction and just use a + // numeric equality check instead. <2025-07-15> + if hash.hash().to_hex()[..needed_chars] == s.0 { + return Ok(*hash); + } + } + bail!("Your shortend hash, does not match a real hash (this is probably a bug)!"); } pub(crate) fn from_info_json(entry: &InfoJson) -> Self { @@ -135,7 +156,7 @@ impl ExtractorHash { &self.hash } - pub(crate) async fn into_short_hash(&self, app: &App) -> Result<ShortHash> { + pub(crate) async fn as_short_hash(&self, app: &App) -> Result<ShortHash> { let needed_chars = if let Some(needed_chars) = EXTRACTOR_HASH_LENGTH.get() { *needed_chars } else { @@ -159,22 +180,6 @@ impl ExtractorHash { )) } - async fn short_hash_to_full_hash(app: &App, s: &ShortHash) -> Result<Self> { - let all_hashes = Self::get_all(app) - .await - .context("Failed to fetch all extractor -hashesh from database")?; - - let needed_chars = s.0.len(); - - for hash in all_hashes { - if hash.hash().to_hex()[..needed_chars] == s.0 { - return Ok(hash); - } - } - - bail!("Your shortend hash, does not match a real hash (this is probably a bug)!"); - } - async fn get_needed_char_len(&self, app: &App) -> Result<usize> { debug!("Calculating the needed hash char length"); let all_hashes = Self::get_all(app) |