From c254ed0c7d098cf3224e004a4b13a11632e432a3 Mon Sep 17 00:00:00 2001 From: Benedikt Peetz Date: Fri, 18 Jul 2025 18:10:58 +0200 Subject: perf(crates/yt/db/extractor_hash/realize): Allow passing in a `all_hashes` This avoids having to re-fetch that value for each realized short hash and massively speeds up the `process_line` code while running `select {file,split}`. --- crates/yt/src/storage/db/extractor_hash.rs | 51 ++++++++++++++++-------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/crates/yt/src/storage/db/extractor_hash.rs b/crates/yt/src/storage/db/extractor_hash.rs index abe1f0f..b828348 100644 --- a/crates/yt/src/storage/db/extractor_hash.rs +++ b/crates/yt/src/storage/db/extractor_hash.rs @@ -64,8 +64,12 @@ impl FromStr for LazyExtractorHash { impl LazyExtractorHash { /// Turn the [`LazyExtractorHash`] into the [`ExtractorHash`] - pub(crate) async fn realize(self, app: &App) -> Result { - ExtractorHash::from_short_hash(app, &self.value).await + pub(crate) async fn realize( + self, + app: &App, + all_hashes: Option<&[ExtractorHash]>, + ) -> Result { + ExtractorHash::from_short_hash(app, &self.value, all_hashes).await } } @@ -75,10 +79,27 @@ impl ExtractorHash { Self { hash } } - pub(crate) async fn from_short_hash(app: &App, s: &ShortHash) -> Result { - Ok(Self { - hash: Self::short_hash_to_full_hash(app, s).await?.hash, - }) + pub(crate) async fn from_short_hash( + app: &App, + s: &ShortHash, + all_hashes: Option<&[Self]>, + ) -> Result { + let all_hashes = if let Some(all) = all_hashes { + all + } else { + &Self::get_all(app) + .await + .context("Failed to fetch all extractor-hashes from the database")? + }; + let needed_chars = s.0.len(); + for hash in all_hashes { + // PERFORMANCE(@bpeetz): This could avoid the string construction and just use a + // numeric equality check instead. <2025-07-15> + if hash.hash().to_hex()[..needed_chars] == s.0 { + return Ok(*hash); + } + } + bail!("Your shortend hash, does not match a real hash (this is probably a bug)!"); } pub(crate) fn from_info_json(entry: &InfoJson) -> Self { @@ -135,7 +156,7 @@ impl ExtractorHash { &self.hash } - pub(crate) async fn into_short_hash(&self, app: &App) -> Result { + pub(crate) async fn as_short_hash(&self, app: &App) -> Result { let needed_chars = if let Some(needed_chars) = EXTRACTOR_HASH_LENGTH.get() { *needed_chars } else { @@ -159,22 +180,6 @@ impl ExtractorHash { )) } - async fn short_hash_to_full_hash(app: &App, s: &ShortHash) -> Result { - let all_hashes = Self::get_all(app) - .await - .context("Failed to fetch all extractor -hashesh from database")?; - - let needed_chars = s.0.len(); - - for hash in all_hashes { - if hash.hash().to_hex()[..needed_chars] == s.0 { - return Ok(hash); - } - } - - bail!("Your shortend hash, does not match a real hash (this is probably a bug)!"); - } - async fn get_needed_char_len(&self, app: &App) -> Result { debug!("Calculating the needed hash char length"); let all_hashes = Self::get_all(app) -- cgit 1.4.1