diff options
author | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2025-06-16 13:58:55 +0200 |
---|---|---|
committer | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2025-06-16 13:58:55 +0200 |
commit | ab61a4e47a955dd4a5dabeef3ade1b85f6576b84 (patch) | |
tree | 4076a7f96ef2a6b6b359eff83bb9b8c8357a03e5 /crates/yt_dlp | |
parent | refactor(yt_dlp/lib): De-duplicate the info json sanitize code (diff) | |
download | yt-ab61a4e47a955dd4a5dabeef3ade1b85f6576b84.zip |
feat(yt_dlp): Support a DeArrow post processor
Diffstat (limited to 'crates/yt_dlp')
-rw-r--r-- | crates/yt_dlp/Cargo.toml | 2 | ||||
-rw-r--r-- | crates/yt_dlp/src/lib.rs | 45 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/dearrow.rs | 108 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/mod.rs | 20 |
4 files changed, 164 insertions, 11 deletions
diff --git a/crates/yt_dlp/Cargo.toml b/crates/yt_dlp/Cargo.toml index 90f2e10..e5d14fd 100644 --- a/crates/yt_dlp/Cargo.toml +++ b/crates/yt_dlp/Cargo.toml @@ -24,7 +24,9 @@ publish = true [dependencies] indexmap = { version = "2.9.0", default-features = false } log.workspace = true +reqwest = { version = "0.12.20", features = ["blocking", "json"] } rustpython = { git = "https://github.com/RustPython/RustPython.git", features = ["threading", "stdlib", "stdio", "importlib", "ssl"], default-features = false } +serde = { workspace = true, features = ["derive"] } serde_json.workspace = true thiserror = "2.0.12" url.workspace = true diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index 0f40f0a..16ec4ca 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -1,10 +1,11 @@ //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure. -use std::{self, env, mem, path::PathBuf}; +use std::{self, env, fmt::Display, path::PathBuf}; use indexmap::IndexMap; use log::{Level, debug, error, info, log_enabled}; use logging::setup_logging; +use post_processors::PostProcessor; use rustpython::{ InterpreterConfig, vm::{ @@ -18,6 +19,7 @@ use rustpython::{ use url::Url; mod logging; +pub mod post_processors; pub mod progress_hook; #[macro_export] @@ -61,6 +63,7 @@ pub struct YoutubeDL { youtube_dl_class: PyObjectRef, yt_dlp_module: PyObjectRef, options: serde_json::Map<String, serde_json::Value>, + post_processors: Vec<Box<dyn PostProcessor>>, } impl std::fmt::Debug for YoutubeDL { @@ -209,6 +212,7 @@ impl YoutubeDL { youtube_dl_class, yt_dlp_module, options: output_options, + post_processors: options.post_processors, }) } @@ -399,9 +403,18 @@ impl YoutubeDL { let result = value.downcast::<PyDict>().expect("This should stay a dict"); - let json = json_dumps(result, vm); + let mut json = json_dumps(result, vm); - { + for pp in &self.post_processors { + if pp + .extractors() + .iter() + .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str)) + { + json = pp.process(json)?; + } else { + error!("Extractor not found for {pp:#?}"); + } } Ok(json) @@ -458,6 +471,9 @@ pub mod prepare { pub enum Error { #[error(transparent)] Python(#[from] PythonError), + + #[error("Failed to run a post processor")] + PostProcessorRun(#[from] post_processors::Error), } } @@ -473,15 +489,19 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); pub struct YoutubeDLOptions { options: serde_json::Map<String, serde_json::Value>, progress_hook: Option<ProgressHookFunction>, + post_processors: Vec<Box<dyn PostProcessor>>, } impl YoutubeDLOptions { #[must_use] pub fn new() -> Self { - Self { + let me = Self { options: serde_json::Map::new(), progress_hook: None, - } + post_processors: vec![], + }; + + me.with_post_processor(post_processors::dearrow::DeArrowPP) } #[must_use] @@ -489,10 +509,7 @@ impl YoutubeDLOptions { let mut options = self.options; options.insert(key.into(), value.into()); - Self { - options, - progress_hook: self.progress_hook, - } + Self { options, ..self } } #[must_use] @@ -501,12 +518,18 @@ impl YoutubeDLOptions { todo!() } else { Self { - options: self.options, progress_hook: Some(progress_hook), + ..self } } } + #[must_use] + pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self { + self.post_processors.push(Box::new(post_processor)); + self + } + /// # Errors /// If the underlying [`YoutubeDL::from_options`] errors. pub fn build(self) -> Result<YoutubeDL, build::Error> { @@ -517,7 +540,7 @@ impl YoutubeDLOptions { pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self { Self { options, - progress_hook: None, + ..Self::new() } } diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs new file mode 100644 index 0000000..110beeb --- /dev/null +++ b/crates/yt_dlp/src/post_processors/dearrow.rs @@ -0,0 +1,108 @@ +use log::{info, warn}; +use serde::{Deserialize, Serialize}; + +use crate::{InfoJson, json_get}; + +use super::PostProcessor; + +#[derive(Debug, Clone, Copy)] +pub struct DeArrowPP; + +impl PostProcessor for DeArrowPP { + fn extractors(&self) -> &'static [&'static str] { + &["Youtube"] + } + + fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> { + let mut output: DeArrowApi = reqwest::blocking::get(format!( + "https://sponsor.ajay.app/api/branding?videoID={}", + json_get!(info, "id", as_str) + ))? + .json()?; + + output.titles.reverse(); + + let title_len = output.titles.len(); + loop { + let Some(title) = output.titles.pop() else { + break; + }; + + if (title.locked || title.votes < 1) && title_len > 1 { + info!( + "Skipping title {:#?}, as it is not good enough", + title.value + ); + // Skip titles that are not “good” enough. + continue; + } + + if let Some(old_title) = info.insert( + "title".to_owned(), + serde_json::Value::String(title.value.clone()), + ) { + warn!("Updating title from {:#?} to {:#?}", old_title, title.value); + info.insert("original_title".to_owned(), old_title); + } else { + warn!("Setting title to {:#?}", title.value); + } + + break; + } + + Ok(info) + } +} + +#[derive(Serialize, Deserialize)] +/// See: <https://wiki.sponsor.ajay.app/w/API_Docs/DeArrow> +struct DeArrowApi { + titles: Vec<Title>, + thumbnails: Vec<Thumbnail>, + + #[serde(alias = "randomTime")] + random_time: Option<f64>, + + #[serde(alias = "videoDuration")] + video_duration: Option<f64>, + + #[serde(alias = "casualVotes")] + casual_votes: Vec<String>, +} + +#[derive(Serialize, Deserialize)] +struct Title { + /// Note: Titles will sometimes contain > before a word. + /// This tells the auto-formatter to not format a word. + /// If you have no auto-formatter, you can ignore this and replace it with an empty string + #[serde(alias = "title")] + value: String, + + original: bool, + votes: u64, + locked: bool, + + #[serde(alias = "UUID")] + uuid: String, + + /// only present if requested + #[serde(alias = "userID")] + user_id: Option<String>, +} + +#[derive(Serialize, Deserialize)] +struct Thumbnail { + // null if original is true + timestamp: Option<f64>, + + original: bool, + votes: u64, + locked: bool, + + #[serde(alias = "UUID")] + uuid: String, + + /// only present if requested + #[serde(alias = "userID")] + user_id: Option<String>, +} diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs new file mode 100644 index 0000000..6067c7a --- /dev/null +++ b/crates/yt_dlp/src/post_processors/mod.rs @@ -0,0 +1,20 @@ +use crate::InfoJson; + +pub mod dearrow; + +pub trait PostProcessor: std::fmt::Debug + Send { + /// Process a [`InfoJson`] object and return the updated one. + /// + /// # Errors + /// If the processing steps failed. + fn process(&self, info: InfoJson) -> Result<InfoJson, Error>; + + /// The supported extractors for this post processor + fn extractors(&self) -> &'static [&'static str]; +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Failed to access a api: {0}")] + Get(#[from] reqwest::Error), +} |