diff options
Diffstat (limited to '')
-rw-r--r-- | crates/yt_dlp/Cargo.toml | 2 | ||||
-rw-r--r-- | crates/yt_dlp/src/lib.rs | 219 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/dearrow.rs | 108 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/mod.rs | 20 |
4 files changed, 279 insertions, 70 deletions
diff --git a/crates/yt_dlp/Cargo.toml b/crates/yt_dlp/Cargo.toml index 90f2e10..e5d14fd 100644 --- a/crates/yt_dlp/Cargo.toml +++ b/crates/yt_dlp/Cargo.toml @@ -24,7 +24,9 @@ publish = true [dependencies] indexmap = { version = "2.9.0", default-features = false } log.workspace = true +reqwest = { version = "0.12.20", features = ["blocking", "json"] } rustpython = { git = "https://github.com/RustPython/RustPython.git", features = ["threading", "stdlib", "stdio", "importlib", "ssl"], default-features = false } +serde = { workspace = true, features = ["derive"] } serde_json.workspace = true thiserror = "2.0.12" url.workspace = true diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index dd42fc6..16ec4ca 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -1,10 +1,11 @@ //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure. -use std::{self, env, mem, path::PathBuf}; +use std::{self, env, fmt::Display, path::PathBuf}; use indexmap::IndexMap; use log::{Level, debug, error, info, log_enabled}; use logging::setup_logging; +use post_processors::PostProcessor; use rustpython::{ InterpreterConfig, vm::{ @@ -18,23 +19,42 @@ use rustpython::{ use url::Url; mod logging; +pub mod post_processors; pub mod progress_hook; #[macro_export] macro_rules! json_get { - ($value:expr, $name:literal, $into:ident) => { - $crate::json_cast!($value.get($name).expect("Should exist"), $into) - }; + ($value:expr, $name:literal, $into:ident) => {{ + match $value.get($name) { + Some(val) => $crate::json_cast!(val, $into), + None => panic!( + concat!( + "Expected '", + $name, + "' to be a key for the'", + stringify!($value), + "' object: {:#?}" + ), + $value + ), + } + }}; } #[macro_export] macro_rules! json_cast { - ($value:expr, $into:ident) => { - $value.$into().expect(concat!( - "Should be able to cast value into ", - stringify!($into) - )) - }; + ($value:expr, $into:ident) => {{ + match $value.$into() { + Some(result) => result, + None => panic!( + concat!( + "Expected to be able to cast value ({:#?}) ", + stringify!($into) + ), + $value + ), + } + }}; } /// The core of the `yt_dlp` interface. @@ -43,6 +63,7 @@ pub struct YoutubeDL { youtube_dl_class: PyObjectRef, yt_dlp_module: PyObjectRef, options: serde_json::Map<String, serde_json::Value>, + post_processors: Vec<Box<dyn PostProcessor>>, } impl std::fmt::Debug for YoutubeDL { @@ -60,7 +81,7 @@ impl YoutubeDL { /// /// # Errors /// If a python call fails. - pub fn from_options(mut options: YoutubeDLOptions) -> Result<Self, build::Error> { + pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> { let mut settings = vm::Settings::default(); if let Ok(python_path) = env::var("PYTHONPATH") { for path in python_path.split(':') { @@ -92,9 +113,8 @@ impl YoutubeDL { let yt_dlp_module = vm.import("yt_dlp", 0)?; let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; - let maybe_hook = mem::take(&mut options.progress_hook); - let opts = options.into_py_dict(vm); - if let Some(function) = maybe_hook { + let opts = json_loads(options.options, vm); + if let Some(function) = options.progress_hook { opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); vm.new_pyobj(vec![hook]) @@ -192,6 +212,7 @@ impl YoutubeDL { youtube_dl_class, yt_dlp_module, options: output_options, + post_processors: options.post_processors, }) } @@ -267,7 +288,7 @@ impl YoutubeDL { download: bool, process: bool, ) -> Result<InfoJson, extract_info::Error> { - match self.interpreter.enter(|vm| { + self.interpreter.enter(|vm| { let pos_args = PosArgs::new(vec![vm.new_pyobj(url.to_string())]); let kw_args = KwArgs::new({ @@ -279,9 +300,13 @@ impl YoutubeDL { let fun_args = FuncArgs::new(pos_args, kw_args); - let inner = self.youtube_dl_class.get_attr("extract_info", vm)?; + let inner = self + .youtube_dl_class + .get_attr("extract_info", vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; let result = inner - .call_with_args(fun_args, vm)? + .call_with_args(fun_args, vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))? .downcast::<PyDict>() .expect("This is a dict"); @@ -295,7 +320,9 @@ impl YoutubeDL { }); let mut out = vec![]; - let next = generator.get_attr("__next__", vm)?; + let next = generator + .get_attr("__next__", vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; while let Ok(output) = next.call((), vm) { out.push(output); @@ -303,27 +330,16 @@ impl YoutubeDL { break; } } - result.set_item("entries", vm.new_pyobj(out), vm)?; + result + .set_item("entries", vm.new_pyobj(out), vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; } } - let result = { - let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?; - let value = sanitize.call((result,), vm)?; + let result = self.prepare_info_json(result, vm)?; - value.downcast::<PyDict>().expect("This should stay a dict") - }; - - let result_json = json_dumps(result, vm); - - Ok::<_, PyRef<PyBaseException>>(result_json) - }) { - Ok(ok) => Ok(ok), - Err(err) => self.interpreter.enter(|vm| { - let buffer = process_exception(vm, &err); - Err(extract_info::Error::Python(buffer)) - }), - } + Ok(result) + }) } /// Take the (potentially modified) result of the information extractor (i.e., @@ -344,7 +360,7 @@ impl YoutubeDL { ie_result: InfoJson, download: bool, ) -> Result<InfoJson, process_ie_result::Error> { - match self.interpreter.enter(|vm| { + self.interpreter.enter(|vm| { let pos_args = PosArgs::new(vec![vm.new_pyobj(json_loads(ie_result, vm))]); let kw_args = KwArgs::new({ @@ -355,46 +371,109 @@ impl YoutubeDL { let fun_args = FuncArgs::new(pos_args, kw_args); - let inner = self.youtube_dl_class.get_attr("process_ie_result", vm)?; + let inner = self + .youtube_dl_class + .get_attr("process_ie_result", vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; let result = inner - .call_with_args(fun_args, vm)? + .call_with_args(fun_args, vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))? .downcast::<PyDict>() .expect("This is a dict"); - let result = { - let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?; - let value = sanitize.call((result,), vm)?; + let result = self.prepare_info_json(result, vm)?; - value.downcast::<PyDict>().expect("This should stay a dict") - }; + Ok(result) + }) + } - let result_json = json_dumps(result, vm); + fn prepare_info_json( + &self, + info: PyRef<PyDict>, + vm: &VirtualMachine, + ) -> Result<InfoJson, prepare::Error> { + let sanitize = self + .youtube_dl_class + .get_attr("sanitize_info", vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; - Ok::<_, PyRef<PyBaseException>>(result_json) - }) { - Ok(ok) => Ok(ok), - Err(err) => self.interpreter.enter(|vm| { - let buffer = process_exception(vm, &err); - Err(process_ie_result::Error::Python(buffer)) - }), + let value = sanitize + .call((info,), vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; + + let result = value.downcast::<PyDict>().expect("This should stay a dict"); + + let mut json = json_dumps(result, vm); + + for pp in &self.post_processors { + if pp + .extractors() + .iter() + .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str)) + { + json = pp.process(json)?; + } else { + error!("Extractor not found for {pp:#?}"); + } } + + Ok(json) + } +} + +#[derive(thiserror::Error, Debug)] +pub struct PythonError(pub String); + +impl Display for PythonError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Python threw an exception: {}", self.0) + } +} + +impl PythonError { + fn from_exception(vm: &VirtualMachine, exc: &PyRef<PyBaseException>) -> Self { + let buffer = process_exception(vm, exc); + Self(buffer) } } #[allow(missing_docs)] pub mod process_ie_result { + use crate::{PythonError, prepare}; + #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("Python threw an exception: {0}")] - Python(String), + #[error(transparent)] + Python(#[from] PythonError), + + #[error("Failed to prepare the info json")] + InfoJsonPrepare(#[from] prepare::Error), } } #[allow(missing_docs)] pub mod extract_info { + use crate::{PythonError, prepare}; + #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("Python threw an exception: {0}")] - Python(String), + #[error(transparent)] + Python(#[from] PythonError), + + #[error("Failed to prepare the info json")] + InfoJsonPrepare(#[from] prepare::Error), + } +} +#[allow(missing_docs)] +pub mod prepare { + use crate::{PythonError, post_processors}; + + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error(transparent)] + Python(#[from] PythonError), + + #[error("Failed to run a post processor")] + PostProcessorRun(#[from] post_processors::Error), } } @@ -410,15 +489,19 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); pub struct YoutubeDLOptions { options: serde_json::Map<String, serde_json::Value>, progress_hook: Option<ProgressHookFunction>, + post_processors: Vec<Box<dyn PostProcessor>>, } impl YoutubeDLOptions { #[must_use] pub fn new() -> Self { - Self { + let me = Self { options: serde_json::Map::new(), progress_hook: None, - } + post_processors: vec![], + }; + + me.with_post_processor(post_processors::dearrow::DeArrowPP) } #[must_use] @@ -426,10 +509,7 @@ impl YoutubeDLOptions { let mut options = self.options; options.insert(key.into(), value.into()); - Self { - options, - progress_hook: self.progress_hook, - } + Self { options, ..self } } #[must_use] @@ -438,12 +518,18 @@ impl YoutubeDLOptions { todo!() } else { Self { - options: self.options, progress_hook: Some(progress_hook), + ..self } } } + #[must_use] + pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self { + self.post_processors.push(Box::new(post_processor)); + self + } + /// # Errors /// If the underlying [`YoutubeDL::from_options`] errors. pub fn build(self) -> Result<YoutubeDL, build::Error> { @@ -454,7 +540,7 @@ impl YoutubeDLOptions { pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self { Self { options, - progress_hook: None, + ..Self::new() } } @@ -462,10 +548,6 @@ impl YoutubeDLOptions { pub fn get(&self, key: &str) -> Option<&serde_json::Value> { self.options.get(key) } - - fn into_py_dict(self, vm: &VirtualMachine) -> PyRef<PyDict> { - json_loads(self.options, vm) - } } #[allow(missing_docs)] @@ -474,9 +556,6 @@ pub mod build { pub enum Error { #[error("Python threw an exception: {0}")] Python(String), - - #[error("Io error: {0}")] - Io(#[from] std::io::Error), } } diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs new file mode 100644 index 0000000..110beeb --- /dev/null +++ b/crates/yt_dlp/src/post_processors/dearrow.rs @@ -0,0 +1,108 @@ +use log::{info, warn}; +use serde::{Deserialize, Serialize}; + +use crate::{InfoJson, json_get}; + +use super::PostProcessor; + +#[derive(Debug, Clone, Copy)] +pub struct DeArrowPP; + +impl PostProcessor for DeArrowPP { + fn extractors(&self) -> &'static [&'static str] { + &["Youtube"] + } + + fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> { + let mut output: DeArrowApi = reqwest::blocking::get(format!( + "https://sponsor.ajay.app/api/branding?videoID={}", + json_get!(info, "id", as_str) + ))? + .json()?; + + output.titles.reverse(); + + let title_len = output.titles.len(); + loop { + let Some(title) = output.titles.pop() else { + break; + }; + + if (title.locked || title.votes < 1) && title_len > 1 { + info!( + "Skipping title {:#?}, as it is not good enough", + title.value + ); + // Skip titles that are not “good” enough. + continue; + } + + if let Some(old_title) = info.insert( + "title".to_owned(), + serde_json::Value::String(title.value.clone()), + ) { + warn!("Updating title from {:#?} to {:#?}", old_title, title.value); + info.insert("original_title".to_owned(), old_title); + } else { + warn!("Setting title to {:#?}", title.value); + } + + break; + } + + Ok(info) + } +} + +#[derive(Serialize, Deserialize)] +/// See: <https://wiki.sponsor.ajay.app/w/API_Docs/DeArrow> +struct DeArrowApi { + titles: Vec<Title>, + thumbnails: Vec<Thumbnail>, + + #[serde(alias = "randomTime")] + random_time: Option<f64>, + + #[serde(alias = "videoDuration")] + video_duration: Option<f64>, + + #[serde(alias = "casualVotes")] + casual_votes: Vec<String>, +} + +#[derive(Serialize, Deserialize)] +struct Title { + /// Note: Titles will sometimes contain > before a word. + /// This tells the auto-formatter to not format a word. + /// If you have no auto-formatter, you can ignore this and replace it with an empty string + #[serde(alias = "title")] + value: String, + + original: bool, + votes: u64, + locked: bool, + + #[serde(alias = "UUID")] + uuid: String, + + /// only present if requested + #[serde(alias = "userID")] + user_id: Option<String>, +} + +#[derive(Serialize, Deserialize)] +struct Thumbnail { + // null if original is true + timestamp: Option<f64>, + + original: bool, + votes: u64, + locked: bool, + + #[serde(alias = "UUID")] + uuid: String, + + /// only present if requested + #[serde(alias = "userID")] + user_id: Option<String>, +} diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs new file mode 100644 index 0000000..6067c7a --- /dev/null +++ b/crates/yt_dlp/src/post_processors/mod.rs @@ -0,0 +1,20 @@ +use crate::InfoJson; + +pub mod dearrow; + +pub trait PostProcessor: std::fmt::Debug + Send { + /// Process a [`InfoJson`] object and return the updated one. + /// + /// # Errors + /// If the processing steps failed. + fn process(&self, info: InfoJson) -> Result<InfoJson, Error>; + + /// The supported extractors for this post processor + fn extractors(&self) -> &'static [&'static str]; +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Failed to access a api: {0}")] + Get(#[from] reqwest::Error), +} |