diff options
-rw-r--r-- | crates/yt_dlp/src/lib.rs | 88 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/dearrow.rs | 58 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/mod.rs | 120 | ||||
-rw-r--r-- | crates/yt_dlp/src/progress_hook.rs | 4 |
4 files changed, 207 insertions, 63 deletions
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index e7b37c6..d0465e1 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -10,16 +10,14 @@ //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure. -use std::{self, env, fmt::Display, path::PathBuf}; +use std::{env, fmt::Display, path::PathBuf}; use indexmap::IndexMap; use log::{Level, debug, error, info, log_enabled}; -use logging::setup_logging; -use post_processors::PostProcessor; use rustpython::{ InterpreterConfig, vm::{ - self, AsObject, Interpreter, PyObjectRef, PyPayload, PyRef, VirtualMachine, + self, AsObject, Interpreter, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, builtins::{PyBaseException, PyBaseExceptionRef, PyDict, PyList, PyStr}, function::{FuncArgs, KwArgs, PosArgs}, py_io::Write, @@ -28,10 +26,13 @@ use rustpython::{ }; use url::Url; -mod logging; +use crate::logging::setup_logging; + pub mod post_processors; pub mod progress_hook; +mod logging; + #[macro_export] macro_rules! json_get { ($value:expr, $name:literal, $into:ident) => {{ @@ -73,7 +74,6 @@ pub struct YoutubeDL { youtube_dl_class: PyObjectRef, yt_dlp_module: PyObjectRef, options: serde_json::Map<String, serde_json::Value>, - post_processors: Vec<Box<dyn PostProcessor>>, } impl std::fmt::Debug for YoutubeDL { @@ -91,6 +91,7 @@ impl YoutubeDL { /// /// # Errors /// If a python call fails. + #[allow(clippy::too_many_lines)] pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> { let mut settings = vm::Settings::default(); if let Ok(python_path) = env::var("PYTHONPATH") { @@ -124,12 +125,16 @@ impl YoutubeDL { let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; let opts = json_loads(options.options, vm); - if let Some(function) = options.progress_hook { - opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { - let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); - vm.new_pyobj(vec![hook]) - }) - .expect("Should work?"); + + { + // Setup the progress hook + if let Some(function) = options.progress_hook { + opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { + let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); + vm.new_pyobj(vec![hook]) + }) + .expect("Should work?"); + } } { @@ -204,6 +209,28 @@ impl YoutubeDL { let youtube_dl_class = class.call((opts,), vm)?; + { + // Setup the post processors + + let add_post_processor_fun = youtube_dl_class.get_attr("add_post_processor", vm)?; + + for pp in options.post_processors { + let args = { + FuncArgs::new( + PosArgs::new(vec![pp(vm)?]), + KwArgs::new({ + let mut map = IndexMap::new(); + // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN + map.insert("when".to_owned(), vm.new_pyobj("pre_process")); + map + }), + ) + }; + + add_post_processor_fun.call(args, vm)?; + } + } + Ok::<_, PyRef<PyBaseException>>((yt_dlp_module, youtube_dl_class)) }) { Ok(ok) => Ok(ok), @@ -222,7 +249,6 @@ impl YoutubeDL { youtube_dl_class, yt_dlp_module, options: output_options, - post_processors: options.post_processors, }) } @@ -413,21 +439,7 @@ impl YoutubeDL { let result = value.downcast::<PyDict>().expect("This should stay a dict"); - let mut json = json_dumps(result, vm); - - for pp in &self.post_processors { - if pp - .extractors() - .iter() - .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str)) - { - json = pp.process(json)?; - } else { - error!("Extractor not found for {pp:#?}"); - } - } - - Ok(json) + Ok(json_dumps(result, vm)) } } @@ -475,21 +487,21 @@ pub mod extract_info { } #[allow(missing_docs)] pub mod prepare { - use crate::{PythonError, post_processors}; + use crate::PythonError; #[derive(Debug, thiserror::Error)] pub enum Error { #[error(transparent)] Python(#[from] PythonError), - - #[error("Failed to run a post processor")] - PostProcessorRun(#[from] post_processors::Error), } } pub type InfoJson = serde_json::Map<String, serde_json::Value>; +/// Wrap your function with [`mk_python_function`]. pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); +pub type PostProcessorFunction = fn(vm: &VirtualMachine) -> PyResult<PyObjectRef>; + /// Options, that are used to customize the download behaviour. /// /// In the future, this might get a Builder api. @@ -499,7 +511,7 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); pub struct YoutubeDLOptions { options: serde_json::Map<String, serde_json::Value>, progress_hook: Option<ProgressHookFunction>, - post_processors: Vec<Box<dyn PostProcessor>>, + post_processors: Vec<PostProcessorFunction>, } impl YoutubeDLOptions { @@ -511,7 +523,7 @@ impl YoutubeDLOptions { post_processors: vec![], }; - me.with_post_processor(post_processors::dearrow::DeArrowPP) + me.with_post_processor(post_processors::dearrow::process) } #[must_use] @@ -535,8 +547,8 @@ impl YoutubeDLOptions { } #[must_use] - pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self { - self.post_processors.push(Box::new(post_processor)); + pub fn with_post_processor(mut self, pp: PostProcessorFunction) -> Self { + self.post_processors.push(pp); self } @@ -569,7 +581,9 @@ pub mod build { } } -fn json_loads( +/// # Panics +/// If expectation about python operations fail. +pub fn json_loads( input: serde_json::Map<String, serde_json::Value>, vm: &VirtualMachine, ) -> PyRef<PyDict> { diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs index 7dc6bbb..77c7ab9 100644 --- a/crates/yt_dlp/src/post_processors/dearrow.rs +++ b/crates/yt_dlp/src/post_processors/dearrow.rs @@ -16,22 +16,21 @@ use rustpython::vm::{ }; use serde::{Deserialize, Serialize}; -use crate::{InfoJson, json_get}; +use crate::{pydict_cast, pydict_get, wrap_post_processor}; -use super::PostProcessor; +wrap_post_processor!("DeArrow", unwrapped_process, process); -#[derive(Debug, Clone, Copy)] -pub struct DeArrowPP; - -impl PostProcessor for DeArrowPP { - fn extractors(&self) -> &'static [&'static str] { - &["Youtube"] +/// # Errors +/// If the API access fails. +pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyRef<PyDict>, Error> { + if pydict_get!(@vm, info, "extractor_key", PyStr).as_str() != "Youtube" { + warn!("DeArrow: Extractor did not match, exiting."); + return Ok(info); } - fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> { - let mut output: DeArrowApi = { - let output_bytes = { - let mut dst = Vec::new(); + let mut output: DeArrowApi = { + let output_bytes = { + let mut dst = Vec::new(); let mut easy = Easy::new(); easy.url( @@ -88,6 +87,41 @@ impl PostProcessor for DeArrowPP { Ok(info) } +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Failed to access the DeArrow api: {0}")] + Get(#[from] curl::Error), + + #[error("Failed to deserialize a api json return object: {0}")] + Deserialize(#[from] serde_json::Error), +} + +fn update_title(info: &PyRef<PyDict>, new_title: &str, vm: &VirtualMachine) { + assert!(!info.contains_key("original_title", vm)); + + if let Ok(old_title) = info.get_item("title", vm) { + warn!( + "DeArrow: Updating title from {:#?} to {:#?}", + pydict_cast!(@ref old_title, PyStr).as_str(), + new_title + ); + + info.set_item("original_title", old_title, vm) + .expect("We checked, it is a new key"); + } else { + warn!("DeArrow: Setting title to {new_title:#?}"); + } + + let cleaned_title = { + // NOTE(@bpeetz): DeArrow uses `>` as a “Don't format the next word” mark. + // They should be removed, if one does not use a auto-formatter. <2025-06-16> + new_title.replace('>', "") + }; + + info.set_item("title", vm.new_pyobj(cleaned_title), vm) + .expect("This should work?"); +} + #[derive(Serialize, Deserialize)] /// See: <https://wiki.sponsor.ajay.app/w/API_Docs/DeArrow> struct DeArrowApi { diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs index 65801c2..575dc45 100644 --- a/crates/yt_dlp/src/post_processors/mod.rs +++ b/crates/yt_dlp/src/post_processors/mod.rs @@ -8,23 +8,115 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use crate::InfoJson; - pub mod dearrow; -pub trait PostProcessor: std::fmt::Debug + Send { - /// Process a [`InfoJson`] object and return the updated one. - /// - /// # Errors - /// If the processing steps failed. - fn process(&self, info: InfoJson) -> Result<InfoJson, Error>; +#[macro_export] +macro_rules! pydict_get { + (@$vm:expr, $value:expr, $name:literal, $into:ident) => {{ + match $value.get_item($name, $vm) { + Ok(val) => $crate::pydict_cast!(val, $into), + Err(_) => panic!( + concat!( + "Expected '", + $name, + "' to be a key for the'", + stringify!($value), + "' py dictionary: {:#?}" + ), + $value + ), + } + }}; +} - /// The supported extractors for this post processor - fn extractors(&self) -> &'static [&'static str]; +#[macro_export] +macro_rules! pydict_cast { + ($value:expr, $into:ident) => {{ + match $value.downcast::<$into>() { + Ok(result) => result, + Err(val) => panic!( + concat!( + "Expected to be able to downcast value ({:#?}) as ", + stringify!($into) + ), + val + ), + } + }}; + (@ref $value:expr, $into:ident) => {{ + match $value.downcast_ref::<$into>() { + Some(result) => result, + None => panic!( + concat!( + "Expected to be able to downcast value ({:#?}) as ", + stringify!($into) + ), + $value + ), + } + }}; } -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Failed to access a api: {0}")] - Get(#[from] reqwest::Error), +#[macro_export] +macro_rules! wrap_post_processor { + ($name:literal, $unwrap:ident, $wrapped:ident) => { + use $crate::progress_hook::__priv::vm; + + /// # Errors + /// - If the underlying function returns an error. + /// - If python operations fail. + pub fn $wrapped(vm: &vm::VirtualMachine) -> vm::PyResult<vm::PyObjectRef> { + fn actual_processor( + mut input: vm::function::FuncArgs, + vm: &vm::VirtualMachine, + ) -> vm::PyResult<vm::PyRef<vm::builtins::PyDict>> { + let input = input + .args + .remove(0) + .downcast::<vm::builtins::PyDict>() + .expect("Should be a py dict"); + + let output = match unwrapped_process(input, vm) { + Ok(ok) => ok, + Err(err) => { + return Err(vm.new_runtime_error(err.to_string())); + } + }; + + Ok(output) + } + + let scope = vm.new_scope_with_builtins(); + + scope.globals.set_item( + "actual_processor", + vm.new_function("actual_processor", actual_processor).into(), + vm, + )?; + + let local_scope = scope.clone(); + vm.run_code_string( + local_scope, + format!( + " +import yt_dlp + +class {}(yt_dlp.postprocessor.PostProcessor): + def run(self, info): + info = actual_processor(info) + return [], info + +inst = {}() +", + $name, $name + ).as_str(), + "<embedded post processor initializing code>".to_owned(), + )?; + + Ok(scope + .globals + .get_item("inst", vm) + .expect("We just declared it")) + } + }; } diff --git a/crates/yt_dlp/src/progress_hook.rs b/crates/yt_dlp/src/progress_hook.rs index 43f85e0..4604223 100644 --- a/crates/yt_dlp/src/progress_hook.rs +++ b/crates/yt_dlp/src/progress_hook.rs @@ -49,3 +49,7 @@ macro_rules! mk_python_function { } pub use rustpython; +pub mod __priv { + pub use crate::{json_dumps, json_loads}; + pub use rustpython::vm; +} |