From 8d6eb786ee99e7b0c36736152e30a5f61cd34167 Mon Sep 17 00:00:00 2001 From: Benedikt Peetz Date: Tue, 17 Jun 2025 09:04:12 +0200 Subject: refactor(yt_dlp): Split the big `lib.rs` file up --- crates/yt_dlp/src/lib.rs | 427 ++--------------------------------------------- 1 file changed, 16 insertions(+), 411 deletions(-) (limited to 'crates/yt_dlp/src/lib.rs') diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index 1912831..a1db606 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -10,26 +10,27 @@ //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure. -use std::{env, fmt::Display, path::PathBuf}; +use std::path::PathBuf; use indexmap::IndexMap; -use log::{Level, debug, error, info, log_enabled}; -use rustpython::{ - InterpreterConfig, - vm::{ - self, AsObject, Interpreter, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, - builtins::{PyBaseException, PyBaseExceptionRef, PyDict, PyList, PyStr}, - function::{FuncArgs, KwArgs, PosArgs}, - py_io::Write, - suggestion::offer_suggestions, - }, +use log::info; +use rustpython::vm::{ + Interpreter, PyObjectRef, PyRef, VirtualMachine, + builtins::{PyDict, PyList, PyStr}, + function::{FuncArgs, KwArgs, PosArgs}, }; use url::Url; -use crate::logging::setup_logging; +use crate::{ + info_json::{InfoJson, json_dumps, json_loads}, + python_error::PythonError, +}; +pub mod info_json; +pub mod options; pub mod post_processors; pub mod progress_hook; +pub mod python_error; mod logging; @@ -84,174 +85,6 @@ impl std::fmt::Debug for YoutubeDL { } impl YoutubeDL { - /// Construct this instance from options. - /// - /// # Panics - /// If `yt_dlp` changed their interface. - /// - /// # Errors - /// If a python call fails. - #[allow(clippy::too_many_lines)] - pub fn from_options(options: YoutubeDLOptions) -> Result { - let mut settings = vm::Settings::default(); - if let Ok(python_path) = env::var("PYTHONPATH") { - for path in python_path.split(':') { - settings.path_list.push(path.to_owned()); - } - } else { - error!( - "No PYTHONPATH found or invalid utf8. \ - This means, that you probably did not \ - supply a yt_dlp python package!" - ); - } - - settings.install_signal_handlers = false; - - // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13> - settings.optimize = 0; - - settings.isolated = true; - - let interpreter = InterpreterConfig::new() - .init_stdlib() - .settings(settings) - .interpreter(); - - let output_options = options.options.clone(); - - let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| { - let yt_dlp_module = vm.import("yt_dlp", 0)?; - let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; - - let opts = json_loads(options.options, vm); - - { - // Setup the progress hook - if let Some(function) = options.progress_hook { - opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { - let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); - vm.new_pyobj(vec![hook]) - }) - .expect("Should work?"); - } - } - - { - // Unconditionally set a logger. - // Otherwise, yt_dlp will log to stderr. - - /// Is the specified record to be logged? Returns false for no, - /// true for yes. Filters can either modify log records in-place or - /// return a completely different record instance which will replace - /// the original log record in any future processing of the event. - fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool { - let record = input.args.remove(0); - - // Filter out all error logs (they are propagated as rust errors) - let levelname: PyRef = record - .get_attr("levelname", vm) - .expect("This should exist") - .downcast() - .expect("This should be a String"); - - let return_value = levelname.as_str() != "ERROR"; - - if log_enabled!(Level::Debug) && !return_value { - let message: String = { - let get_message = record.get_attr("getMessage", vm).expect("Is set"); - let message: PyRef = get_message - .call((), vm) - .expect("Can be called") - .downcast() - .expect("Downcasting works"); - - message.as_str().to_owned() - }; - - debug!("Swollowed error message: '{message}'"); - } - return_value - } - - let logging = setup_logging(vm, "yt_dlp")?; - let ytdl_logger = { - let get_logger = logging.get_item("getLogger", vm)?; - get_logger.call(("yt_dlp",), vm)? - }; - - { - let args = FuncArgs::new( - PosArgs::new(vec![]), - KwArgs::new({ - let mut map = IndexMap::new(); - // Ensure that all events are logged by setting - // the log level to NOTSET (we filter on rust's side) - map.insert("level".to_owned(), vm.new_pyobj(0)); - map - }), - ); - - let basic_config = logging.get_item("basicConfig", vm)?; - basic_config.call(args, vm)?; - } - - { - let add_filter = ytdl_logger.get_attr("addFilter", vm)?; - add_filter.call( - (vm.new_function("yt_dlp_error_filter", filter_error_log),), - vm, - )?; - } - - opts.set_item("logger", ytdl_logger, vm)?; - } - - let youtube_dl_class = class.call((opts,), vm)?; - - { - // Setup the post processors - - let add_post_processor_fun = youtube_dl_class.get_attr("add_post_processor", vm)?; - - for pp in options.post_processors { - let args = { - FuncArgs::new( - PosArgs::new(vec![pp(vm)?]), - KwArgs::new({ - let mut map = IndexMap::new(); - // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN - map.insert("when".to_owned(), vm.new_pyobj("pre_process")); - map - }), - ) - }; - - add_post_processor_fun.call(args, vm)?; - } - } - - Ok::<_, PyRef>((yt_dlp_module, youtube_dl_class)) - }) { - Ok(ok) => Ok(ok), - Err(err) => { - // TODO(@bpeetz): Do we want to run `interpreter.finalize` here? <2025-06-14> - // interpreter.finalize(Some(err)); - interpreter.enter(|vm| { - let buffer = process_exception(vm, &err); - Err(build::Error::Python(buffer)) - }) - } - }?; - - Ok(Self { - interpreter, - youtube_dl_class, - yt_dlp_module, - options: output_options, - }) - } - /// # Panics /// /// If `yt_dlp` changed their location or type of `__version__`. @@ -443,25 +276,9 @@ impl YoutubeDL { } } -#[derive(thiserror::Error, Debug)] -pub struct PythonError(pub String); - -impl Display for PythonError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Python threw an exception: {}", self.0) - } -} - -impl PythonError { - fn from_exception(vm: &VirtualMachine, exc: &PyRef) -> Self { - let buffer = process_exception(vm, exc); - Self(buffer) - } -} - #[allow(missing_docs)] pub mod process_ie_result { - use crate::{PythonError, prepare}; + use crate::{prepare, python_error::PythonError}; #[derive(Debug, thiserror::Error)] pub enum Error { @@ -474,7 +291,7 @@ pub mod process_ie_result { } #[allow(missing_docs)] pub mod extract_info { - use crate::{PythonError, prepare}; + use crate::{prepare, python_error::PythonError}; #[derive(Debug, thiserror::Error)] pub enum Error { @@ -487,7 +304,7 @@ pub mod extract_info { } #[allow(missing_docs)] pub mod prepare { - use crate::PythonError; + use crate::python_error::PythonError; #[derive(Debug, thiserror::Error)] pub enum Error { @@ -495,215 +312,3 @@ pub mod prepare { Python(#[from] PythonError), } } - -pub type InfoJson = serde_json::Map; -/// Wrap your function with [`mk_python_function`]. -pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); - -pub type PostProcessorFunction = fn(vm: &VirtualMachine) -> PyResult; - -/// Options, that are used to customize the download behaviour. -/// -/// In the future, this might get a Builder api. -/// -/// See `help(yt_dlp.YoutubeDL())` from python for a full list of available options. -#[derive(Default, Debug)] -pub struct YoutubeDLOptions { - options: serde_json::Map, - progress_hook: Option, - post_processors: Vec, -} - -impl YoutubeDLOptions { - #[must_use] - pub fn new() -> Self { - let me = Self { - options: serde_json::Map::new(), - progress_hook: None, - post_processors: vec![], - }; - - me.with_post_processor(post_processors::dearrow::process) - } - - #[must_use] - pub fn set(self, key: impl Into, value: impl Into) -> Self { - let mut options = self.options; - options.insert(key.into(), value.into()); - - Self { options, ..self } - } - - #[must_use] - pub fn with_progress_hook(self, progress_hook: ProgressHookFunction) -> Self { - if let Some(_previous_hook) = self.progress_hook { - todo!() - } else { - Self { - progress_hook: Some(progress_hook), - ..self - } - } - } - - #[must_use] - pub fn with_post_processor(mut self, pp: PostProcessorFunction) -> Self { - self.post_processors.push(pp); - self - } - - /// # Errors - /// If the underlying [`YoutubeDL::from_options`] errors. - pub fn build(self) -> Result { - YoutubeDL::from_options(self) - } - - #[must_use] - pub fn from_json_options(options: serde_json::Map) -> Self { - Self { - options, - ..Self::new() - } - } - - #[must_use] - pub fn get(&self, key: &str) -> Option<&serde_json::Value> { - self.options.get(key) - } -} - -#[allow(missing_docs)] -pub mod build { - #[derive(Debug, thiserror::Error)] - pub enum Error { - #[error("Python threw an exception: {0}")] - Python(String), - } -} - -/// # Panics -/// If expectation about python operations fail. -pub fn json_loads( - input: serde_json::Map, - vm: &VirtualMachine, -) -> PyRef { - let json = vm.import("json", 0).expect("Module exists"); - let loads = json.get_attr("loads", vm).expect("Method exists"); - let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json"); - let dict = loads - .call((self_str,), vm) - .expect("Vaild json is always a valid dict"); - - dict.downcast().expect("Should always be a dict") -} - -/// # Panics -/// If expectation about python operations fail. -pub fn json_dumps( - input: PyRef, - vm: &VirtualMachine, -) -> serde_json::Map { - let json = vm.import("json", 0).expect("Module exists"); - let dumps = json.get_attr("dumps", vm).expect("Method exists"); - let dict = dumps - .call((input,), vm) - .map_err(|err| vm.print_exception(err)) - .expect("Might not always work, but for our dicts it works"); - - let string: PyRef = dict.downcast().expect("Should always be a string"); - - let real_string = string.to_str().expect("Should be valid utf8"); - - // { - // let mut file = File::create("debug.dump.json").unwrap(); - // write!(file, "{}", real_string).unwrap(); - // } - - let value: serde_json::Value = serde_json::from_str(real_string).expect("Should be valid json"); - - match value { - serde_json::Value::Object(map) => map, - _ => unreachable!("These should not be json.dumps output"), - } -} - -// Inlined and changed from `vm.write_exception_inner` -fn write_exception( - vm: &VirtualMachine, - output: &mut W, - exc: &PyBaseExceptionRef, -) -> Result<(), W::Error> { - let varargs = exc.args(); - let args_repr = { - match varargs.len() { - 0 => vec![], - 1 => { - let args0_repr = if true { - varargs[0] - .str(vm) - .unwrap_or_else(|_| PyStr::from("").into_ref(&vm.ctx)) - } else { - varargs[0].repr(vm).unwrap_or_else(|_| { - PyStr::from("").into_ref(&vm.ctx) - }) - }; - vec![args0_repr] - } - _ => varargs - .iter() - .map(|vararg| { - vararg.repr(vm).unwrap_or_else(|_| { - PyStr::from("").into_ref(&vm.ctx) - }) - }) - .collect(), - } - }; - - let exc_class = exc.class(); - - if exc_class.fast_issubclass(vm.ctx.exceptions.syntax_error) { - unreachable!( - "A syntax error should never be raised, \ - as yt_dlp should not have them and neither our embedded code" - ); - } - - let exc_name = exc_class.name(); - match args_repr.len() { - 0 => write!(output, "{exc_name}"), - 1 => write!(output, "{}: {}", exc_name, args_repr[0]), - _ => write!( - output, - "{}: ({})", - exc_name, - args_repr - .iter() - .map(|val| val.as_str()) - .collect::>() - .join(", "), - ), - }?; - - match offer_suggestions(exc, vm) { - Some(suggestions) => { - write!(output, ". Did you mean: '{suggestions}'?") - } - None => Ok(()), - } -} - -fn process_exception(vm: &VirtualMachine, err: &PyBaseExceptionRef) -> String { - let mut buffer = String::new(); - write_exception(vm, &mut buffer, err) - .expect("We are writing into an *in-memory* string, it will always work"); - - if log_enabled!(Level::Debug) { - let mut output = String::new(); - vm.write_exception(&mut output, err) - .expect("We are writing into an *in-memory* string, it will always work"); - debug!("Python threw an exception: {output}"); - } - - buffer -} -- cgit 1.4.1