From 8d6eb786ee99e7b0c36736152e30a5f61cd34167 Mon Sep 17 00:00:00 2001 From: Benedikt Peetz Date: Tue, 17 Jun 2025 09:04:12 +0200 Subject: refactor(yt_dlp): Split the big `lib.rs` file up --- crates/yt/src/comments/description.rs | 4 +- crates/yt/src/comments/mod.rs | 4 +- crates/yt/src/download/download_options.rs | 2 +- crates/yt/src/select/cmds/add.rs | 2 +- crates/yt/src/storage/subscriptions.rs | 2 +- crates/yt/src/storage/video_database/get/mod.rs | 2 +- crates/yt/src/subscribe/mod.rs | 2 +- crates/yt/src/update/mod.rs | 2 +- crates/yt/src/update/updater.rs | 4 +- crates/yt/src/version/mod.rs | 2 +- crates/yt_dlp/src/info_json.rs | 50 +++ crates/yt_dlp/src/lib.rs | 427 +----------------------- crates/yt_dlp/src/options.rs | 270 +++++++++++++++ crates/yt_dlp/src/progress_hook.rs | 2 +- crates/yt_dlp/src/python_error.rs | 106 ++++++ 15 files changed, 457 insertions(+), 424 deletions(-) create mode 100644 crates/yt_dlp/src/info_json.rs create mode 100644 crates/yt_dlp/src/options.rs create mode 100644 crates/yt_dlp/src/python_error.rs (limited to 'crates') diff --git a/crates/yt/src/comments/description.rs b/crates/yt/src/comments/description.rs index e8cb29d..878b573 100644 --- a/crates/yt/src/comments/description.rs +++ b/crates/yt/src/comments/description.rs @@ -17,7 +17,7 @@ use crate::{ }; use anyhow::{Result, bail}; -use yt_dlp::{InfoJson, json_cast}; +use yt_dlp::json_cast; pub async fn description(app: &App) -> Result<()> { let description = get(app).await?; @@ -34,7 +34,7 @@ pub async fn get(app: &App) -> Result { bail!("Could not find a currently playing video!"); }; - let info_json: InfoJson = get::video_info_json(¤tly_playing_video)?.unreachable( + let info_json = get::video_info_json(¤tly_playing_video)?.unreachable( "A currently *playing* must be cached. And thus the info.json should be available", ); diff --git a/crates/yt/src/comments/mod.rs b/crates/yt/src/comments/mod.rs index 876146d..54031a4 100644 --- a/crates/yt/src/comments/mod.rs +++ b/crates/yt/src/comments/mod.rs @@ -15,7 +15,7 @@ use anyhow::{Result, bail}; use comment::{Comment, CommentExt, Comments, Parent}; use output::display_fmt_and_less; use regex::Regex; -use yt_dlp::{InfoJson, json_cast}; +use yt_dlp::json_cast; use crate::{ app::App, @@ -39,7 +39,7 @@ pub async fn get(app: &App) -> Result { bail!("Could not find a currently playing video!"); }; - let info_json: InfoJson = get::video_info_json(¤tly_playing_video)?.unreachable( + let info_json = get::video_info_json(¤tly_playing_video)?.unreachable( "A currently *playing* video must be cached. And thus the info.json should be available", ); diff --git a/crates/yt/src/download/download_options.rs b/crates/yt/src/download/download_options.rs index 03c20ba..558adfd 100644 --- a/crates/yt/src/download/download_options.rs +++ b/crates/yt/src/download/download_options.rs @@ -11,7 +11,7 @@ use anyhow::Context; use serde_json::{Value, json}; -use yt_dlp::{YoutubeDL, YoutubeDLOptions}; +use yt_dlp::{YoutubeDL, options::YoutubeDLOptions}; use crate::{app::App, storage::video_database::YtDlpOptions}; diff --git a/crates/yt/src/select/cmds/add.rs b/crates/yt/src/select/cmds/add.rs index 387b3a1..2fff298 100644 --- a/crates/yt/src/select/cmds/add.rs +++ b/crates/yt/src/select/cmds/add.rs @@ -20,7 +20,7 @@ use crate::{ use anyhow::{Context, Result, bail}; use log::{error, warn}; use url::Url; -use yt_dlp::{InfoJson, YoutubeDL, json_cast, json_get}; +use yt_dlp::{YoutubeDL, info_json::InfoJson, json_cast, json_get}; #[allow(clippy::too_many_lines)] pub(super) async fn add( diff --git a/crates/yt/src/storage/subscriptions.rs b/crates/yt/src/storage/subscriptions.rs index 6c0d08a..1ab0d72 100644 --- a/crates/yt/src/storage/subscriptions.rs +++ b/crates/yt/src/storage/subscriptions.rs @@ -17,7 +17,7 @@ use anyhow::Result; use log::debug; use sqlx::query; use url::Url; -use yt_dlp::YoutubeDLOptions; +use yt_dlp::options::YoutubeDLOptions; use crate::{app::App, unreachable::Unreachable}; diff --git a/crates/yt/src/storage/video_database/get/mod.rs b/crates/yt/src/storage/video_database/get/mod.rs index 0456cd3..e76131e 100644 --- a/crates/yt/src/storage/video_database/get/mod.rs +++ b/crates/yt/src/storage/video_database/get/mod.rs @@ -18,7 +18,7 @@ use anyhow::{Context, Result, bail}; use blake3::Hash; use log::{debug, trace}; use sqlx::query; -use yt_dlp::InfoJson; +use yt_dlp::info_json::InfoJson; use crate::{ app::App, diff --git a/crates/yt/src/subscribe/mod.rs b/crates/yt/src/subscribe/mod.rs index 7ac0be4..a965ac0 100644 --- a/crates/yt/src/subscribe/mod.rs +++ b/crates/yt/src/subscribe/mod.rs @@ -16,7 +16,7 @@ use futures::FutureExt; use log::warn; use tokio::io::{AsyncBufRead, AsyncBufReadExt}; use url::Url; -use yt_dlp::{YoutubeDLOptions, json_get}; +use yt_dlp::{json_get, options::YoutubeDLOptions}; use crate::{ app::App, diff --git a/crates/yt/src/update/mod.rs b/crates/yt/src/update/mod.rs index d866882..7f9bee7 100644 --- a/crates/yt/src/update/mod.rs +++ b/crates/yt/src/update/mod.rs @@ -15,7 +15,7 @@ use anyhow::{Context, Ok, Result}; use chrono::{DateTime, Utc}; use log::warn; use url::Url; -use yt_dlp::{InfoJson, json_cast, json_get}; +use yt_dlp::{info_json::InfoJson, json_cast, json_get}; use crate::{ app::App, diff --git a/crates/yt/src/update/updater.rs b/crates/yt/src/update/updater.rs index 60e9855..27fda4e 100644 --- a/crates/yt/src/update/updater.rs +++ b/crates/yt/src/update/updater.rs @@ -19,7 +19,9 @@ use futures::{StreamExt, future::join_all, stream}; use log::{Level, debug, error, log_enabled}; use serde_json::json; use tokio_util::task::LocalPoolHandle; -use yt_dlp::{InfoJson, PythonError, YoutubeDLOptions, json_cast, json_get, process_ie_result}; +use yt_dlp::{ + info_json::InfoJson, json_cast, json_get, options::YoutubeDLOptions, process_ie_result, python_error::PythonError, +}; use crate::{ ansi_escape_codes::{clear_whole_line, move_to_col}, diff --git a/crates/yt/src/version/mod.rs b/crates/yt/src/version/mod.rs index 9a91f3b..95660c0 100644 --- a/crates/yt/src/version/mod.rs +++ b/crates/yt/src/version/mod.rs @@ -10,7 +10,7 @@ use anyhow::{Context, Result}; use sqlx::{SqlitePool, sqlite::SqliteConnectOptions}; -use yt_dlp::YoutubeDLOptions; +use yt_dlp::options::YoutubeDLOptions; use crate::{config::Config, storage::migrate::get_version_db}; diff --git a/crates/yt_dlp/src/info_json.rs b/crates/yt_dlp/src/info_json.rs new file mode 100644 index 0000000..db4616d --- /dev/null +++ b/crates/yt_dlp/src/info_json.rs @@ -0,0 +1,50 @@ +use rustpython::vm::{ + PyRef, VirtualMachine, + builtins::{PyDict, PyStr}, +}; + +pub type InfoJson = serde_json::Map; + +pub fn json_loads( + input: serde_json::Map, + vm: &VirtualMachine, +) -> PyRef { + let json = vm.import("json", 0).expect("Module exists"); + let loads = json.get_attr("loads", vm).expect("Method exists"); + let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json"); + let dict = loads + .call((self_str,), vm) + .expect("Vaild json is always a valid dict"); + + dict.downcast().expect("Should always be a dict") +} + +/// # Panics +/// If expectation about python operations fail. +pub fn json_dumps( + input: PyRef, + vm: &VirtualMachine, +) -> serde_json::Map { + let json = vm.import("json", 0).expect("Module exists"); + let dumps = json.get_attr("dumps", vm).expect("Method exists"); + let dict = dumps + .call((input,), vm) + .map_err(|err| vm.print_exception(err)) + .expect("Might not always work, but for our dicts it works"); + + let string: PyRef = dict.downcast().expect("Should always be a string"); + + let real_string = string.to_str().expect("Should be valid utf8"); + + // { + // let mut file = File::create("debug.dump.json").unwrap(); + // write!(file, "{}", real_string).unwrap(); + // } + + let value: serde_json::Value = serde_json::from_str(real_string).expect("Should be valid json"); + + match value { + serde_json::Value::Object(map) => map, + _ => unreachable!("These should not be json.dumps output"), + } +} diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index 1912831..a1db606 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -10,26 +10,27 @@ //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure. -use std::{env, fmt::Display, path::PathBuf}; +use std::path::PathBuf; use indexmap::IndexMap; -use log::{Level, debug, error, info, log_enabled}; -use rustpython::{ - InterpreterConfig, - vm::{ - self, AsObject, Interpreter, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine, - builtins::{PyBaseException, PyBaseExceptionRef, PyDict, PyList, PyStr}, - function::{FuncArgs, KwArgs, PosArgs}, - py_io::Write, - suggestion::offer_suggestions, - }, +use log::info; +use rustpython::vm::{ + Interpreter, PyObjectRef, PyRef, VirtualMachine, + builtins::{PyDict, PyList, PyStr}, + function::{FuncArgs, KwArgs, PosArgs}, }; use url::Url; -use crate::logging::setup_logging; +use crate::{ + info_json::{InfoJson, json_dumps, json_loads}, + python_error::PythonError, +}; +pub mod info_json; +pub mod options; pub mod post_processors; pub mod progress_hook; +pub mod python_error; mod logging; @@ -84,174 +85,6 @@ impl std::fmt::Debug for YoutubeDL { } impl YoutubeDL { - /// Construct this instance from options. - /// - /// # Panics - /// If `yt_dlp` changed their interface. - /// - /// # Errors - /// If a python call fails. - #[allow(clippy::too_many_lines)] - pub fn from_options(options: YoutubeDLOptions) -> Result { - let mut settings = vm::Settings::default(); - if let Ok(python_path) = env::var("PYTHONPATH") { - for path in python_path.split(':') { - settings.path_list.push(path.to_owned()); - } - } else { - error!( - "No PYTHONPATH found or invalid utf8. \ - This means, that you probably did not \ - supply a yt_dlp python package!" - ); - } - - settings.install_signal_handlers = false; - - // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13> - settings.optimize = 0; - - settings.isolated = true; - - let interpreter = InterpreterConfig::new() - .init_stdlib() - .settings(settings) - .interpreter(); - - let output_options = options.options.clone(); - - let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| { - let yt_dlp_module = vm.import("yt_dlp", 0)?; - let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; - - let opts = json_loads(options.options, vm); - - { - // Setup the progress hook - if let Some(function) = options.progress_hook { - opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { - let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); - vm.new_pyobj(vec![hook]) - }) - .expect("Should work?"); - } - } - - { - // Unconditionally set a logger. - // Otherwise, yt_dlp will log to stderr. - - /// Is the specified record to be logged? Returns false for no, - /// true for yes. Filters can either modify log records in-place or - /// return a completely different record instance which will replace - /// the original log record in any future processing of the event. - fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool { - let record = input.args.remove(0); - - // Filter out all error logs (they are propagated as rust errors) - let levelname: PyRef = record - .get_attr("levelname", vm) - .expect("This should exist") - .downcast() - .expect("This should be a String"); - - let return_value = levelname.as_str() != "ERROR"; - - if log_enabled!(Level::Debug) && !return_value { - let message: String = { - let get_message = record.get_attr("getMessage", vm).expect("Is set"); - let message: PyRef = get_message - .call((), vm) - .expect("Can be called") - .downcast() - .expect("Downcasting works"); - - message.as_str().to_owned() - }; - - debug!("Swollowed error message: '{message}'"); - } - return_value - } - - let logging = setup_logging(vm, "yt_dlp")?; - let ytdl_logger = { - let get_logger = logging.get_item("getLogger", vm)?; - get_logger.call(("yt_dlp",), vm)? - }; - - { - let args = FuncArgs::new( - PosArgs::new(vec![]), - KwArgs::new({ - let mut map = IndexMap::new(); - // Ensure that all events are logged by setting - // the log level to NOTSET (we filter on rust's side) - map.insert("level".to_owned(), vm.new_pyobj(0)); - map - }), - ); - - let basic_config = logging.get_item("basicConfig", vm)?; - basic_config.call(args, vm)?; - } - - { - let add_filter = ytdl_logger.get_attr("addFilter", vm)?; - add_filter.call( - (vm.new_function("yt_dlp_error_filter", filter_error_log),), - vm, - )?; - } - - opts.set_item("logger", ytdl_logger, vm)?; - } - - let youtube_dl_class = class.call((opts,), vm)?; - - { - // Setup the post processors - - let add_post_processor_fun = youtube_dl_class.get_attr("add_post_processor", vm)?; - - for pp in options.post_processors { - let args = { - FuncArgs::new( - PosArgs::new(vec![pp(vm)?]), - KwArgs::new({ - let mut map = IndexMap::new(); - // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN - map.insert("when".to_owned(), vm.new_pyobj("pre_process")); - map - }), - ) - }; - - add_post_processor_fun.call(args, vm)?; - } - } - - Ok::<_, PyRef>((yt_dlp_module, youtube_dl_class)) - }) { - Ok(ok) => Ok(ok), - Err(err) => { - // TODO(@bpeetz): Do we want to run `interpreter.finalize` here? <2025-06-14> - // interpreter.finalize(Some(err)); - interpreter.enter(|vm| { - let buffer = process_exception(vm, &err); - Err(build::Error::Python(buffer)) - }) - } - }?; - - Ok(Self { - interpreter, - youtube_dl_class, - yt_dlp_module, - options: output_options, - }) - } - /// # Panics /// /// If `yt_dlp` changed their location or type of `__version__`. @@ -443,25 +276,9 @@ impl YoutubeDL { } } -#[derive(thiserror::Error, Debug)] -pub struct PythonError(pub String); - -impl Display for PythonError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Python threw an exception: {}", self.0) - } -} - -impl PythonError { - fn from_exception(vm: &VirtualMachine, exc: &PyRef) -> Self { - let buffer = process_exception(vm, exc); - Self(buffer) - } -} - #[allow(missing_docs)] pub mod process_ie_result { - use crate::{PythonError, prepare}; + use crate::{prepare, python_error::PythonError}; #[derive(Debug, thiserror::Error)] pub enum Error { @@ -474,7 +291,7 @@ pub mod process_ie_result { } #[allow(missing_docs)] pub mod extract_info { - use crate::{PythonError, prepare}; + use crate::{prepare, python_error::PythonError}; #[derive(Debug, thiserror::Error)] pub enum Error { @@ -487,7 +304,7 @@ pub mod extract_info { } #[allow(missing_docs)] pub mod prepare { - use crate::PythonError; + use crate::python_error::PythonError; #[derive(Debug, thiserror::Error)] pub enum Error { @@ -495,215 +312,3 @@ pub mod prepare { Python(#[from] PythonError), } } - -pub type InfoJson = serde_json::Map; -/// Wrap your function with [`mk_python_function`]. -pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); - -pub type PostProcessorFunction = fn(vm: &VirtualMachine) -> PyResult; - -/// Options, that are used to customize the download behaviour. -/// -/// In the future, this might get a Builder api. -/// -/// See `help(yt_dlp.YoutubeDL())` from python for a full list of available options. -#[derive(Default, Debug)] -pub struct YoutubeDLOptions { - options: serde_json::Map, - progress_hook: Option, - post_processors: Vec, -} - -impl YoutubeDLOptions { - #[must_use] - pub fn new() -> Self { - let me = Self { - options: serde_json::Map::new(), - progress_hook: None, - post_processors: vec![], - }; - - me.with_post_processor(post_processors::dearrow::process) - } - - #[must_use] - pub fn set(self, key: impl Into, value: impl Into) -> Self { - let mut options = self.options; - options.insert(key.into(), value.into()); - - Self { options, ..self } - } - - #[must_use] - pub fn with_progress_hook(self, progress_hook: ProgressHookFunction) -> Self { - if let Some(_previous_hook) = self.progress_hook { - todo!() - } else { - Self { - progress_hook: Some(progress_hook), - ..self - } - } - } - - #[must_use] - pub fn with_post_processor(mut self, pp: PostProcessorFunction) -> Self { - self.post_processors.push(pp); - self - } - - /// # Errors - /// If the underlying [`YoutubeDL::from_options`] errors. - pub fn build(self) -> Result { - YoutubeDL::from_options(self) - } - - #[must_use] - pub fn from_json_options(options: serde_json::Map) -> Self { - Self { - options, - ..Self::new() - } - } - - #[must_use] - pub fn get(&self, key: &str) -> Option<&serde_json::Value> { - self.options.get(key) - } -} - -#[allow(missing_docs)] -pub mod build { - #[derive(Debug, thiserror::Error)] - pub enum Error { - #[error("Python threw an exception: {0}")] - Python(String), - } -} - -/// # Panics -/// If expectation about python operations fail. -pub fn json_loads( - input: serde_json::Map, - vm: &VirtualMachine, -) -> PyRef { - let json = vm.import("json", 0).expect("Module exists"); - let loads = json.get_attr("loads", vm).expect("Method exists"); - let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json"); - let dict = loads - .call((self_str,), vm) - .expect("Vaild json is always a valid dict"); - - dict.downcast().expect("Should always be a dict") -} - -/// # Panics -/// If expectation about python operations fail. -pub fn json_dumps( - input: PyRef, - vm: &VirtualMachine, -) -> serde_json::Map { - let json = vm.import("json", 0).expect("Module exists"); - let dumps = json.get_attr("dumps", vm).expect("Method exists"); - let dict = dumps - .call((input,), vm) - .map_err(|err| vm.print_exception(err)) - .expect("Might not always work, but for our dicts it works"); - - let string: PyRef = dict.downcast().expect("Should always be a string"); - - let real_string = string.to_str().expect("Should be valid utf8"); - - // { - // let mut file = File::create("debug.dump.json").unwrap(); - // write!(file, "{}", real_string).unwrap(); - // } - - let value: serde_json::Value = serde_json::from_str(real_string).expect("Should be valid json"); - - match value { - serde_json::Value::Object(map) => map, - _ => unreachable!("These should not be json.dumps output"), - } -} - -// Inlined and changed from `vm.write_exception_inner` -fn write_exception( - vm: &VirtualMachine, - output: &mut W, - exc: &PyBaseExceptionRef, -) -> Result<(), W::Error> { - let varargs = exc.args(); - let args_repr = { - match varargs.len() { - 0 => vec![], - 1 => { - let args0_repr = if true { - varargs[0] - .str(vm) - .unwrap_or_else(|_| PyStr::from("").into_ref(&vm.ctx)) - } else { - varargs[0].repr(vm).unwrap_or_else(|_| { - PyStr::from("").into_ref(&vm.ctx) - }) - }; - vec![args0_repr] - } - _ => varargs - .iter() - .map(|vararg| { - vararg.repr(vm).unwrap_or_else(|_| { - PyStr::from("").into_ref(&vm.ctx) - }) - }) - .collect(), - } - }; - - let exc_class = exc.class(); - - if exc_class.fast_issubclass(vm.ctx.exceptions.syntax_error) { - unreachable!( - "A syntax error should never be raised, \ - as yt_dlp should not have them and neither our embedded code" - ); - } - - let exc_name = exc_class.name(); - match args_repr.len() { - 0 => write!(output, "{exc_name}"), - 1 => write!(output, "{}: {}", exc_name, args_repr[0]), - _ => write!( - output, - "{}: ({})", - exc_name, - args_repr - .iter() - .map(|val| val.as_str()) - .collect::>() - .join(", "), - ), - }?; - - match offer_suggestions(exc, vm) { - Some(suggestions) => { - write!(output, ". Did you mean: '{suggestions}'?") - } - None => Ok(()), - } -} - -fn process_exception(vm: &VirtualMachine, err: &PyBaseExceptionRef) -> String { - let mut buffer = String::new(); - write_exception(vm, &mut buffer, err) - .expect("We are writing into an *in-memory* string, it will always work"); - - if log_enabled!(Level::Debug) { - let mut output = String::new(); - vm.write_exception(&mut output, err) - .expect("We are writing into an *in-memory* string, it will always work"); - debug!("Python threw an exception: {output}"); - } - - buffer -} diff --git a/crates/yt_dlp/src/options.rs b/crates/yt_dlp/src/options.rs new file mode 100644 index 0000000..34612ac --- /dev/null +++ b/crates/yt_dlp/src/options.rs @@ -0,0 +1,270 @@ +use std::env; + +use indexmap::IndexMap; +use log::{Level, debug, error, log_enabled}; +use rustpython::{ + InterpreterConfig, + vm::{ + self, PyObjectRef, PyRef, PyResult, VirtualMachine, + builtins::{PyBaseException, PyStr}, + function::{FuncArgs, KwArgs, PosArgs}, + }, +}; + +use crate::{ + YoutubeDL, json_loads, logging::setup_logging, post_processors, python_error::process_exception, +}; + +/// Wrap your function with [`mk_python_function`]. +pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); + +pub type PostProcessorFunction = fn(vm: &VirtualMachine) -> PyResult; + +/// Options, that are used to customize the download behaviour. +/// +/// In the future, this might get a Builder api. +/// +/// See `help(yt_dlp.YoutubeDL())` from python for a full list of available options. +#[derive(Default, Debug)] +pub struct YoutubeDLOptions { + options: serde_json::Map, + progress_hook: Option, + post_processors: Vec, +} + +impl YoutubeDLOptions { + #[must_use] + pub fn new() -> Self { + let me = Self { + options: serde_json::Map::new(), + progress_hook: None, + post_processors: vec![], + }; + + me.with_post_processor(post_processors::dearrow::process) + } + + #[must_use] + pub fn set(self, key: impl Into, value: impl Into) -> Self { + let mut options = self.options; + options.insert(key.into(), value.into()); + + Self { options, ..self } + } + + #[must_use] + pub fn with_progress_hook(self, progress_hook: ProgressHookFunction) -> Self { + if let Some(_previous_hook) = self.progress_hook { + todo!() + } else { + Self { + progress_hook: Some(progress_hook), + ..self + } + } + } + + #[must_use] + pub fn with_post_processor(mut self, pp: PostProcessorFunction) -> Self { + self.post_processors.push(pp); + self + } + + /// # Errors + /// If the underlying [`YoutubeDL::from_options`] errors. + pub fn build(self) -> Result { + YoutubeDL::from_options(self) + } + + #[must_use] + pub fn from_json_options(options: serde_json::Map) -> Self { + Self { + options, + ..Self::new() + } + } + + #[must_use] + pub fn get(&self, key: &str) -> Option<&serde_json::Value> { + self.options.get(key) + } +} + +impl YoutubeDL { + /// Construct this instance from options. + /// + /// # Panics + /// If `yt_dlp` changed their interface. + /// + /// # Errors + /// If a python call fails. + #[allow(clippy::too_many_lines)] + pub fn from_options(options: YoutubeDLOptions) -> Result { + let mut settings = vm::Settings::default(); + if let Ok(python_path) = env::var("PYTHONPATH") { + for path in python_path.split(':') { + settings.path_list.push(path.to_owned()); + } + } else { + error!( + "No PYTHONPATH found or invalid utf8. \ + This means, that you probably did not \ + supply a yt_dlp python package!" + ); + } + + settings.install_signal_handlers = false; + + // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13> + settings.optimize = 0; + + settings.isolated = true; + + let interpreter = InterpreterConfig::new() + .init_stdlib() + .settings(settings) + .interpreter(); + + let output_options = options.options.clone(); + + let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| { + let yt_dlp_module = vm.import("yt_dlp", 0)?; + let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; + + let opts = json_loads(options.options, vm); + + { + // Setup the progress hook + if let Some(function) = options.progress_hook { + opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { + let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); + vm.new_pyobj(vec![hook]) + }) + .expect("Should work?"); + } + } + + { + // Unconditionally set a logger. + // Otherwise, yt_dlp will log to stderr. + + /// Is the specified record to be logged? Returns false for no, + /// true for yes. Filters can either modify log records in-place or + /// return a completely different record instance which will replace + /// the original log record in any future processing of the event. + fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool { + let record = input.args.remove(0); + + // Filter out all error logs (they are propagated as rust errors) + let levelname: PyRef = record + .get_attr("levelname", vm) + .expect("This should exist") + .downcast() + .expect("This should be a String"); + + let return_value = levelname.as_str() != "ERROR"; + + if log_enabled!(Level::Debug) && !return_value { + let message: String = { + let get_message = record.get_attr("getMessage", vm).expect("Is set"); + let message: PyRef = get_message + .call((), vm) + .expect("Can be called") + .downcast() + .expect("Downcasting works"); + + message.as_str().to_owned() + }; + + debug!("Swollowed error message: '{message}'"); + } + return_value + } + + let logging = setup_logging(vm, "yt_dlp")?; + let ytdl_logger = { + let get_logger = logging.get_item("getLogger", vm)?; + get_logger.call(("yt_dlp",), vm)? + }; + + { + let args = FuncArgs::new( + PosArgs::new(vec![]), + KwArgs::new({ + let mut map = IndexMap::new(); + // Ensure that all events are logged by setting + // the log level to NOTSET (we filter on rust's side) + map.insert("level".to_owned(), vm.new_pyobj(0)); + map + }), + ); + + let basic_config = logging.get_item("basicConfig", vm)?; + basic_config.call(args, vm)?; + } + + { + let add_filter = ytdl_logger.get_attr("addFilter", vm)?; + add_filter.call( + (vm.new_function("yt_dlp_error_filter", filter_error_log),), + vm, + )?; + } + + opts.set_item("logger", ytdl_logger, vm)?; + } + + let youtube_dl_class = class.call((opts,), vm)?; + + { + // Setup the post processors + + let add_post_processor_fun = youtube_dl_class.get_attr("add_post_processor", vm)?; + + for pp in options.post_processors { + let args = { + FuncArgs::new( + PosArgs::new(vec![pp(vm)?]), + KwArgs::new({ + let mut map = IndexMap::new(); + // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN + map.insert("when".to_owned(), vm.new_pyobj("pre_process")); + map + }), + ) + }; + + add_post_processor_fun.call(args, vm)?; + } + } + + Ok::<_, PyRef>((yt_dlp_module, youtube_dl_class)) + }) { + Ok(ok) => Ok(ok), + Err(err) => { + // TODO(@bpeetz): Do we want to run `interpreter.finalize` here? <2025-06-14> + // interpreter.finalize(Some(err)); + interpreter.enter(|vm| { + let buffer = process_exception(vm, &err); + Err(build::Error::Python(buffer)) + }) + } + }?; + + Ok(Self { + interpreter, + youtube_dl_class, + yt_dlp_module, + options: output_options, + }) + } +} + +#[allow(missing_docs)] +pub mod build { + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error("Python threw an exception: {0}")] + Python(String), + } +} diff --git a/crates/yt_dlp/src/progress_hook.rs b/crates/yt_dlp/src/progress_hook.rs index 6d2c9b7..b42ae21 100644 --- a/crates/yt_dlp/src/progress_hook.rs +++ b/crates/yt_dlp/src/progress_hook.rs @@ -49,6 +49,6 @@ macro_rules! mk_python_function { } pub mod __priv { - pub use crate::{json_dumps, json_loads}; + pub use crate::info_json::{json_dumps, json_loads}; pub use rustpython::vm; } diff --git a/crates/yt_dlp/src/python_error.rs b/crates/yt_dlp/src/python_error.rs new file mode 100644 index 0000000..d1d5b2e --- /dev/null +++ b/crates/yt_dlp/src/python_error.rs @@ -0,0 +1,106 @@ +use std::fmt::Display; + +use log::{Level, debug, log_enabled}; +use rustpython::vm::{ + AsObject, PyPayload, PyRef, VirtualMachine, + builtins::{PyBaseException, PyBaseExceptionRef, PyStr}, + py_io::Write, + suggestion::offer_suggestions, +}; + +#[derive(thiserror::Error, Debug)] +pub struct PythonError(pub String); + +impl Display for PythonError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Python threw an exception: {}", self.0) + } +} + +impl PythonError { + pub(super) fn from_exception(vm: &VirtualMachine, exc: &PyRef) -> Self { + let buffer = process_exception(vm, exc); + Self(buffer) + } +} + +pub(super) fn process_exception(vm: &VirtualMachine, err: &PyBaseExceptionRef) -> String { + let mut buffer = String::new(); + write_exception(vm, &mut buffer, err) + .expect("We are writing into an *in-memory* string, it will always work"); + + if log_enabled!(Level::Debug) { + let mut output = String::new(); + vm.write_exception(&mut output, err) + .expect("We are writing into an *in-memory* string, it will always work"); + debug!("Python threw an exception: {output}"); + } + + buffer +} + +// Inlined and changed from `vm.write_exception_inner` +fn write_exception( + vm: &VirtualMachine, + output: &mut W, + exc: &PyBaseExceptionRef, +) -> Result<(), W::Error> { + let varargs = exc.args(); + let args_repr = { + match varargs.len() { + 0 => vec![], + 1 => { + let args0_repr = if true { + varargs[0] + .str(vm) + .unwrap_or_else(|_| PyStr::from("").into_ref(&vm.ctx)) + } else { + varargs[0].repr(vm).unwrap_or_else(|_| { + PyStr::from("").into_ref(&vm.ctx) + }) + }; + vec![args0_repr] + } + _ => varargs + .iter() + .map(|vararg| { + vararg.repr(vm).unwrap_or_else(|_| { + PyStr::from("").into_ref(&vm.ctx) + }) + }) + .collect(), + } + }; + + let exc_class = exc.class(); + + if exc_class.fast_issubclass(vm.ctx.exceptions.syntax_error) { + unreachable!( + "A syntax error should never be raised, \ + as yt_dlp should not have them and neither our embedded code" + ); + } + + let exc_name = exc_class.name(); + match args_repr.len() { + 0 => write!(output, "{exc_name}"), + 1 => write!(output, "{}: {}", exc_name, args_repr[0]), + _ => write!( + output, + "{}: ({})", + exc_name, + args_repr + .iter() + .map(|val| val.as_str()) + .collect::>() + .join(", "), + ), + }?; + + match offer_suggestions(exc, vm) { + Some(suggestions) => { + write!(output, ". Did you mean: '{suggestions}'?") + } + None => Ok(()), + } +} -- cgit 1.4.1