diff options
Diffstat (limited to 'crates/yt_dlp/src')
-rw-r--r-- | crates/yt_dlp/src/info_json.rs | 48 | ||||
-rw-r--r-- | crates/yt_dlp/src/lib.rs | 211 | ||||
-rw-r--r-- | crates/yt_dlp/src/logging.rs | 171 | ||||
-rw-r--r-- | crates/yt_dlp/src/options.rs | 217 | ||||
-rw-r--r-- | crates/yt_dlp/src/package_hacks/mod.rs | 11 | ||||
-rw-r--r-- | crates/yt_dlp/src/package_hacks/urllib3.rs | 35 | ||||
-rw-r--r-- | crates/yt_dlp/src/package_hacks/urllib3_polyfill.py | 13 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/dearrow.rs | 145 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/mod.rs | 91 | ||||
-rw-r--r-- | crates/yt_dlp/src/progress_hook.rs | 83 | ||||
-rw-r--r-- | crates/yt_dlp/src/python_error.rs | 105 |
11 files changed, 376 insertions, 754 deletions
diff --git a/crates/yt_dlp/src/info_json.rs b/crates/yt_dlp/src/info_json.rs index 31f4a69..3ed08ee 100644 --- a/crates/yt_dlp/src/info_json.rs +++ b/crates/yt_dlp/src/info_json.rs @@ -8,50 +8,46 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use rustpython::vm::{ - PyRef, VirtualMachine, - builtins::{PyDict, PyStr}, +use pyo3::{ + Bound, Python, intern, + types::{PyAnyMethods, PyDict}, }; pub type InfoJson = serde_json::Map<String, serde_json::Value>; +/// # Panics +/// If expectation about python operations fail. +#[must_use] pub fn json_loads( input: serde_json::Map<String, serde_json::Value>, - vm: &VirtualMachine, -) -> PyRef<PyDict> { - let json = vm.import("json", 0).expect("Module exists"); - let loads = json.get_attr("loads", vm).expect("Method exists"); + py: Python<'_>, +) -> Bound<'_, PyDict> { + let json = py.import(intern!(py, "json")).expect("Module exists"); + let loads = json.getattr(intern!(py, "loads")).expect("Method exists"); let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json"); let dict = loads - .call((self_str,), vm) + .call((self_str,), None) .expect("Vaild json is always a valid dict"); - dict.downcast().expect("Should always be a dict") + dict.downcast_into().expect("Should always be a dict") } /// # Panics /// If expectation about python operations fail. -pub fn json_dumps( - input: PyRef<PyDict>, - vm: &VirtualMachine, -) -> serde_json::Map<String, serde_json::Value> { - let json = vm.import("json", 0).expect("Module exists"); - let dumps = json.get_attr("dumps", vm).expect("Method exists"); +#[must_use] +pub fn json_dumps(input: &Bound<'_, PyDict>) -> serde_json::Map<String, serde_json::Value> { + let py = input.py(); + + let json = py.import(intern!(py, "json")).expect("Module exists"); + let dumps = json.getattr(intern!(py, "dumps")).expect("Method exists"); let dict = dumps - .call((input,), vm) - .map_err(|err| vm.print_exception(err)) + .call((input,), None) + .map_err(|err| err.print(py)) .expect("Might not always work, but for our dicts it works"); - let string: PyRef<PyStr> = dict.downcast().expect("Should always be a string"); - - let real_string = string.to_str().expect("Should be valid utf8"); - - // { - // let mut file = File::create("debug.dump.json").unwrap(); - // write!(file, "{}", real_string).unwrap(); - // } + let string: String = dict.extract().expect("Should always be a string"); - let value: serde_json::Value = serde_json::from_str(real_string).expect("Should be valid json"); + let value: serde_json::Value = serde_json::from_str(&string).expect("Should be valid json"); match value { serde_json::Value::Object(map) => map, diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index a03e444..d0cfbdd 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -12,18 +12,16 @@ use std::path::PathBuf; -use indexmap::IndexMap; use log::info; -use rustpython::vm::{ - Interpreter, PyObjectRef, PyRef, VirtualMachine, - builtins::{PyDict, PyList, PyStr}, - function::{FuncArgs, KwArgs, PosArgs}, +use pyo3::{ + Bound, Py, PyAny, Python, intern, + types::{PyAnyMethods, PyDict, PyIterator, PyList}, }; use url::Url; use crate::{ info_json::{InfoJson, json_dumps, json_loads}, - python_error::PythonError, + python_error::{IntoPythonError, PythonError}, }; pub mod info_json; @@ -32,19 +30,16 @@ pub mod post_processors; pub mod progress_hook; pub mod python_error; -mod logging; -mod package_hacks; - #[macro_export] macro_rules! json_get { ($value:expr, $name:literal, $into:ident) => {{ match $value.get($name) { - Some(val) => $crate::json_cast!(val, $into), + Some(val) => $crate::json_cast!(@log_key $name, val, $into), None => panic!( concat!( "Expected '", $name, - "' to be a key for the'", + "' to be a key for the '", stringify!($value), "' object: {:#?}" ), @@ -57,11 +52,17 @@ macro_rules! json_get { #[macro_export] macro_rules! json_cast { ($value:expr, $into:ident) => {{ + json_cast!(@log_key "<unknown>", $value, $into) + }}; + + (@log_key $name:literal, $value:expr, $into:ident) => {{ match $value.$into() { Some(result) => result, None => panic!( concat!( - "Expected to be able to cast value ({:#?}) ", + "Expected to be able to cast '", + $name, + "' value ({:#?}) ", stringify!($into) ), $value @@ -70,50 +71,50 @@ macro_rules! json_cast { }}; } +macro_rules! py_kw_args { + ($py:expr => $($kw_arg_name:ident = $kw_arg_val:expr),*) => {{ + use $crate::python_error::IntoPythonError; + + let dict = PyDict::new($py); + + $( + dict.set_item(stringify!($kw_arg_name), $kw_arg_val).wrap_exc($py)?; + )* + + Some(dict) + } + .as_ref()}; +} +pub(crate) use py_kw_args; + /// The core of the `yt_dlp` interface. +#[derive(Debug)] pub struct YoutubeDL { - interpreter: Interpreter, - youtube_dl_class: PyObjectRef, - yt_dlp_module: PyObjectRef, + inner: Py<PyAny>, options: serde_json::Map<String, serde_json::Value>, } -impl std::fmt::Debug for YoutubeDL { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // TODO(@bpeetz): Use something useful here. <2025-06-13> - f.write_str("YoutubeDL") - } -} - impl YoutubeDL { /// Fetch the underlying `yt_dlp` and `python` version. /// - /// - /// # Panics - /// - /// If `yt_dlp` changed their location or type of `__version__`. - pub fn version(&self) -> (String, String) { - let yt_dlp: PyRef<PyStr> = self.interpreter.enter_and_expect( - |vm| { - let version_module = self.yt_dlp_module.get_attr("version", vm)?; - let version = version_module.get_attr("__version__", vm)?; - let version = version.downcast().expect("This should always be a string"); - Ok(version) - }, - "yt_dlp version location has changed", - ); - - let python: PyRef<PyStr> = self.interpreter.enter_and_expect( - |vm| { - let version_module = vm.import("sys", 0)?; - let version = version_module.get_attr("version", vm)?; - let version = version.downcast().expect("This should always be a string"); - Ok(version) - }, - "python version location has changed", - ); - - (yt_dlp.to_string(), python.to_string()) + /// # Errors + /// If python attribute access fails. + pub fn version(&self) -> Result<(String, String), PythonError> { + Python::with_gil(|py| { + let yt_dlp = py + .import(intern!(py, "yt_dlp")) + .wrap_exc(py)? + .getattr(intern!(py, "version")) + .wrap_exc(py)? + .getattr(intern!(py, "__version__")) + .wrap_exc(py)? + .extract() + .wrap_exc(py)?; + + let python = py.version(); + + Ok((yt_dlp, python.to_owned())) + }) } /// Download a given list of URLs. @@ -172,55 +173,61 @@ impl YoutubeDL { download: bool, process: bool, ) -> Result<InfoJson, extract_info::Error> { - self.interpreter.enter(|vm| { - let pos_args = PosArgs::new(vec![vm.new_pyobj(url.to_string())]); - - let kw_args = KwArgs::new({ - let mut map = IndexMap::new(); - map.insert("download".to_owned(), vm.new_pyobj(download)); - map.insert("process".to_owned(), vm.new_pyobj(process)); - map - }); - - let fun_args = FuncArgs::new(pos_args, kw_args); - + Python::with_gil(|py| { let inner = self - .youtube_dl_class - .get_attr("extract_info", vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; + .inner + .bind(py) + .getattr(intern!(py, "extract_info")) + .wrap_exc(py)?; + let result = inner - .call_with_args(fun_args, vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))? - .downcast::<PyDict>() + .call( + (url.to_string(),), + py_kw_args!(py => download = download, process = process), + ) + .wrap_exc(py)? + .downcast_into::<PyDict>() .expect("This is a dict"); // Resolve the generator object - if let Ok(generator) = result.get_item("entries", vm) { - if generator.payload_is::<PyList>() { + if let Ok(generator) = result.get_item(intern!(py, "entries")) { + if generator.is_instance_of::<PyList>() { // already resolved. Do nothing - } else { + } else if let Ok(generator) = generator.downcast::<PyIterator>() { + // A python generator object. let max_backlog = self.options.get("playlistend").map_or(10000, |value| { - usize::try_from(value.as_u64().expect("Works")).expect("Should work") + usize::try_from(json_cast!(value, as_u64)).expect("Should work") }); let mut out = vec![]; - let next = generator - .get_attr("__next__", vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; - while let Ok(output) = next.call((), vm) { - out.push(output); + for output in generator { + out.push(output.wrap_exc(py)?); if out.len() == max_backlog { break; } } + + result.set_item(intern!(py, "entries"), out).wrap_exc(py)?; + } else { + // Probably some sort of paged list (`OnDemand` or otherwise) + let max_backlog = self.options.get("playlistend").map_or(10000, |value| { + usize::try_from(json_cast!(value, as_u64)).expect("Should work") + }); + + let next = generator.getattr(intern!(py, "getslice")).wrap_exc(py)?; + + let output = next + .call((), py_kw_args!(py => start = 0, end = max_backlog)) + .wrap_exc(py)?; + result - .set_item("entries", vm.new_pyobj(out), vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; + .set_item(intern!(py, "entries"), output) + .wrap_exc(py)?; } } - let result = self.prepare_info_json(result, vm)?; + let result = self.prepare_info_json(&result, py)?; Ok(result) }) @@ -244,50 +251,40 @@ impl YoutubeDL { ie_result: InfoJson, download: bool, ) -> Result<InfoJson, process_ie_result::Error> { - self.interpreter.enter(|vm| { - let pos_args = PosArgs::new(vec![vm.new_pyobj(json_loads(ie_result, vm))]); - - let kw_args = KwArgs::new({ - let mut map = IndexMap::new(); - map.insert("download".to_owned(), vm.new_pyobj(download)); - map - }); - - let fun_args = FuncArgs::new(pos_args, kw_args); - + Python::with_gil(|py| { let inner = self - .youtube_dl_class - .get_attr("process_ie_result", vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; + .inner + .bind(py) + .getattr(intern!(py, "process_ie_result")) + .wrap_exc(py)?; + let result = inner - .call_with_args(fun_args, vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))? - .downcast::<PyDict>() + .call( + (json_loads(ie_result, py),), + py_kw_args!(py => download = download), + ) + .wrap_exc(py)? + .downcast_into::<PyDict>() .expect("This is a dict"); - let result = self.prepare_info_json(result, vm)?; + let result = self.prepare_info_json(&result, py)?; Ok(result) }) } - fn prepare_info_json( + fn prepare_info_json<'py>( &self, - info: PyRef<PyDict>, - vm: &VirtualMachine, + info: &Bound<'py, PyDict>, + py: Python<'py>, ) -> Result<InfoJson, prepare::Error> { - let sanitize = self - .youtube_dl_class - .get_attr("sanitize_info", vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; + let sanitize = self.inner.bind(py).getattr(intern!(py, "sanitize_info")).wrap_exc(py)?; - let value = sanitize - .call((info,), vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; + let value = sanitize.call((info,), None).wrap_exc(py)?; let result = value.downcast::<PyDict>().expect("This should stay a dict"); - Ok(json_dumps(result, vm)) + Ok(json_dumps(result)) } } diff --git a/crates/yt_dlp/src/logging.rs b/crates/yt_dlp/src/logging.rs deleted file mode 100644 index 112836e..0000000 --- a/crates/yt_dlp/src/logging.rs +++ /dev/null @@ -1,171 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -// This file is taken from: https://github.com/dylanbstorey/pyo3-pylogger/blob/d89e0d6820ebc4f067647e3b74af59dbc4941dd5/src/lib.rs -// It is licensed under the Apache 2.0 License, copyright up to 2024 by Dylan Storey -// It was modified by Benedikt Peetz 2024, 2025 - -use log::{Level, MetadataBuilder, Record, logger}; -use rustpython::vm::{ - PyObjectRef, PyRef, PyResult, VirtualMachine, - builtins::{PyInt, PyStr}, - convert::ToPyObject, - function::FuncArgs, -}; - -/// Consume a Python `logging.LogRecord` and emit a Rust `Log` instead. -fn host_log(mut input: FuncArgs, vm: &VirtualMachine) -> PyResult<()> { - let record = input.args.remove(0); - let rust_target = { - let base: PyRef<PyStr> = input.args.remove(0).downcast().expect("Should be a string"); - base.as_str().to_owned() - }; - - let level = { - let level: PyRef<PyInt> = record - .get_attr("levelno", vm)? - .downcast() - .expect("Should always be an int"); - level.as_u32_mask() - }; - let message = { - let get_message = record.get_attr("getMessage", vm)?; - let message: PyRef<PyStr> = get_message - .call((), vm)? - .downcast() - .expect("Downcasting works"); - - message.as_str().to_owned() - }; - - let pathname = { - let pathname: PyRef<PyStr> = record - .get_attr("pathname", vm)? - .downcast() - .expect("Is a string"); - - pathname.as_str().to_owned() - }; - - let lineno = { - let lineno: PyRef<PyInt> = record - .get_attr("lineno", vm)? - .downcast() - .expect("Is a number"); - - lineno.as_u32_mask() - }; - - let logger_name = { - let name: PyRef<PyStr> = record - .get_attr("name", vm)? - .downcast() - .expect("Should be a string"); - name.as_str().to_owned() - }; - - let full_target: Option<String> = if logger_name.trim().is_empty() || logger_name == "root" { - None - } else { - // Libraries (ex: tracing_subscriber::filter::Directive) expect rust-style targets like foo::bar, - // and may not deal well with "." as a module separator: - let logger_name = logger_name.replace('.', "::"); - Some(format!("{rust_target}::{logger_name}")) - }; - - let target = full_target.as_deref().unwrap_or(&rust_target); - - // error - let error_metadata = if level >= 40 { - MetadataBuilder::new() - .target(target) - .level(Level::Error) - .build() - } else if level >= 30 { - MetadataBuilder::new() - .target(target) - .level(Level::Warn) - .build() - } else if level >= 20 { - MetadataBuilder::new() - .target(target) - .level(Level::Info) - .build() - } else if level >= 10 { - MetadataBuilder::new() - .target(target) - .level(Level::Debug) - .build() - } else { - MetadataBuilder::new() - .target(target) - .level(Level::Trace) - .build() - }; - - logger().log( - &Record::builder() - .metadata(error_metadata) - .args(format_args!("{}", &message)) - .line(Some(lineno)) - .file(None) - .module_path(Some(&pathname)) - .build(), - ); - - Ok(()) -} - -/// Registers the `host_log` function in rust as the event handler for Python's logging logger -/// This function needs to be called from within a pyo3 context as early as possible to ensure logging messages -/// arrive to the rust consumer. -/// -/// # Panics -/// Only if internal assertions fail. -#[allow(clippy::module_name_repetitions)] -pub(super) fn setup_logging(vm: &VirtualMachine, target: &str) -> PyResult<PyObjectRef> { - let logging = vm.import("logging", 0)?; - - let scope = vm.new_scope_with_builtins(); - - for (key, value) in logging.dict().expect("Should be a dict") { - let key: PyRef<PyStr> = key.downcast().expect("Is a string"); - - scope.globals.set_item(key.as_str(), value, vm)?; - } - scope - .globals - .set_item("host_log", vm.new_function("host_log", host_log).into(), vm)?; - - let local_scope = scope.clone(); - vm.run_code_string( - local_scope, - format!( - r#" -class HostHandler(Handler): - def __init__(self, level=0): - super().__init__(level=level) - - def emit(self, record): - host_log(record,"{target}") - -oldBasicConfig = basicConfig -def basicConfig(*pargs, **kwargs): - if "handlers" not in kwargs: - kwargs["handlers"] = [HostHandler()] - return oldBasicConfig(*pargs, **kwargs) -"# - ) - .as_str(), - "<embedded logging inintializing code>".to_owned(), - )?; - - Ok(scope.globals.to_pyobject(vm)) -} diff --git a/crates/yt_dlp/src/options.rs b/crates/yt_dlp/src/options.rs index dc3c154..dedb03c 100644 --- a/crates/yt_dlp/src/options.rs +++ b/crates/yt_dlp/src/options.rs @@ -8,28 +8,21 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use std::env; +use std::sync; -use indexmap::IndexMap; -use log::{Level, debug, error, log_enabled}; -use rustpython::{ - InterpreterConfig, - vm::{ - self, PyObjectRef, PyRef, PyResult, VirtualMachine, - builtins::{PyBaseException, PyStr}, - function::{FuncArgs, KwArgs, PosArgs}, - }, +use pyo3::{ + Bound, IntoPyObjectExt, PyAny, PyResult, Python, intern, + types::{PyAnyMethods, PyCFunction, PyDict, PyTuple}, }; +use pyo3_pylogger::setup_logging; use crate::{ - YoutubeDL, json_loads, logging::setup_logging, package_hacks, post_processors, - python_error::process_exception, + YoutubeDL, json_loads, post_processors, py_kw_args, + python_error::{IntoPythonError, PythonError}, }; -/// Wrap your function with [`mk_python_function`]. -pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); - -pub type PostProcessorFunction = fn(vm: &VirtualMachine) -> PyResult<PyObjectRef>; +pub type ProgressHookFunction = fn(py: Python<'_>) -> PyResult<Bound<'_, PyCFunction>>; +pub type PostProcessorFunction = fn(py: Python<'_>) -> PyResult<Bound<'_, PyAny>>; /// Options, that are used to customize the download behaviour. /// @@ -111,52 +104,36 @@ impl YoutubeDL { /// If a python call fails. #[allow(clippy::too_many_lines)] pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> { - let mut settings = vm::Settings::default(); - if let Ok(python_path) = env::var("PYTHONPATH") { - for path in python_path.split(':') { - settings.path_list.push(path.to_owned()); - } - } else { - error!( - "No PYTHONPATH found or invalid utf8. \ - This means, that you probably did not \ - supply a yt_dlp python package!" - ); - } - - settings.install_signal_handlers = false; - - // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13> - settings.optimize = 0; - - settings.isolated = true; - - let interpreter = InterpreterConfig::new() - .init_stdlib() - .settings(settings) - .interpreter(); + pyo3::prepare_freethreaded_python(); let output_options = options.options.clone(); - let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| { + let yt_dlp_module = Python::with_gil(|py| { + let opts = json_loads(options.options, py); + { - // Add missing (and required) values to the stdlib - package_hacks::urllib3::apply_hacks(vm)?; + static CALL_ONCE: sync::Once = sync::Once::new(); + + CALL_ONCE.call_once(|| { + py.run( + c" +import signal +signal.signal(signal.SIGINT, signal.SIG_DFL) + ", + None, + None, + ) + .unwrap_or_else(|err| { + panic!("Failed to disable python signal handling: {err}") + }); + }); } - let yt_dlp_module = vm.import("yt_dlp", 0)?; - let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; - - let opts = json_loads(options.options, vm); - { // Setup the progress hook - if let Some(function) = options.progress_hook { - opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { - let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); - vm.new_pyobj(vec![hook]) - }) - .expect("Should work?"); + if let Some(ph) = options.progress_hook { + opts.set_item(intern!(py, "progress_hooks"), vec![ph(py).wrap_exc(py)?]) + .wrap_exc(py)?; } } @@ -164,113 +141,53 @@ impl YoutubeDL { // Unconditionally set a logger. // Otherwise, yt_dlp will log to stderr. - /// Is the specified record to be logged? Returns false for no, - /// true for yes. Filters can either modify log records in-place or - /// return a completely different record instance which will replace - /// the original log record in any future processing of the event. - fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool { - let record = input.args.remove(0); - - // Filter out all error logs (they are propagated as rust errors) - let levelname: PyRef<PyStr> = record - .get_attr("levelname", vm) - .expect("This should exist") - .downcast() - .expect("This should be a String"); - - let return_value = levelname.as_str() != "ERROR"; - - if log_enabled!(Level::Debug) && !return_value { - let message: String = { - let get_message = record.get_attr("getMessage", vm).expect("Is set"); - let message: PyRef<PyStr> = get_message - .call((), vm) - .expect("Can be called") - .downcast() - .expect("Downcasting works"); + let ytdl_logger = setup_logging(py, "yt_dlp").wrap_exc(py)?; - message.as_str().to_owned() - }; - - debug!("Swollowed error message: '{message}'"); - } - return_value - } - - let logging = setup_logging(vm, "yt_dlp")?; - let ytdl_logger = { - let get_logger = logging.get_item("getLogger", vm)?; - get_logger.call(("yt_dlp",), vm)? - }; - - { - let args = FuncArgs::new( - PosArgs::new(vec![]), - KwArgs::new({ - let mut map = IndexMap::new(); - // Ensure that all events are logged by setting - // the log level to NOTSET (we filter on rust's side) - map.insert("level".to_owned(), vm.new_pyobj(0)); - map - }), - ); - - let basic_config = logging.get_item("basicConfig", vm)?; - basic_config.call(args, vm)?; - } - - { - let add_filter = ytdl_logger.get_attr("addFilter", vm)?; - add_filter.call( - (vm.new_function("yt_dlp_error_filter", filter_error_log),), - vm, - )?; - } - - opts.set_item("logger", ytdl_logger, vm)?; + opts.set_item(intern!(py, "logger"), ytdl_logger) + .wrap_exc(py)?; } - let youtube_dl_class = class.call((opts,), vm)?; + let inner = { + let p_params = opts.into_bound_py_any(py).wrap_exc(py)?; + let p_auto_init = true.into_bound_py_any(py).wrap_exc(py)?; + + py.import(intern!(py, "yt_dlp.YoutubeDL")) + .wrap_exc(py)? + .getattr(intern!(py, "YoutubeDL")) + .wrap_exc(py)? + .call1( + PyTuple::new( + py, + [ + p_params.into_bound_py_any(py).wrap_exc(py)?, + p_auto_init.into_bound_py_any(py).wrap_exc(py)?, + ], + ) + .wrap_exc(py)?, + ) + .wrap_exc(py)? + }; { // Setup the post processors - - let add_post_processor_fun = youtube_dl_class.get_attr("add_post_processor", vm)?; + let add_post_processor_fun = inner.getattr(intern!(py, "add_post_processor")).wrap_exc(py)?; for pp in options.post_processors { - let args = { - FuncArgs::new( - PosArgs::new(vec![pp(vm)?]), - KwArgs::new({ - let mut map = IndexMap::new(); - // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN - map.insert("when".to_owned(), vm.new_pyobj("pre_process")); - map - }), + add_post_processor_fun + .call( + (pp(py).wrap_exc(py)?.into_bound_py_any(py).wrap_exc(py)?,), + // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN + py_kw_args!(py => when = "pre_process"), ) - }; - - add_post_processor_fun.call(args, vm)?; + .wrap_exc(py)?; } } - Ok::<_, PyRef<PyBaseException>>((yt_dlp_module, youtube_dl_class)) - }) { - Ok(ok) => Ok(ok), - Err(err) => { - // TODO(@bpeetz): Do we want to run `interpreter.finalize` here? <2025-06-14> - // interpreter.finalize(Some(err)); - interpreter.enter(|vm| { - let buffer = process_exception(vm, &err); - Err(build::Error::Python(buffer)) - }) - } - }?; + Ok::<_, PythonError>(inner.unbind()) + })?; Ok(Self { - interpreter, - youtube_dl_class, - yt_dlp_module, + inner: yt_dlp_module, options: output_options, }) } @@ -278,9 +195,11 @@ impl YoutubeDL { #[allow(missing_docs)] pub mod build { + use crate::python_error::PythonError; + #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("Python threw an exception: {0}")] - Python(String), + #[error(transparent)] + Python(#[from] PythonError), } } diff --git a/crates/yt_dlp/src/package_hacks/mod.rs b/crates/yt_dlp/src/package_hacks/mod.rs deleted file mode 100644 index 53fe323..0000000 --- a/crates/yt_dlp/src/package_hacks/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -pub(super) mod urllib3; diff --git a/crates/yt_dlp/src/package_hacks/urllib3.rs b/crates/yt_dlp/src/package_hacks/urllib3.rs deleted file mode 100644 index 28ae37a..0000000 --- a/crates/yt_dlp/src/package_hacks/urllib3.rs +++ /dev/null @@ -1,35 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -use rustpython::vm::{PyResult, VirtualMachine}; - -// NOTE(@bpeetz): Remove this, once rust-python supports these features. <2025-06-27> -pub(crate) fn apply_hacks(vm: &VirtualMachine) -> PyResult<()> { - { - // Urllib3 tries to import this value, regardless if it is set. - let ssl_module = vm.import("ssl", 0)?; - ssl_module.set_attr("VERIFY_X509_STRICT", vm.ctx.new_int(0x20), vm)?; - } - - { - // Urllib3 tries to set the SSLContext.verify_flags value, regardless if it exists or not. - // So we need to provide a polyfill. - - let scope = vm.new_scope_with_builtins(); - - vm.run_code_string( - scope, - include_str!("urllib3_polyfill.py"), - "<embedded urllib3 polyfill workaround code>".to_owned(), - )?; - } - - Ok(()) -} diff --git a/crates/yt_dlp/src/package_hacks/urllib3_polyfill.py b/crates/yt_dlp/src/package_hacks/urllib3_polyfill.py deleted file mode 100644 index 610fd99..0000000 --- a/crates/yt_dlp/src/package_hacks/urllib3_polyfill.py +++ /dev/null @@ -1,13 +0,0 @@ -# yt - A fully featured command line YouTube client -# -# Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> -# SPDX-License-Identifier: GPL-3.0-or-later -# -# This file is part of Yt. -# -# You should have received a copy of the License along with this program. -# If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -import ssl - -ssl.SSLContext.verify_flags = 0 diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs index 3cac745..f35f301 100644 --- a/crates/yt_dlp/src/post_processors/dearrow.rs +++ b/crates/yt_dlp/src/post_processors/dearrow.rs @@ -9,50 +9,106 @@ // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. use curl::easy::Easy; -use log::{error, info, warn}; -use rustpython::vm::{ - PyRef, VirtualMachine, - builtins::{PyDict, PyStr}, +use log::{error, info, trace, warn}; +use pyo3::{ + Bound, PyAny, PyErr, PyResult, Python, exceptions, intern, pyfunction, + types::{PyAnyMethods, PyDict, PyModule}, + wrap_pyfunction, }; use serde::{Deserialize, Serialize}; -use crate::{pydict_cast, pydict_get, wrap_post_processor}; +use crate::{ + pydict_cast, pydict_get, + python_error::{IntoPythonError, PythonError}, +}; + +/// # Errors +/// - If the underlying function returns an error. +/// - If python operations fail. +pub fn process(py: Python<'_>) -> PyResult<Bound<'_, PyAny>> { + #[pyfunction] + fn actual_processor(info_json: Bound<'_, PyDict>) -> PyResult<Bound<'_, PyDict>> { + let output = match unwrapped_process(info_json) { + Ok(ok) => ok, + Err(err) => { + return Err(PyErr::new::<exceptions::PyRuntimeError, _>(err.to_string())); + } + }; + Ok(output) + } -wrap_post_processor!("DeArrow", unwrapped_process, process); + let module = PyModule::new(py, "rust_post_processors")?; + let scope = PyDict::new(py); + scope.set_item( + intern!(py, "actual_processor"), + wrap_pyfunction!(actual_processor, module)?, + )?; + py.run( + c" +import yt_dlp + +class DeArrow(yt_dlp.postprocessor.PostProcessor): + def run(self, info): + info = actual_processor(info) + return [], info + +inst = DeArrow() +", + Some(&scope), + None, + )?; + + Ok(scope.get_item(intern!(py, "inst"))?.downcast_into()?) +} /// # Errors /// If the API access fails. -pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyRef<PyDict>, Error> { - if pydict_get!(@vm, info, "extractor_key", PyStr).as_str() != "Youtube" { - warn!("DeArrow: Extractor did not match, exiting."); +pub fn unwrapped_process(info: Bound<'_, PyDict>) -> Result<Bound<'_, PyDict>, Error> { + if pydict_get!(info, "extractor_key", String).as_str() != "Youtube" { return Ok(info); } + let mut retry_num = 3; let mut output: DeArrowApi = { - let output_bytes = { - let mut dst = Vec::new(); - - let mut easy = Easy::new(); - easy.url( - format!( - "https://sponsor.ajay.app/api/branding?videoID={}", - pydict_get!(@vm, info, "id", PyStr).as_str() - ) - .as_str(), - )?; - - let mut transfer = easy.transfer(); - transfer.write_function(|data| { - dst.extend_from_slice(data); - Ok(data.len()) - })?; - transfer.perform()?; - drop(transfer); - - dst - }; - - serde_json::from_slice(&output_bytes)? + loop { + let output_bytes = { + let mut dst = Vec::new(); + + let mut easy = Easy::new(); + easy.url( + format!( + "https://sponsor.ajay.app/api/branding?videoID={}", + pydict_get!(info, "id", String) + ) + .as_str(), + )?; + + let mut transfer = easy.transfer(); + transfer.write_function(|data| { + dst.extend_from_slice(data); + Ok(data.len()) + })?; + transfer.perform()?; + drop(transfer); + + dst + }; + + match serde_json::from_slice(&output_bytes) { + Ok(ok) => break ok, + Err(err) => { + if retry_num > 0 { + trace!( + "DeArrow: Api access failed, trying again ({retry_num} retries left)" + ); + retry_num -= 1; + } else { + let err: serde_json::Error = err; + return Err(err.into()); + } + } + } + } }; // We pop the titles, so we need this vector reversed. @@ -74,7 +130,7 @@ pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyR continue; } - update_title(&info, &title.value, vm); + update_title(&info, &title.value).wrap_exc(info.py())?; break true; }; @@ -82,7 +138,7 @@ pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyR if !selected && title_len != 0 { // No title was selected, even though we had some titles. // Just pick the first one in this case. - update_title(&info, &output.titles[0].value, vm); + update_title(&info, &output.titles[0].value).wrap_exc(info.py())?; } Ok(info) @@ -90,6 +146,9 @@ pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyR #[derive(thiserror::Error, Debug)] pub enum Error { + #[error(transparent)] + Python(#[from] PythonError), + #[error("Failed to access the DeArrow api: {0}")] Get(#[from] curl::Error), @@ -97,17 +156,19 @@ pub enum Error { Deserialize(#[from] serde_json::Error), } -fn update_title(info: &PyRef<PyDict>, new_title: &str, vm: &VirtualMachine) { - assert!(!info.contains_key("original_title", vm)); +fn update_title(info: &Bound<'_, PyDict>, new_title: &str) -> PyResult<()> { + let py = info.py(); + + assert!(!info.contains(intern!(py, "original_title"))?); - if let Ok(old_title) = info.get_item("title", vm) { + if let Ok(old_title) = info.get_item(intern!(py, "title")) { warn!( "DeArrow: Updating title from {:#?} to {:#?}", - pydict_cast!(@ref old_title, PyStr).as_str(), + pydict_cast!(old_title, &str), new_title ); - info.set_item("original_title", old_title, vm) + info.set_item(intern!(py, "original_title"), old_title) .expect("We checked, it is a new key"); } else { warn!("DeArrow: Setting title to {new_title:#?}"); @@ -119,8 +180,10 @@ fn update_title(info: &PyRef<PyDict>, new_title: &str, vm: &VirtualMachine) { new_title.replace('>', "") }; - info.set_item("title", vm.new_pyobj(cleaned_title), vm) + info.set_item(intern!(py, "title"), cleaned_title) .expect("This should work?"); + + Ok(()) } #[derive(Serialize, Deserialize)] diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs index 00b0ad5..d9be3f5 100644 --- a/crates/yt_dlp/src/post_processors/mod.rs +++ b/crates/yt_dlp/src/post_processors/mod.rs @@ -12,8 +12,9 @@ pub mod dearrow; #[macro_export] macro_rules! pydict_get { - (@$vm:expr, $value:expr, $name:literal, $into:ident) => {{ - match $value.get_item($name, $vm) { + ($value:expr, $name:literal, $into:ty) => {{ + let item = $value.get_item(pyo3::intern!($value.py(), $name)); + match &item { Ok(val) => $crate::pydict_cast!(val, $into), Err(_) => panic!( concat!( @@ -31,93 +32,17 @@ macro_rules! pydict_get { #[macro_export] macro_rules! pydict_cast { - ($value:expr, $into:ident) => {{ - match $value.downcast::<$into>() { + ($value:expr, $into:ty) => {{ + match $value.extract::<$into>() { Ok(result) => result, Err(val) => panic!( concat!( - "Expected to be able to downcast value ({:#?}) as ", - stringify!($into) + "Expected to be able to extract ", + stringify!($into), + " from value ({:#?})." ), val ), } }}; - (@ref $value:expr, $into:ident) => {{ - match $value.downcast_ref::<$into>() { - Some(result) => result, - None => panic!( - concat!( - "Expected to be able to downcast value ({:#?}) as ", - stringify!($into) - ), - $value - ), - } - }}; -} - -#[macro_export] -macro_rules! wrap_post_processor { - ($name:literal, $unwrap:ident, $wrapped:ident) => { - use $crate::progress_hook::__priv::vm; - - /// # Errors - /// - If the underlying function returns an error. - /// - If python operations fail. - pub fn $wrapped(vm: &vm::VirtualMachine) -> vm::PyResult<vm::PyObjectRef> { - fn actual_processor( - mut input: vm::function::FuncArgs, - vm: &vm::VirtualMachine, - ) -> vm::PyResult<vm::PyRef<vm::builtins::PyDict>> { - let input = input - .args - .remove(0) - .downcast::<vm::builtins::PyDict>() - .expect("Should be a py dict"); - - let output = match unwrapped_process(input, vm) { - Ok(ok) => ok, - Err(err) => { - return Err(vm.new_runtime_error(err.to_string())); - } - }; - - Ok(output) - } - - let scope = vm.new_scope_with_builtins(); - - scope.globals.set_item( - "actual_processor", - vm.new_function("actual_processor", actual_processor).into(), - vm, - )?; - - let local_scope = scope.clone(); - vm.run_code_string( - local_scope, - format!( - " -import yt_dlp - -class {}(yt_dlp.postprocessor.PostProcessor): - def run(self, info): - info = actual_processor(info) - return [], info - -inst = {}() -", - $name, $name - ) - .as_str(), - "<embedded post processor initializing code>".to_owned(), - )?; - - Ok(scope - .globals - .get_item("inst", vm) - .expect("We just declared it")) - } - }; } diff --git a/crates/yt_dlp/src/progress_hook.rs b/crates/yt_dlp/src/progress_hook.rs index b42ae21..7e5f8a5 100644 --- a/crates/yt_dlp/src/progress_hook.rs +++ b/crates/yt_dlp/src/progress_hook.rs @@ -9,46 +9,59 @@ // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. #[macro_export] -macro_rules! mk_python_function { +macro_rules! wrap_progress_hook { ($name:ident, $new_name:ident) => { - pub fn $new_name( - mut args: $crate::progress_hook::__priv::vm::function::FuncArgs, - vm: &$crate::progress_hook::__priv::vm::VirtualMachine, - ) { - use $crate::progress_hook::__priv::vm; - - let input = { - let dict: vm::PyRef<vm::builtins::PyDict> = args - .args - .remove(0) - .downcast() - .expect("The progress hook is always called with these args"); - let new_dict = vm::builtins::PyDict::new_ref(&vm.ctx); - dict.into_iter() - .filter_map(|(name, value)| { - let real_name: vm::PyRefExact<vm::builtins::PyStr> = - name.downcast_exact(vm).expect("Is a string"); - let name_str = real_name.to_str().expect("Is a string"); - if name_str.starts_with('_') { - None - } else { - Some((name_str.to_owned(), value)) - } - }) - .for_each(|(key, value)| { - new_dict - .set_item(&key, value, vm) - .expect("This is a transpositions, should always be valid"); - }); - - $crate::progress_hook::__priv::json_dumps(new_dict, vm) - }; - $name(input).expect("Shall not fail!"); + pub(crate) fn $new_name( + py: yt_dlp::progress_hook::__priv::pyo3::Python<'_>, + ) -> yt_dlp::progress_hook::__priv::pyo3::PyResult< + yt_dlp::progress_hook::__priv::pyo3::Bound< + '_, + yt_dlp::progress_hook::__priv::pyo3::types::PyCFunction, + >, + > { + #[yt_dlp::progress_hook::__priv::pyo3::pyfunction] + #[pyo3(crate = "yt_dlp::progress_hook::__priv::pyo3")] + fn inner( + input: yt_dlp::progress_hook::__priv::pyo3::Bound< + '_, + yt_dlp::progress_hook::__priv::pyo3::types::PyDict, + >, + ) -> yt_dlp::progress_hook::__priv::pyo3::PyResult<()> { + let processed_input = { + let new_dict = yt_dlp::progress_hook::__priv::pyo3::types::PyDict::new(input.py()); + + input + .into_iter() + .filter_map(|(name, value)| { + let real_name = yt_dlp::progress_hook::__priv::pyo3::types::PyAnyMethods::extract::<String>(&name).expect("Should always be a string"); + + if real_name.starts_with('_') { + None + } else { + Some((real_name, value)) + } + }) + .for_each(|(key, value)| { + yt_dlp::progress_hook::__priv::pyo3::types::PyDictMethods::set_item(&new_dict, &key, value) + .expect("This is a transpositions, should always be valid"); + }); + yt_dlp::progress_hook::__priv::json_dumps(&new_dict) + }; + + $name(processed_input)?; + + Ok(()) + } + + let module = yt_dlp::progress_hook::__priv::pyo3::types::PyModule::new(py, "progress_hook")?; + let fun = yt_dlp::progress_hook::__priv::pyo3::wrap_pyfunction!(inner, module)?; + + Ok(fun) } }; } pub mod __priv { pub use crate::info_json::{json_dumps, json_loads}; - pub use rustpython::vm; + pub use pyo3; } diff --git a/crates/yt_dlp/src/python_error.rs b/crates/yt_dlp/src/python_error.rs index 9513956..0c442b3 100644 --- a/crates/yt_dlp/src/python_error.rs +++ b/crates/yt_dlp/src/python_error.rs @@ -8,109 +8,48 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use std::fmt::Display; +use std::fmt::{self, Display}; use log::{Level, debug, log_enabled}; -use rustpython::vm::{ - AsObject, PyPayload, PyRef, VirtualMachine, - builtins::{PyBaseException, PyBaseExceptionRef, PyStr}, - py_io::Write, - suggestion::offer_suggestions, -}; +use pyo3::{PyErr, Python, types::PyTracebackMethods}; #[derive(thiserror::Error, Debug)] pub struct PythonError(pub String); +pub(crate) trait IntoPythonError<T>: Sized { + fn wrap_exc(self, py: Python<'_>) -> Result<T, PythonError>; +} + +impl<T> IntoPythonError<T> for Result<T, PyErr> { + fn wrap_exc(self, py: Python<'_>) -> Result<T, PythonError> { + self.map_err(|exc| PythonError::from_exception(py, &exc)) + } +} + impl Display for PythonError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Python threw an exception: {}", self.0) } } impl PythonError { - pub(super) fn from_exception(vm: &VirtualMachine, exc: &PyRef<PyBaseException>) -> Self { - let buffer = process_exception(vm, exc); + pub(super) fn from_exception(py: Python<'_>, exc: &PyErr) -> Self { + let buffer = process_exception(py, exc); Self(buffer) } } -pub(super) fn process_exception(vm: &VirtualMachine, err: &PyBaseExceptionRef) -> String { - let mut buffer = String::new(); - write_exception(vm, &mut buffer, err) - .expect("We are writing into an *in-memory* string, it will always work"); - +pub(super) fn process_exception(py: Python<'_>, err: &PyErr) -> String { if log_enabled!(Level::Debug) { - let mut output = String::new(); - vm.write_exception(&mut output, err) - .expect("We are writing into an *in-memory* string, it will always work"); - debug!("Python threw an exception: {output}"); - } + let mut output = err.to_string(); - buffer -} - -// Inlined and changed from `vm.write_exception_inner` -fn write_exception<W: Write>( - vm: &VirtualMachine, - output: &mut W, - exc: &PyBaseExceptionRef, -) -> Result<(), W::Error> { - let varargs = exc.args(); - let args_repr = { - match varargs.len() { - 0 => vec![], - 1 => { - let args0_repr = if true { - varargs[0] - .str(vm) - .unwrap_or_else(|_| PyStr::from("<element str() failed>").into_ref(&vm.ctx)) - } else { - varargs[0].repr(vm).unwrap_or_else(|_| { - PyStr::from("<element repr() failed>").into_ref(&vm.ctx) - }) - }; - vec![args0_repr] - } - _ => varargs - .iter() - .map(|vararg| { - vararg.repr(vm).unwrap_or_else(|_| { - PyStr::from("<element repr() failed>").into_ref(&vm.ctx) - }) - }) - .collect(), + if let Some(tb) = err.traceback(py) { + output.push('\n'); + output.push_str(&tb.format().unwrap()); } - }; - let exc_class = exc.class(); - - if exc_class.fast_issubclass(vm.ctx.exceptions.syntax_error) { - unreachable!( - "A syntax error should never be raised, \ - as yt_dlp should not have them and neither our embedded code" - ); + debug!("Python threw an exception: {output}"); } - let exc_name = exc_class.name(); - match args_repr.len() { - 0 => write!(output, "{exc_name}"), - 1 => write!(output, "{}: {}", exc_name, args_repr[0]), - _ => write!( - output, - "{}: ({})", - exc_name, - args_repr - .iter() - .map(|val| val.as_str()) - .collect::<Vec<_>>() - .join(", "), - ), - }?; - - match offer_suggestions(exc, vm) { - Some(suggestions) => { - write!(output, ". Did you mean: '{suggestions}'?") - } - None => Ok(()), - } + err.to_string() } |