From 82277ca7513eff82365ed54fe9836aae5bd45fe1 Mon Sep 17 00:00:00 2001 From: Benedikt Peetz Date: Thu, 10 Jul 2025 16:36:42 +0200 Subject: refactor(crates/yt_dlp): Port to `pyo3` again Rustpyton is slower, does not implement everything correctly and worst of all, contains code produced by LLM's. Using the freethreaded mode of pyo3 also works nicely around the GIL, and enables parallel execution. --- crates/yt_dlp/src/info_json.rs | 48 +++-- crates/yt_dlp/src/lib.rs | 211 ++++++++++---------- crates/yt_dlp/src/logging.rs | 171 ---------------- crates/yt_dlp/src/options.rs | 217 +++++++-------------- crates/yt_dlp/src/package_hacks/mod.rs | 11 -- crates/yt_dlp/src/package_hacks/urllib3.rs | 35 ---- .../yt_dlp/src/package_hacks/urllib3_polyfill.py | 13 -- crates/yt_dlp/src/post_processors/dearrow.rs | 145 ++++++++++---- crates/yt_dlp/src/post_processors/mod.rs | 91 +-------- crates/yt_dlp/src/progress_hook.rs | 83 ++++---- crates/yt_dlp/src/python_error.rs | 105 +++------- 11 files changed, 376 insertions(+), 754 deletions(-) delete mode 100644 crates/yt_dlp/src/logging.rs delete mode 100644 crates/yt_dlp/src/package_hacks/mod.rs delete mode 100644 crates/yt_dlp/src/package_hacks/urllib3.rs delete mode 100644 crates/yt_dlp/src/package_hacks/urllib3_polyfill.py (limited to 'crates/yt_dlp/src') diff --git a/crates/yt_dlp/src/info_json.rs b/crates/yt_dlp/src/info_json.rs index 31f4a69..3ed08ee 100644 --- a/crates/yt_dlp/src/info_json.rs +++ b/crates/yt_dlp/src/info_json.rs @@ -8,50 +8,46 @@ // You should have received a copy of the License along with this program. // If not, see . -use rustpython::vm::{ - PyRef, VirtualMachine, - builtins::{PyDict, PyStr}, +use pyo3::{ + Bound, Python, intern, + types::{PyAnyMethods, PyDict}, }; pub type InfoJson = serde_json::Map; +/// # Panics +/// If expectation about python operations fail. +#[must_use] pub fn json_loads( input: serde_json::Map, - vm: &VirtualMachine, -) -> PyRef { - let json = vm.import("json", 0).expect("Module exists"); - let loads = json.get_attr("loads", vm).expect("Method exists"); + py: Python<'_>, +) -> Bound<'_, PyDict> { + let json = py.import(intern!(py, "json")).expect("Module exists"); + let loads = json.getattr(intern!(py, "loads")).expect("Method exists"); let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json"); let dict = loads - .call((self_str,), vm) + .call((self_str,), None) .expect("Vaild json is always a valid dict"); - dict.downcast().expect("Should always be a dict") + dict.downcast_into().expect("Should always be a dict") } /// # Panics /// If expectation about python operations fail. -pub fn json_dumps( - input: PyRef, - vm: &VirtualMachine, -) -> serde_json::Map { - let json = vm.import("json", 0).expect("Module exists"); - let dumps = json.get_attr("dumps", vm).expect("Method exists"); +#[must_use] +pub fn json_dumps(input: &Bound<'_, PyDict>) -> serde_json::Map { + let py = input.py(); + + let json = py.import(intern!(py, "json")).expect("Module exists"); + let dumps = json.getattr(intern!(py, "dumps")).expect("Method exists"); let dict = dumps - .call((input,), vm) - .map_err(|err| vm.print_exception(err)) + .call((input,), None) + .map_err(|err| err.print(py)) .expect("Might not always work, but for our dicts it works"); - let string: PyRef = dict.downcast().expect("Should always be a string"); - - let real_string = string.to_str().expect("Should be valid utf8"); - - // { - // let mut file = File::create("debug.dump.json").unwrap(); - // write!(file, "{}", real_string).unwrap(); - // } + let string: String = dict.extract().expect("Should always be a string"); - let value: serde_json::Value = serde_json::from_str(real_string).expect("Should be valid json"); + let value: serde_json::Value = serde_json::from_str(&string).expect("Should be valid json"); match value { serde_json::Value::Object(map) => map, diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index a03e444..d0cfbdd 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -12,18 +12,16 @@ use std::path::PathBuf; -use indexmap::IndexMap; use log::info; -use rustpython::vm::{ - Interpreter, PyObjectRef, PyRef, VirtualMachine, - builtins::{PyDict, PyList, PyStr}, - function::{FuncArgs, KwArgs, PosArgs}, +use pyo3::{ + Bound, Py, PyAny, Python, intern, + types::{PyAnyMethods, PyDict, PyIterator, PyList}, }; use url::Url; use crate::{ info_json::{InfoJson, json_dumps, json_loads}, - python_error::PythonError, + python_error::{IntoPythonError, PythonError}, }; pub mod info_json; @@ -32,19 +30,16 @@ pub mod post_processors; pub mod progress_hook; pub mod python_error; -mod logging; -mod package_hacks; - #[macro_export] macro_rules! json_get { ($value:expr, $name:literal, $into:ident) => {{ match $value.get($name) { - Some(val) => $crate::json_cast!(val, $into), + Some(val) => $crate::json_cast!(@log_key $name, val, $into), None => panic!( concat!( "Expected '", $name, - "' to be a key for the'", + "' to be a key for the '", stringify!($value), "' object: {:#?}" ), @@ -57,11 +52,17 @@ macro_rules! json_get { #[macro_export] macro_rules! json_cast { ($value:expr, $into:ident) => {{ + json_cast!(@log_key "", $value, $into) + }}; + + (@log_key $name:literal, $value:expr, $into:ident) => {{ match $value.$into() { Some(result) => result, None => panic!( concat!( - "Expected to be able to cast value ({:#?}) ", + "Expected to be able to cast '", + $name, + "' value ({:#?}) ", stringify!($into) ), $value @@ -70,50 +71,50 @@ macro_rules! json_cast { }}; } +macro_rules! py_kw_args { + ($py:expr => $($kw_arg_name:ident = $kw_arg_val:expr),*) => {{ + use $crate::python_error::IntoPythonError; + + let dict = PyDict::new($py); + + $( + dict.set_item(stringify!($kw_arg_name), $kw_arg_val).wrap_exc($py)?; + )* + + Some(dict) + } + .as_ref()}; +} +pub(crate) use py_kw_args; + /// The core of the `yt_dlp` interface. +#[derive(Debug)] pub struct YoutubeDL { - interpreter: Interpreter, - youtube_dl_class: PyObjectRef, - yt_dlp_module: PyObjectRef, + inner: Py, options: serde_json::Map, } -impl std::fmt::Debug for YoutubeDL { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // TODO(@bpeetz): Use something useful here. <2025-06-13> - f.write_str("YoutubeDL") - } -} - impl YoutubeDL { /// Fetch the underlying `yt_dlp` and `python` version. /// - /// - /// # Panics - /// - /// If `yt_dlp` changed their location or type of `__version__`. - pub fn version(&self) -> (String, String) { - let yt_dlp: PyRef = self.interpreter.enter_and_expect( - |vm| { - let version_module = self.yt_dlp_module.get_attr("version", vm)?; - let version = version_module.get_attr("__version__", vm)?; - let version = version.downcast().expect("This should always be a string"); - Ok(version) - }, - "yt_dlp version location has changed", - ); - - let python: PyRef = self.interpreter.enter_and_expect( - |vm| { - let version_module = vm.import("sys", 0)?; - let version = version_module.get_attr("version", vm)?; - let version = version.downcast().expect("This should always be a string"); - Ok(version) - }, - "python version location has changed", - ); - - (yt_dlp.to_string(), python.to_string()) + /// # Errors + /// If python attribute access fails. + pub fn version(&self) -> Result<(String, String), PythonError> { + Python::with_gil(|py| { + let yt_dlp = py + .import(intern!(py, "yt_dlp")) + .wrap_exc(py)? + .getattr(intern!(py, "version")) + .wrap_exc(py)? + .getattr(intern!(py, "__version__")) + .wrap_exc(py)? + .extract() + .wrap_exc(py)?; + + let python = py.version(); + + Ok((yt_dlp, python.to_owned())) + }) } /// Download a given list of URLs. @@ -172,55 +173,61 @@ impl YoutubeDL { download: bool, process: bool, ) -> Result { - self.interpreter.enter(|vm| { - let pos_args = PosArgs::new(vec![vm.new_pyobj(url.to_string())]); - - let kw_args = KwArgs::new({ - let mut map = IndexMap::new(); - map.insert("download".to_owned(), vm.new_pyobj(download)); - map.insert("process".to_owned(), vm.new_pyobj(process)); - map - }); - - let fun_args = FuncArgs::new(pos_args, kw_args); - + Python::with_gil(|py| { let inner = self - .youtube_dl_class - .get_attr("extract_info", vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; + .inner + .bind(py) + .getattr(intern!(py, "extract_info")) + .wrap_exc(py)?; + let result = inner - .call_with_args(fun_args, vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))? - .downcast::() + .call( + (url.to_string(),), + py_kw_args!(py => download = download, process = process), + ) + .wrap_exc(py)? + .downcast_into::() .expect("This is a dict"); // Resolve the generator object - if let Ok(generator) = result.get_item("entries", vm) { - if generator.payload_is::() { + if let Ok(generator) = result.get_item(intern!(py, "entries")) { + if generator.is_instance_of::() { // already resolved. Do nothing - } else { + } else if let Ok(generator) = generator.downcast::() { + // A python generator object. let max_backlog = self.options.get("playlistend").map_or(10000, |value| { - usize::try_from(value.as_u64().expect("Works")).expect("Should work") + usize::try_from(json_cast!(value, as_u64)).expect("Should work") }); let mut out = vec![]; - let next = generator - .get_attr("__next__", vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; - while let Ok(output) = next.call((), vm) { - out.push(output); + for output in generator { + out.push(output.wrap_exc(py)?); if out.len() == max_backlog { break; } } + + result.set_item(intern!(py, "entries"), out).wrap_exc(py)?; + } else { + // Probably some sort of paged list (`OnDemand` or otherwise) + let max_backlog = self.options.get("playlistend").map_or(10000, |value| { + usize::try_from(json_cast!(value, as_u64)).expect("Should work") + }); + + let next = generator.getattr(intern!(py, "getslice")).wrap_exc(py)?; + + let output = next + .call((), py_kw_args!(py => start = 0, end = max_backlog)) + .wrap_exc(py)?; + result - .set_item("entries", vm.new_pyobj(out), vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; + .set_item(intern!(py, "entries"), output) + .wrap_exc(py)?; } } - let result = self.prepare_info_json(result, vm)?; + let result = self.prepare_info_json(&result, py)?; Ok(result) }) @@ -244,50 +251,40 @@ impl YoutubeDL { ie_result: InfoJson, download: bool, ) -> Result { - self.interpreter.enter(|vm| { - let pos_args = PosArgs::new(vec![vm.new_pyobj(json_loads(ie_result, vm))]); - - let kw_args = KwArgs::new({ - let mut map = IndexMap::new(); - map.insert("download".to_owned(), vm.new_pyobj(download)); - map - }); - - let fun_args = FuncArgs::new(pos_args, kw_args); - + Python::with_gil(|py| { let inner = self - .youtube_dl_class - .get_attr("process_ie_result", vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; + .inner + .bind(py) + .getattr(intern!(py, "process_ie_result")) + .wrap_exc(py)?; + let result = inner - .call_with_args(fun_args, vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))? - .downcast::() + .call( + (json_loads(ie_result, py),), + py_kw_args!(py => download = download), + ) + .wrap_exc(py)? + .downcast_into::() .expect("This is a dict"); - let result = self.prepare_info_json(result, vm)?; + let result = self.prepare_info_json(&result, py)?; Ok(result) }) } - fn prepare_info_json( + fn prepare_info_json<'py>( &self, - info: PyRef, - vm: &VirtualMachine, + info: &Bound<'py, PyDict>, + py: Python<'py>, ) -> Result { - let sanitize = self - .youtube_dl_class - .get_attr("sanitize_info", vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; + let sanitize = self.inner.bind(py).getattr(intern!(py, "sanitize_info")).wrap_exc(py)?; - let value = sanitize - .call((info,), vm) - .map_err(|exc| PythonError::from_exception(vm, &exc))?; + let value = sanitize.call((info,), None).wrap_exc(py)?; let result = value.downcast::().expect("This should stay a dict"); - Ok(json_dumps(result, vm)) + Ok(json_dumps(result)) } } diff --git a/crates/yt_dlp/src/logging.rs b/crates/yt_dlp/src/logging.rs deleted file mode 100644 index 112836e..0000000 --- a/crates/yt_dlp/src/logging.rs +++ /dev/null @@ -1,171 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2024 Benedikt Peetz -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see . - -// This file is taken from: https://github.com/dylanbstorey/pyo3-pylogger/blob/d89e0d6820ebc4f067647e3b74af59dbc4941dd5/src/lib.rs -// It is licensed under the Apache 2.0 License, copyright up to 2024 by Dylan Storey -// It was modified by Benedikt Peetz 2024, 2025 - -use log::{Level, MetadataBuilder, Record, logger}; -use rustpython::vm::{ - PyObjectRef, PyRef, PyResult, VirtualMachine, - builtins::{PyInt, PyStr}, - convert::ToPyObject, - function::FuncArgs, -}; - -/// Consume a Python `logging.LogRecord` and emit a Rust `Log` instead. -fn host_log(mut input: FuncArgs, vm: &VirtualMachine) -> PyResult<()> { - let record = input.args.remove(0); - let rust_target = { - let base: PyRef = input.args.remove(0).downcast().expect("Should be a string"); - base.as_str().to_owned() - }; - - let level = { - let level: PyRef = record - .get_attr("levelno", vm)? - .downcast() - .expect("Should always be an int"); - level.as_u32_mask() - }; - let message = { - let get_message = record.get_attr("getMessage", vm)?; - let message: PyRef = get_message - .call((), vm)? - .downcast() - .expect("Downcasting works"); - - message.as_str().to_owned() - }; - - let pathname = { - let pathname: PyRef = record - .get_attr("pathname", vm)? - .downcast() - .expect("Is a string"); - - pathname.as_str().to_owned() - }; - - let lineno = { - let lineno: PyRef = record - .get_attr("lineno", vm)? - .downcast() - .expect("Is a number"); - - lineno.as_u32_mask() - }; - - let logger_name = { - let name: PyRef = record - .get_attr("name", vm)? - .downcast() - .expect("Should be a string"); - name.as_str().to_owned() - }; - - let full_target: Option = if logger_name.trim().is_empty() || logger_name == "root" { - None - } else { - // Libraries (ex: tracing_subscriber::filter::Directive) expect rust-style targets like foo::bar, - // and may not deal well with "." as a module separator: - let logger_name = logger_name.replace('.', "::"); - Some(format!("{rust_target}::{logger_name}")) - }; - - let target = full_target.as_deref().unwrap_or(&rust_target); - - // error - let error_metadata = if level >= 40 { - MetadataBuilder::new() - .target(target) - .level(Level::Error) - .build() - } else if level >= 30 { - MetadataBuilder::new() - .target(target) - .level(Level::Warn) - .build() - } else if level >= 20 { - MetadataBuilder::new() - .target(target) - .level(Level::Info) - .build() - } else if level >= 10 { - MetadataBuilder::new() - .target(target) - .level(Level::Debug) - .build() - } else { - MetadataBuilder::new() - .target(target) - .level(Level::Trace) - .build() - }; - - logger().log( - &Record::builder() - .metadata(error_metadata) - .args(format_args!("{}", &message)) - .line(Some(lineno)) - .file(None) - .module_path(Some(&pathname)) - .build(), - ); - - Ok(()) -} - -/// Registers the `host_log` function in rust as the event handler for Python's logging logger -/// This function needs to be called from within a pyo3 context as early as possible to ensure logging messages -/// arrive to the rust consumer. -/// -/// # Panics -/// Only if internal assertions fail. -#[allow(clippy::module_name_repetitions)] -pub(super) fn setup_logging(vm: &VirtualMachine, target: &str) -> PyResult { - let logging = vm.import("logging", 0)?; - - let scope = vm.new_scope_with_builtins(); - - for (key, value) in logging.dict().expect("Should be a dict") { - let key: PyRef = key.downcast().expect("Is a string"); - - scope.globals.set_item(key.as_str(), value, vm)?; - } - scope - .globals - .set_item("host_log", vm.new_function("host_log", host_log).into(), vm)?; - - let local_scope = scope.clone(); - vm.run_code_string( - local_scope, - format!( - r#" -class HostHandler(Handler): - def __init__(self, level=0): - super().__init__(level=level) - - def emit(self, record): - host_log(record,"{target}") - -oldBasicConfig = basicConfig -def basicConfig(*pargs, **kwargs): - if "handlers" not in kwargs: - kwargs["handlers"] = [HostHandler()] - return oldBasicConfig(*pargs, **kwargs) -"# - ) - .as_str(), - "".to_owned(), - )?; - - Ok(scope.globals.to_pyobject(vm)) -} diff --git a/crates/yt_dlp/src/options.rs b/crates/yt_dlp/src/options.rs index dc3c154..dedb03c 100644 --- a/crates/yt_dlp/src/options.rs +++ b/crates/yt_dlp/src/options.rs @@ -8,28 +8,21 @@ // You should have received a copy of the License along with this program. // If not, see . -use std::env; +use std::sync; -use indexmap::IndexMap; -use log::{Level, debug, error, log_enabled}; -use rustpython::{ - InterpreterConfig, - vm::{ - self, PyObjectRef, PyRef, PyResult, VirtualMachine, - builtins::{PyBaseException, PyStr}, - function::{FuncArgs, KwArgs, PosArgs}, - }, +use pyo3::{ + Bound, IntoPyObjectExt, PyAny, PyResult, Python, intern, + types::{PyAnyMethods, PyCFunction, PyDict, PyTuple}, }; +use pyo3_pylogger::setup_logging; use crate::{ - YoutubeDL, json_loads, logging::setup_logging, package_hacks, post_processors, - python_error::process_exception, + YoutubeDL, json_loads, post_processors, py_kw_args, + python_error::{IntoPythonError, PythonError}, }; -/// Wrap your function with [`mk_python_function`]. -pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); - -pub type PostProcessorFunction = fn(vm: &VirtualMachine) -> PyResult; +pub type ProgressHookFunction = fn(py: Python<'_>) -> PyResult>; +pub type PostProcessorFunction = fn(py: Python<'_>) -> PyResult>; /// Options, that are used to customize the download behaviour. /// @@ -111,52 +104,36 @@ impl YoutubeDL { /// If a python call fails. #[allow(clippy::too_many_lines)] pub fn from_options(options: YoutubeDLOptions) -> Result { - let mut settings = vm::Settings::default(); - if let Ok(python_path) = env::var("PYTHONPATH") { - for path in python_path.split(':') { - settings.path_list.push(path.to_owned()); - } - } else { - error!( - "No PYTHONPATH found or invalid utf8. \ - This means, that you probably did not \ - supply a yt_dlp python package!" - ); - } - - settings.install_signal_handlers = false; - - // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13> - settings.optimize = 0; - - settings.isolated = true; - - let interpreter = InterpreterConfig::new() - .init_stdlib() - .settings(settings) - .interpreter(); + pyo3::prepare_freethreaded_python(); let output_options = options.options.clone(); - let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| { + let yt_dlp_module = Python::with_gil(|py| { + let opts = json_loads(options.options, py); + { - // Add missing (and required) values to the stdlib - package_hacks::urllib3::apply_hacks(vm)?; + static CALL_ONCE: sync::Once = sync::Once::new(); + + CALL_ONCE.call_once(|| { + py.run( + c" +import signal +signal.signal(signal.SIGINT, signal.SIG_DFL) + ", + None, + None, + ) + .unwrap_or_else(|err| { + panic!("Failed to disable python signal handling: {err}") + }); + }); } - let yt_dlp_module = vm.import("yt_dlp", 0)?; - let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; - - let opts = json_loads(options.options, vm); - { // Setup the progress hook - if let Some(function) = options.progress_hook { - opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { - let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); - vm.new_pyobj(vec![hook]) - }) - .expect("Should work?"); + if let Some(ph) = options.progress_hook { + opts.set_item(intern!(py, "progress_hooks"), vec![ph(py).wrap_exc(py)?]) + .wrap_exc(py)?; } } @@ -164,113 +141,53 @@ impl YoutubeDL { // Unconditionally set a logger. // Otherwise, yt_dlp will log to stderr. - /// Is the specified record to be logged? Returns false for no, - /// true for yes. Filters can either modify log records in-place or - /// return a completely different record instance which will replace - /// the original log record in any future processing of the event. - fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool { - let record = input.args.remove(0); - - // Filter out all error logs (they are propagated as rust errors) - let levelname: PyRef = record - .get_attr("levelname", vm) - .expect("This should exist") - .downcast() - .expect("This should be a String"); - - let return_value = levelname.as_str() != "ERROR"; - - if log_enabled!(Level::Debug) && !return_value { - let message: String = { - let get_message = record.get_attr("getMessage", vm).expect("Is set"); - let message: PyRef = get_message - .call((), vm) - .expect("Can be called") - .downcast() - .expect("Downcasting works"); + let ytdl_logger = setup_logging(py, "yt_dlp").wrap_exc(py)?; - message.as_str().to_owned() - }; - - debug!("Swollowed error message: '{message}'"); - } - return_value - } - - let logging = setup_logging(vm, "yt_dlp")?; - let ytdl_logger = { - let get_logger = logging.get_item("getLogger", vm)?; - get_logger.call(("yt_dlp",), vm)? - }; - - { - let args = FuncArgs::new( - PosArgs::new(vec![]), - KwArgs::new({ - let mut map = IndexMap::new(); - // Ensure that all events are logged by setting - // the log level to NOTSET (we filter on rust's side) - map.insert("level".to_owned(), vm.new_pyobj(0)); - map - }), - ); - - let basic_config = logging.get_item("basicConfig", vm)?; - basic_config.call(args, vm)?; - } - - { - let add_filter = ytdl_logger.get_attr("addFilter", vm)?; - add_filter.call( - (vm.new_function("yt_dlp_error_filter", filter_error_log),), - vm, - )?; - } - - opts.set_item("logger", ytdl_logger, vm)?; + opts.set_item(intern!(py, "logger"), ytdl_logger) + .wrap_exc(py)?; } - let youtube_dl_class = class.call((opts,), vm)?; + let inner = { + let p_params = opts.into_bound_py_any(py).wrap_exc(py)?; + let p_auto_init = true.into_bound_py_any(py).wrap_exc(py)?; + + py.import(intern!(py, "yt_dlp.YoutubeDL")) + .wrap_exc(py)? + .getattr(intern!(py, "YoutubeDL")) + .wrap_exc(py)? + .call1( + PyTuple::new( + py, + [ + p_params.into_bound_py_any(py).wrap_exc(py)?, + p_auto_init.into_bound_py_any(py).wrap_exc(py)?, + ], + ) + .wrap_exc(py)?, + ) + .wrap_exc(py)? + }; { // Setup the post processors - - let add_post_processor_fun = youtube_dl_class.get_attr("add_post_processor", vm)?; + let add_post_processor_fun = inner.getattr(intern!(py, "add_post_processor")).wrap_exc(py)?; for pp in options.post_processors { - let args = { - FuncArgs::new( - PosArgs::new(vec![pp(vm)?]), - KwArgs::new({ - let mut map = IndexMap::new(); - // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN - map.insert("when".to_owned(), vm.new_pyobj("pre_process")); - map - }), + add_post_processor_fun + .call( + (pp(py).wrap_exc(py)?.into_bound_py_any(py).wrap_exc(py)?,), + // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN + py_kw_args!(py => when = "pre_process"), ) - }; - - add_post_processor_fun.call(args, vm)?; + .wrap_exc(py)?; } } - Ok::<_, PyRef>((yt_dlp_module, youtube_dl_class)) - }) { - Ok(ok) => Ok(ok), - Err(err) => { - // TODO(@bpeetz): Do we want to run `interpreter.finalize` here? <2025-06-14> - // interpreter.finalize(Some(err)); - interpreter.enter(|vm| { - let buffer = process_exception(vm, &err); - Err(build::Error::Python(buffer)) - }) - } - }?; + Ok::<_, PythonError>(inner.unbind()) + })?; Ok(Self { - interpreter, - youtube_dl_class, - yt_dlp_module, + inner: yt_dlp_module, options: output_options, }) } @@ -278,9 +195,11 @@ impl YoutubeDL { #[allow(missing_docs)] pub mod build { + use crate::python_error::PythonError; + #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("Python threw an exception: {0}")] - Python(String), + #[error(transparent)] + Python(#[from] PythonError), } } diff --git a/crates/yt_dlp/src/package_hacks/mod.rs b/crates/yt_dlp/src/package_hacks/mod.rs deleted file mode 100644 index 53fe323..0000000 --- a/crates/yt_dlp/src/package_hacks/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2025 Benedikt Peetz -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see . - -pub(super) mod urllib3; diff --git a/crates/yt_dlp/src/package_hacks/urllib3.rs b/crates/yt_dlp/src/package_hacks/urllib3.rs deleted file mode 100644 index 28ae37a..0000000 --- a/crates/yt_dlp/src/package_hacks/urllib3.rs +++ /dev/null @@ -1,35 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2025 Benedikt Peetz -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see . - -use rustpython::vm::{PyResult, VirtualMachine}; - -// NOTE(@bpeetz): Remove this, once rust-python supports these features. <2025-06-27> -pub(crate) fn apply_hacks(vm: &VirtualMachine) -> PyResult<()> { - { - // Urllib3 tries to import this value, regardless if it is set. - let ssl_module = vm.import("ssl", 0)?; - ssl_module.set_attr("VERIFY_X509_STRICT", vm.ctx.new_int(0x20), vm)?; - } - - { - // Urllib3 tries to set the SSLContext.verify_flags value, regardless if it exists or not. - // So we need to provide a polyfill. - - let scope = vm.new_scope_with_builtins(); - - vm.run_code_string( - scope, - include_str!("urllib3_polyfill.py"), - "".to_owned(), - )?; - } - - Ok(()) -} diff --git a/crates/yt_dlp/src/package_hacks/urllib3_polyfill.py b/crates/yt_dlp/src/package_hacks/urllib3_polyfill.py deleted file mode 100644 index 610fd99..0000000 --- a/crates/yt_dlp/src/package_hacks/urllib3_polyfill.py +++ /dev/null @@ -1,13 +0,0 @@ -# yt - A fully featured command line YouTube client -# -# Copyright (C) 2025 Benedikt Peetz -# SPDX-License-Identifier: GPL-3.0-or-later -# -# This file is part of Yt. -# -# You should have received a copy of the License along with this program. -# If not, see . - -import ssl - -ssl.SSLContext.verify_flags = 0 diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs index 3cac745..f35f301 100644 --- a/crates/yt_dlp/src/post_processors/dearrow.rs +++ b/crates/yt_dlp/src/post_processors/dearrow.rs @@ -9,50 +9,106 @@ // If not, see . use curl::easy::Easy; -use log::{error, info, warn}; -use rustpython::vm::{ - PyRef, VirtualMachine, - builtins::{PyDict, PyStr}, +use log::{error, info, trace, warn}; +use pyo3::{ + Bound, PyAny, PyErr, PyResult, Python, exceptions, intern, pyfunction, + types::{PyAnyMethods, PyDict, PyModule}, + wrap_pyfunction, }; use serde::{Deserialize, Serialize}; -use crate::{pydict_cast, pydict_get, wrap_post_processor}; +use crate::{ + pydict_cast, pydict_get, + python_error::{IntoPythonError, PythonError}, +}; + +/// # Errors +/// - If the underlying function returns an error. +/// - If python operations fail. +pub fn process(py: Python<'_>) -> PyResult> { + #[pyfunction] + fn actual_processor(info_json: Bound<'_, PyDict>) -> PyResult> { + let output = match unwrapped_process(info_json) { + Ok(ok) => ok, + Err(err) => { + return Err(PyErr::new::(err.to_string())); + } + }; + Ok(output) + } -wrap_post_processor!("DeArrow", unwrapped_process, process); + let module = PyModule::new(py, "rust_post_processors")?; + let scope = PyDict::new(py); + scope.set_item( + intern!(py, "actual_processor"), + wrap_pyfunction!(actual_processor, module)?, + )?; + py.run( + c" +import yt_dlp + +class DeArrow(yt_dlp.postprocessor.PostProcessor): + def run(self, info): + info = actual_processor(info) + return [], info + +inst = DeArrow() +", + Some(&scope), + None, + )?; + + Ok(scope.get_item(intern!(py, "inst"))?.downcast_into()?) +} /// # Errors /// If the API access fails. -pub fn unwrapped_process(info: PyRef, vm: &VirtualMachine) -> Result, Error> { - if pydict_get!(@vm, info, "extractor_key", PyStr).as_str() != "Youtube" { - warn!("DeArrow: Extractor did not match, exiting."); +pub fn unwrapped_process(info: Bound<'_, PyDict>) -> Result, Error> { + if pydict_get!(info, "extractor_key", String).as_str() != "Youtube" { return Ok(info); } + let mut retry_num = 3; let mut output: DeArrowApi = { - let output_bytes = { - let mut dst = Vec::new(); - - let mut easy = Easy::new(); - easy.url( - format!( - "https://sponsor.ajay.app/api/branding?videoID={}", - pydict_get!(@vm, info, "id", PyStr).as_str() - ) - .as_str(), - )?; - - let mut transfer = easy.transfer(); - transfer.write_function(|data| { - dst.extend_from_slice(data); - Ok(data.len()) - })?; - transfer.perform()?; - drop(transfer); - - dst - }; - - serde_json::from_slice(&output_bytes)? + loop { + let output_bytes = { + let mut dst = Vec::new(); + + let mut easy = Easy::new(); + easy.url( + format!( + "https://sponsor.ajay.app/api/branding?videoID={}", + pydict_get!(info, "id", String) + ) + .as_str(), + )?; + + let mut transfer = easy.transfer(); + transfer.write_function(|data| { + dst.extend_from_slice(data); + Ok(data.len()) + })?; + transfer.perform()?; + drop(transfer); + + dst + }; + + match serde_json::from_slice(&output_bytes) { + Ok(ok) => break ok, + Err(err) => { + if retry_num > 0 { + trace!( + "DeArrow: Api access failed, trying again ({retry_num} retries left)" + ); + retry_num -= 1; + } else { + let err: serde_json::Error = err; + return Err(err.into()); + } + } + } + } }; // We pop the titles, so we need this vector reversed. @@ -74,7 +130,7 @@ pub fn unwrapped_process(info: PyRef, vm: &VirtualMachine) -> Result, vm: &VirtualMachine) -> Result, vm: &VirtualMachine) -> Result, new_title: &str, vm: &VirtualMachine) { - assert!(!info.contains_key("original_title", vm)); +fn update_title(info: &Bound<'_, PyDict>, new_title: &str) -> PyResult<()> { + let py = info.py(); + + assert!(!info.contains(intern!(py, "original_title"))?); - if let Ok(old_title) = info.get_item("title", vm) { + if let Ok(old_title) = info.get_item(intern!(py, "title")) { warn!( "DeArrow: Updating title from {:#?} to {:#?}", - pydict_cast!(@ref old_title, PyStr).as_str(), + pydict_cast!(old_title, &str), new_title ); - info.set_item("original_title", old_title, vm) + info.set_item(intern!(py, "original_title"), old_title) .expect("We checked, it is a new key"); } else { warn!("DeArrow: Setting title to {new_title:#?}"); @@ -119,8 +180,10 @@ fn update_title(info: &PyRef, new_title: &str, vm: &VirtualMachine) { new_title.replace('>', "") }; - info.set_item("title", vm.new_pyobj(cleaned_title), vm) + info.set_item(intern!(py, "title"), cleaned_title) .expect("This should work?"); + + Ok(()) } #[derive(Serialize, Deserialize)] diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs index 00b0ad5..d9be3f5 100644 --- a/crates/yt_dlp/src/post_processors/mod.rs +++ b/crates/yt_dlp/src/post_processors/mod.rs @@ -12,8 +12,9 @@ pub mod dearrow; #[macro_export] macro_rules! pydict_get { - (@$vm:expr, $value:expr, $name:literal, $into:ident) => {{ - match $value.get_item($name, $vm) { + ($value:expr, $name:literal, $into:ty) => {{ + let item = $value.get_item(pyo3::intern!($value.py(), $name)); + match &item { Ok(val) => $crate::pydict_cast!(val, $into), Err(_) => panic!( concat!( @@ -31,93 +32,17 @@ macro_rules! pydict_get { #[macro_export] macro_rules! pydict_cast { - ($value:expr, $into:ident) => {{ - match $value.downcast::<$into>() { + ($value:expr, $into:ty) => {{ + match $value.extract::<$into>() { Ok(result) => result, Err(val) => panic!( concat!( - "Expected to be able to downcast value ({:#?}) as ", - stringify!($into) + "Expected to be able to extract ", + stringify!($into), + " from value ({:#?})." ), val ), } }}; - (@ref $value:expr, $into:ident) => {{ - match $value.downcast_ref::<$into>() { - Some(result) => result, - None => panic!( - concat!( - "Expected to be able to downcast value ({:#?}) as ", - stringify!($into) - ), - $value - ), - } - }}; -} - -#[macro_export] -macro_rules! wrap_post_processor { - ($name:literal, $unwrap:ident, $wrapped:ident) => { - use $crate::progress_hook::__priv::vm; - - /// # Errors - /// - If the underlying function returns an error. - /// - If python operations fail. - pub fn $wrapped(vm: &vm::VirtualMachine) -> vm::PyResult { - fn actual_processor( - mut input: vm::function::FuncArgs, - vm: &vm::VirtualMachine, - ) -> vm::PyResult> { - let input = input - .args - .remove(0) - .downcast::() - .expect("Should be a py dict"); - - let output = match unwrapped_process(input, vm) { - Ok(ok) => ok, - Err(err) => { - return Err(vm.new_runtime_error(err.to_string())); - } - }; - - Ok(output) - } - - let scope = vm.new_scope_with_builtins(); - - scope.globals.set_item( - "actual_processor", - vm.new_function("actual_processor", actual_processor).into(), - vm, - )?; - - let local_scope = scope.clone(); - vm.run_code_string( - local_scope, - format!( - " -import yt_dlp - -class {}(yt_dlp.postprocessor.PostProcessor): - def run(self, info): - info = actual_processor(info) - return [], info - -inst = {}() -", - $name, $name - ) - .as_str(), - "".to_owned(), - )?; - - Ok(scope - .globals - .get_item("inst", vm) - .expect("We just declared it")) - } - }; } diff --git a/crates/yt_dlp/src/progress_hook.rs b/crates/yt_dlp/src/progress_hook.rs index b42ae21..7e5f8a5 100644 --- a/crates/yt_dlp/src/progress_hook.rs +++ b/crates/yt_dlp/src/progress_hook.rs @@ -9,46 +9,59 @@ // If not, see . #[macro_export] -macro_rules! mk_python_function { +macro_rules! wrap_progress_hook { ($name:ident, $new_name:ident) => { - pub fn $new_name( - mut args: $crate::progress_hook::__priv::vm::function::FuncArgs, - vm: &$crate::progress_hook::__priv::vm::VirtualMachine, - ) { - use $crate::progress_hook::__priv::vm; - - let input = { - let dict: vm::PyRef = args - .args - .remove(0) - .downcast() - .expect("The progress hook is always called with these args"); - let new_dict = vm::builtins::PyDict::new_ref(&vm.ctx); - dict.into_iter() - .filter_map(|(name, value)| { - let real_name: vm::PyRefExact = - name.downcast_exact(vm).expect("Is a string"); - let name_str = real_name.to_str().expect("Is a string"); - if name_str.starts_with('_') { - None - } else { - Some((name_str.to_owned(), value)) - } - }) - .for_each(|(key, value)| { - new_dict - .set_item(&key, value, vm) - .expect("This is a transpositions, should always be valid"); - }); - - $crate::progress_hook::__priv::json_dumps(new_dict, vm) - }; - $name(input).expect("Shall not fail!"); + pub(crate) fn $new_name( + py: yt_dlp::progress_hook::__priv::pyo3::Python<'_>, + ) -> yt_dlp::progress_hook::__priv::pyo3::PyResult< + yt_dlp::progress_hook::__priv::pyo3::Bound< + '_, + yt_dlp::progress_hook::__priv::pyo3::types::PyCFunction, + >, + > { + #[yt_dlp::progress_hook::__priv::pyo3::pyfunction] + #[pyo3(crate = "yt_dlp::progress_hook::__priv::pyo3")] + fn inner( + input: yt_dlp::progress_hook::__priv::pyo3::Bound< + '_, + yt_dlp::progress_hook::__priv::pyo3::types::PyDict, + >, + ) -> yt_dlp::progress_hook::__priv::pyo3::PyResult<()> { + let processed_input = { + let new_dict = yt_dlp::progress_hook::__priv::pyo3::types::PyDict::new(input.py()); + + input + .into_iter() + .filter_map(|(name, value)| { + let real_name = yt_dlp::progress_hook::__priv::pyo3::types::PyAnyMethods::extract::(&name).expect("Should always be a string"); + + if real_name.starts_with('_') { + None + } else { + Some((real_name, value)) + } + }) + .for_each(|(key, value)| { + yt_dlp::progress_hook::__priv::pyo3::types::PyDictMethods::set_item(&new_dict, &key, value) + .expect("This is a transpositions, should always be valid"); + }); + yt_dlp::progress_hook::__priv::json_dumps(&new_dict) + }; + + $name(processed_input)?; + + Ok(()) + } + + let module = yt_dlp::progress_hook::__priv::pyo3::types::PyModule::new(py, "progress_hook")?; + let fun = yt_dlp::progress_hook::__priv::pyo3::wrap_pyfunction!(inner, module)?; + + Ok(fun) } }; } pub mod __priv { pub use crate::info_json::{json_dumps, json_loads}; - pub use rustpython::vm; + pub use pyo3; } diff --git a/crates/yt_dlp/src/python_error.rs b/crates/yt_dlp/src/python_error.rs index 9513956..0c442b3 100644 --- a/crates/yt_dlp/src/python_error.rs +++ b/crates/yt_dlp/src/python_error.rs @@ -8,109 +8,48 @@ // You should have received a copy of the License along with this program. // If not, see . -use std::fmt::Display; +use std::fmt::{self, Display}; use log::{Level, debug, log_enabled}; -use rustpython::vm::{ - AsObject, PyPayload, PyRef, VirtualMachine, - builtins::{PyBaseException, PyBaseExceptionRef, PyStr}, - py_io::Write, - suggestion::offer_suggestions, -}; +use pyo3::{PyErr, Python, types::PyTracebackMethods}; #[derive(thiserror::Error, Debug)] pub struct PythonError(pub String); +pub(crate) trait IntoPythonError: Sized { + fn wrap_exc(self, py: Python<'_>) -> Result; +} + +impl IntoPythonError for Result { + fn wrap_exc(self, py: Python<'_>) -> Result { + self.map_err(|exc| PythonError::from_exception(py, &exc)) + } +} + impl Display for PythonError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Python threw an exception: {}", self.0) } } impl PythonError { - pub(super) fn from_exception(vm: &VirtualMachine, exc: &PyRef) -> Self { - let buffer = process_exception(vm, exc); + pub(super) fn from_exception(py: Python<'_>, exc: &PyErr) -> Self { + let buffer = process_exception(py, exc); Self(buffer) } } -pub(super) fn process_exception(vm: &VirtualMachine, err: &PyBaseExceptionRef) -> String { - let mut buffer = String::new(); - write_exception(vm, &mut buffer, err) - .expect("We are writing into an *in-memory* string, it will always work"); - +pub(super) fn process_exception(py: Python<'_>, err: &PyErr) -> String { if log_enabled!(Level::Debug) { - let mut output = String::new(); - vm.write_exception(&mut output, err) - .expect("We are writing into an *in-memory* string, it will always work"); - debug!("Python threw an exception: {output}"); - } + let mut output = err.to_string(); - buffer -} - -// Inlined and changed from `vm.write_exception_inner` -fn write_exception( - vm: &VirtualMachine, - output: &mut W, - exc: &PyBaseExceptionRef, -) -> Result<(), W::Error> { - let varargs = exc.args(); - let args_repr = { - match varargs.len() { - 0 => vec![], - 1 => { - let args0_repr = if true { - varargs[0] - .str(vm) - .unwrap_or_else(|_| PyStr::from("").into_ref(&vm.ctx)) - } else { - varargs[0].repr(vm).unwrap_or_else(|_| { - PyStr::from("").into_ref(&vm.ctx) - }) - }; - vec![args0_repr] - } - _ => varargs - .iter() - .map(|vararg| { - vararg.repr(vm).unwrap_or_else(|_| { - PyStr::from("").into_ref(&vm.ctx) - }) - }) - .collect(), + if let Some(tb) = err.traceback(py) { + output.push('\n'); + output.push_str(&tb.format().unwrap()); } - }; - let exc_class = exc.class(); - - if exc_class.fast_issubclass(vm.ctx.exceptions.syntax_error) { - unreachable!( - "A syntax error should never be raised, \ - as yt_dlp should not have them and neither our embedded code" - ); + debug!("Python threw an exception: {output}"); } - let exc_name = exc_class.name(); - match args_repr.len() { - 0 => write!(output, "{exc_name}"), - 1 => write!(output, "{}: {}", exc_name, args_repr[0]), - _ => write!( - output, - "{}: ({})", - exc_name, - args_repr - .iter() - .map(|val| val.as_str()) - .collect::>() - .join(", "), - ), - }?; - - match offer_suggestions(exc, vm) { - Some(suggestions) => { - write!(output, ". Did you mean: '{suggestions}'?") - } - None => Ok(()), - } + err.to_string() } -- cgit 1.4.1