about summary refs log tree commit diff stats
path: root/crates/yt_dlp/src/options.rs
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-07-10 16:36:42 +0200
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-07-10 16:36:42 +0200
commit82277ca7513eff82365ed54fe9836aae5bd45fe1 (patch)
tree3c1ba24fbcb9ef5bb5d7fbeaeea8a46cd7f61ae9 /crates/yt_dlp/src/options.rs
parentrefactor(crates/bytes): Move into yt (diff)
downloadyt-82277ca7513eff82365ed54fe9836aae5bd45fe1.zip
refactor(crates/yt_dlp): Port to `pyo3` again
Rustpyton is slower, does not implement everything correctly and worst
of all, contains code produced by LLM's.

Using the freethreaded mode of pyo3 also works nicely around the GIL,
and enables parallel execution.
Diffstat (limited to 'crates/yt_dlp/src/options.rs')
-rw-r--r--crates/yt_dlp/src/options.rs217
1 files changed, 68 insertions, 149 deletions
diff --git a/crates/yt_dlp/src/options.rs b/crates/yt_dlp/src/options.rs
index dc3c154..dedb03c 100644
--- a/crates/yt_dlp/src/options.rs
+++ b/crates/yt_dlp/src/options.rs
@@ -8,28 +8,21 @@
 // You should have received a copy of the License along with this program.
 // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
 
-use std::env;
+use std::sync;
 
-use indexmap::IndexMap;
-use log::{Level, debug, error, log_enabled};
-use rustpython::{
-    InterpreterConfig,
-    vm::{
-        self, PyObjectRef, PyRef, PyResult, VirtualMachine,
-        builtins::{PyBaseException, PyStr},
-        function::{FuncArgs, KwArgs, PosArgs},
-    },
+use pyo3::{
+    Bound, IntoPyObjectExt, PyAny, PyResult, Python, intern,
+    types::{PyAnyMethods, PyCFunction, PyDict, PyTuple},
 };
+use pyo3_pylogger::setup_logging;
 
 use crate::{
-    YoutubeDL, json_loads, logging::setup_logging, package_hacks, post_processors,
-    python_error::process_exception,
+    YoutubeDL, json_loads, post_processors, py_kw_args,
+    python_error::{IntoPythonError, PythonError},
 };
 
-/// Wrap your function with [`mk_python_function`].
-pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
-
-pub type PostProcessorFunction = fn(vm: &VirtualMachine) -> PyResult<PyObjectRef>;
+pub type ProgressHookFunction = fn(py: Python<'_>) -> PyResult<Bound<'_, PyCFunction>>;
+pub type PostProcessorFunction = fn(py: Python<'_>) -> PyResult<Bound<'_, PyAny>>;
 
 /// Options, that are used to customize the download behaviour.
 ///
@@ -111,52 +104,36 @@ impl YoutubeDL {
     /// If a python call fails.
     #[allow(clippy::too_many_lines)]
     pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> {
-        let mut settings = vm::Settings::default();
-        if let Ok(python_path) = env::var("PYTHONPATH") {
-            for path in python_path.split(':') {
-                settings.path_list.push(path.to_owned());
-            }
-        } else {
-            error!(
-                "No PYTHONPATH found or invalid utf8. \
-                This means, that you probably did not \
-                supply a yt_dlp python package!"
-            );
-        }
-
-        settings.install_signal_handlers = false;
-
-        // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13>
-        settings.optimize = 0;
-
-        settings.isolated = true;
-
-        let interpreter = InterpreterConfig::new()
-            .init_stdlib()
-            .settings(settings)
-            .interpreter();
+        pyo3::prepare_freethreaded_python();
 
         let output_options = options.options.clone();
 
-        let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| {
+        let yt_dlp_module = Python::with_gil(|py| {
+            let opts = json_loads(options.options, py);
+
             {
-                // Add missing (and required) values to the stdlib
-                package_hacks::urllib3::apply_hacks(vm)?;
+                static CALL_ONCE: sync::Once = sync::Once::new();
+
+                CALL_ONCE.call_once(|| {
+                    py.run(
+                        c"
+import signal
+signal.signal(signal.SIGINT, signal.SIG_DFL)
+              ",
+                        None,
+                        None,
+                    )
+                    .unwrap_or_else(|err| {
+                        panic!("Failed to disable python signal handling: {err}")
+                    });
+                });
             }
 
-            let yt_dlp_module = vm.import("yt_dlp", 0)?;
-            let class = yt_dlp_module.get_attr("YoutubeDL", vm)?;
-
-            let opts = json_loads(options.options, vm);
-
             {
                 // Setup the progress hook
-                if let Some(function) = options.progress_hook {
-                    opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || {
-                        let hook: PyObjectRef = vm.new_function("progress_hook", function).into();
-                        vm.new_pyobj(vec![hook])
-                    })
-                    .expect("Should work?");
+                if let Some(ph) = options.progress_hook {
+                    opts.set_item(intern!(py, "progress_hooks"), vec![ph(py).wrap_exc(py)?])
+                        .wrap_exc(py)?;
                 }
             }
 
@@ -164,113 +141,53 @@ impl YoutubeDL {
                 // Unconditionally set a logger.
                 // Otherwise, yt_dlp will log to stderr.
 
-                /// Is the specified record to be logged? Returns false for no,
-                /// true for yes. Filters can either modify log records in-place or
-                /// return a completely different record instance which will replace
-                /// the original log record in any future processing of the event.
-                fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool {
-                    let record = input.args.remove(0);
-
-                    // Filter out all error logs (they are propagated as rust errors)
-                    let levelname: PyRef<PyStr> = record
-                        .get_attr("levelname", vm)
-                        .expect("This should exist")
-                        .downcast()
-                        .expect("This should be a String");
-
-                    let return_value = levelname.as_str() != "ERROR";
-
-                    if log_enabled!(Level::Debug) && !return_value {
-                        let message: String = {
-                            let get_message = record.get_attr("getMessage", vm).expect("Is set");
-                            let message: PyRef<PyStr> = get_message
-                                .call((), vm)
-                                .expect("Can be called")
-                                .downcast()
-                                .expect("Downcasting works");
+                let ytdl_logger = setup_logging(py, "yt_dlp").wrap_exc(py)?;
 
-                            message.as_str().to_owned()
-                        };
-
-                        debug!("Swollowed error message: '{message}'");
-                    }
-                    return_value
-                }
-
-                let logging = setup_logging(vm, "yt_dlp")?;
-                let ytdl_logger = {
-                    let get_logger = logging.get_item("getLogger", vm)?;
-                    get_logger.call(("yt_dlp",), vm)?
-                };
-
-                {
-                    let args = FuncArgs::new(
-                        PosArgs::new(vec![]),
-                        KwArgs::new({
-                            let mut map = IndexMap::new();
-                            // Ensure that all events are logged by setting
-                            // the log level to NOTSET (we filter on rust's side)
-                            map.insert("level".to_owned(), vm.new_pyobj(0));
-                            map
-                        }),
-                    );
-
-                    let basic_config = logging.get_item("basicConfig", vm)?;
-                    basic_config.call(args, vm)?;
-                }
-
-                {
-                    let add_filter = ytdl_logger.get_attr("addFilter", vm)?;
-                    add_filter.call(
-                        (vm.new_function("yt_dlp_error_filter", filter_error_log),),
-                        vm,
-                    )?;
-                }
-
-                opts.set_item("logger", ytdl_logger, vm)?;
+                opts.set_item(intern!(py, "logger"), ytdl_logger)
+                    .wrap_exc(py)?;
             }
 
-            let youtube_dl_class = class.call((opts,), vm)?;
+            let inner = {
+                let p_params = opts.into_bound_py_any(py).wrap_exc(py)?;
+                let p_auto_init = true.into_bound_py_any(py).wrap_exc(py)?;
+
+                py.import(intern!(py, "yt_dlp.YoutubeDL"))
+                    .wrap_exc(py)?
+                    .getattr(intern!(py, "YoutubeDL"))
+                    .wrap_exc(py)?
+                    .call1(
+                        PyTuple::new(
+                            py,
+                            [
+                                p_params.into_bound_py_any(py).wrap_exc(py)?,
+                                p_auto_init.into_bound_py_any(py).wrap_exc(py)?,
+                            ],
+                        )
+                        .wrap_exc(py)?,
+                    )
+                    .wrap_exc(py)?
+            };
 
             {
                 // Setup the post processors
-
-                let add_post_processor_fun = youtube_dl_class.get_attr("add_post_processor", vm)?;
+                let add_post_processor_fun = inner.getattr(intern!(py, "add_post_processor")).wrap_exc(py)?;
 
                 for pp in options.post_processors {
-                    let args = {
-                        FuncArgs::new(
-                            PosArgs::new(vec![pp(vm)?]),
-                            KwArgs::new({
-                                let mut map = IndexMap::new();
-                                //  "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN
-                                map.insert("when".to_owned(), vm.new_pyobj("pre_process"));
-                                map
-                            }),
+                    add_post_processor_fun
+                        .call(
+                            (pp(py).wrap_exc(py)?.into_bound_py_any(py).wrap_exc(py)?,),
+                            // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN
+                            py_kw_args!(py => when = "pre_process"),
                         )
-                    };
-
-                    add_post_processor_fun.call(args, vm)?;
+                        .wrap_exc(py)?;
                 }
             }
 
-            Ok::<_, PyRef<PyBaseException>>((yt_dlp_module, youtube_dl_class))
-        }) {
-            Ok(ok) => Ok(ok),
-            Err(err) => {
-                // TODO(@bpeetz): Do we want to run `interpreter.finalize` here? <2025-06-14>
-                // interpreter.finalize(Some(err));
-                interpreter.enter(|vm| {
-                    let buffer = process_exception(vm, &err);
-                    Err(build::Error::Python(buffer))
-                })
-            }
-        }?;
+            Ok::<_, PythonError>(inner.unbind())
+        })?;
 
         Ok(Self {
-            interpreter,
-            youtube_dl_class,
-            yt_dlp_module,
+            inner: yt_dlp_module,
             options: output_options,
         })
     }
@@ -278,9 +195,11 @@ impl YoutubeDL {
 
 #[allow(missing_docs)]
 pub mod build {
+    use crate::python_error::PythonError;
+
     #[derive(Debug, thiserror::Error)]
     pub enum Error {
-        #[error("Python threw an exception: {0}")]
-        Python(String),
+        #[error(transparent)]
+        Python(#[from] PythonError),
     }
 }