about summary refs log tree commit diff stats
path: root/crates/yt_dlp/src/lib.rs
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-07-10 16:36:42 +0200
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-07-10 16:36:42 +0200
commit82277ca7513eff82365ed54fe9836aae5bd45fe1 (patch)
tree3c1ba24fbcb9ef5bb5d7fbeaeea8a46cd7f61ae9 /crates/yt_dlp/src/lib.rs
parentrefactor(crates/bytes): Move into yt (diff)
downloadyt-82277ca7513eff82365ed54fe9836aae5bd45fe1.zip
refactor(crates/yt_dlp): Port to `pyo3` again
Rustpyton is slower, does not implement everything correctly and worst
of all, contains code produced by LLM's.

Using the freethreaded mode of pyo3 also works nicely around the GIL,
and enables parallel execution.
Diffstat (limited to 'crates/yt_dlp/src/lib.rs')
-rw-r--r--crates/yt_dlp/src/lib.rs211
1 files changed, 104 insertions, 107 deletions
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs
index a03e444..d0cfbdd 100644
--- a/crates/yt_dlp/src/lib.rs
+++ b/crates/yt_dlp/src/lib.rs
@@ -12,18 +12,16 @@
 
 use std::path::PathBuf;
 
-use indexmap::IndexMap;
 use log::info;
-use rustpython::vm::{
-    Interpreter, PyObjectRef, PyRef, VirtualMachine,
-    builtins::{PyDict, PyList, PyStr},
-    function::{FuncArgs, KwArgs, PosArgs},
+use pyo3::{
+    Bound, Py, PyAny, Python, intern,
+    types::{PyAnyMethods, PyDict, PyIterator, PyList},
 };
 use url::Url;
 
 use crate::{
     info_json::{InfoJson, json_dumps, json_loads},
-    python_error::PythonError,
+    python_error::{IntoPythonError, PythonError},
 };
 
 pub mod info_json;
@@ -32,19 +30,16 @@ pub mod post_processors;
 pub mod progress_hook;
 pub mod python_error;
 
-mod logging;
-mod package_hacks;
-
 #[macro_export]
 macro_rules! json_get {
     ($value:expr, $name:literal, $into:ident) => {{
         match $value.get($name) {
-            Some(val) => $crate::json_cast!(val, $into),
+            Some(val) => $crate::json_cast!(@log_key $name, val, $into),
             None => panic!(
                 concat!(
                     "Expected '",
                     $name,
-                    "' to be a key for the'",
+                    "' to be a key for the '",
                     stringify!($value),
                     "' object: {:#?}"
                 ),
@@ -57,11 +52,17 @@ macro_rules! json_get {
 #[macro_export]
 macro_rules! json_cast {
     ($value:expr, $into:ident) => {{
+        json_cast!(@log_key "<unknown>", $value, $into)
+    }};
+
+    (@log_key $name:literal, $value:expr, $into:ident) => {{
         match $value.$into() {
             Some(result) => result,
             None => panic!(
                 concat!(
-                    "Expected to be able to cast value ({:#?}) ",
+                    "Expected to be able to cast '",
+                    $name,
+                    "' value ({:#?}) ",
                     stringify!($into)
                 ),
                 $value
@@ -70,50 +71,50 @@ macro_rules! json_cast {
     }};
 }
 
+macro_rules! py_kw_args {
+    ($py:expr => $($kw_arg_name:ident = $kw_arg_val:expr),*) => {{
+        use $crate::python_error::IntoPythonError;
+
+        let dict = PyDict::new($py);
+
+        $(
+            dict.set_item(stringify!($kw_arg_name), $kw_arg_val).wrap_exc($py)?;
+        )*
+
+        Some(dict)
+    }
+    .as_ref()};
+}
+pub(crate) use py_kw_args;
+
 /// The core of the `yt_dlp` interface.
+#[derive(Debug)]
 pub struct YoutubeDL {
-    interpreter: Interpreter,
-    youtube_dl_class: PyObjectRef,
-    yt_dlp_module: PyObjectRef,
+    inner: Py<PyAny>,
     options: serde_json::Map<String, serde_json::Value>,
 }
 
-impl std::fmt::Debug for YoutubeDL {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        // TODO(@bpeetz): Use something useful here. <2025-06-13>
-        f.write_str("YoutubeDL")
-    }
-}
-
 impl YoutubeDL {
     /// Fetch the underlying `yt_dlp` and `python` version.
     ///
-    ///
-    /// # Panics
-    ///
-    /// If `yt_dlp` changed their location or type of `__version__`.
-    pub fn version(&self) -> (String, String) {
-        let yt_dlp: PyRef<PyStr> = self.interpreter.enter_and_expect(
-            |vm| {
-                let version_module = self.yt_dlp_module.get_attr("version", vm)?;
-                let version = version_module.get_attr("__version__", vm)?;
-                let version = version.downcast().expect("This should always be a string");
-                Ok(version)
-            },
-            "yt_dlp version location has changed",
-        );
-
-        let python: PyRef<PyStr> = self.interpreter.enter_and_expect(
-            |vm| {
-                let version_module = vm.import("sys", 0)?;
-                let version = version_module.get_attr("version", vm)?;
-                let version = version.downcast().expect("This should always be a string");
-                Ok(version)
-            },
-            "python version location has changed",
-        );
-
-        (yt_dlp.to_string(), python.to_string())
+    /// # Errors
+    /// If python attribute access fails.
+    pub fn version(&self) -> Result<(String, String), PythonError> {
+        Python::with_gil(|py| {
+            let yt_dlp = py
+                .import(intern!(py, "yt_dlp"))
+                .wrap_exc(py)?
+                .getattr(intern!(py, "version"))
+                .wrap_exc(py)?
+                .getattr(intern!(py, "__version__"))
+                .wrap_exc(py)?
+                .extract()
+                .wrap_exc(py)?;
+
+            let python = py.version();
+
+            Ok((yt_dlp, python.to_owned()))
+        })
     }
 
     /// Download a given list of URLs.
@@ -172,55 +173,61 @@ impl YoutubeDL {
         download: bool,
         process: bool,
     ) -> Result<InfoJson, extract_info::Error> {
-        self.interpreter.enter(|vm| {
-            let pos_args = PosArgs::new(vec![vm.new_pyobj(url.to_string())]);
-
-            let kw_args = KwArgs::new({
-                let mut map = IndexMap::new();
-                map.insert("download".to_owned(), vm.new_pyobj(download));
-                map.insert("process".to_owned(), vm.new_pyobj(process));
-                map
-            });
-
-            let fun_args = FuncArgs::new(pos_args, kw_args);
-
+        Python::with_gil(|py| {
             let inner = self
-                .youtube_dl_class
-                .get_attr("extract_info", vm)
-                .map_err(|exc| PythonError::from_exception(vm, &exc))?;
+                .inner
+                .bind(py)
+                .getattr(intern!(py, "extract_info"))
+                .wrap_exc(py)?;
+
             let result = inner
-                .call_with_args(fun_args, vm)
-                .map_err(|exc| PythonError::from_exception(vm, &exc))?
-                .downcast::<PyDict>()
+                .call(
+                    (url.to_string(),),
+                    py_kw_args!(py => download = download, process = process),
+                )
+                .wrap_exc(py)?
+                .downcast_into::<PyDict>()
                 .expect("This is a dict");
 
             // Resolve the generator object
-            if let Ok(generator) = result.get_item("entries", vm) {
-                if generator.payload_is::<PyList>() {
+            if let Ok(generator) = result.get_item(intern!(py, "entries")) {
+                if generator.is_instance_of::<PyList>() {
                     // already resolved. Do nothing
-                } else {
+                } else if let Ok(generator) = generator.downcast::<PyIterator>() {
+                    // A python generator object.
                     let max_backlog = self.options.get("playlistend").map_or(10000, |value| {
-                        usize::try_from(value.as_u64().expect("Works")).expect("Should work")
+                        usize::try_from(json_cast!(value, as_u64)).expect("Should work")
                     });
 
                     let mut out = vec![];
-                    let next = generator
-                        .get_attr("__next__", vm)
-                        .map_err(|exc| PythonError::from_exception(vm, &exc))?;
-                    while let Ok(output) = next.call((), vm) {
-                        out.push(output);
+                    for output in generator {
+                        out.push(output.wrap_exc(py)?);
 
                         if out.len() == max_backlog {
                             break;
                         }
                     }
+
+                    result.set_item(intern!(py, "entries"), out).wrap_exc(py)?;
+                } else {
+                    // Probably some sort of paged list (`OnDemand` or otherwise)
+                    let max_backlog = self.options.get("playlistend").map_or(10000, |value| {
+                        usize::try_from(json_cast!(value, as_u64)).expect("Should work")
+                    });
+
+                    let next = generator.getattr(intern!(py, "getslice")).wrap_exc(py)?;
+
+                    let output = next
+                        .call((), py_kw_args!(py => start = 0, end = max_backlog))
+                        .wrap_exc(py)?;
+
                     result
-                        .set_item("entries", vm.new_pyobj(out), vm)
-                        .map_err(|exc| PythonError::from_exception(vm, &exc))?;
+                        .set_item(intern!(py, "entries"), output)
+                        .wrap_exc(py)?;
                 }
             }
 
-            let result = self.prepare_info_json(result, vm)?;
+            let result = self.prepare_info_json(&result, py)?;
 
             Ok(result)
         })
@@ -244,50 +251,40 @@ impl YoutubeDL {
         ie_result: InfoJson,
         download: bool,
     ) -> Result<InfoJson, process_ie_result::Error> {
-        self.interpreter.enter(|vm| {
-            let pos_args = PosArgs::new(vec![vm.new_pyobj(json_loads(ie_result, vm))]);
-
-            let kw_args = KwArgs::new({
-                let mut map = IndexMap::new();
-                map.insert("download".to_owned(), vm.new_pyobj(download));
-                map
-            });
-
-            let fun_args = FuncArgs::new(pos_args, kw_args);
-
+        Python::with_gil(|py| {
             let inner = self
-                .youtube_dl_class
-                .get_attr("process_ie_result", vm)
-                .map_err(|exc| PythonError::from_exception(vm, &exc))?;
+                .inner
+                .bind(py)
+                .getattr(intern!(py, "process_ie_result"))
+                .wrap_exc(py)?;
+
             let result = inner
-                .call_with_args(fun_args, vm)
-                .map_err(|exc| PythonError::from_exception(vm, &exc))?
-                .downcast::<PyDict>()
+                .call(
+                    (json_loads(ie_result, py),),
+                    py_kw_args!(py => download = download),
+                )
+                .wrap_exc(py)?
+                .downcast_into::<PyDict>()
                 .expect("This is a dict");
 
-            let result = self.prepare_info_json(result, vm)?;
+            let result = self.prepare_info_json(&result, py)?;
 
             Ok(result)
         })
     }
 
-    fn prepare_info_json(
+    fn prepare_info_json<'py>(
         &self,
-        info: PyRef<PyDict>,
-        vm: &VirtualMachine,
+        info: &Bound<'py, PyDict>,
+        py: Python<'py>,
     ) -> Result<InfoJson, prepare::Error> {
-        let sanitize = self
-            .youtube_dl_class
-            .get_attr("sanitize_info", vm)
-            .map_err(|exc| PythonError::from_exception(vm, &exc))?;
+        let sanitize = self.inner.bind(py).getattr(intern!(py, "sanitize_info")).wrap_exc(py)?;
 
-        let value = sanitize
-            .call((info,), vm)
-            .map_err(|exc| PythonError::from_exception(vm, &exc))?;
+        let value = sanitize.call((info,), None).wrap_exc(py)?;
 
         let result = value.downcast::<PyDict>().expect("This should stay a dict");
 
-        Ok(json_dumps(result, vm))
+        Ok(json_dumps(result))
     }
 }