about summary refs log tree commit diff stats
path: root/crates/yt_dlp/src/post_processors/dearrow.rs
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-07-10 16:36:42 +0200
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-07-10 16:36:42 +0200
commit82277ca7513eff82365ed54fe9836aae5bd45fe1 (patch)
tree3c1ba24fbcb9ef5bb5d7fbeaeea8a46cd7f61ae9 /crates/yt_dlp/src/post_processors/dearrow.rs
parentrefactor(crates/bytes): Move into yt (diff)
downloadyt-82277ca7513eff82365ed54fe9836aae5bd45fe1.zip
refactor(crates/yt_dlp): Port to `pyo3` again
Rustpyton is slower, does not implement everything correctly and worst
of all, contains code produced by LLM's.

Using the freethreaded mode of pyo3 also works nicely around the GIL,
and enables parallel execution.
Diffstat (limited to 'crates/yt_dlp/src/post_processors/dearrow.rs')
-rw-r--r--crates/yt_dlp/src/post_processors/dearrow.rs145
1 files changed, 104 insertions, 41 deletions
diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs
index 3cac745..f35f301 100644
--- a/crates/yt_dlp/src/post_processors/dearrow.rs
+++ b/crates/yt_dlp/src/post_processors/dearrow.rs
@@ -9,50 +9,106 @@
 // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
 
 use curl::easy::Easy;
-use log::{error, info, warn};
-use rustpython::vm::{
-    PyRef, VirtualMachine,
-    builtins::{PyDict, PyStr},
+use log::{error, info, trace, warn};
+use pyo3::{
+    Bound, PyAny, PyErr, PyResult, Python, exceptions, intern, pyfunction,
+    types::{PyAnyMethods, PyDict, PyModule},
+    wrap_pyfunction,
 };
 use serde::{Deserialize, Serialize};
 
-use crate::{pydict_cast, pydict_get, wrap_post_processor};
+use crate::{
+    pydict_cast, pydict_get,
+    python_error::{IntoPythonError, PythonError},
+};
+
+/// # Errors
+/// - If the underlying function returns an error.
+/// - If python operations fail.
+pub fn process(py: Python<'_>) -> PyResult<Bound<'_, PyAny>> {
+    #[pyfunction]
+    fn actual_processor(info_json: Bound<'_, PyDict>) -> PyResult<Bound<'_, PyDict>> {
+        let output = match unwrapped_process(info_json) {
+            Ok(ok) => ok,
+            Err(err) => {
+                return Err(PyErr::new::<exceptions::PyRuntimeError, _>(err.to_string()));
+            }
+        };
+        Ok(output)
+    }
 
-wrap_post_processor!("DeArrow", unwrapped_process, process);
+    let module = PyModule::new(py, "rust_post_processors")?;
+    let scope = PyDict::new(py);
+    scope.set_item(
+        intern!(py, "actual_processor"),
+        wrap_pyfunction!(actual_processor, module)?,
+    )?;
+    py.run(
+        c"
+import yt_dlp
+
+class DeArrow(yt_dlp.postprocessor.PostProcessor):
+    def run(self, info):
+        info = actual_processor(info)
+        return [], info
+
+inst = DeArrow()
+",
+        Some(&scope),
+        None,
+    )?;
+
+    Ok(scope.get_item(intern!(py, "inst"))?.downcast_into()?)
+}
 
 /// # Errors
 /// If the API access fails.
-pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyRef<PyDict>, Error> {
-    if pydict_get!(@vm, info, "extractor_key", PyStr).as_str() != "Youtube" {
-        warn!("DeArrow: Extractor did not match, exiting.");
+pub fn unwrapped_process(info: Bound<'_, PyDict>) -> Result<Bound<'_, PyDict>, Error> {
+    if pydict_get!(info, "extractor_key", String).as_str() != "Youtube" {
         return Ok(info);
     }
 
+    let mut retry_num = 3;
     let mut output: DeArrowApi = {
-        let output_bytes = {
-            let mut dst = Vec::new();
-
-            let mut easy = Easy::new();
-            easy.url(
-                format!(
-                    "https://sponsor.ajay.app/api/branding?videoID={}",
-                    pydict_get!(@vm, info, "id", PyStr).as_str()
-                )
-                .as_str(),
-            )?;
-
-            let mut transfer = easy.transfer();
-            transfer.write_function(|data| {
-                dst.extend_from_slice(data);
-                Ok(data.len())
-            })?;
-            transfer.perform()?;
-            drop(transfer);
-
-            dst
-        };
-
-        serde_json::from_slice(&output_bytes)?
+        loop {
+            let output_bytes = {
+                let mut dst = Vec::new();
+
+                let mut easy = Easy::new();
+                easy.url(
+                    format!(
+                        "https://sponsor.ajay.app/api/branding?videoID={}",
+                        pydict_get!(info, "id", String)
+                    )
+                    .as_str(),
+                )?;
+
+                let mut transfer = easy.transfer();
+                transfer.write_function(|data| {
+                    dst.extend_from_slice(data);
+                    Ok(data.len())
+                })?;
+                transfer.perform()?;
+                drop(transfer);
+
+                dst
+            };
+
+            match serde_json::from_slice(&output_bytes) {
+                Ok(ok) => break ok,
+                Err(err) => {
+                    if retry_num > 0 {
+                        trace!(
+                            "DeArrow: Api access failed, trying again ({retry_num} retries left)"
+                        );
+                        retry_num -= 1;
+                    } else {
+                        let err: serde_json::Error = err;
+                        return Err(err.into());
+                    }
+                }
+            }
+        }
     };
 
     // We pop the titles, so we need this vector reversed.
@@ -74,7 +130,7 @@ pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyR
             continue;
         }
 
-        update_title(&info, &title.value, vm);
+        update_title(&info, &title.value).wrap_exc(info.py())?;
 
         break true;
     };
@@ -82,7 +138,7 @@ pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyR
     if !selected && title_len != 0 {
         // No title was selected, even though we had some titles.
         // Just pick the first one in this case.
-        update_title(&info, &output.titles[0].value, vm);
+        update_title(&info, &output.titles[0].value).wrap_exc(info.py())?;
     }
 
     Ok(info)
@@ -90,6 +146,9 @@ pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyR
 
 #[derive(thiserror::Error, Debug)]
 pub enum Error {
+    #[error(transparent)]
+    Python(#[from] PythonError),
+
     #[error("Failed to access the DeArrow api: {0}")]
     Get(#[from] curl::Error),
 
@@ -97,17 +156,19 @@ pub enum Error {
     Deserialize(#[from] serde_json::Error),
 }
 
-fn update_title(info: &PyRef<PyDict>, new_title: &str, vm: &VirtualMachine) {
-    assert!(!info.contains_key("original_title", vm));
+fn update_title(info: &Bound<'_, PyDict>, new_title: &str) -> PyResult<()> {
+    let py = info.py();
+
+    assert!(!info.contains(intern!(py, "original_title"))?);
 
-    if let Ok(old_title) = info.get_item("title", vm) {
+    if let Ok(old_title) = info.get_item(intern!(py, "title")) {
         warn!(
             "DeArrow: Updating title from {:#?} to {:#?}",
-            pydict_cast!(@ref old_title, PyStr).as_str(),
+            pydict_cast!(old_title, &str),
             new_title
         );
 
-        info.set_item("original_title", old_title, vm)
+        info.set_item(intern!(py, "original_title"), old_title)
             .expect("We checked, it is a new key");
     } else {
         warn!("DeArrow: Setting title to {new_title:#?}");
@@ -119,8 +180,10 @@ fn update_title(info: &PyRef<PyDict>, new_title: &str, vm: &VirtualMachine) {
         new_title.replace('>', "")
     };
 
-    info.set_item("title", vm.new_pyobj(cleaned_title), vm)
+    info.set_item(intern!(py, "title"), cleaned_title)
         .expect("This should work?");
+
+    Ok(())
 }
 
 #[derive(Serialize, Deserialize)]