From 82277ca7513eff82365ed54fe9836aae5bd45fe1 Mon Sep 17 00:00:00 2001 From: Benedikt Peetz Date: Thu, 10 Jul 2025 16:36:42 +0200 Subject: refactor(crates/yt_dlp): Port to `pyo3` again Rustpyton is slower, does not implement everything correctly and worst of all, contains code produced by LLM's. Using the freethreaded mode of pyo3 also works nicely around the GIL, and enables parallel execution. --- crates/yt_dlp/src/post_processors/dearrow.rs | 145 +++++++++++++++++++-------- 1 file changed, 104 insertions(+), 41 deletions(-) (limited to 'crates/yt_dlp/src/post_processors/dearrow.rs') diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs index 3cac745..f35f301 100644 --- a/crates/yt_dlp/src/post_processors/dearrow.rs +++ b/crates/yt_dlp/src/post_processors/dearrow.rs @@ -9,50 +9,106 @@ // If not, see . use curl::easy::Easy; -use log::{error, info, warn}; -use rustpython::vm::{ - PyRef, VirtualMachine, - builtins::{PyDict, PyStr}, +use log::{error, info, trace, warn}; +use pyo3::{ + Bound, PyAny, PyErr, PyResult, Python, exceptions, intern, pyfunction, + types::{PyAnyMethods, PyDict, PyModule}, + wrap_pyfunction, }; use serde::{Deserialize, Serialize}; -use crate::{pydict_cast, pydict_get, wrap_post_processor}; +use crate::{ + pydict_cast, pydict_get, + python_error::{IntoPythonError, PythonError}, +}; + +/// # Errors +/// - If the underlying function returns an error. +/// - If python operations fail. +pub fn process(py: Python<'_>) -> PyResult> { + #[pyfunction] + fn actual_processor(info_json: Bound<'_, PyDict>) -> PyResult> { + let output = match unwrapped_process(info_json) { + Ok(ok) => ok, + Err(err) => { + return Err(PyErr::new::(err.to_string())); + } + }; + Ok(output) + } -wrap_post_processor!("DeArrow", unwrapped_process, process); + let module = PyModule::new(py, "rust_post_processors")?; + let scope = PyDict::new(py); + scope.set_item( + intern!(py, "actual_processor"), + wrap_pyfunction!(actual_processor, module)?, + )?; + py.run( + c" +import yt_dlp + +class DeArrow(yt_dlp.postprocessor.PostProcessor): + def run(self, info): + info = actual_processor(info) + return [], info + +inst = DeArrow() +", + Some(&scope), + None, + )?; + + Ok(scope.get_item(intern!(py, "inst"))?.downcast_into()?) +} /// # Errors /// If the API access fails. -pub fn unwrapped_process(info: PyRef, vm: &VirtualMachine) -> Result, Error> { - if pydict_get!(@vm, info, "extractor_key", PyStr).as_str() != "Youtube" { - warn!("DeArrow: Extractor did not match, exiting."); +pub fn unwrapped_process(info: Bound<'_, PyDict>) -> Result, Error> { + if pydict_get!(info, "extractor_key", String).as_str() != "Youtube" { return Ok(info); } + let mut retry_num = 3; let mut output: DeArrowApi = { - let output_bytes = { - let mut dst = Vec::new(); - - let mut easy = Easy::new(); - easy.url( - format!( - "https://sponsor.ajay.app/api/branding?videoID={}", - pydict_get!(@vm, info, "id", PyStr).as_str() - ) - .as_str(), - )?; - - let mut transfer = easy.transfer(); - transfer.write_function(|data| { - dst.extend_from_slice(data); - Ok(data.len()) - })?; - transfer.perform()?; - drop(transfer); - - dst - }; - - serde_json::from_slice(&output_bytes)? + loop { + let output_bytes = { + let mut dst = Vec::new(); + + let mut easy = Easy::new(); + easy.url( + format!( + "https://sponsor.ajay.app/api/branding?videoID={}", + pydict_get!(info, "id", String) + ) + .as_str(), + )?; + + let mut transfer = easy.transfer(); + transfer.write_function(|data| { + dst.extend_from_slice(data); + Ok(data.len()) + })?; + transfer.perform()?; + drop(transfer); + + dst + }; + + match serde_json::from_slice(&output_bytes) { + Ok(ok) => break ok, + Err(err) => { + if retry_num > 0 { + trace!( + "DeArrow: Api access failed, trying again ({retry_num} retries left)" + ); + retry_num -= 1; + } else { + let err: serde_json::Error = err; + return Err(err.into()); + } + } + } + } }; // We pop the titles, so we need this vector reversed. @@ -74,7 +130,7 @@ pub fn unwrapped_process(info: PyRef, vm: &VirtualMachine) -> Result, vm: &VirtualMachine) -> Result, vm: &VirtualMachine) -> Result, new_title: &str, vm: &VirtualMachine) { - assert!(!info.contains_key("original_title", vm)); +fn update_title(info: &Bound<'_, PyDict>, new_title: &str) -> PyResult<()> { + let py = info.py(); + + assert!(!info.contains(intern!(py, "original_title"))?); - if let Ok(old_title) = info.get_item("title", vm) { + if let Ok(old_title) = info.get_item(intern!(py, "title")) { warn!( "DeArrow: Updating title from {:#?} to {:#?}", - pydict_cast!(@ref old_title, PyStr).as_str(), + pydict_cast!(old_title, &str), new_title ); - info.set_item("original_title", old_title, vm) + info.set_item(intern!(py, "original_title"), old_title) .expect("We checked, it is a new key"); } else { warn!("DeArrow: Setting title to {new_title:#?}"); @@ -119,8 +180,10 @@ fn update_title(info: &PyRef, new_title: &str, vm: &VirtualMachine) { new_title.replace('>', "") }; - info.set_item("title", vm.new_pyobj(cleaned_title), vm) + info.set_item(intern!(py, "title"), cleaned_title) .expect("This should work?"); + + Ok(()) } #[derive(Serialize, Deserialize)] -- cgit 1.4.1