about summary refs log tree commit diff stats
path: root/crates/yt_dlp/src/lib.rs
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-06-17 08:56:36 +0200
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-06-17 08:56:36 +0200
commit1a6d3639e6fddb731735554d407d1eea77f053c6 (patch)
tree7e42b8d65c283c4cf6b756901dcfccf7c0f6db94 /crates/yt_dlp/src/lib.rs
parentfix(yt_dlp/post_processors/dearrow): Migrate to curl for api requests (diff)
downloadyt-1a6d3639e6fddb731735554d407d1eea77f053c6.zip
fix(yt_dlp/post_processors): Register in python
We need to tell yt_dlp about our post processors, as they would
otherwise not take full effect. For example, changing the title would
previously only have changed the title in the *in-memory* info json, the
actual file on disk (video and .info.json) would still have the old
title, as yt_dlp did not know about our post processor.

Registering it via their api also has the upside of being able to
determine when to run.
Diffstat (limited to 'crates/yt_dlp/src/lib.rs')
-rw-r--r--crates/yt_dlp/src/lib.rs88
1 files changed, 51 insertions, 37 deletions
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs
index e7b37c6..d0465e1 100644
--- a/crates/yt_dlp/src/lib.rs
+++ b/crates/yt_dlp/src/lib.rs
@@ -10,16 +10,14 @@
 
 //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure.
 
-use std::{self, env, fmt::Display, path::PathBuf};
+use std::{env, fmt::Display, path::PathBuf};
 
 use indexmap::IndexMap;
 use log::{Level, debug, error, info, log_enabled};
-use logging::setup_logging;
-use post_processors::PostProcessor;
 use rustpython::{
     InterpreterConfig,
     vm::{
-        self, AsObject, Interpreter, PyObjectRef, PyPayload, PyRef, VirtualMachine,
+        self, AsObject, Interpreter, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine,
         builtins::{PyBaseException, PyBaseExceptionRef, PyDict, PyList, PyStr},
         function::{FuncArgs, KwArgs, PosArgs},
         py_io::Write,
@@ -28,10 +26,13 @@ use rustpython::{
 };
 use url::Url;
 
-mod logging;
+use crate::logging::setup_logging;
+
 pub mod post_processors;
 pub mod progress_hook;
 
+mod logging;
+
 #[macro_export]
 macro_rules! json_get {
     ($value:expr, $name:literal, $into:ident) => {{
@@ -73,7 +74,6 @@ pub struct YoutubeDL {
     youtube_dl_class: PyObjectRef,
     yt_dlp_module: PyObjectRef,
     options: serde_json::Map<String, serde_json::Value>,
-    post_processors: Vec<Box<dyn PostProcessor>>,
 }
 
 impl std::fmt::Debug for YoutubeDL {
@@ -91,6 +91,7 @@ impl YoutubeDL {
     ///
     /// # Errors
     /// If a python call fails.
+    #[allow(clippy::too_many_lines)]
     pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> {
         let mut settings = vm::Settings::default();
         if let Ok(python_path) = env::var("PYTHONPATH") {
@@ -124,12 +125,16 @@ impl YoutubeDL {
             let class = yt_dlp_module.get_attr("YoutubeDL", vm)?;
 
             let opts = json_loads(options.options, vm);
-            if let Some(function) = options.progress_hook {
-                opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || {
-                    let hook: PyObjectRef = vm.new_function("progress_hook", function).into();
-                    vm.new_pyobj(vec![hook])
-                })
-                .expect("Should work?");
+
+            {
+                // Setup the progress hook
+                if let Some(function) = options.progress_hook {
+                    opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || {
+                        let hook: PyObjectRef = vm.new_function("progress_hook", function).into();
+                        vm.new_pyobj(vec![hook])
+                    })
+                    .expect("Should work?");
+                }
             }
 
             {
@@ -204,6 +209,28 @@ impl YoutubeDL {
 
             let youtube_dl_class = class.call((opts,), vm)?;
 
+            {
+                // Setup the post processors
+
+                let add_post_processor_fun = youtube_dl_class.get_attr("add_post_processor", vm)?;
+
+                for pp in options.post_processors {
+                    let args = {
+                        FuncArgs::new(
+                            PosArgs::new(vec![pp(vm)?]),
+                            KwArgs::new({
+                                let mut map = IndexMap::new();
+                                //  "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN
+                                map.insert("when".to_owned(), vm.new_pyobj("pre_process"));
+                                map
+                            }),
+                        )
+                    };
+
+                    add_post_processor_fun.call(args, vm)?;
+                }
+            }
+
             Ok::<_, PyRef<PyBaseException>>((yt_dlp_module, youtube_dl_class))
         }) {
             Ok(ok) => Ok(ok),
@@ -222,7 +249,6 @@ impl YoutubeDL {
             youtube_dl_class,
             yt_dlp_module,
             options: output_options,
-            post_processors: options.post_processors,
         })
     }
 
@@ -413,21 +439,7 @@ impl YoutubeDL {
 
         let result = value.downcast::<PyDict>().expect("This should stay a dict");
 
-        let mut json = json_dumps(result, vm);
-
-        for pp in &self.post_processors {
-            if pp
-                .extractors()
-                .iter()
-                .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str))
-            {
-                json = pp.process(json)?;
-            } else {
-                error!("Extractor not found for {pp:#?}");
-            }
-        }
-
-        Ok(json)
+        Ok(json_dumps(result, vm))
     }
 }
 
@@ -475,21 +487,21 @@ pub mod extract_info {
 }
 #[allow(missing_docs)]
 pub mod prepare {
-    use crate::{PythonError, post_processors};
+    use crate::PythonError;
 
     #[derive(Debug, thiserror::Error)]
     pub enum Error {
         #[error(transparent)]
         Python(#[from] PythonError),
-
-        #[error("Failed to run a post processor")]
-        PostProcessorRun(#[from] post_processors::Error),
     }
 }
 
 pub type InfoJson = serde_json::Map<String, serde_json::Value>;
+/// Wrap your function with [`mk_python_function`].
 pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
 
+pub type PostProcessorFunction = fn(vm: &VirtualMachine) -> PyResult<PyObjectRef>;
+
 /// Options, that are used to customize the download behaviour.
 ///
 /// In the future, this might get a Builder api.
@@ -499,7 +511,7 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
 pub struct YoutubeDLOptions {
     options: serde_json::Map<String, serde_json::Value>,
     progress_hook: Option<ProgressHookFunction>,
-    post_processors: Vec<Box<dyn PostProcessor>>,
+    post_processors: Vec<PostProcessorFunction>,
 }
 
 impl YoutubeDLOptions {
@@ -511,7 +523,7 @@ impl YoutubeDLOptions {
             post_processors: vec![],
         };
 
-        me.with_post_processor(post_processors::dearrow::DeArrowPP)
+        me.with_post_processor(post_processors::dearrow::process)
     }
 
     #[must_use]
@@ -535,8 +547,8 @@ impl YoutubeDLOptions {
     }
 
     #[must_use]
-    pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self {
-        self.post_processors.push(Box::new(post_processor));
+    pub fn with_post_processor(mut self, pp: PostProcessorFunction) -> Self {
+        self.post_processors.push(pp);
         self
     }
 
@@ -569,7 +581,9 @@ pub mod build {
     }
 }
 
-fn json_loads(
+/// # Panics
+/// If expectation about python operations fail.
+pub fn json_loads(
     input: serde_json::Map<String, serde_json::Value>,
     vm: &VirtualMachine,
 ) -> PyRef<PyDict> {