about summary refs log tree commit diff stats
path: root/crates/yt_dlp/src/lib.rs
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-06-16 13:58:55 +0200
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-06-16 13:58:55 +0200
commitab61a4e47a955dd4a5dabeef3ade1b85f6576b84 (patch)
tree4076a7f96ef2a6b6b359eff83bb9b8c8357a03e5 /crates/yt_dlp/src/lib.rs
parentrefactor(yt_dlp/lib): De-duplicate the info json sanitize code (diff)
downloadyt-ab61a4e47a955dd4a5dabeef3ade1b85f6576b84.zip
feat(yt_dlp): Support a DeArrow post processor
Diffstat (limited to 'crates/yt_dlp/src/lib.rs')
-rw-r--r--crates/yt_dlp/src/lib.rs45
1 files changed, 34 insertions, 11 deletions
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs
index 0f40f0a..16ec4ca 100644
--- a/crates/yt_dlp/src/lib.rs
+++ b/crates/yt_dlp/src/lib.rs
@@ -1,10 +1,11 @@
 //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure.
 
-use std::{self, env, mem, path::PathBuf};
+use std::{self, env, fmt::Display, path::PathBuf};
 
 use indexmap::IndexMap;
 use log::{Level, debug, error, info, log_enabled};
 use logging::setup_logging;
+use post_processors::PostProcessor;
 use rustpython::{
     InterpreterConfig,
     vm::{
@@ -18,6 +19,7 @@ use rustpython::{
 use url::Url;
 
 mod logging;
+pub mod post_processors;
 pub mod progress_hook;
 
 #[macro_export]
@@ -61,6 +63,7 @@ pub struct YoutubeDL {
     youtube_dl_class: PyObjectRef,
     yt_dlp_module: PyObjectRef,
     options: serde_json::Map<String, serde_json::Value>,
+    post_processors: Vec<Box<dyn PostProcessor>>,
 }
 
 impl std::fmt::Debug for YoutubeDL {
@@ -209,6 +212,7 @@ impl YoutubeDL {
             youtube_dl_class,
             yt_dlp_module,
             options: output_options,
+            post_processors: options.post_processors,
         })
     }
 
@@ -399,9 +403,18 @@ impl YoutubeDL {
 
         let result = value.downcast::<PyDict>().expect("This should stay a dict");
 
-        let json = json_dumps(result, vm);
+        let mut json = json_dumps(result, vm);
 
-        {
+        for pp in &self.post_processors {
+            if pp
+                .extractors()
+                .iter()
+                .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str))
+            {
+                json = pp.process(json)?;
+            } else {
+                error!("Extractor not found for {pp:#?}");
+            }
         }
 
         Ok(json)
@@ -458,6 +471,9 @@ pub mod prepare {
     pub enum Error {
         #[error(transparent)]
         Python(#[from] PythonError),
+
+        #[error("Failed to run a post processor")]
+        PostProcessorRun(#[from] post_processors::Error),
     }
 }
 
@@ -473,15 +489,19 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
 pub struct YoutubeDLOptions {
     options: serde_json::Map<String, serde_json::Value>,
     progress_hook: Option<ProgressHookFunction>,
+    post_processors: Vec<Box<dyn PostProcessor>>,
 }
 
 impl YoutubeDLOptions {
     #[must_use]
     pub fn new() -> Self {
-        Self {
+        let me = Self {
             options: serde_json::Map::new(),
             progress_hook: None,
-        }
+            post_processors: vec![],
+        };
+
+        me.with_post_processor(post_processors::dearrow::DeArrowPP)
     }
 
     #[must_use]
@@ -489,10 +509,7 @@ impl YoutubeDLOptions {
         let mut options = self.options;
         options.insert(key.into(), value.into());
 
-        Self {
-            options,
-            progress_hook: self.progress_hook,
-        }
+        Self { options, ..self }
     }
 
     #[must_use]
@@ -501,12 +518,18 @@ impl YoutubeDLOptions {
             todo!()
         } else {
             Self {
-                options: self.options,
                 progress_hook: Some(progress_hook),
+                ..self
             }
         }
     }
 
+    #[must_use]
+    pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self {
+        self.post_processors.push(Box::new(post_processor));
+        self
+    }
+
     /// # Errors
     /// If the underlying [`YoutubeDL::from_options`] errors.
     pub fn build(self) -> Result<YoutubeDL, build::Error> {
@@ -517,7 +540,7 @@ impl YoutubeDLOptions {
     pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self {
         Self {
             options,
-            progress_hook: None,
+            ..Self::new()
         }
     }