about summary refs log tree commit diff stats
path: root/crates/yt_dlp/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--crates/yt_dlp/src/lib.rs229
1 files changed, 159 insertions, 70 deletions
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs
index dd42fc6..e7b37c6 100644
--- a/crates/yt_dlp/src/lib.rs
+++ b/crates/yt_dlp/src/lib.rs
@@ -1,10 +1,21 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
 //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure.
 
-use std::{self, env, mem, path::PathBuf};
+use std::{self, env, fmt::Display, path::PathBuf};
 
 use indexmap::IndexMap;
 use log::{Level, debug, error, info, log_enabled};
 use logging::setup_logging;
+use post_processors::PostProcessor;
 use rustpython::{
     InterpreterConfig,
     vm::{
@@ -18,23 +29,42 @@ use rustpython::{
 use url::Url;
 
 mod logging;
+pub mod post_processors;
 pub mod progress_hook;
 
 #[macro_export]
 macro_rules! json_get {
-    ($value:expr, $name:literal, $into:ident) => {
-        $crate::json_cast!($value.get($name).expect("Should exist"), $into)
-    };
+    ($value:expr, $name:literal, $into:ident) => {{
+        match $value.get($name) {
+            Some(val) => $crate::json_cast!(val, $into),
+            None => panic!(
+                concat!(
+                    "Expected '",
+                    $name,
+                    "' to be a key for the'",
+                    stringify!($value),
+                    "' object: {:#?}"
+                ),
+                $value
+            ),
+        }
+    }};
 }
 
 #[macro_export]
 macro_rules! json_cast {
-    ($value:expr, $into:ident) => {
-        $value.$into().expect(concat!(
-            "Should be able to cast value into ",
-            stringify!($into)
-        ))
-    };
+    ($value:expr, $into:ident) => {{
+        match $value.$into() {
+            Some(result) => result,
+            None => panic!(
+                concat!(
+                    "Expected to be able to cast value ({:#?}) ",
+                    stringify!($into)
+                ),
+                $value
+            ),
+        }
+    }};
 }
 
 /// The core of the `yt_dlp` interface.
@@ -43,6 +73,7 @@ pub struct YoutubeDL {
     youtube_dl_class: PyObjectRef,
     yt_dlp_module: PyObjectRef,
     options: serde_json::Map<String, serde_json::Value>,
+    post_processors: Vec<Box<dyn PostProcessor>>,
 }
 
 impl std::fmt::Debug for YoutubeDL {
@@ -60,7 +91,7 @@ impl YoutubeDL {
     ///
     /// # Errors
     /// If a python call fails.
-    pub fn from_options(mut options: YoutubeDLOptions) -> Result<Self, build::Error> {
+    pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> {
         let mut settings = vm::Settings::default();
         if let Ok(python_path) = env::var("PYTHONPATH") {
             for path in python_path.split(':') {
@@ -92,9 +123,8 @@ impl YoutubeDL {
             let yt_dlp_module = vm.import("yt_dlp", 0)?;
             let class = yt_dlp_module.get_attr("YoutubeDL", vm)?;
 
-            let maybe_hook = mem::take(&mut options.progress_hook);
-            let opts = options.into_py_dict(vm);
-            if let Some(function) = maybe_hook {
+            let opts = json_loads(options.options, vm);
+            if let Some(function) = options.progress_hook {
                 opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || {
                     let hook: PyObjectRef = vm.new_function("progress_hook", function).into();
                     vm.new_pyobj(vec![hook])
@@ -192,6 +222,7 @@ impl YoutubeDL {
             youtube_dl_class,
             yt_dlp_module,
             options: output_options,
+            post_processors: options.post_processors,
         })
     }
 
@@ -267,7 +298,7 @@ impl YoutubeDL {
         download: bool,
         process: bool,
     ) -> Result<InfoJson, extract_info::Error> {
-        match self.interpreter.enter(|vm| {
+        self.interpreter.enter(|vm| {
             let pos_args = PosArgs::new(vec![vm.new_pyobj(url.to_string())]);
 
             let kw_args = KwArgs::new({
@@ -279,9 +310,13 @@ impl YoutubeDL {
 
             let fun_args = FuncArgs::new(pos_args, kw_args);
 
-            let inner = self.youtube_dl_class.get_attr("extract_info", vm)?;
+            let inner = self
+                .youtube_dl_class
+                .get_attr("extract_info", vm)
+                .map_err(|exc| PythonError::from_exception(vm, &exc))?;
             let result = inner
-                .call_with_args(fun_args, vm)?
+                .call_with_args(fun_args, vm)
+                .map_err(|exc| PythonError::from_exception(vm, &exc))?
                 .downcast::<PyDict>()
                 .expect("This is a dict");
 
@@ -295,7 +330,9 @@ impl YoutubeDL {
                     });
 
                     let mut out = vec![];
-                    let next = generator.get_attr("__next__", vm)?;
+                    let next = generator
+                        .get_attr("__next__", vm)
+                        .map_err(|exc| PythonError::from_exception(vm, &exc))?;
                     while let Ok(output) = next.call((), vm) {
                         out.push(output);
 
@@ -303,27 +340,16 @@ impl YoutubeDL {
                             break;
                         }
                     }
-                    result.set_item("entries", vm.new_pyobj(out), vm)?;
+                    result
+                        .set_item("entries", vm.new_pyobj(out), vm)
+                        .map_err(|exc| PythonError::from_exception(vm, &exc))?;
                 }
             }
 
-            let result = {
-                let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?;
-                let value = sanitize.call((result,), vm)?;
-
-                value.downcast::<PyDict>().expect("This should stay a dict")
-            };
-
-            let result_json = json_dumps(result, vm);
+            let result = self.prepare_info_json(result, vm)?;
 
-            Ok::<_, PyRef<PyBaseException>>(result_json)
-        }) {
-            Ok(ok) => Ok(ok),
-            Err(err) => self.interpreter.enter(|vm| {
-                let buffer = process_exception(vm, &err);
-                Err(extract_info::Error::Python(buffer))
-            }),
-        }
+            Ok(result)
+        })
     }
 
     /// Take the (potentially modified) result of the information extractor (i.e.,
@@ -344,7 +370,7 @@ impl YoutubeDL {
         ie_result: InfoJson,
         download: bool,
     ) -> Result<InfoJson, process_ie_result::Error> {
-        match self.interpreter.enter(|vm| {
+        self.interpreter.enter(|vm| {
             let pos_args = PosArgs::new(vec![vm.new_pyobj(json_loads(ie_result, vm))]);
 
             let kw_args = KwArgs::new({
@@ -355,46 +381,109 @@ impl YoutubeDL {
 
             let fun_args = FuncArgs::new(pos_args, kw_args);
 
-            let inner = self.youtube_dl_class.get_attr("process_ie_result", vm)?;
+            let inner = self
+                .youtube_dl_class
+                .get_attr("process_ie_result", vm)
+                .map_err(|exc| PythonError::from_exception(vm, &exc))?;
             let result = inner
-                .call_with_args(fun_args, vm)?
+                .call_with_args(fun_args, vm)
+                .map_err(|exc| PythonError::from_exception(vm, &exc))?
                 .downcast::<PyDict>()
                 .expect("This is a dict");
 
-            let result = {
-                let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?;
-                let value = sanitize.call((result,), vm)?;
+            let result = self.prepare_info_json(result, vm)?;
 
-                value.downcast::<PyDict>().expect("This should stay a dict")
-            };
+            Ok(result)
+        })
+    }
 
-            let result_json = json_dumps(result, vm);
+    fn prepare_info_json(
+        &self,
+        info: PyRef<PyDict>,
+        vm: &VirtualMachine,
+    ) -> Result<InfoJson, prepare::Error> {
+        let sanitize = self
+            .youtube_dl_class
+            .get_attr("sanitize_info", vm)
+            .map_err(|exc| PythonError::from_exception(vm, &exc))?;
 
-            Ok::<_, PyRef<PyBaseException>>(result_json)
-        }) {
-            Ok(ok) => Ok(ok),
-            Err(err) => self.interpreter.enter(|vm| {
-                let buffer = process_exception(vm, &err);
-                Err(process_ie_result::Error::Python(buffer))
-            }),
+        let value = sanitize
+            .call((info,), vm)
+            .map_err(|exc| PythonError::from_exception(vm, &exc))?;
+
+        let result = value.downcast::<PyDict>().expect("This should stay a dict");
+
+        let mut json = json_dumps(result, vm);
+
+        for pp in &self.post_processors {
+            if pp
+                .extractors()
+                .iter()
+                .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str))
+            {
+                json = pp.process(json)?;
+            } else {
+                error!("Extractor not found for {pp:#?}");
+            }
         }
+
+        Ok(json)
+    }
+}
+
+#[derive(thiserror::Error, Debug)]
+pub struct PythonError(pub String);
+
+impl Display for PythonError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Python threw an exception: {}", self.0)
+    }
+}
+
+impl PythonError {
+    fn from_exception(vm: &VirtualMachine, exc: &PyRef<PyBaseException>) -> Self {
+        let buffer = process_exception(vm, exc);
+        Self(buffer)
     }
 }
 
 #[allow(missing_docs)]
 pub mod process_ie_result {
+    use crate::{PythonError, prepare};
+
     #[derive(Debug, thiserror::Error)]
     pub enum Error {
-        #[error("Python threw an exception: {0}")]
-        Python(String),
+        #[error(transparent)]
+        Python(#[from] PythonError),
+
+        #[error("Failed to prepare the info json")]
+        InfoJsonPrepare(#[from] prepare::Error),
     }
 }
 #[allow(missing_docs)]
 pub mod extract_info {
+    use crate::{PythonError, prepare};
+
     #[derive(Debug, thiserror::Error)]
     pub enum Error {
-        #[error("Python threw an exception: {0}")]
-        Python(String),
+        #[error(transparent)]
+        Python(#[from] PythonError),
+
+        #[error("Failed to prepare the info json")]
+        InfoJsonPrepare(#[from] prepare::Error),
+    }
+}
+#[allow(missing_docs)]
+pub mod prepare {
+    use crate::{PythonError, post_processors};
+
+    #[derive(Debug, thiserror::Error)]
+    pub enum Error {
+        #[error(transparent)]
+        Python(#[from] PythonError),
+
+        #[error("Failed to run a post processor")]
+        PostProcessorRun(#[from] post_processors::Error),
     }
 }
 
@@ -410,15 +499,19 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
 pub struct YoutubeDLOptions {
     options: serde_json::Map<String, serde_json::Value>,
     progress_hook: Option<ProgressHookFunction>,
+    post_processors: Vec<Box<dyn PostProcessor>>,
 }
 
 impl YoutubeDLOptions {
     #[must_use]
     pub fn new() -> Self {
-        Self {
+        let me = Self {
             options: serde_json::Map::new(),
             progress_hook: None,
-        }
+            post_processors: vec![],
+        };
+
+        me.with_post_processor(post_processors::dearrow::DeArrowPP)
     }
 
     #[must_use]
@@ -426,10 +519,7 @@ impl YoutubeDLOptions {
         let mut options = self.options;
         options.insert(key.into(), value.into());
 
-        Self {
-            options,
-            progress_hook: self.progress_hook,
-        }
+        Self { options, ..self }
     }
 
     #[must_use]
@@ -438,12 +528,18 @@ impl YoutubeDLOptions {
             todo!()
         } else {
             Self {
-                options: self.options,
                 progress_hook: Some(progress_hook),
+                ..self
             }
         }
     }
 
+    #[must_use]
+    pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self {
+        self.post_processors.push(Box::new(post_processor));
+        self
+    }
+
     /// # Errors
     /// If the underlying [`YoutubeDL::from_options`] errors.
     pub fn build(self) -> Result<YoutubeDL, build::Error> {
@@ -454,7 +550,7 @@ impl YoutubeDLOptions {
     pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self {
         Self {
             options,
-            progress_hook: None,
+            ..Self::new()
         }
     }
 
@@ -462,10 +558,6 @@ impl YoutubeDLOptions {
     pub fn get(&self, key: &str) -> Option<&serde_json::Value> {
         self.options.get(key)
     }
-
-    fn into_py_dict(self, vm: &VirtualMachine) -> PyRef<PyDict> {
-        json_loads(self.options, vm)
-    }
 }
 
 #[allow(missing_docs)]
@@ -474,9 +566,6 @@ pub mod build {
     pub enum Error {
         #[error("Python threw an exception: {0}")]
         Python(String),
-
-        #[error("Io error: {0}")]
-        Io(#[from] std::io::Error),
     }
 }