about summary refs log tree commit diff stats
path: root/crates/yt_dlp
diff options
context:
space:
mode:
Diffstat (limited to 'crates/yt_dlp')
-rw-r--r--crates/yt_dlp/Cargo.toml10
-rw-r--r--crates/yt_dlp/README.md2
-rw-r--r--crates/yt_dlp/src/lib.rs229
-rw-r--r--crates/yt_dlp/src/post_processors/dearrow.rs118
-rw-r--r--crates/yt_dlp/src/post_processors/mod.rs30
-rw-r--r--crates/yt_dlp/src/progress_hook.rs10
6 files changed, 327 insertions, 72 deletions
diff --git a/crates/yt_dlp/Cargo.toml b/crates/yt_dlp/Cargo.toml
index 90f2e10..81e1412 100644
--- a/crates/yt_dlp/Cargo.toml
+++ b/crates/yt_dlp/Cargo.toml
@@ -24,7 +24,15 @@ publish = true
 [dependencies]
 indexmap = { version = "2.9.0", default-features = false }
 log.workspace = true
-rustpython = { git = "https://github.com/RustPython/RustPython.git", features = ["threading", "stdlib", "stdio", "importlib", "ssl"], default-features = false }
+reqwest = { version = "0.12.20", features = ["blocking", "json"] }
+rustpython = { git = "https://github.com/RustPython/RustPython.git", features = [
+  "threading",
+  "stdlib",
+  "stdio",
+  "importlib",
+  "ssl",
+], default-features = false }
+serde = { workspace = true, features = ["derive"] }
 serde_json.workspace = true
 thiserror = "2.0.12"
 url.workspace = true
diff --git a/crates/yt_dlp/README.md b/crates/yt_dlp/README.md
index 591ef2e..ece8540 100644
--- a/crates/yt_dlp/README.md
+++ b/crates/yt_dlp/README.md
@@ -12,7 +12,7 @@ If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
 
 # Yt_py
 
-> \[can be empty\]
+> [can be empty]
 
 Some text about the project.
 
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs
index dd42fc6..e7b37c6 100644
--- a/crates/yt_dlp/src/lib.rs
+++ b/crates/yt_dlp/src/lib.rs
@@ -1,10 +1,21 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
 //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure.
 
-use std::{self, env, mem, path::PathBuf};
+use std::{self, env, fmt::Display, path::PathBuf};
 
 use indexmap::IndexMap;
 use log::{Level, debug, error, info, log_enabled};
 use logging::setup_logging;
+use post_processors::PostProcessor;
 use rustpython::{
     InterpreterConfig,
     vm::{
@@ -18,23 +29,42 @@ use rustpython::{
 use url::Url;
 
 mod logging;
+pub mod post_processors;
 pub mod progress_hook;
 
 #[macro_export]
 macro_rules! json_get {
-    ($value:expr, $name:literal, $into:ident) => {
-        $crate::json_cast!($value.get($name).expect("Should exist"), $into)
-    };
+    ($value:expr, $name:literal, $into:ident) => {{
+        match $value.get($name) {
+            Some(val) => $crate::json_cast!(val, $into),
+            None => panic!(
+                concat!(
+                    "Expected '",
+                    $name,
+                    "' to be a key for the'",
+                    stringify!($value),
+                    "' object: {:#?}"
+                ),
+                $value
+            ),
+        }
+    }};
 }
 
 #[macro_export]
 macro_rules! json_cast {
-    ($value:expr, $into:ident) => {
-        $value.$into().expect(concat!(
-            "Should be able to cast value into ",
-            stringify!($into)
-        ))
-    };
+    ($value:expr, $into:ident) => {{
+        match $value.$into() {
+            Some(result) => result,
+            None => panic!(
+                concat!(
+                    "Expected to be able to cast value ({:#?}) ",
+                    stringify!($into)
+                ),
+                $value
+            ),
+        }
+    }};
 }
 
 /// The core of the `yt_dlp` interface.
@@ -43,6 +73,7 @@ pub struct YoutubeDL {
     youtube_dl_class: PyObjectRef,
     yt_dlp_module: PyObjectRef,
     options: serde_json::Map<String, serde_json::Value>,
+    post_processors: Vec<Box<dyn PostProcessor>>,
 }
 
 impl std::fmt::Debug for YoutubeDL {
@@ -60,7 +91,7 @@ impl YoutubeDL {
     ///
     /// # Errors
     /// If a python call fails.
-    pub fn from_options(mut options: YoutubeDLOptions) -> Result<Self, build::Error> {
+    pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> {
         let mut settings = vm::Settings::default();
         if let Ok(python_path) = env::var("PYTHONPATH") {
             for path in python_path.split(':') {
@@ -92,9 +123,8 @@ impl YoutubeDL {
             let yt_dlp_module = vm.import("yt_dlp", 0)?;
             let class = yt_dlp_module.get_attr("YoutubeDL", vm)?;
 
-            let maybe_hook = mem::take(&mut options.progress_hook);
-            let opts = options.into_py_dict(vm);
-            if let Some(function) = maybe_hook {
+            let opts = json_loads(options.options, vm);
+            if let Some(function) = options.progress_hook {
                 opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || {
                     let hook: PyObjectRef = vm.new_function("progress_hook", function).into();
                     vm.new_pyobj(vec![hook])
@@ -192,6 +222,7 @@ impl YoutubeDL {
             youtube_dl_class,
             yt_dlp_module,
             options: output_options,
+            post_processors: options.post_processors,
         })
     }
 
@@ -267,7 +298,7 @@ impl YoutubeDL {
         download: bool,
         process: bool,
     ) -> Result<InfoJson, extract_info::Error> {
-        match self.interpreter.enter(|vm| {
+        self.interpreter.enter(|vm| {
             let pos_args = PosArgs::new(vec![vm.new_pyobj(url.to_string())]);
 
             let kw_args = KwArgs::new({
@@ -279,9 +310,13 @@ impl YoutubeDL {
 
             let fun_args = FuncArgs::new(pos_args, kw_args);
 
-            let inner = self.youtube_dl_class.get_attr("extract_info", vm)?;
+            let inner = self
+                .youtube_dl_class
+                .get_attr("extract_info", vm)
+                .map_err(|exc| PythonError::from_exception(vm, &exc))?;
             let result = inner
-                .call_with_args(fun_args, vm)?
+                .call_with_args(fun_args, vm)
+                .map_err(|exc| PythonError::from_exception(vm, &exc))?
                 .downcast::<PyDict>()
                 .expect("This is a dict");
 
@@ -295,7 +330,9 @@ impl YoutubeDL {
                     });
 
                     let mut out = vec![];
-                    let next = generator.get_attr("__next__", vm)?;
+                    let next = generator
+                        .get_attr("__next__", vm)
+                        .map_err(|exc| PythonError::from_exception(vm, &exc))?;
                     while let Ok(output) = next.call((), vm) {
                         out.push(output);
 
@@ -303,27 +340,16 @@ impl YoutubeDL {
                             break;
                         }
                     }
-                    result.set_item("entries", vm.new_pyobj(out), vm)?;
+                    result
+                        .set_item("entries", vm.new_pyobj(out), vm)
+                        .map_err(|exc| PythonError::from_exception(vm, &exc))?;
                 }
             }
 
-            let result = {
-                let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?;
-                let value = sanitize.call((result,), vm)?;
-
-                value.downcast::<PyDict>().expect("This should stay a dict")
-            };
-
-            let result_json = json_dumps(result, vm);
+            let result = self.prepare_info_json(result, vm)?;
 
-            Ok::<_, PyRef<PyBaseException>>(result_json)
-        }) {
-            Ok(ok) => Ok(ok),
-            Err(err) => self.interpreter.enter(|vm| {
-                let buffer = process_exception(vm, &err);
-                Err(extract_info::Error::Python(buffer))
-            }),
-        }
+            Ok(result)
+        })
     }
 
     /// Take the (potentially modified) result of the information extractor (i.e.,
@@ -344,7 +370,7 @@ impl YoutubeDL {
         ie_result: InfoJson,
         download: bool,
     ) -> Result<InfoJson, process_ie_result::Error> {
-        match self.interpreter.enter(|vm| {
+        self.interpreter.enter(|vm| {
             let pos_args = PosArgs::new(vec![vm.new_pyobj(json_loads(ie_result, vm))]);
 
             let kw_args = KwArgs::new({
@@ -355,46 +381,109 @@ impl YoutubeDL {
 
             let fun_args = FuncArgs::new(pos_args, kw_args);
 
-            let inner = self.youtube_dl_class.get_attr("process_ie_result", vm)?;
+            let inner = self
+                .youtube_dl_class
+                .get_attr("process_ie_result", vm)
+                .map_err(|exc| PythonError::from_exception(vm, &exc))?;
             let result = inner
-                .call_with_args(fun_args, vm)?
+                .call_with_args(fun_args, vm)
+                .map_err(|exc| PythonError::from_exception(vm, &exc))?
                 .downcast::<PyDict>()
                 .expect("This is a dict");
 
-            let result = {
-                let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?;
-                let value = sanitize.call((result,), vm)?;
+            let result = self.prepare_info_json(result, vm)?;
 
-                value.downcast::<PyDict>().expect("This should stay a dict")
-            };
+            Ok(result)
+        })
+    }
 
-            let result_json = json_dumps(result, vm);
+    fn prepare_info_json(
+        &self,
+        info: PyRef<PyDict>,
+        vm: &VirtualMachine,
+    ) -> Result<InfoJson, prepare::Error> {
+        let sanitize = self
+            .youtube_dl_class
+            .get_attr("sanitize_info", vm)
+            .map_err(|exc| PythonError::from_exception(vm, &exc))?;
 
-            Ok::<_, PyRef<PyBaseException>>(result_json)
-        }) {
-            Ok(ok) => Ok(ok),
-            Err(err) => self.interpreter.enter(|vm| {
-                let buffer = process_exception(vm, &err);
-                Err(process_ie_result::Error::Python(buffer))
-            }),
+        let value = sanitize
+            .call((info,), vm)
+            .map_err(|exc| PythonError::from_exception(vm, &exc))?;
+
+        let result = value.downcast::<PyDict>().expect("This should stay a dict");
+
+        let mut json = json_dumps(result, vm);
+
+        for pp in &self.post_processors {
+            if pp
+                .extractors()
+                .iter()
+                .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str))
+            {
+                json = pp.process(json)?;
+            } else {
+                error!("Extractor not found for {pp:#?}");
+            }
         }
+
+        Ok(json)
+    }
+}
+
+#[derive(thiserror::Error, Debug)]
+pub struct PythonError(pub String);
+
+impl Display for PythonError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Python threw an exception: {}", self.0)
+    }
+}
+
+impl PythonError {
+    fn from_exception(vm: &VirtualMachine, exc: &PyRef<PyBaseException>) -> Self {
+        let buffer = process_exception(vm, exc);
+        Self(buffer)
     }
 }
 
 #[allow(missing_docs)]
 pub mod process_ie_result {
+    use crate::{PythonError, prepare};
+
     #[derive(Debug, thiserror::Error)]
     pub enum Error {
-        #[error("Python threw an exception: {0}")]
-        Python(String),
+        #[error(transparent)]
+        Python(#[from] PythonError),
+
+        #[error("Failed to prepare the info json")]
+        InfoJsonPrepare(#[from] prepare::Error),
     }
 }
 #[allow(missing_docs)]
 pub mod extract_info {
+    use crate::{PythonError, prepare};
+
     #[derive(Debug, thiserror::Error)]
     pub enum Error {
-        #[error("Python threw an exception: {0}")]
-        Python(String),
+        #[error(transparent)]
+        Python(#[from] PythonError),
+
+        #[error("Failed to prepare the info json")]
+        InfoJsonPrepare(#[from] prepare::Error),
+    }
+}
+#[allow(missing_docs)]
+pub mod prepare {
+    use crate::{PythonError, post_processors};
+
+    #[derive(Debug, thiserror::Error)]
+    pub enum Error {
+        #[error(transparent)]
+        Python(#[from] PythonError),
+
+        #[error("Failed to run a post processor")]
+        PostProcessorRun(#[from] post_processors::Error),
     }
 }
 
@@ -410,15 +499,19 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
 pub struct YoutubeDLOptions {
     options: serde_json::Map<String, serde_json::Value>,
     progress_hook: Option<ProgressHookFunction>,
+    post_processors: Vec<Box<dyn PostProcessor>>,
 }
 
 impl YoutubeDLOptions {
     #[must_use]
     pub fn new() -> Self {
-        Self {
+        let me = Self {
             options: serde_json::Map::new(),
             progress_hook: None,
-        }
+            post_processors: vec![],
+        };
+
+        me.with_post_processor(post_processors::dearrow::DeArrowPP)
     }
 
     #[must_use]
@@ -426,10 +519,7 @@ impl YoutubeDLOptions {
         let mut options = self.options;
         options.insert(key.into(), value.into());
 
-        Self {
-            options,
-            progress_hook: self.progress_hook,
-        }
+        Self { options, ..self }
     }
 
     #[must_use]
@@ -438,12 +528,18 @@ impl YoutubeDLOptions {
             todo!()
         } else {
             Self {
-                options: self.options,
                 progress_hook: Some(progress_hook),
+                ..self
             }
         }
     }
 
+    #[must_use]
+    pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self {
+        self.post_processors.push(Box::new(post_processor));
+        self
+    }
+
     /// # Errors
     /// If the underlying [`YoutubeDL::from_options`] errors.
     pub fn build(self) -> Result<YoutubeDL, build::Error> {
@@ -454,7 +550,7 @@ impl YoutubeDLOptions {
     pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self {
         Self {
             options,
-            progress_hook: None,
+            ..Self::new()
         }
     }
 
@@ -462,10 +558,6 @@ impl YoutubeDLOptions {
     pub fn get(&self, key: &str) -> Option<&serde_json::Value> {
         self.options.get(key)
     }
-
-    fn into_py_dict(self, vm: &VirtualMachine) -> PyRef<PyDict> {
-        json_loads(self.options, vm)
-    }
 }
 
 #[allow(missing_docs)]
@@ -474,9 +566,6 @@ pub mod build {
     pub enum Error {
         #[error("Python threw an exception: {0}")]
         Python(String),
-
-        #[error("Io error: {0}")]
-        Io(#[from] std::io::Error),
     }
 }
 
diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs
new file mode 100644
index 0000000..bdbea7c
--- /dev/null
+++ b/crates/yt_dlp/src/post_processors/dearrow.rs
@@ -0,0 +1,118 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+use log::{info, warn};
+use serde::{Deserialize, Serialize};
+
+use crate::{InfoJson, json_get};
+
+use super::PostProcessor;
+
+#[derive(Debug, Clone, Copy)]
+pub struct DeArrowPP;
+
+impl PostProcessor for DeArrowPP {
+    fn extractors(&self) -> &'static [&'static str] {
+        &["Youtube"]
+    }
+
+    fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> {
+        let mut output: DeArrowApi = reqwest::blocking::get(format!(
+            "https://sponsor.ajay.app/api/branding?videoID={}",
+            json_get!(info, "id", as_str)
+        ))?
+        .json()?;
+
+        output.titles.reverse();
+
+        let title_len = output.titles.len();
+        loop {
+            let Some(title) = output.titles.pop() else {
+                break;
+            };
+
+            if (title.locked || title.votes < 1) && title_len > 1 {
+                info!(
+                    "Skipping title {:#?}, as it is not good enough",
+                    title.value
+                );
+                // Skip titles that are not “good” enough.
+                continue;
+            }
+
+            if let Some(old_title) = info.insert(
+                "title".to_owned(),
+                serde_json::Value::String(title.value.clone()),
+            ) {
+                warn!("Updating title from {:#?} to {:#?}", old_title, title.value);
+                info.insert("original_title".to_owned(), old_title);
+            } else {
+                warn!("Setting title to {:#?}", title.value);
+            }
+
+            break;
+        }
+
+        Ok(info)
+    }
+}
+
+#[derive(Serialize, Deserialize)]
+/// See: <https://wiki.sponsor.ajay.app/w/API_Docs/DeArrow>
+struct DeArrowApi {
+    titles: Vec<Title>,
+    thumbnails: Vec<Thumbnail>,
+
+    #[serde(alias = "randomTime")]
+    random_time: Option<f64>,
+
+    #[serde(alias = "videoDuration")]
+    video_duration: Option<f64>,
+
+    #[serde(alias = "casualVotes")]
+    casual_votes: Vec<String>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct Title {
+    /// Note: Titles will sometimes contain > before a word.
+    /// This tells the auto-formatter to not format a word.
+    /// If you have no auto-formatter, you can ignore this and replace it with an empty string
+    #[serde(alias = "title")]
+    value: String,
+
+    original: bool,
+    votes: u64,
+    locked: bool,
+
+    #[serde(alias = "UUID")]
+    uuid: String,
+
+    /// only present if requested
+    #[serde(alias = "userID")]
+    user_id: Option<String>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct Thumbnail {
+    // null if original is true
+    timestamp: Option<f64>,
+
+    original: bool,
+    votes: u64,
+    locked: bool,
+
+    #[serde(alias = "UUID")]
+    uuid: String,
+
+    /// only present if requested
+    #[serde(alias = "userID")]
+    user_id: Option<String>,
+}
diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs
new file mode 100644
index 0000000..65801c2
--- /dev/null
+++ b/crates/yt_dlp/src/post_processors/mod.rs
@@ -0,0 +1,30 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+use crate::InfoJson;
+
+pub mod dearrow;
+
+pub trait PostProcessor: std::fmt::Debug + Send {
+    /// Process a [`InfoJson`] object and return the updated one.
+    ///
+    /// # Errors
+    /// If the processing steps failed.
+    fn process(&self, info: InfoJson) -> Result<InfoJson, Error>;
+
+    /// The supported extractors for this post processor
+    fn extractors(&self) -> &'static [&'static str];
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error("Failed to access a api: {0}")]
+    Get(#[from] reqwest::Error),
+}
diff --git a/crates/yt_dlp/src/progress_hook.rs b/crates/yt_dlp/src/progress_hook.rs
index 7a7628a..43f85e0 100644
--- a/crates/yt_dlp/src/progress_hook.rs
+++ b/crates/yt_dlp/src/progress_hook.rs
@@ -1,3 +1,13 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
 #[macro_export]
 macro_rules! mk_python_function {
     ($name:ident, $new_name:ident) => {