about summary refs log tree commit diff stats
path: root/crates/yt_dlp
diff options
context:
space:
mode:
Diffstat (limited to 'crates/yt_dlp')
-rw-r--r--crates/yt_dlp/Cargo.toml2
-rw-r--r--crates/yt_dlp/src/lib.rs45
-rw-r--r--crates/yt_dlp/src/post_processors/dearrow.rs108
-rw-r--r--crates/yt_dlp/src/post_processors/mod.rs20
4 files changed, 164 insertions, 11 deletions
diff --git a/crates/yt_dlp/Cargo.toml b/crates/yt_dlp/Cargo.toml
index 90f2e10..e5d14fd 100644
--- a/crates/yt_dlp/Cargo.toml
+++ b/crates/yt_dlp/Cargo.toml
@@ -24,7 +24,9 @@ publish = true
 [dependencies]
 indexmap = { version = "2.9.0", default-features = false }
 log.workspace = true
+reqwest = { version = "0.12.20", features = ["blocking", "json"] }
 rustpython = { git = "https://github.com/RustPython/RustPython.git", features = ["threading", "stdlib", "stdio", "importlib", "ssl"], default-features = false }
+serde = { workspace = true, features = ["derive"] }
 serde_json.workspace = true
 thiserror = "2.0.12"
 url.workspace = true
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs
index 0f40f0a..16ec4ca 100644
--- a/crates/yt_dlp/src/lib.rs
+++ b/crates/yt_dlp/src/lib.rs
@@ -1,10 +1,11 @@
 //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure.
 
-use std::{self, env, mem, path::PathBuf};
+use std::{self, env, fmt::Display, path::PathBuf};
 
 use indexmap::IndexMap;
 use log::{Level, debug, error, info, log_enabled};
 use logging::setup_logging;
+use post_processors::PostProcessor;
 use rustpython::{
     InterpreterConfig,
     vm::{
@@ -18,6 +19,7 @@ use rustpython::{
 use url::Url;
 
 mod logging;
+pub mod post_processors;
 pub mod progress_hook;
 
 #[macro_export]
@@ -61,6 +63,7 @@ pub struct YoutubeDL {
     youtube_dl_class: PyObjectRef,
     yt_dlp_module: PyObjectRef,
     options: serde_json::Map<String, serde_json::Value>,
+    post_processors: Vec<Box<dyn PostProcessor>>,
 }
 
 impl std::fmt::Debug for YoutubeDL {
@@ -209,6 +212,7 @@ impl YoutubeDL {
             youtube_dl_class,
             yt_dlp_module,
             options: output_options,
+            post_processors: options.post_processors,
         })
     }
 
@@ -399,9 +403,18 @@ impl YoutubeDL {
 
         let result = value.downcast::<PyDict>().expect("This should stay a dict");
 
-        let json = json_dumps(result, vm);
+        let mut json = json_dumps(result, vm);
 
-        {
+        for pp in &self.post_processors {
+            if pp
+                .extractors()
+                .iter()
+                .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str))
+            {
+                json = pp.process(json)?;
+            } else {
+                error!("Extractor not found for {pp:#?}");
+            }
         }
 
         Ok(json)
@@ -458,6 +471,9 @@ pub mod prepare {
     pub enum Error {
         #[error(transparent)]
         Python(#[from] PythonError),
+
+        #[error("Failed to run a post processor")]
+        PostProcessorRun(#[from] post_processors::Error),
     }
 }
 
@@ -473,15 +489,19 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
 pub struct YoutubeDLOptions {
     options: serde_json::Map<String, serde_json::Value>,
     progress_hook: Option<ProgressHookFunction>,
+    post_processors: Vec<Box<dyn PostProcessor>>,
 }
 
 impl YoutubeDLOptions {
     #[must_use]
     pub fn new() -> Self {
-        Self {
+        let me = Self {
             options: serde_json::Map::new(),
             progress_hook: None,
-        }
+            post_processors: vec![],
+        };
+
+        me.with_post_processor(post_processors::dearrow::DeArrowPP)
     }
 
     #[must_use]
@@ -489,10 +509,7 @@ impl YoutubeDLOptions {
         let mut options = self.options;
         options.insert(key.into(), value.into());
 
-        Self {
-            options,
-            progress_hook: self.progress_hook,
-        }
+        Self { options, ..self }
     }
 
     #[must_use]
@@ -501,12 +518,18 @@ impl YoutubeDLOptions {
             todo!()
         } else {
             Self {
-                options: self.options,
                 progress_hook: Some(progress_hook),
+                ..self
             }
         }
     }
 
+    #[must_use]
+    pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self {
+        self.post_processors.push(Box::new(post_processor));
+        self
+    }
+
     /// # Errors
     /// If the underlying [`YoutubeDL::from_options`] errors.
     pub fn build(self) -> Result<YoutubeDL, build::Error> {
@@ -517,7 +540,7 @@ impl YoutubeDLOptions {
     pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self {
         Self {
             options,
-            progress_hook: None,
+            ..Self::new()
         }
     }
 
diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs
new file mode 100644
index 0000000..110beeb
--- /dev/null
+++ b/crates/yt_dlp/src/post_processors/dearrow.rs
@@ -0,0 +1,108 @@
+use log::{info, warn};
+use serde::{Deserialize, Serialize};
+
+use crate::{InfoJson, json_get};
+
+use super::PostProcessor;
+
+#[derive(Debug, Clone, Copy)]
+pub struct DeArrowPP;
+
+impl PostProcessor for DeArrowPP {
+    fn extractors(&self) -> &'static [&'static str] {
+        &["Youtube"]
+    }
+
+    fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> {
+        let mut output: DeArrowApi = reqwest::blocking::get(format!(
+            "https://sponsor.ajay.app/api/branding?videoID={}",
+            json_get!(info, "id", as_str)
+        ))?
+        .json()?;
+
+        output.titles.reverse();
+
+        let title_len = output.titles.len();
+        loop {
+            let Some(title) = output.titles.pop() else {
+                break;
+            };
+
+            if (title.locked || title.votes < 1) && title_len > 1 {
+                info!(
+                    "Skipping title {:#?}, as it is not good enough",
+                    title.value
+                );
+                // Skip titles that are not “good” enough.
+                continue;
+            }
+
+            if let Some(old_title) = info.insert(
+                "title".to_owned(),
+                serde_json::Value::String(title.value.clone()),
+            ) {
+                warn!("Updating title from {:#?} to {:#?}", old_title, title.value);
+                info.insert("original_title".to_owned(), old_title);
+            } else {
+                warn!("Setting title to {:#?}", title.value);
+            }
+
+            break;
+        }
+
+        Ok(info)
+    }
+}
+
+#[derive(Serialize, Deserialize)]
+/// See: <https://wiki.sponsor.ajay.app/w/API_Docs/DeArrow>
+struct DeArrowApi {
+    titles: Vec<Title>,
+    thumbnails: Vec<Thumbnail>,
+
+    #[serde(alias = "randomTime")]
+    random_time: Option<f64>,
+
+    #[serde(alias = "videoDuration")]
+    video_duration: Option<f64>,
+
+    #[serde(alias = "casualVotes")]
+    casual_votes: Vec<String>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct Title {
+    /// Note: Titles will sometimes contain > before a word.
+    /// This tells the auto-formatter to not format a word.
+    /// If you have no auto-formatter, you can ignore this and replace it with an empty string
+    #[serde(alias = "title")]
+    value: String,
+
+    original: bool,
+    votes: u64,
+    locked: bool,
+
+    #[serde(alias = "UUID")]
+    uuid: String,
+
+    /// only present if requested
+    #[serde(alias = "userID")]
+    user_id: Option<String>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct Thumbnail {
+    // null if original is true
+    timestamp: Option<f64>,
+
+    original: bool,
+    votes: u64,
+    locked: bool,
+
+    #[serde(alias = "UUID")]
+    uuid: String,
+
+    /// only present if requested
+    #[serde(alias = "userID")]
+    user_id: Option<String>,
+}
diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs
new file mode 100644
index 0000000..6067c7a
--- /dev/null
+++ b/crates/yt_dlp/src/post_processors/mod.rs
@@ -0,0 +1,20 @@
+use crate::InfoJson;
+
+pub mod dearrow;
+
+pub trait PostProcessor: std::fmt::Debug + Send {
+    /// Process a [`InfoJson`] object and return the updated one.
+    ///
+    /// # Errors
+    /// If the processing steps failed.
+    fn process(&self, info: InfoJson) -> Result<InfoJson, Error>;
+
+    /// The supported extractors for this post processor
+    fn extractors(&self) -> &'static [&'static str];
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error("Failed to access a api: {0}")]
+    Get(#[from] reqwest::Error),
+}