about summary refs log tree commit diff stats
path: root/crates/yt_dlp/src/post_processors
diff options
context:
space:
mode:
Diffstat (limited to 'crates/yt_dlp/src/post_processors')
-rw-r--r--crates/yt_dlp/src/post_processors/dearrow.rs157
-rw-r--r--crates/yt_dlp/src/post_processors/mod.rs121
2 files changed, 218 insertions, 60 deletions
diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs
index bdbea7c..ab5478b 100644
--- a/crates/yt_dlp/src/post_processors/dearrow.rs
+++ b/crates/yt_dlp/src/post_processors/dearrow.rs
@@ -8,60 +8,118 @@
 // You should have received a copy of the License along with this program.
 // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
 
-use log::{info, warn};
+use curl::easy::Easy;
+use log::{error, info, warn};
+use rustpython::vm::{
+    PyRef, VirtualMachine,
+    builtins::{PyDict, PyStr},
+};
 use serde::{Deserialize, Serialize};
 
-use crate::{InfoJson, json_get};
+use crate::{pydict_cast, pydict_get, wrap_post_processor};
 
-use super::PostProcessor;
+wrap_post_processor!("DeArrow", unwrapped_process, process);
 
-#[derive(Debug, Clone, Copy)]
-pub struct DeArrowPP;
-
-impl PostProcessor for DeArrowPP {
-    fn extractors(&self) -> &'static [&'static str] {
-        &["Youtube"]
+/// # Errors
+/// If the API access fails.
+pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyRef<PyDict>, Error> {
+    if pydict_get!(@vm, info, "extractor_key", PyStr).as_str() != "Youtube" {
+        warn!("DeArrow: Extractor did not match, exiting.");
+        return Ok(info);
     }
 
-    fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> {
-        let mut output: DeArrowApi = reqwest::blocking::get(format!(
-            "https://sponsor.ajay.app/api/branding?videoID={}",
-            json_get!(info, "id", as_str)
-        ))?
-        .json()?;
-
-        output.titles.reverse();
-
-        let title_len = output.titles.len();
-        loop {
-            let Some(title) = output.titles.pop() else {
-                break;
-            };
-
-            if (title.locked || title.votes < 1) && title_len > 1 {
-                info!(
-                    "Skipping title {:#?}, as it is not good enough",
-                    title.value
-                );
-                // Skip titles that are not “good” enough.
-                continue;
-            }
-
-            if let Some(old_title) = info.insert(
-                "title".to_owned(),
-                serde_json::Value::String(title.value.clone()),
-            ) {
-                warn!("Updating title from {:#?} to {:#?}", old_title, title.value);
-                info.insert("original_title".to_owned(), old_title);
-            } else {
-                warn!("Setting title to {:#?}", title.value);
-            }
-
-            break;
+    let mut output: DeArrowApi = {
+        let output_bytes = {
+            let mut dst = Vec::new();
+
+            let mut easy = Easy::new();
+            easy.url(
+                format!(
+                    "https://sponsor.ajay.app/api/branding?videoID={}",
+                    pydict_get!(@vm, info, "id", PyStr).as_str()
+                )
+                .as_str(),
+            )?;
+
+            let mut transfer = easy.transfer();
+            transfer.write_function(|data| {
+                dst.extend_from_slice(data);
+                Ok(data.len())
+            })?;
+            transfer.perform()?;
+            drop(transfer);
+
+            dst
+        };
+
+        serde_json::from_slice(&output_bytes)?
+    };
+
+    // We pop the titles, so we need this vector reversed.
+    output.titles.reverse();
+
+    let title_len = output.titles.len();
+    let selected = loop {
+        let Some(title) = output.titles.pop() else {
+            break false;
+        };
+
+        if (title.locked || title.votes < 1) && title_len > 1 {
+            info!(
+                "DeArrow: Skipping title {:#?}, as it is not good enough",
+                title.value
+            );
+            // Skip titles that are not “good” enough.
+            continue;
         }
 
-        Ok(info)
+        update_title(&info, &title.value, vm);
+
+        break true;
+    };
+
+    if !selected && title_len != 0 {
+        // No title was selected, even though we had some titles.
+        // Just pick the first one in this case.
+        update_title(&info, &output.titles[0].value, vm);
     }
+
+    Ok(info)
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error("Failed to access the DeArrow api: {0}")]
+    Get(#[from] curl::Error),
+
+    #[error("Failed to deserialize a api json return object: {0}")]
+    Deserialize(#[from] serde_json::Error),
+}
+
+fn update_title(info: &PyRef<PyDict>, new_title: &str, vm: &VirtualMachine) {
+    assert!(!info.contains_key("original_title", vm));
+
+    if let Ok(old_title) = info.get_item("title", vm) {
+        warn!(
+            "DeArrow: Updating title from {:#?} to {:#?}",
+            pydict_cast!(@ref old_title, PyStr).as_str(),
+            new_title
+        );
+
+        info.set_item("original_title", old_title, vm)
+            .expect("We checked, it is a new key");
+    } else {
+        warn!("DeArrow: Setting title to {new_title:#?}");
+    }
+
+    let cleaned_title = {
+        // NOTE(@bpeetz): DeArrow uses `>` as a “Don't format the next word” mark.
+        // They should be removed, if one does not use a auto-formatter. <2025-06-16>
+        new_title.replace('>', "")
+    };
+
+    info.set_item("title", vm.new_pyobj(cleaned_title), vm)
+        .expect("This should work?");
 }
 
 #[derive(Serialize, Deserialize)]
@@ -77,7 +135,14 @@ struct DeArrowApi {
     video_duration: Option<f64>,
 
     #[serde(alias = "casualVotes")]
-    casual_votes: Vec<String>,
+    casual_votes: Vec<CasualVote>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct CasualVote {
+    id: String,
+    count: u32,
+    title: String,
 }
 
 #[derive(Serialize, Deserialize)]
diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs
index 65801c2..00b0ad5 100644
--- a/crates/yt_dlp/src/post_processors/mod.rs
+++ b/crates/yt_dlp/src/post_processors/mod.rs
@@ -8,23 +8,116 @@
 // You should have received a copy of the License along with this program.
 // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
 
-use crate::InfoJson;
-
 pub mod dearrow;
 
-pub trait PostProcessor: std::fmt::Debug + Send {
-    /// Process a [`InfoJson`] object and return the updated one.
-    ///
-    /// # Errors
-    /// If the processing steps failed.
-    fn process(&self, info: InfoJson) -> Result<InfoJson, Error>;
+#[macro_export]
+macro_rules! pydict_get {
+    (@$vm:expr, $value:expr, $name:literal, $into:ident) => {{
+        match $value.get_item($name, $vm) {
+            Ok(val) => $crate::pydict_cast!(val, $into),
+            Err(_) => panic!(
+                concat!(
+                    "Expected '",
+                    $name,
+                    "' to be a key for the'",
+                    stringify!($value),
+                    "' py dictionary: {:#?}"
+                ),
+                $value
+            ),
+        }
+    }};
+}
 
-    /// The supported extractors for this post processor
-    fn extractors(&self) -> &'static [&'static str];
+#[macro_export]
+macro_rules! pydict_cast {
+    ($value:expr, $into:ident) => {{
+        match $value.downcast::<$into>() {
+            Ok(result) => result,
+            Err(val) => panic!(
+                concat!(
+                    "Expected to be able to downcast value ({:#?}) as ",
+                    stringify!($into)
+                ),
+                val
+            ),
+        }
+    }};
+    (@ref $value:expr, $into:ident) => {{
+        match $value.downcast_ref::<$into>() {
+            Some(result) => result,
+            None => panic!(
+                concat!(
+                    "Expected to be able to downcast value ({:#?}) as ",
+                    stringify!($into)
+                ),
+                $value
+            ),
+        }
+    }};
 }
 
-#[derive(thiserror::Error, Debug)]
-pub enum Error {
-    #[error("Failed to access a api: {0}")]
-    Get(#[from] reqwest::Error),
+#[macro_export]
+macro_rules! wrap_post_processor {
+    ($name:literal, $unwrap:ident, $wrapped:ident) => {
+        use $crate::progress_hook::__priv::vm;
+
+        /// # Errors
+        /// - If the underlying function returns an error.
+        /// - If python operations fail.
+        pub fn $wrapped(vm: &vm::VirtualMachine) -> vm::PyResult<vm::PyObjectRef> {
+            fn actual_processor(
+                mut input: vm::function::FuncArgs,
+                vm: &vm::VirtualMachine,
+            ) -> vm::PyResult<vm::PyRef<vm::builtins::PyDict>> {
+                let input = input
+                    .args
+                    .remove(0)
+                    .downcast::<vm::builtins::PyDict>()
+                    .expect("Should be a py dict");
+
+                let output = match unwrapped_process(input, vm) {
+                    Ok(ok) => ok,
+                    Err(err) => {
+                        return Err(vm.new_runtime_error(err.to_string()));
+                    }
+                };
+
+                Ok(output)
+            }
+
+            let scope = vm.new_scope_with_builtins();
+
+            scope.globals.set_item(
+                "actual_processor",
+                vm.new_function("actual_processor", actual_processor).into(),
+                vm,
+            )?;
+
+            let local_scope = scope.clone();
+            vm.run_code_string(
+                local_scope,
+                format!(
+                    "
+import yt_dlp
+
+class {}(yt_dlp.postprocessor.PostProcessor):
+    def run(self, info):
+        info = actual_processor(info)
+        return [], info
+
+inst = {}()
+",
+                    $name, $name
+                )
+                .as_str(),
+                "<embedded post processor initializing code>".to_owned(),
+            )?;
+
+            Ok(scope
+                .globals
+                .get_item("inst", vm)
+                .expect("We just declared it"))
+        }
+    };
 }