diff options
Diffstat (limited to 'crates/yt_dlp/src/post_processors')
-rw-r--r-- | crates/yt_dlp/src/post_processors/dearrow.rs | 157 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/mod.rs | 121 |
2 files changed, 218 insertions, 60 deletions
diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs index bdbea7c..ab5478b 100644 --- a/crates/yt_dlp/src/post_processors/dearrow.rs +++ b/crates/yt_dlp/src/post_processors/dearrow.rs @@ -8,60 +8,118 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use log::{info, warn}; +use curl::easy::Easy; +use log::{error, info, warn}; +use rustpython::vm::{ + PyRef, VirtualMachine, + builtins::{PyDict, PyStr}, +}; use serde::{Deserialize, Serialize}; -use crate::{InfoJson, json_get}; +use crate::{pydict_cast, pydict_get, wrap_post_processor}; -use super::PostProcessor; +wrap_post_processor!("DeArrow", unwrapped_process, process); -#[derive(Debug, Clone, Copy)] -pub struct DeArrowPP; - -impl PostProcessor for DeArrowPP { - fn extractors(&self) -> &'static [&'static str] { - &["Youtube"] +/// # Errors +/// If the API access fails. +pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyRef<PyDict>, Error> { + if pydict_get!(@vm, info, "extractor_key", PyStr).as_str() != "Youtube" { + warn!("DeArrow: Extractor did not match, exiting."); + return Ok(info); } - fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> { - let mut output: DeArrowApi = reqwest::blocking::get(format!( - "https://sponsor.ajay.app/api/branding?videoID={}", - json_get!(info, "id", as_str) - ))? - .json()?; - - output.titles.reverse(); - - let title_len = output.titles.len(); - loop { - let Some(title) = output.titles.pop() else { - break; - }; - - if (title.locked || title.votes < 1) && title_len > 1 { - info!( - "Skipping title {:#?}, as it is not good enough", - title.value - ); - // Skip titles that are not “good” enough. - continue; - } - - if let Some(old_title) = info.insert( - "title".to_owned(), - serde_json::Value::String(title.value.clone()), - ) { - warn!("Updating title from {:#?} to {:#?}", old_title, title.value); - info.insert("original_title".to_owned(), old_title); - } else { - warn!("Setting title to {:#?}", title.value); - } - - break; + let mut output: DeArrowApi = { + let output_bytes = { + let mut dst = Vec::new(); + + let mut easy = Easy::new(); + easy.url( + format!( + "https://sponsor.ajay.app/api/branding?videoID={}", + pydict_get!(@vm, info, "id", PyStr).as_str() + ) + .as_str(), + )?; + + let mut transfer = easy.transfer(); + transfer.write_function(|data| { + dst.extend_from_slice(data); + Ok(data.len()) + })?; + transfer.perform()?; + drop(transfer); + + dst + }; + + serde_json::from_slice(&output_bytes)? + }; + + // We pop the titles, so we need this vector reversed. + output.titles.reverse(); + + let title_len = output.titles.len(); + let selected = loop { + let Some(title) = output.titles.pop() else { + break false; + }; + + if (title.locked || title.votes < 1) && title_len > 1 { + info!( + "DeArrow: Skipping title {:#?}, as it is not good enough", + title.value + ); + // Skip titles that are not “good” enough. + continue; } - Ok(info) + update_title(&info, &title.value, vm); + + break true; + }; + + if !selected && title_len != 0 { + // No title was selected, even though we had some titles. + // Just pick the first one in this case. + update_title(&info, &output.titles[0].value, vm); } + + Ok(info) +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Failed to access the DeArrow api: {0}")] + Get(#[from] curl::Error), + + #[error("Failed to deserialize a api json return object: {0}")] + Deserialize(#[from] serde_json::Error), +} + +fn update_title(info: &PyRef<PyDict>, new_title: &str, vm: &VirtualMachine) { + assert!(!info.contains_key("original_title", vm)); + + if let Ok(old_title) = info.get_item("title", vm) { + warn!( + "DeArrow: Updating title from {:#?} to {:#?}", + pydict_cast!(@ref old_title, PyStr).as_str(), + new_title + ); + + info.set_item("original_title", old_title, vm) + .expect("We checked, it is a new key"); + } else { + warn!("DeArrow: Setting title to {new_title:#?}"); + } + + let cleaned_title = { + // NOTE(@bpeetz): DeArrow uses `>` as a “Don't format the next word” mark. + // They should be removed, if one does not use a auto-formatter. <2025-06-16> + new_title.replace('>', "") + }; + + info.set_item("title", vm.new_pyobj(cleaned_title), vm) + .expect("This should work?"); } #[derive(Serialize, Deserialize)] @@ -77,7 +135,14 @@ struct DeArrowApi { video_duration: Option<f64>, #[serde(alias = "casualVotes")] - casual_votes: Vec<String>, + casual_votes: Vec<CasualVote>, +} + +#[derive(Serialize, Deserialize)] +struct CasualVote { + id: String, + count: u32, + title: String, } #[derive(Serialize, Deserialize)] diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs index 65801c2..00b0ad5 100644 --- a/crates/yt_dlp/src/post_processors/mod.rs +++ b/crates/yt_dlp/src/post_processors/mod.rs @@ -8,23 +8,116 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use crate::InfoJson; - pub mod dearrow; -pub trait PostProcessor: std::fmt::Debug + Send { - /// Process a [`InfoJson`] object and return the updated one. - /// - /// # Errors - /// If the processing steps failed. - fn process(&self, info: InfoJson) -> Result<InfoJson, Error>; +#[macro_export] +macro_rules! pydict_get { + (@$vm:expr, $value:expr, $name:literal, $into:ident) => {{ + match $value.get_item($name, $vm) { + Ok(val) => $crate::pydict_cast!(val, $into), + Err(_) => panic!( + concat!( + "Expected '", + $name, + "' to be a key for the'", + stringify!($value), + "' py dictionary: {:#?}" + ), + $value + ), + } + }}; +} - /// The supported extractors for this post processor - fn extractors(&self) -> &'static [&'static str]; +#[macro_export] +macro_rules! pydict_cast { + ($value:expr, $into:ident) => {{ + match $value.downcast::<$into>() { + Ok(result) => result, + Err(val) => panic!( + concat!( + "Expected to be able to downcast value ({:#?}) as ", + stringify!($into) + ), + val + ), + } + }}; + (@ref $value:expr, $into:ident) => {{ + match $value.downcast_ref::<$into>() { + Some(result) => result, + None => panic!( + concat!( + "Expected to be able to downcast value ({:#?}) as ", + stringify!($into) + ), + $value + ), + } + }}; } -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Failed to access a api: {0}")] - Get(#[from] reqwest::Error), +#[macro_export] +macro_rules! wrap_post_processor { + ($name:literal, $unwrap:ident, $wrapped:ident) => { + use $crate::progress_hook::__priv::vm; + + /// # Errors + /// - If the underlying function returns an error. + /// - If python operations fail. + pub fn $wrapped(vm: &vm::VirtualMachine) -> vm::PyResult<vm::PyObjectRef> { + fn actual_processor( + mut input: vm::function::FuncArgs, + vm: &vm::VirtualMachine, + ) -> vm::PyResult<vm::PyRef<vm::builtins::PyDict>> { + let input = input + .args + .remove(0) + .downcast::<vm::builtins::PyDict>() + .expect("Should be a py dict"); + + let output = match unwrapped_process(input, vm) { + Ok(ok) => ok, + Err(err) => { + return Err(vm.new_runtime_error(err.to_string())); + } + }; + + Ok(output) + } + + let scope = vm.new_scope_with_builtins(); + + scope.globals.set_item( + "actual_processor", + vm.new_function("actual_processor", actual_processor).into(), + vm, + )?; + + let local_scope = scope.clone(); + vm.run_code_string( + local_scope, + format!( + " +import yt_dlp + +class {}(yt_dlp.postprocessor.PostProcessor): + def run(self, info): + info = actual_processor(info) + return [], info + +inst = {}() +", + $name, $name + ) + .as_str(), + "<embedded post processor initializing code>".to_owned(), + )?; + + Ok(scope + .globals + .get_item("inst", vm) + .expect("We just declared it")) + } + }; } |