aboutsummaryrefslogtreecommitdiffstats
path: root/crates/yt_dlp
diff options
context:
space:
mode:
Diffstat (limited to 'crates/yt_dlp')
-rw-r--r--crates/yt_dlp/Cargo.toml2
-rw-r--r--crates/yt_dlp/src/lib.rs45
-rw-r--r--crates/yt_dlp/src/post_processors/dearrow.rs108
-rw-r--r--crates/yt_dlp/src/post_processors/mod.rs20
4 files changed, 164 insertions, 11 deletions
diff --git a/crates/yt_dlp/Cargo.toml b/crates/yt_dlp/Cargo.toml
index 90f2e10..e5d14fd 100644
--- a/crates/yt_dlp/Cargo.toml
+++ b/crates/yt_dlp/Cargo.toml
@@ -24,7 +24,9 @@ publish = true
[dependencies]
indexmap = { version = "2.9.0", default-features = false }
log.workspace = true
+reqwest = { version = "0.12.20", features = ["blocking", "json"] }
rustpython = { git = "https://github.com/RustPython/RustPython.git", features = ["threading", "stdlib", "stdio", "importlib", "ssl"], default-features = false }
+serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
thiserror = "2.0.12"
url.workspace = true
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs
index 0f40f0a..16ec4ca 100644
--- a/crates/yt_dlp/src/lib.rs
+++ b/crates/yt_dlp/src/lib.rs
@@ -1,10 +1,11 @@
//! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure.
-use std::{self, env, mem, path::PathBuf};
+use std::{self, env, fmt::Display, path::PathBuf};
use indexmap::IndexMap;
use log::{Level, debug, error, info, log_enabled};
use logging::setup_logging;
+use post_processors::PostProcessor;
use rustpython::{
InterpreterConfig,
vm::{
@@ -18,6 +19,7 @@ use rustpython::{
use url::Url;
mod logging;
+pub mod post_processors;
pub mod progress_hook;
#[macro_export]
@@ -61,6 +63,7 @@ pub struct YoutubeDL {
youtube_dl_class: PyObjectRef,
yt_dlp_module: PyObjectRef,
options: serde_json::Map<String, serde_json::Value>,
+ post_processors: Vec<Box<dyn PostProcessor>>,
}
impl std::fmt::Debug for YoutubeDL {
@@ -209,6 +212,7 @@ impl YoutubeDL {
youtube_dl_class,
yt_dlp_module,
options: output_options,
+ post_processors: options.post_processors,
})
}
@@ -399,9 +403,18 @@ impl YoutubeDL {
let result = value.downcast::<PyDict>().expect("This should stay a dict");
- let json = json_dumps(result, vm);
+ let mut json = json_dumps(result, vm);
- {
+ for pp in &self.post_processors {
+ if pp
+ .extractors()
+ .iter()
+ .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str))
+ {
+ json = pp.process(json)?;
+ } else {
+ error!("Extractor not found for {pp:#?}");
+ }
}
Ok(json)
@@ -458,6 +471,9 @@ pub mod prepare {
pub enum Error {
#[error(transparent)]
Python(#[from] PythonError),
+
+ #[error("Failed to run a post processor")]
+ PostProcessorRun(#[from] post_processors::Error),
}
}
@@ -473,15 +489,19 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
pub struct YoutubeDLOptions {
options: serde_json::Map<String, serde_json::Value>,
progress_hook: Option<ProgressHookFunction>,
+ post_processors: Vec<Box<dyn PostProcessor>>,
}
impl YoutubeDLOptions {
#[must_use]
pub fn new() -> Self {
- Self {
+ let me = Self {
options: serde_json::Map::new(),
progress_hook: None,
- }
+ post_processors: vec![],
+ };
+
+ me.with_post_processor(post_processors::dearrow::DeArrowPP)
}
#[must_use]
@@ -489,10 +509,7 @@ impl YoutubeDLOptions {
let mut options = self.options;
options.insert(key.into(), value.into());
- Self {
- options,
- progress_hook: self.progress_hook,
- }
+ Self { options, ..self }
}
#[must_use]
@@ -501,12 +518,18 @@ impl YoutubeDLOptions {
todo!()
} else {
Self {
- options: self.options,
progress_hook: Some(progress_hook),
+ ..self
}
}
}
+ #[must_use]
+ pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self {
+ self.post_processors.push(Box::new(post_processor));
+ self
+ }
+
/// # Errors
/// If the underlying [`YoutubeDL::from_options`] errors.
pub fn build(self) -> Result<YoutubeDL, build::Error> {
@@ -517,7 +540,7 @@ impl YoutubeDLOptions {
pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self {
Self {
options,
- progress_hook: None,
+ ..Self::new()
}
}
diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs
new file mode 100644
index 0000000..110beeb
--- /dev/null
+++ b/crates/yt_dlp/src/post_processors/dearrow.rs
@@ -0,0 +1,108 @@
+use log::{info, warn};
+use serde::{Deserialize, Serialize};
+
+use crate::{InfoJson, json_get};
+
+use super::PostProcessor;
+
+#[derive(Debug, Clone, Copy)]
+pub struct DeArrowPP;
+
+impl PostProcessor for DeArrowPP {
+ fn extractors(&self) -> &'static [&'static str] {
+ &["Youtube"]
+ }
+
+ fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> {
+ let mut output: DeArrowApi = reqwest::blocking::get(format!(
+ "https://sponsor.ajay.app/api/branding?videoID={}",
+ json_get!(info, "id", as_str)
+ ))?
+ .json()?;
+
+ output.titles.reverse();
+
+ let title_len = output.titles.len();
+ loop {
+ let Some(title) = output.titles.pop() else {
+ break;
+ };
+
+ if (title.locked || title.votes < 1) && title_len > 1 {
+ info!(
+ "Skipping title {:#?}, as it is not good enough",
+ title.value
+ );
+ // Skip titles that are not “good” enough.
+ continue;
+ }
+
+ if let Some(old_title) = info.insert(
+ "title".to_owned(),
+ serde_json::Value::String(title.value.clone()),
+ ) {
+ warn!("Updating title from {:#?} to {:#?}", old_title, title.value);
+ info.insert("original_title".to_owned(), old_title);
+ } else {
+ warn!("Setting title to {:#?}", title.value);
+ }
+
+ break;
+ }
+
+ Ok(info)
+ }
+}
+
+#[derive(Serialize, Deserialize)]
+/// See: <https://wiki.sponsor.ajay.app/w/API_Docs/DeArrow>
+struct DeArrowApi {
+ titles: Vec<Title>,
+ thumbnails: Vec<Thumbnail>,
+
+ #[serde(alias = "randomTime")]
+ random_time: Option<f64>,
+
+ #[serde(alias = "videoDuration")]
+ video_duration: Option<f64>,
+
+ #[serde(alias = "casualVotes")]
+ casual_votes: Vec<String>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct Title {
+ /// Note: Titles will sometimes contain > before a word.
+ /// This tells the auto-formatter to not format a word.
+ /// If you have no auto-formatter, you can ignore this and replace it with an empty string
+ #[serde(alias = "title")]
+ value: String,
+
+ original: bool,
+ votes: u64,
+ locked: bool,
+
+ #[serde(alias = "UUID")]
+ uuid: String,
+
+ /// only present if requested
+ #[serde(alias = "userID")]
+ user_id: Option<String>,
+}
+
+#[derive(Serialize, Deserialize)]
+struct Thumbnail {
+ // null if original is true
+ timestamp: Option<f64>,
+
+ original: bool,
+ votes: u64,
+ locked: bool,
+
+ #[serde(alias = "UUID")]
+ uuid: String,
+
+ /// only present if requested
+ #[serde(alias = "userID")]
+ user_id: Option<String>,
+}
diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs
new file mode 100644
index 0000000..6067c7a
--- /dev/null
+++ b/crates/yt_dlp/src/post_processors/mod.rs
@@ -0,0 +1,20 @@
+use crate::InfoJson;
+
+pub mod dearrow;
+
+pub trait PostProcessor: std::fmt::Debug + Send {
+ /// Process a [`InfoJson`] object and return the updated one.
+ ///
+ /// # Errors
+ /// If the processing steps failed.
+ fn process(&self, info: InfoJson) -> Result<InfoJson, Error>;
+
+ /// The supported extractors for this post processor
+ fn extractors(&self) -> &'static [&'static str];
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+ #[error("Failed to access a api: {0}")]
+ Get(#[from] reqwest::Error),
+}