diff options
Diffstat (limited to '')
-rw-r--r-- | crates/libmpv2/libmpv2-sys/build.rs | 4 | ||||
-rw-r--r-- | crates/yt/Cargo.toml | 4 | ||||
-rw-r--r-- | crates/yt/src/cli.rs | 78 | ||||
-rw-r--r-- | crates/yt/src/download/mod.rs | 9 | ||||
-rw-r--r-- | crates/yt/src/main.rs | 26 | ||||
-rw-r--r-- | crates/yt/src/select/cmds/mod.rs | 2 | ||||
-rw-r--r-- | crates/yt/src/select/mod.rs | 31 | ||||
-rw-r--r-- | crates/yt/src/select/selection_file/mod.rs | 24 | ||||
-rw-r--r-- | crates/yt/src/update/updater.rs | 36 | ||||
-rw-r--r-- | crates/yt/src/videos/mod.rs | 21 | ||||
-rw-r--r-- | crates/yt_dlp/Cargo.toml | 2 | ||||
-rw-r--r-- | crates/yt_dlp/src/lib.rs | 219 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/dearrow.rs | 108 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/mod.rs | 20 |
14 files changed, 445 insertions, 139 deletions
diff --git a/crates/libmpv2/libmpv2-sys/build.rs b/crates/libmpv2/libmpv2-sys/build.rs index bf9a02e..45c2450 100644 --- a/crates/libmpv2/libmpv2-sys/build.rs +++ b/crates/libmpv2/libmpv2-sys/build.rs @@ -30,7 +30,9 @@ fn main() { ), "--verbose", ]) - .generate_comments(true) + // NOTE(@bpeetz): The comments are interpreted as doc-tests, + // which obviously fail, as the code is c. <2025-06-16> + .generate_comments(false) .generate() .expect("Unable to generate bindings"); diff --git a/crates/yt/Cargo.toml b/crates/yt/Cargo.toml index 6803e68..c6d8c30 100644 --- a/crates/yt/Cargo.toml +++ b/crates/yt/Cargo.toml @@ -29,16 +29,16 @@ blake3 = "1.8.2" chrono = { version = "0.4.41", features = ["now"] } chrono-humanize = "0.2.3" clap = { version = "4.5.40", features = ["derive"] } +clap_complete = { version = "4.5.54", features = ["unstable-dynamic"] } futures = "0.3.31" -nucleo-matcher = "0.3.1" owo-colors = "4.2.1" regex = "1.11.1" sqlx = { version = "0.8.6", features = ["runtime-tokio", "sqlite"] } stderrlog = "0.6.0" tempfile = "3.20.0" toml = "0.8.23" -trinitry = { version = "0.2.2" } xdg = "3.0.0" +shlex = "1.3.0" bytes.workspace = true libmpv2.workspace = true log.workspace = true diff --git a/crates/yt/src/cli.rs b/crates/yt/src/cli.rs index 634e422..41fadf4 100644 --- a/crates/yt/src/cli.rs +++ b/crates/yt/src/cli.rs @@ -9,12 +9,16 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use std::{path::PathBuf, str::FromStr}; +use std::{ + fmt::{self, Display, Formatter}, + path::PathBuf, + str::FromStr, +}; use anyhow::Context; use bytes::Bytes; use chrono::NaiveDate; -use clap::{ArgAction, Args, Parser, Subcommand}; +use clap::{ArgAction, Args, Parser, Subcommand, ValueEnum}; use url::Url; use crate::{ @@ -294,6 +298,43 @@ impl FromStr for OptionalPublisher { } } +#[derive(Default, ValueEnum, Clone, Copy, Debug)] +pub enum SelectSplitSortKey { + /// Sort by the name of the publisher. + #[default] + Publisher, + + /// Sort by the number of unselected videos per publisher. + Videos, +} +impl Display for SelectSplitSortKey { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + SelectSplitSortKey::Publisher => f.write_str("publisher"), + SelectSplitSortKey::Videos => f.write_str("videos"), + } + } +} + +#[derive(Default, ValueEnum, Clone, Copy, Debug)] +pub enum SelectSplitSortMode { + /// Sort in ascending order (small -> big) + #[default] + Asc, + + /// Sort in descending order (big -> small) + Desc, +} + +impl Display for SelectSplitSortMode { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + SelectSplitSortMode::Asc => f.write_str("asc"), + SelectSplitSortMode::Desc => f.write_str("desc"), + } + } +} + #[derive(Subcommand, Clone, Debug)] // NOTE: Keep this in sync with the [`constants::HELP_STR`] constant. <2024-08-20> // NOTE: Also keep this in sync with the `tree-sitter-yts/grammar.js`. <2024-11-04> @@ -304,15 +345,26 @@ pub enum SelectCommand { #[arg(long, short)] done: bool, - /// Generate a directory, where each file contains only one subscription. - #[arg(long, short, conflicts_with = "use_last_selection")] - split: bool, - /// Use the last selection file (useful if you've spend time on it and want to get it again) #[arg(long, short, conflicts_with = "done")] use_last_selection: bool, }, + /// Generate a directory, where each file contains only one subscription. + Split { + /// Include done (watched, dropped) videos + #[arg(long, short)] + done: bool, + + /// Which key to use for sorting. + #[arg(default_value_t)] + sort_key: SelectSplitSortKey, + + /// Which mode to use for sorting. + #[arg(default_value_t)] + sort_mode: SelectSplitSortMode, + }, + /// Add a video to the database /// /// This optionally supports to add a playlist. @@ -371,7 +423,6 @@ impl Default for SelectCommand { Self::File { done: false, use_last_selection: false, - split: false, } } } @@ -381,7 +432,7 @@ pub enum CacheCommand { /// Invalidate all cache entries Invalidate { /// Also delete the cache path - #[arg(short, long)] + #[arg(short = 'f', long)] hard: bool, }, @@ -396,3 +447,14 @@ pub enum CacheCommand { all: bool, }, } + +#[cfg(test)] +mod test { + use clap::CommandFactory; + + use super::CliArgs; + #[test] + fn verify_cli() { + CliArgs::command().debug_assert(); + } +} diff --git a/crates/yt/src/download/mod.rs b/crates/yt/src/download/mod.rs index 110bf55..6065cf9 100644 --- a/crates/yt/src/download/mod.rs +++ b/crates/yt/src/download/mod.rs @@ -311,8 +311,11 @@ impl Downloader { let size = if let Some(val) = result.get("filesize") { json_cast!(val, as_u64) - } else if let Some(val) = result.get("filesize_approx") { - json_cast!(val, as_u64) + } else if let Some(serde_json::Value::Number(num)) = result.get("filesize_approx") { + // NOTE(@bpeetz): yt_dlp sets this value to `Null`, instead of omitting it when it + // can't calculate the approximate filesize. + // Thus, we have to check, that it is actually non-null, before we cast it. <2025-06-15> + json_cast!(num, as_u64) } else if result.get("duration").is_some() && result.get("tbr").is_some() { #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] let duration = json_get!(result, "duration", as_f64).ceil() as u64; @@ -347,7 +350,7 @@ impl Downloader { let yt_dlp = download_opts(app, &addional_opts)?; let result = yt_dlp - .download(&[video.url.to_owned()]) + .download(&[video.url.clone()]) .with_context(|| format!("Failed to download video: '{}'", video.title))?; assert_eq!(result.len(), 1); diff --git a/crates/yt/src/main.rs b/crates/yt/src/main.rs index 930d269..b6a7d29 100644 --- a/crates/yt/src/main.rs +++ b/crates/yt/src/main.rs @@ -19,7 +19,7 @@ use anyhow::{Context, Result, bail}; use app::App; use bytes::Bytes; use cache::{invalidate, maintain}; -use clap::Parser; +use clap::{CommandFactory, Parser}; use cli::{CacheCommand, SelectCommand, SubscriptionCommand, VideosCommand}; use config::Config; use log::{error, info}; @@ -56,6 +56,8 @@ pub mod watch; // This is _the_ main function after all. It is not really good, but it sort of works. #[allow(clippy::too_many_lines)] async fn main() -> Result<()> { + clap_complete::CompleteEnv::with_factory(cli::CliArgs::command).complete(); + let args = cli::CliArgs::parse(); // The default verbosity is 1 (Warn) @@ -115,14 +117,13 @@ async fn main() -> Result<()> { SelectCommand::File { done, use_last_selection, - split, + } => Box::pin(select::select_file(&app, done, use_last_selection)).await?, + SelectCommand::Split { + done, + sort_key, + sort_mode, } => { - if split { - assert!(!use_last_selection); - Box::pin(select::select_split(&app, done)).await? - } else { - Box::pin(select::select_file(&app, done, use_last_selection)).await? - } + Box::pin(select::select_split(&app, done, sort_key, sort_mode)).await? } _ => Box::pin(handle_select_cmd(&app, cmd, None)).await?, } @@ -219,7 +220,14 @@ async fn main() -> Result<()> { current_progress += CHUNK_SIZE; } } else { - update::update(&app, max_backlog, subscriptions, total_number, current_progress).await?; + update::update( + &app, + max_backlog, + subscriptions, + total_number, + current_progress, + ) + .await?; } } Command::Subscriptions { cmd } => match cmd { diff --git a/crates/yt/src/select/cmds/mod.rs b/crates/yt/src/select/cmds/mod.rs index aabcd3d..f97b04b 100644 --- a/crates/yt/src/select/cmds/mod.rs +++ b/crates/yt/src/select/cmds/mod.rs @@ -76,7 +76,7 @@ pub async fn handle_select_cmd( firefox.arg(url.as_str()); let _handle = firefox.spawn().context("Failed to run firefox")?; } - SelectCommand::File { .. } => unreachable!("This should have been filtered out"), + SelectCommand::File { .. } | SelectCommand::Split { .. } => unreachable!("This should have been filtered out"), } Ok(()) } diff --git a/crates/yt/src/select/mod.rs b/crates/yt/src/select/mod.rs index 668ab02..135bd76 100644 --- a/crates/yt/src/select/mod.rs +++ b/crates/yt/src/select/mod.rs @@ -21,7 +21,7 @@ use std::{ use crate::{ app::App, - cli::CliArgs, + cli::{CliArgs, SelectSplitSortKey, SelectSplitSortMode}, constants::HELP_STR, storage::video_database::{Video, VideoStatusMarker, get}, unreachable::Unreachable, @@ -39,7 +39,12 @@ use tokio::process::Command; pub mod cmds; pub mod selection_file; -pub async fn select_split(app: &App, done: bool) -> Result<()> { +pub async fn select_split( + app: &App, + done: bool, + sort_key: SelectSplitSortKey, + sort_mode: SelectSplitSortMode, +) -> Result<()> { let temp_dir = Builder::new() .prefix("yt_video_select-") .rand_bytes(6) @@ -69,8 +74,24 @@ pub async fn select_split(app: &App, done: bool) -> Result<()> { let author_map = { let mut temp_vec: Vec<_> = author_map.into_iter().collect(); - // PERFORMANCE: The clone here should not be neeed. <2025-06-15> - temp_vec.sort_by_key(|(name, _)| name.to_owned()); + match sort_key { + SelectSplitSortKey::Publisher => { + // PERFORMANCE: The clone here should not be neeed. <2025-06-15> + temp_vec.sort_by_key(|(name, _): &(String, Vec<Video>)| name.to_owned()); + } + SelectSplitSortKey::Videos => { + temp_vec.sort_by_key(|(_, videos): &(String, Vec<Video>)| videos.len()); + } + } + + match sort_mode { + SelectSplitSortMode::Asc => { + // Std's default mode is ascending. + } + SelectSplitSortMode::Desc => { + temp_vec.reverse(); + } + } temp_vec }; @@ -243,7 +264,7 @@ async fn process_file(app: &App, file: &File, processed: i64) -> Result<i64> { } } - Ok(line_number * -1) + Ok(-line_number) } async fn open_editor_at(path: &Path) -> Result<()> { diff --git a/crates/yt/src/select/selection_file/mod.rs b/crates/yt/src/select/selection_file/mod.rs index abd26c4..f5e0531 100644 --- a/crates/yt/src/select/selection_file/mod.rs +++ b/crates/yt/src/select/selection_file/mod.rs @@ -11,22 +11,32 @@ //! The data structures needed to express the file, which the user edits -use anyhow::{Context, Result}; -use trinitry::Trinitry; +use anyhow::{Result, bail}; +use shlex::Shlex; pub mod duration; +/// # Panics +/// If internal assertions fail. pub fn process_line(line: &str) -> Result<Option<Vec<String>>> { // Filter out comments and empty lines if line.starts_with('#') || line.trim().is_empty() { Ok(None) } else { - let tri = Trinitry::new(line).with_context(|| format!("Failed to parse line '{line}'"))?; + let split: Vec<_> = { + let mut shl = Shlex::new(line); + let res = shl.by_ref().collect(); - let mut vec = Vec::with_capacity(tri.arguments().len() + 1); - vec.push(tri.command().to_owned()); - vec.extend(tri.arguments().to_vec()); + if shl.had_error { + bail!("Failed to parse line '{line}'") + } - Ok(Some(vec)) + assert_eq!(shl.line_no, 1, "A unexpected newline appeared"); + res + }; + + assert!(!split.is_empty()); + + Ok(Some(split)) } } diff --git a/crates/yt/src/update/updater.rs b/crates/yt/src/update/updater.rs index 04bcaa1..60e9855 100644 --- a/crates/yt/src/update/updater.rs +++ b/crates/yt/src/update/updater.rs @@ -19,7 +19,7 @@ use futures::{StreamExt, future::join_all, stream}; use log::{Level, debug, error, log_enabled}; use serde_json::json; use tokio_util::task::LocalPoolHandle; -use yt_dlp::{InfoJson, YoutubeDLOptions, json_cast, json_get, process_ie_result}; +use yt_dlp::{InfoJson, PythonError, YoutubeDLOptions, json_cast, json_get, process_ie_result}; use crate::{ ansi_escape_codes::{clear_whole_line, move_to_col}, @@ -160,24 +160,28 @@ impl Updater { } }) // Don't fail the whole update, if one of the entries fails to fetch. - .filter_map(|base| match base { + .filter_map(move |base| match base { Ok(ok) => Some(ok), Err(err) => { - let process_ie_result::Error::Python(err) = &err; - - if err.contains( - "Join this channel to get access to members-only content ", - ) { - // Hide this error - } else { - // Show the error, but don't fail. - let error = err - .strip_prefix("DownloadError: \u{1b}[0;31mERROR:\u{1b}[0m ") - .unwrap_or(err); - error!("{error}"); + match err { + process_ie_result::Error::Python(PythonError(err)) => { + if err.contains( "Join this channel to get access to members-only content ",) { + // Hide this error + } else { + // Show the error, but don't fail. + let error = err + .strip_prefix("DownloadError: \u{1b}[0;31mERROR:\u{1b}[0m ") + .unwrap_or(&err); + error!("While fetching {:#?}: {error}", sub.name); + } + + None + } + process_ie_result::Error::InfoJsonPrepare(error) => { + error!("While fetching {:#?}: Failed to prepare info json: {error}", sub.name); + None + }, } - - None } })) } diff --git a/crates/yt/src/videos/mod.rs b/crates/yt/src/videos/mod.rs index e821772..960340b 100644 --- a/crates/yt/src/videos/mod.rs +++ b/crates/yt/src/videos/mod.rs @@ -11,10 +11,6 @@ use anyhow::Result; use futures::{TryStreamExt, stream::FuturesUnordered}; -use nucleo_matcher::{ - Matcher, - pattern::{CaseMatching, Normalization, Pattern}, -}; pub mod display; @@ -46,19 +42,10 @@ pub async fn query(app: &App, limit: Option<usize>, search_query: Option<String> .await?; if let Some(query) = search_query { - let mut matcher = Matcher::new(nucleo_matcher::Config::DEFAULT.match_paths()); - - let pattern_matches = Pattern::parse( - &query.replace(' ', "\\ "), - CaseMatching::Ignore, - Normalization::Smart, - ) - .match_list(all_video_strings, &mut matcher); - - pattern_matches - .iter() - .rev() - .for_each(|(val, key)| println!("{val} ({key})")); + all_video_strings + .into_iter() + .filter(|video| video.to_lowercase().contains(&query.to_lowercase())) + .for_each(|video| println!("{video}")); } else { println!("{}", all_video_strings.join("\n")); } diff --git a/crates/yt_dlp/Cargo.toml b/crates/yt_dlp/Cargo.toml index 90f2e10..e5d14fd 100644 --- a/crates/yt_dlp/Cargo.toml +++ b/crates/yt_dlp/Cargo.toml @@ -24,7 +24,9 @@ publish = true [dependencies] indexmap = { version = "2.9.0", default-features = false } log.workspace = true +reqwest = { version = "0.12.20", features = ["blocking", "json"] } rustpython = { git = "https://github.com/RustPython/RustPython.git", features = ["threading", "stdlib", "stdio", "importlib", "ssl"], default-features = false } +serde = { workspace = true, features = ["derive"] } serde_json.workspace = true thiserror = "2.0.12" url.workspace = true diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index dd42fc6..16ec4ca 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -1,10 +1,11 @@ //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure. -use std::{self, env, mem, path::PathBuf}; +use std::{self, env, fmt::Display, path::PathBuf}; use indexmap::IndexMap; use log::{Level, debug, error, info, log_enabled}; use logging::setup_logging; +use post_processors::PostProcessor; use rustpython::{ InterpreterConfig, vm::{ @@ -18,23 +19,42 @@ use rustpython::{ use url::Url; mod logging; +pub mod post_processors; pub mod progress_hook; #[macro_export] macro_rules! json_get { - ($value:expr, $name:literal, $into:ident) => { - $crate::json_cast!($value.get($name).expect("Should exist"), $into) - }; + ($value:expr, $name:literal, $into:ident) => {{ + match $value.get($name) { + Some(val) => $crate::json_cast!(val, $into), + None => panic!( + concat!( + "Expected '", + $name, + "' to be a key for the'", + stringify!($value), + "' object: {:#?}" + ), + $value + ), + } + }}; } #[macro_export] macro_rules! json_cast { - ($value:expr, $into:ident) => { - $value.$into().expect(concat!( - "Should be able to cast value into ", - stringify!($into) - )) - }; + ($value:expr, $into:ident) => {{ + match $value.$into() { + Some(result) => result, + None => panic!( + concat!( + "Expected to be able to cast value ({:#?}) ", + stringify!($into) + ), + $value + ), + } + }}; } /// The core of the `yt_dlp` interface. @@ -43,6 +63,7 @@ pub struct YoutubeDL { youtube_dl_class: PyObjectRef, yt_dlp_module: PyObjectRef, options: serde_json::Map<String, serde_json::Value>, + post_processors: Vec<Box<dyn PostProcessor>>, } impl std::fmt::Debug for YoutubeDL { @@ -60,7 +81,7 @@ impl YoutubeDL { /// /// # Errors /// If a python call fails. - pub fn from_options(mut options: YoutubeDLOptions) -> Result<Self, build::Error> { + pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> { let mut settings = vm::Settings::default(); if let Ok(python_path) = env::var("PYTHONPATH") { for path in python_path.split(':') { @@ -92,9 +113,8 @@ impl YoutubeDL { let yt_dlp_module = vm.import("yt_dlp", 0)?; let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; - let maybe_hook = mem::take(&mut options.progress_hook); - let opts = options.into_py_dict(vm); - if let Some(function) = maybe_hook { + let opts = json_loads(options.options, vm); + if let Some(function) = options.progress_hook { opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); vm.new_pyobj(vec![hook]) @@ -192,6 +212,7 @@ impl YoutubeDL { youtube_dl_class, yt_dlp_module, options: output_options, + post_processors: options.post_processors, }) } @@ -267,7 +288,7 @@ impl YoutubeDL { download: bool, process: bool, ) -> Result<InfoJson, extract_info::Error> { - match self.interpreter.enter(|vm| { + self.interpreter.enter(|vm| { let pos_args = PosArgs::new(vec![vm.new_pyobj(url.to_string())]); let kw_args = KwArgs::new({ @@ -279,9 +300,13 @@ impl YoutubeDL { let fun_args = FuncArgs::new(pos_args, kw_args); - let inner = self.youtube_dl_class.get_attr("extract_info", vm)?; + let inner = self + .youtube_dl_class + .get_attr("extract_info", vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; let result = inner - .call_with_args(fun_args, vm)? + .call_with_args(fun_args, vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))? .downcast::<PyDict>() .expect("This is a dict"); @@ -295,7 +320,9 @@ impl YoutubeDL { }); let mut out = vec![]; - let next = generator.get_attr("__next__", vm)?; + let next = generator + .get_attr("__next__", vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; while let Ok(output) = next.call((), vm) { out.push(output); @@ -303,27 +330,16 @@ impl YoutubeDL { break; } } - result.set_item("entries", vm.new_pyobj(out), vm)?; + result + .set_item("entries", vm.new_pyobj(out), vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; } } - let result = { - let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?; - let value = sanitize.call((result,), vm)?; + let result = self.prepare_info_json(result, vm)?; - value.downcast::<PyDict>().expect("This should stay a dict") - }; - - let result_json = json_dumps(result, vm); - - Ok::<_, PyRef<PyBaseException>>(result_json) - }) { - Ok(ok) => Ok(ok), - Err(err) => self.interpreter.enter(|vm| { - let buffer = process_exception(vm, &err); - Err(extract_info::Error::Python(buffer)) - }), - } + Ok(result) + }) } /// Take the (potentially modified) result of the information extractor (i.e., @@ -344,7 +360,7 @@ impl YoutubeDL { ie_result: InfoJson, download: bool, ) -> Result<InfoJson, process_ie_result::Error> { - match self.interpreter.enter(|vm| { + self.interpreter.enter(|vm| { let pos_args = PosArgs::new(vec![vm.new_pyobj(json_loads(ie_result, vm))]); let kw_args = KwArgs::new({ @@ -355,46 +371,109 @@ impl YoutubeDL { let fun_args = FuncArgs::new(pos_args, kw_args); - let inner = self.youtube_dl_class.get_attr("process_ie_result", vm)?; + let inner = self + .youtube_dl_class + .get_attr("process_ie_result", vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; let result = inner - .call_with_args(fun_args, vm)? + .call_with_args(fun_args, vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))? .downcast::<PyDict>() .expect("This is a dict"); - let result = { - let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?; - let value = sanitize.call((result,), vm)?; + let result = self.prepare_info_json(result, vm)?; - value.downcast::<PyDict>().expect("This should stay a dict") - }; + Ok(result) + }) + } - let result_json = json_dumps(result, vm); + fn prepare_info_json( + &self, + info: PyRef<PyDict>, + vm: &VirtualMachine, + ) -> Result<InfoJson, prepare::Error> { + let sanitize = self + .youtube_dl_class + .get_attr("sanitize_info", vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; - Ok::<_, PyRef<PyBaseException>>(result_json) - }) { - Ok(ok) => Ok(ok), - Err(err) => self.interpreter.enter(|vm| { - let buffer = process_exception(vm, &err); - Err(process_ie_result::Error::Python(buffer)) - }), + let value = sanitize + .call((info,), vm) + .map_err(|exc| PythonError::from_exception(vm, &exc))?; + + let result = value.downcast::<PyDict>().expect("This should stay a dict"); + + let mut json = json_dumps(result, vm); + + for pp in &self.post_processors { + if pp + .extractors() + .iter() + .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str)) + { + json = pp.process(json)?; + } else { + error!("Extractor not found for {pp:#?}"); + } } + + Ok(json) + } +} + +#[derive(thiserror::Error, Debug)] +pub struct PythonError(pub String); + +impl Display for PythonError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Python threw an exception: {}", self.0) + } +} + +impl PythonError { + fn from_exception(vm: &VirtualMachine, exc: &PyRef<PyBaseException>) -> Self { + let buffer = process_exception(vm, exc); + Self(buffer) } } #[allow(missing_docs)] pub mod process_ie_result { + use crate::{PythonError, prepare}; + #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("Python threw an exception: {0}")] - Python(String), + #[error(transparent)] + Python(#[from] PythonError), + + #[error("Failed to prepare the info json")] + InfoJsonPrepare(#[from] prepare::Error), } } #[allow(missing_docs)] pub mod extract_info { + use crate::{PythonError, prepare}; + #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("Python threw an exception: {0}")] - Python(String), + #[error(transparent)] + Python(#[from] PythonError), + + #[error("Failed to prepare the info json")] + InfoJsonPrepare(#[from] prepare::Error), + } +} +#[allow(missing_docs)] +pub mod prepare { + use crate::{PythonError, post_processors}; + + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error(transparent)] + Python(#[from] PythonError), + + #[error("Failed to run a post processor")] + PostProcessorRun(#[from] post_processors::Error), } } @@ -410,15 +489,19 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); pub struct YoutubeDLOptions { options: serde_json::Map<String, serde_json::Value>, progress_hook: Option<ProgressHookFunction>, + post_processors: Vec<Box<dyn PostProcessor>>, } impl YoutubeDLOptions { #[must_use] pub fn new() -> Self { - Self { + let me = Self { options: serde_json::Map::new(), progress_hook: None, - } + post_processors: vec![], + }; + + me.with_post_processor(post_processors::dearrow::DeArrowPP) } #[must_use] @@ -426,10 +509,7 @@ impl YoutubeDLOptions { let mut options = self.options; options.insert(key.into(), value.into()); - Self { - options, - progress_hook: self.progress_hook, - } + Self { options, ..self } } #[must_use] @@ -438,12 +518,18 @@ impl YoutubeDLOptions { todo!() } else { Self { - options: self.options, progress_hook: Some(progress_hook), + ..self } } } + #[must_use] + pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self { + self.post_processors.push(Box::new(post_processor)); + self + } + /// # Errors /// If the underlying [`YoutubeDL::from_options`] errors. pub fn build(self) -> Result<YoutubeDL, build::Error> { @@ -454,7 +540,7 @@ impl YoutubeDLOptions { pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self { Self { options, - progress_hook: None, + ..Self::new() } } @@ -462,10 +548,6 @@ impl YoutubeDLOptions { pub fn get(&self, key: &str) -> Option<&serde_json::Value> { self.options.get(key) } - - fn into_py_dict(self, vm: &VirtualMachine) -> PyRef<PyDict> { - json_loads(self.options, vm) - } } #[allow(missing_docs)] @@ -474,9 +556,6 @@ pub mod build { pub enum Error { #[error("Python threw an exception: {0}")] Python(String), - - #[error("Io error: {0}")] - Io(#[from] std::io::Error), } } diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs new file mode 100644 index 0000000..110beeb --- /dev/null +++ b/crates/yt_dlp/src/post_processors/dearrow.rs @@ -0,0 +1,108 @@ +use log::{info, warn}; +use serde::{Deserialize, Serialize}; + +use crate::{InfoJson, json_get}; + +use super::PostProcessor; + +#[derive(Debug, Clone, Copy)] +pub struct DeArrowPP; + +impl PostProcessor for DeArrowPP { + fn extractors(&self) -> &'static [&'static str] { + &["Youtube"] + } + + fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> { + let mut output: DeArrowApi = reqwest::blocking::get(format!( + "https://sponsor.ajay.app/api/branding?videoID={}", + json_get!(info, "id", as_str) + ))? + .json()?; + + output.titles.reverse(); + + let title_len = output.titles.len(); + loop { + let Some(title) = output.titles.pop() else { + break; + }; + + if (title.locked || title.votes < 1) && title_len > 1 { + info!( + "Skipping title {:#?}, as it is not good enough", + title.value + ); + // Skip titles that are not “good” enough. + continue; + } + + if let Some(old_title) = info.insert( + "title".to_owned(), + serde_json::Value::String(title.value.clone()), + ) { + warn!("Updating title from {:#?} to {:#?}", old_title, title.value); + info.insert("original_title".to_owned(), old_title); + } else { + warn!("Setting title to {:#?}", title.value); + } + + break; + } + + Ok(info) + } +} + +#[derive(Serialize, Deserialize)] +/// See: <https://wiki.sponsor.ajay.app/w/API_Docs/DeArrow> +struct DeArrowApi { + titles: Vec<Title>, + thumbnails: Vec<Thumbnail>, + + #[serde(alias = "randomTime")] + random_time: Option<f64>, + + #[serde(alias = "videoDuration")] + video_duration: Option<f64>, + + #[serde(alias = "casualVotes")] + casual_votes: Vec<String>, +} + +#[derive(Serialize, Deserialize)] +struct Title { + /// Note: Titles will sometimes contain > before a word. + /// This tells the auto-formatter to not format a word. + /// If you have no auto-formatter, you can ignore this and replace it with an empty string + #[serde(alias = "title")] + value: String, + + original: bool, + votes: u64, + locked: bool, + + #[serde(alias = "UUID")] + uuid: String, + + /// only present if requested + #[serde(alias = "userID")] + user_id: Option<String>, +} + +#[derive(Serialize, Deserialize)] +struct Thumbnail { + // null if original is true + timestamp: Option<f64>, + + original: bool, + votes: u64, + locked: bool, + + #[serde(alias = "UUID")] + uuid: String, + + /// only present if requested + #[serde(alias = "userID")] + user_id: Option<String>, +} diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs new file mode 100644 index 0000000..6067c7a --- /dev/null +++ b/crates/yt_dlp/src/post_processors/mod.rs @@ -0,0 +1,20 @@ +use crate::InfoJson; + +pub mod dearrow; + +pub trait PostProcessor: std::fmt::Debug + Send { + /// Process a [`InfoJson`] object and return the updated one. + /// + /// # Errors + /// If the processing steps failed. + fn process(&self, info: InfoJson) -> Result<InfoJson, Error>; + + /// The supported extractors for this post processor + fn extractors(&self) -> &'static [&'static str]; +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Failed to access a api: {0}")] + Get(#[from] reqwest::Error), +} |