diff options
Diffstat (limited to 'crates')
-rw-r--r-- | crates/yt/src/comments/description.rs | 4 | ||||
-rw-r--r-- | crates/yt/src/comments/mod.rs | 4 | ||||
-rw-r--r-- | crates/yt/src/download/download_options.rs | 2 | ||||
-rw-r--r-- | crates/yt/src/select/cmds/add.rs | 2 | ||||
-rw-r--r-- | crates/yt/src/storage/subscriptions.rs | 2 | ||||
-rw-r--r-- | crates/yt/src/storage/video_database/get/mod.rs | 2 | ||||
-rw-r--r-- | crates/yt/src/subscribe/mod.rs | 2 | ||||
-rw-r--r-- | crates/yt/src/update/mod.rs | 2 | ||||
-rw-r--r-- | crates/yt/src/update/updater.rs | 5 | ||||
-rw-r--r-- | crates/yt/src/version/mod.rs | 2 | ||||
-rw-r--r-- | crates/yt_dlp/Cargo.toml | 3 | ||||
-rw-r--r-- | crates/yt_dlp/src/info_json.rs | 60 | ||||
-rw-r--r-- | crates/yt_dlp/src/lib.rs | 421 | ||||
-rw-r--r-- | crates/yt_dlp/src/logging.rs | 28 | ||||
-rw-r--r-- | crates/yt_dlp/src/options.rs | 280 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/dearrow.rs | 157 | ||||
-rw-r--r-- | crates/yt_dlp/src/post_processors/mod.rs | 121 | ||||
-rw-r--r-- | crates/yt_dlp/src/progress_hook.rs | 19 | ||||
-rw-r--r-- | crates/yt_dlp/src/python_error.rs | 116 |
19 files changed, 723 insertions, 509 deletions
diff --git a/crates/yt/src/comments/description.rs b/crates/yt/src/comments/description.rs index e8cb29d..878b573 100644 --- a/crates/yt/src/comments/description.rs +++ b/crates/yt/src/comments/description.rs @@ -17,7 +17,7 @@ use crate::{ }; use anyhow::{Result, bail}; -use yt_dlp::{InfoJson, json_cast}; +use yt_dlp::json_cast; pub async fn description(app: &App) -> Result<()> { let description = get(app).await?; @@ -34,7 +34,7 @@ pub async fn get(app: &App) -> Result<String> { bail!("Could not find a currently playing video!"); }; - let info_json: InfoJson = get::video_info_json(¤tly_playing_video)?.unreachable( + let info_json = get::video_info_json(¤tly_playing_video)?.unreachable( "A currently *playing* must be cached. And thus the info.json should be available", ); diff --git a/crates/yt/src/comments/mod.rs b/crates/yt/src/comments/mod.rs index 876146d..54031a4 100644 --- a/crates/yt/src/comments/mod.rs +++ b/crates/yt/src/comments/mod.rs @@ -15,7 +15,7 @@ use anyhow::{Result, bail}; use comment::{Comment, CommentExt, Comments, Parent}; use output::display_fmt_and_less; use regex::Regex; -use yt_dlp::{InfoJson, json_cast}; +use yt_dlp::json_cast; use crate::{ app::App, @@ -39,7 +39,7 @@ pub async fn get(app: &App) -> Result<Comments> { bail!("Could not find a currently playing video!"); }; - let info_json: InfoJson = get::video_info_json(¤tly_playing_video)?.unreachable( + let info_json = get::video_info_json(¤tly_playing_video)?.unreachable( "A currently *playing* video must be cached. And thus the info.json should be available", ); diff --git a/crates/yt/src/download/download_options.rs b/crates/yt/src/download/download_options.rs index 03c20ba..558adfd 100644 --- a/crates/yt/src/download/download_options.rs +++ b/crates/yt/src/download/download_options.rs @@ -11,7 +11,7 @@ use anyhow::Context; use serde_json::{Value, json}; -use yt_dlp::{YoutubeDL, YoutubeDLOptions}; +use yt_dlp::{YoutubeDL, options::YoutubeDLOptions}; use crate::{app::App, storage::video_database::YtDlpOptions}; diff --git a/crates/yt/src/select/cmds/add.rs b/crates/yt/src/select/cmds/add.rs index 387b3a1..2fff298 100644 --- a/crates/yt/src/select/cmds/add.rs +++ b/crates/yt/src/select/cmds/add.rs @@ -20,7 +20,7 @@ use crate::{ use anyhow::{Context, Result, bail}; use log::{error, warn}; use url::Url; -use yt_dlp::{InfoJson, YoutubeDL, json_cast, json_get}; +use yt_dlp::{YoutubeDL, info_json::InfoJson, json_cast, json_get}; #[allow(clippy::too_many_lines)] pub(super) async fn add( diff --git a/crates/yt/src/storage/subscriptions.rs b/crates/yt/src/storage/subscriptions.rs index 6c0d08a..1ab0d72 100644 --- a/crates/yt/src/storage/subscriptions.rs +++ b/crates/yt/src/storage/subscriptions.rs @@ -17,7 +17,7 @@ use anyhow::Result; use log::debug; use sqlx::query; use url::Url; -use yt_dlp::YoutubeDLOptions; +use yt_dlp::options::YoutubeDLOptions; use crate::{app::App, unreachable::Unreachable}; diff --git a/crates/yt/src/storage/video_database/get/mod.rs b/crates/yt/src/storage/video_database/get/mod.rs index 0456cd3..e76131e 100644 --- a/crates/yt/src/storage/video_database/get/mod.rs +++ b/crates/yt/src/storage/video_database/get/mod.rs @@ -18,7 +18,7 @@ use anyhow::{Context, Result, bail}; use blake3::Hash; use log::{debug, trace}; use sqlx::query; -use yt_dlp::InfoJson; +use yt_dlp::info_json::InfoJson; use crate::{ app::App, diff --git a/crates/yt/src/subscribe/mod.rs b/crates/yt/src/subscribe/mod.rs index 7ac0be4..a965ac0 100644 --- a/crates/yt/src/subscribe/mod.rs +++ b/crates/yt/src/subscribe/mod.rs @@ -16,7 +16,7 @@ use futures::FutureExt; use log::warn; use tokio::io::{AsyncBufRead, AsyncBufReadExt}; use url::Url; -use yt_dlp::{YoutubeDLOptions, json_get}; +use yt_dlp::{json_get, options::YoutubeDLOptions}; use crate::{ app::App, diff --git a/crates/yt/src/update/mod.rs b/crates/yt/src/update/mod.rs index d866882..7f9bee7 100644 --- a/crates/yt/src/update/mod.rs +++ b/crates/yt/src/update/mod.rs @@ -15,7 +15,7 @@ use anyhow::{Context, Ok, Result}; use chrono::{DateTime, Utc}; use log::warn; use url::Url; -use yt_dlp::{InfoJson, json_cast, json_get}; +use yt_dlp::{info_json::InfoJson, json_cast, json_get}; use crate::{ app::App, diff --git a/crates/yt/src/update/updater.rs b/crates/yt/src/update/updater.rs index 60e9855..75d12dc 100644 --- a/crates/yt/src/update/updater.rs +++ b/crates/yt/src/update/updater.rs @@ -19,7 +19,10 @@ use futures::{StreamExt, future::join_all, stream}; use log::{Level, debug, error, log_enabled}; use serde_json::json; use tokio_util::task::LocalPoolHandle; -use yt_dlp::{InfoJson, PythonError, YoutubeDLOptions, json_cast, json_get, process_ie_result}; +use yt_dlp::{ + info_json::InfoJson, json_cast, json_get, options::YoutubeDLOptions, process_ie_result, + python_error::PythonError, +}; use crate::{ ansi_escape_codes::{clear_whole_line, move_to_col}, diff --git a/crates/yt/src/version/mod.rs b/crates/yt/src/version/mod.rs index 9a91f3b..95660c0 100644 --- a/crates/yt/src/version/mod.rs +++ b/crates/yt/src/version/mod.rs @@ -10,7 +10,7 @@ use anyhow::{Context, Result}; use sqlx::{SqlitePool, sqlite::SqliteConnectOptions}; -use yt_dlp::YoutubeDLOptions; +use yt_dlp::options::YoutubeDLOptions; use crate::{config::Config, storage::migrate::get_version_db}; diff --git a/crates/yt_dlp/Cargo.toml b/crates/yt_dlp/Cargo.toml index 81e1412..4f62eec 100644 --- a/crates/yt_dlp/Cargo.toml +++ b/crates/yt_dlp/Cargo.toml @@ -22,13 +22,14 @@ rust-version.workspace = true publish = true [dependencies] +curl = "0.4.48" indexmap = { version = "2.9.0", default-features = false } log.workspace = true -reqwest = { version = "0.12.20", features = ["blocking", "json"] } rustpython = { git = "https://github.com/RustPython/RustPython.git", features = [ "threading", "stdlib", "stdio", + "freeze-stdlib", "importlib", "ssl", ], default-features = false } diff --git a/crates/yt_dlp/src/info_json.rs b/crates/yt_dlp/src/info_json.rs new file mode 100644 index 0000000..31f4a69 --- /dev/null +++ b/crates/yt_dlp/src/info_json.rs @@ -0,0 +1,60 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use rustpython::vm::{ + PyRef, VirtualMachine, + builtins::{PyDict, PyStr}, +}; + +pub type InfoJson = serde_json::Map<String, serde_json::Value>; + +pub fn json_loads( + input: serde_json::Map<String, serde_json::Value>, + vm: &VirtualMachine, +) -> PyRef<PyDict> { + let json = vm.import("json", 0).expect("Module exists"); + let loads = json.get_attr("loads", vm).expect("Method exists"); + let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json"); + let dict = loads + .call((self_str,), vm) + .expect("Vaild json is always a valid dict"); + + dict.downcast().expect("Should always be a dict") +} + +/// # Panics +/// If expectation about python operations fail. +pub fn json_dumps( + input: PyRef<PyDict>, + vm: &VirtualMachine, +) -> serde_json::Map<String, serde_json::Value> { + let json = vm.import("json", 0).expect("Module exists"); + let dumps = json.get_attr("dumps", vm).expect("Method exists"); + let dict = dumps + .call((input,), vm) + .map_err(|err| vm.print_exception(err)) + .expect("Might not always work, but for our dicts it works"); + + let string: PyRef<PyStr> = dict.downcast().expect("Should always be a string"); + + let real_string = string.to_str().expect("Should be valid utf8"); + + // { + // let mut file = File::create("debug.dump.json").unwrap(); + // write!(file, "{}", real_string).unwrap(); + // } + + let value: serde_json::Value = serde_json::from_str(real_string).expect("Should be valid json"); + + match value { + serde_json::Value::Object(map) => map, + _ => unreachable!("These should not be json.dumps output"), + } +} diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index e7b37c6..a1db606 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -10,27 +10,29 @@ //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure. -use std::{self, env, fmt::Display, path::PathBuf}; +use std::path::PathBuf; use indexmap::IndexMap; -use log::{Level, debug, error, info, log_enabled}; -use logging::setup_logging; -use post_processors::PostProcessor; -use rustpython::{ - InterpreterConfig, - vm::{ - self, AsObject, Interpreter, PyObjectRef, PyPayload, PyRef, VirtualMachine, - builtins::{PyBaseException, PyBaseExceptionRef, PyDict, PyList, PyStr}, - function::{FuncArgs, KwArgs, PosArgs}, - py_io::Write, - suggestion::offer_suggestions, - }, +use log::info; +use rustpython::vm::{ + Interpreter, PyObjectRef, PyRef, VirtualMachine, + builtins::{PyDict, PyList, PyStr}, + function::{FuncArgs, KwArgs, PosArgs}, }; use url::Url; -mod logging; +use crate::{ + info_json::{InfoJson, json_dumps, json_loads}, + python_error::PythonError, +}; + +pub mod info_json; +pub mod options; pub mod post_processors; pub mod progress_hook; +pub mod python_error; + +mod logging; #[macro_export] macro_rules! json_get { @@ -73,7 +75,6 @@ pub struct YoutubeDL { youtube_dl_class: PyObjectRef, yt_dlp_module: PyObjectRef, options: serde_json::Map<String, serde_json::Value>, - post_processors: Vec<Box<dyn PostProcessor>>, } impl std::fmt::Debug for YoutubeDL { @@ -84,148 +85,6 @@ impl std::fmt::Debug for YoutubeDL { } impl YoutubeDL { - /// Construct this instance from options. - /// - /// # Panics - /// If `yt_dlp` changed their interface. - /// - /// # Errors - /// If a python call fails. - pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> { - let mut settings = vm::Settings::default(); - if let Ok(python_path) = env::var("PYTHONPATH") { - for path in python_path.split(':') { - settings.path_list.push(path.to_owned()); - } - } else { - error!( - "No PYTHONPATH found or invalid utf8. \ - This means, that you probably did not \ - supply the yt_dlp!" - ); - } - - settings.install_signal_handlers = false; - - // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13> - settings.optimize = 0; - - settings.isolated = true; - - let interpreter = InterpreterConfig::new() - .init_stdlib() - .settings(settings) - .interpreter(); - - let output_options = options.options.clone(); - - let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| { - let yt_dlp_module = vm.import("yt_dlp", 0)?; - let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; - - let opts = json_loads(options.options, vm); - if let Some(function) = options.progress_hook { - opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { - let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); - vm.new_pyobj(vec![hook]) - }) - .expect("Should work?"); - } - - { - // Unconditionally set a logger. - // Otherwise, yt_dlp will log to stderr. - - /// Is the specified record to be logged? Returns false for no, - /// true for yes. Filters can either modify log records in-place or - /// return a completely different record instance which will replace - /// the original log record in any future processing of the event. - fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool { - let record = input.args.remove(0); - - // Filter out all error logs (they are propagated as rust errors) - let levelname: PyRef<PyStr> = record - .get_attr("levelname", vm) - .expect("This should exist") - .downcast() - .expect("This should be a String"); - - let return_value = levelname.as_str() != "ERROR"; - - if log_enabled!(Level::Debug) && !return_value { - let message: String = { - let get_message = record.get_attr("getMessage", vm).expect("Is set"); - let message: PyRef<PyStr> = get_message - .call((), vm) - .expect("Can be called") - .downcast() - .expect("Downcasting works"); - - message.as_str().to_owned() - }; - - debug!("Swollowed error message: '{message}'"); - } - return_value - } - - let logging = setup_logging(vm, "yt_dlp")?; - let ytdl_logger = { - let get_logger = logging.get_item("getLogger", vm)?; - get_logger.call(("yt_dlp",), vm)? - }; - - { - let args = FuncArgs::new( - PosArgs::new(vec![]), - KwArgs::new({ - let mut map = IndexMap::new(); - // Ensure that all events are logged by setting - // the log level to NOTSET (we filter on rust's side) - map.insert("level".to_owned(), vm.new_pyobj(0)); - map - }), - ); - - let basic_config = logging.get_item("basicConfig", vm)?; - basic_config.call(args, vm)?; - } - - { - let add_filter = ytdl_logger.get_attr("addFilter", vm)?; - add_filter.call( - (vm.new_function("yt_dlp_error_filter", filter_error_log),), - vm, - )?; - } - - opts.set_item("logger", ytdl_logger, vm)?; - } - - let youtube_dl_class = class.call((opts,), vm)?; - - Ok::<_, PyRef<PyBaseException>>((yt_dlp_module, youtube_dl_class)) - }) { - Ok(ok) => Ok(ok), - Err(err) => { - // TODO(@bpeetz): Do we want to run `interpreter.finalize` here? <2025-06-14> - // interpreter.finalize(Some(err)); - interpreter.enter(|vm| { - let buffer = process_exception(vm, &err); - Err(build::Error::Python(buffer)) - }) - } - }?; - - Ok(Self { - interpreter, - youtube_dl_class, - yt_dlp_module, - options: output_options, - post_processors: options.post_processors, - }) - } - /// # Panics /// /// If `yt_dlp` changed their location or type of `__version__`. @@ -413,43 +272,13 @@ impl YoutubeDL { let result = value.downcast::<PyDict>().expect("This should stay a dict"); - let mut json = json_dumps(result, vm); - - for pp in &self.post_processors { - if pp - .extractors() - .iter() - .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str)) - { - json = pp.process(json)?; - } else { - error!("Extractor not found for {pp:#?}"); - } - } - - Ok(json) - } -} - -#[derive(thiserror::Error, Debug)] -pub struct PythonError(pub String); - -impl Display for PythonError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Python threw an exception: {}", self.0) - } -} - -impl PythonError { - fn from_exception(vm: &VirtualMachine, exc: &PyRef<PyBaseException>) -> Self { - let buffer = process_exception(vm, exc); - Self(buffer) + Ok(json_dumps(result, vm)) } } #[allow(missing_docs)] pub mod process_ie_result { - use crate::{PythonError, prepare}; + use crate::{prepare, python_error::PythonError}; #[derive(Debug, thiserror::Error)] pub enum Error { @@ -462,7 +291,7 @@ pub mod process_ie_result { } #[allow(missing_docs)] pub mod extract_info { - use crate::{PythonError, prepare}; + use crate::{prepare, python_error::PythonError}; #[derive(Debug, thiserror::Error)] pub enum Error { @@ -475,221 +304,11 @@ pub mod extract_info { } #[allow(missing_docs)] pub mod prepare { - use crate::{PythonError, post_processors}; + use crate::python_error::PythonError; #[derive(Debug, thiserror::Error)] pub enum Error { #[error(transparent)] Python(#[from] PythonError), - - #[error("Failed to run a post processor")] - PostProcessorRun(#[from] post_processors::Error), - } -} - -pub type InfoJson = serde_json::Map<String, serde_json::Value>; -pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); - -/// Options, that are used to customize the download behaviour. -/// -/// In the future, this might get a Builder api. -/// -/// See `help(yt_dlp.YoutubeDL())` from python for a full list of available options. -#[derive(Default, Debug)] -pub struct YoutubeDLOptions { - options: serde_json::Map<String, serde_json::Value>, - progress_hook: Option<ProgressHookFunction>, - post_processors: Vec<Box<dyn PostProcessor>>, -} - -impl YoutubeDLOptions { - #[must_use] - pub fn new() -> Self { - let me = Self { - options: serde_json::Map::new(), - progress_hook: None, - post_processors: vec![], - }; - - me.with_post_processor(post_processors::dearrow::DeArrowPP) - } - - #[must_use] - pub fn set(self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self { - let mut options = self.options; - options.insert(key.into(), value.into()); - - Self { options, ..self } - } - - #[must_use] - pub fn with_progress_hook(self, progress_hook: ProgressHookFunction) -> Self { - if let Some(_previous_hook) = self.progress_hook { - todo!() - } else { - Self { - progress_hook: Some(progress_hook), - ..self - } - } - } - - #[must_use] - pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self { - self.post_processors.push(Box::new(post_processor)); - self - } - - /// # Errors - /// If the underlying [`YoutubeDL::from_options`] errors. - pub fn build(self) -> Result<YoutubeDL, build::Error> { - YoutubeDL::from_options(self) - } - - #[must_use] - pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self { - Self { - options, - ..Self::new() - } - } - - #[must_use] - pub fn get(&self, key: &str) -> Option<&serde_json::Value> { - self.options.get(key) - } -} - -#[allow(missing_docs)] -pub mod build { - #[derive(Debug, thiserror::Error)] - pub enum Error { - #[error("Python threw an exception: {0}")] - Python(String), } } - -fn json_loads( - input: serde_json::Map<String, serde_json::Value>, - vm: &VirtualMachine, -) -> PyRef<PyDict> { - let json = vm.import("json", 0).expect("Module exists"); - let loads = json.get_attr("loads", vm).expect("Method exists"); - let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json"); - let dict = loads - .call((self_str,), vm) - .expect("Vaild json is always a valid dict"); - - dict.downcast().expect("Should always be a dict") -} - -/// # Panics -/// If expectation about python operations fail. -pub fn json_dumps( - input: PyRef<PyDict>, - vm: &VirtualMachine, -) -> serde_json::Map<String, serde_json::Value> { - let json = vm.import("json", 0).expect("Module exists"); - let dumps = json.get_attr("dumps", vm).expect("Method exists"); - let dict = dumps - .call((input,), vm) - .map_err(|err| vm.print_exception(err)) - .expect("Might not always work, but for our dicts it works"); - - let string: PyRef<PyStr> = dict.downcast().expect("Should always be a string"); - - let real_string = string.to_str().expect("Should be valid utf8"); - - // { - // let mut file = File::create("debug.dump.json").unwrap(); - // write!(file, "{}", real_string).unwrap(); - // } - - let value: serde_json::Value = serde_json::from_str(real_string).expect("Should be valid json"); - - match value { - serde_json::Value::Object(map) => map, - _ => unreachable!("These should not be json.dumps output"), - } -} - -// Inlined and changed from `vm.write_exception_inner` -fn write_exception<W: Write>( - vm: &VirtualMachine, - output: &mut W, - exc: &PyBaseExceptionRef, -) -> Result<(), W::Error> { - let varargs = exc.args(); - let args_repr = { - match varargs.len() { - 0 => vec![], - 1 => { - let args0_repr = if true { - varargs[0] - .str(vm) - .unwrap_or_else(|_| PyStr::from("<element str() failed>").into_ref(&vm.ctx)) - } else { - varargs[0].repr(vm).unwrap_or_else(|_| { - PyStr::from("<element repr() failed>").into_ref(&vm.ctx) - }) - }; - vec![args0_repr] - } - _ => varargs - .iter() - .map(|vararg| { - vararg.repr(vm).unwrap_or_else(|_| { - PyStr::from("<element repr() failed>").into_ref(&vm.ctx) - }) - }) - .collect(), - } - }; - - let exc_class = exc.class(); - - if exc_class.fast_issubclass(vm.ctx.exceptions.syntax_error) { - unreachable!( - "A syntax error should never be raised, \ - as yt_dlp should not have them and neither our embedded code" - ); - } - - let exc_name = exc_class.name(); - match args_repr.len() { - 0 => write!(output, "{exc_name}"), - 1 => write!(output, "{}: {}", exc_name, args_repr[0]), - _ => write!( - output, - "{}: ({})", - exc_name, - args_repr - .iter() - .map(|val| val.as_str()) - .collect::<Vec<_>>() - .join(", "), - ), - }?; - - match offer_suggestions(exc, vm) { - Some(suggestions) => { - write!(output, ". Did you mean: '{suggestions}'?") - } - None => Ok(()), - } -} - -fn process_exception(vm: &VirtualMachine, err: &PyBaseExceptionRef) -> String { - let mut buffer = String::new(); - write_exception(vm, &mut buffer, err) - .expect("We are writing into an *in-memory* string, it will always work"); - - if log_enabled!(Level::Debug) { - let mut output = String::new(); - vm.write_exception(&mut output, err) - .expect("We are writing into an *in-memory* string, it will always work"); - debug!("Python threw an exception: {output}"); - } - - buffer -} diff --git a/crates/yt_dlp/src/logging.rs b/crates/yt_dlp/src/logging.rs index 5cb4c1d..112836e 100644 --- a/crates/yt_dlp/src/logging.rs +++ b/crates/yt_dlp/src/logging.rs @@ -15,7 +15,7 @@ use log::{Level, MetadataBuilder, Record, logger}; use rustpython::vm::{ PyObjectRef, PyRef, PyResult, VirtualMachine, - builtins::{PyInt, PyList, PyStr}, + builtins::{PyInt, PyStr}, convert::ToPyObject, function::FuncArgs, }; @@ -167,31 +167,5 @@ def basicConfig(*pargs, **kwargs): "<embedded logging inintializing code>".to_owned(), )?; - let all: PyRef<PyList> = logging - .get_attr("__all__", vm)? - .downcast() - .expect("Is a list"); - all.borrow_vec_mut().push(vm.new_pyobj("HostHandler")); - - // { - // let logging_dict = logging.dict().expect("Exists"); - // - // for (key, val) in scope.globals { - // let key: PyRef<PyStr> = key.downcast().expect("Is a string"); - // - // if !logging_dict.contains_key(key.as_str(), vm) { - // logging_dict.set_item(key.as_str(), val, vm)?; - // } - // } - // - // for (key, val) in scope.locals { - // let key: PyRef<PyStr> = key.downcast().expect("Is a string"); - // - // if !logging_dict.contains_key(key.as_str(), vm) { - // logging_dict.set_item(key.as_str(), val, vm)?; - // } - // } - // } - Ok(scope.globals.to_pyobject(vm)) } diff --git a/crates/yt_dlp/src/options.rs b/crates/yt_dlp/src/options.rs new file mode 100644 index 0000000..182b8a1 --- /dev/null +++ b/crates/yt_dlp/src/options.rs @@ -0,0 +1,280 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use std::env; + +use indexmap::IndexMap; +use log::{Level, debug, error, log_enabled}; +use rustpython::{ + InterpreterConfig, + vm::{ + self, PyObjectRef, PyRef, PyResult, VirtualMachine, + builtins::{PyBaseException, PyStr}, + function::{FuncArgs, KwArgs, PosArgs}, + }, +}; + +use crate::{ + YoutubeDL, json_loads, logging::setup_logging, post_processors, python_error::process_exception, +}; + +/// Wrap your function with [`mk_python_function`]. +pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); + +pub type PostProcessorFunction = fn(vm: &VirtualMachine) -> PyResult<PyObjectRef>; + +/// Options, that are used to customize the download behaviour. +/// +/// In the future, this might get a Builder api. +/// +/// See `help(yt_dlp.YoutubeDL())` from python for a full list of available options. +#[derive(Default, Debug)] +pub struct YoutubeDLOptions { + options: serde_json::Map<String, serde_json::Value>, + progress_hook: Option<ProgressHookFunction>, + post_processors: Vec<PostProcessorFunction>, +} + +impl YoutubeDLOptions { + #[must_use] + pub fn new() -> Self { + let me = Self { + options: serde_json::Map::new(), + progress_hook: None, + post_processors: vec![], + }; + + me.with_post_processor(post_processors::dearrow::process) + } + + #[must_use] + pub fn set(self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self { + let mut options = self.options; + options.insert(key.into(), value.into()); + + Self { options, ..self } + } + + #[must_use] + pub fn with_progress_hook(self, progress_hook: ProgressHookFunction) -> Self { + if let Some(_previous_hook) = self.progress_hook { + todo!() + } else { + Self { + progress_hook: Some(progress_hook), + ..self + } + } + } + + #[must_use] + pub fn with_post_processor(mut self, pp: PostProcessorFunction) -> Self { + self.post_processors.push(pp); + self + } + + /// # Errors + /// If the underlying [`YoutubeDL::from_options`] errors. + pub fn build(self) -> Result<YoutubeDL, build::Error> { + YoutubeDL::from_options(self) + } + + #[must_use] + pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self { + Self { + options, + ..Self::new() + } + } + + #[must_use] + pub fn get(&self, key: &str) -> Option<&serde_json::Value> { + self.options.get(key) + } +} + +impl YoutubeDL { + /// Construct this instance from options. + /// + /// # Panics + /// If `yt_dlp` changed their interface. + /// + /// # Errors + /// If a python call fails. + #[allow(clippy::too_many_lines)] + pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> { + let mut settings = vm::Settings::default(); + if let Ok(python_path) = env::var("PYTHONPATH") { + for path in python_path.split(':') { + settings.path_list.push(path.to_owned()); + } + } else { + error!( + "No PYTHONPATH found or invalid utf8. \ + This means, that you probably did not \ + supply a yt_dlp python package!" + ); + } + + settings.install_signal_handlers = false; + + // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13> + settings.optimize = 0; + + settings.isolated = true; + + let interpreter = InterpreterConfig::new() + .init_stdlib() + .settings(settings) + .interpreter(); + + let output_options = options.options.clone(); + + let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| { + let yt_dlp_module = vm.import("yt_dlp", 0)?; + let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; + + let opts = json_loads(options.options, vm); + + { + // Setup the progress hook + if let Some(function) = options.progress_hook { + opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { + let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); + vm.new_pyobj(vec![hook]) + }) + .expect("Should work?"); + } + } + + { + // Unconditionally set a logger. + // Otherwise, yt_dlp will log to stderr. + + /// Is the specified record to be logged? Returns false for no, + /// true for yes. Filters can either modify log records in-place or + /// return a completely different record instance which will replace + /// the original log record in any future processing of the event. + fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool { + let record = input.args.remove(0); + + // Filter out all error logs (they are propagated as rust errors) + let levelname: PyRef<PyStr> = record + .get_attr("levelname", vm) + .expect("This should exist") + .downcast() + .expect("This should be a String"); + + let return_value = levelname.as_str() != "ERROR"; + + if log_enabled!(Level::Debug) && !return_value { + let message: String = { + let get_message = record.get_attr("getMessage", vm).expect("Is set"); + let message: PyRef<PyStr> = get_message + .call((), vm) + .expect("Can be called") + .downcast() + .expect("Downcasting works"); + + message.as_str().to_owned() + }; + + debug!("Swollowed error message: '{message}'"); + } + return_value + } + + let logging = setup_logging(vm, "yt_dlp")?; + let ytdl_logger = { + let get_logger = logging.get_item("getLogger", vm)?; + get_logger.call(("yt_dlp",), vm)? + }; + + { + let args = FuncArgs::new( + PosArgs::new(vec![]), + KwArgs::new({ + let mut map = IndexMap::new(); + // Ensure that all events are logged by setting + // the log level to NOTSET (we filter on rust's side) + map.insert("level".to_owned(), vm.new_pyobj(0)); + map + }), + ); + + let basic_config = logging.get_item("basicConfig", vm)?; + basic_config.call(args, vm)?; + } + + { + let add_filter = ytdl_logger.get_attr("addFilter", vm)?; + add_filter.call( + (vm.new_function("yt_dlp_error_filter", filter_error_log),), + vm, + )?; + } + + opts.set_item("logger", ytdl_logger, vm)?; + } + + let youtube_dl_class = class.call((opts,), vm)?; + + { + // Setup the post processors + + let add_post_processor_fun = youtube_dl_class.get_attr("add_post_processor", vm)?; + + for pp in options.post_processors { + let args = { + FuncArgs::new( + PosArgs::new(vec![pp(vm)?]), + KwArgs::new({ + let mut map = IndexMap::new(); + // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN + map.insert("when".to_owned(), vm.new_pyobj("pre_process")); + map + }), + ) + }; + + add_post_processor_fun.call(args, vm)?; + } + } + + Ok::<_, PyRef<PyBaseException>>((yt_dlp_module, youtube_dl_class)) + }) { + Ok(ok) => Ok(ok), + Err(err) => { + // TODO(@bpeetz): Do we want to run `interpreter.finalize` here? <2025-06-14> + // interpreter.finalize(Some(err)); + interpreter.enter(|vm| { + let buffer = process_exception(vm, &err); + Err(build::Error::Python(buffer)) + }) + } + }?; + + Ok(Self { + interpreter, + youtube_dl_class, + yt_dlp_module, + options: output_options, + }) + } +} + +#[allow(missing_docs)] +pub mod build { + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error("Python threw an exception: {0}")] + Python(String), + } +} diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs index bdbea7c..ab5478b 100644 --- a/crates/yt_dlp/src/post_processors/dearrow.rs +++ b/crates/yt_dlp/src/post_processors/dearrow.rs @@ -8,60 +8,118 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use log::{info, warn}; +use curl::easy::Easy; +use log::{error, info, warn}; +use rustpython::vm::{ + PyRef, VirtualMachine, + builtins::{PyDict, PyStr}, +}; use serde::{Deserialize, Serialize}; -use crate::{InfoJson, json_get}; +use crate::{pydict_cast, pydict_get, wrap_post_processor}; -use super::PostProcessor; +wrap_post_processor!("DeArrow", unwrapped_process, process); -#[derive(Debug, Clone, Copy)] -pub struct DeArrowPP; - -impl PostProcessor for DeArrowPP { - fn extractors(&self) -> &'static [&'static str] { - &["Youtube"] +/// # Errors +/// If the API access fails. +pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyRef<PyDict>, Error> { + if pydict_get!(@vm, info, "extractor_key", PyStr).as_str() != "Youtube" { + warn!("DeArrow: Extractor did not match, exiting."); + return Ok(info); } - fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> { - let mut output: DeArrowApi = reqwest::blocking::get(format!( - "https://sponsor.ajay.app/api/branding?videoID={}", - json_get!(info, "id", as_str) - ))? - .json()?; - - output.titles.reverse(); - - let title_len = output.titles.len(); - loop { - let Some(title) = output.titles.pop() else { - break; - }; - - if (title.locked || title.votes < 1) && title_len > 1 { - info!( - "Skipping title {:#?}, as it is not good enough", - title.value - ); - // Skip titles that are not “good” enough. - continue; - } - - if let Some(old_title) = info.insert( - "title".to_owned(), - serde_json::Value::String(title.value.clone()), - ) { - warn!("Updating title from {:#?} to {:#?}", old_title, title.value); - info.insert("original_title".to_owned(), old_title); - } else { - warn!("Setting title to {:#?}", title.value); - } - - break; + let mut output: DeArrowApi = { + let output_bytes = { + let mut dst = Vec::new(); + + let mut easy = Easy::new(); + easy.url( + format!( + "https://sponsor.ajay.app/api/branding?videoID={}", + pydict_get!(@vm, info, "id", PyStr).as_str() + ) + .as_str(), + )?; + + let mut transfer = easy.transfer(); + transfer.write_function(|data| { + dst.extend_from_slice(data); + Ok(data.len()) + })?; + transfer.perform()?; + drop(transfer); + + dst + }; + + serde_json::from_slice(&output_bytes)? + }; + + // We pop the titles, so we need this vector reversed. + output.titles.reverse(); + + let title_len = output.titles.len(); + let selected = loop { + let Some(title) = output.titles.pop() else { + break false; + }; + + if (title.locked || title.votes < 1) && title_len > 1 { + info!( + "DeArrow: Skipping title {:#?}, as it is not good enough", + title.value + ); + // Skip titles that are not “good” enough. + continue; } - Ok(info) + update_title(&info, &title.value, vm); + + break true; + }; + + if !selected && title_len != 0 { + // No title was selected, even though we had some titles. + // Just pick the first one in this case. + update_title(&info, &output.titles[0].value, vm); } + + Ok(info) +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Failed to access the DeArrow api: {0}")] + Get(#[from] curl::Error), + + #[error("Failed to deserialize a api json return object: {0}")] + Deserialize(#[from] serde_json::Error), +} + +fn update_title(info: &PyRef<PyDict>, new_title: &str, vm: &VirtualMachine) { + assert!(!info.contains_key("original_title", vm)); + + if let Ok(old_title) = info.get_item("title", vm) { + warn!( + "DeArrow: Updating title from {:#?} to {:#?}", + pydict_cast!(@ref old_title, PyStr).as_str(), + new_title + ); + + info.set_item("original_title", old_title, vm) + .expect("We checked, it is a new key"); + } else { + warn!("DeArrow: Setting title to {new_title:#?}"); + } + + let cleaned_title = { + // NOTE(@bpeetz): DeArrow uses `>` as a “Don't format the next word” mark. + // They should be removed, if one does not use a auto-formatter. <2025-06-16> + new_title.replace('>', "") + }; + + info.set_item("title", vm.new_pyobj(cleaned_title), vm) + .expect("This should work?"); } #[derive(Serialize, Deserialize)] @@ -77,7 +135,14 @@ struct DeArrowApi { video_duration: Option<f64>, #[serde(alias = "casualVotes")] - casual_votes: Vec<String>, + casual_votes: Vec<CasualVote>, +} + +#[derive(Serialize, Deserialize)] +struct CasualVote { + id: String, + count: u32, + title: String, } #[derive(Serialize, Deserialize)] diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs index 65801c2..00b0ad5 100644 --- a/crates/yt_dlp/src/post_processors/mod.rs +++ b/crates/yt_dlp/src/post_processors/mod.rs @@ -8,23 +8,116 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use crate::InfoJson; - pub mod dearrow; -pub trait PostProcessor: std::fmt::Debug + Send { - /// Process a [`InfoJson`] object and return the updated one. - /// - /// # Errors - /// If the processing steps failed. - fn process(&self, info: InfoJson) -> Result<InfoJson, Error>; +#[macro_export] +macro_rules! pydict_get { + (@$vm:expr, $value:expr, $name:literal, $into:ident) => {{ + match $value.get_item($name, $vm) { + Ok(val) => $crate::pydict_cast!(val, $into), + Err(_) => panic!( + concat!( + "Expected '", + $name, + "' to be a key for the'", + stringify!($value), + "' py dictionary: {:#?}" + ), + $value + ), + } + }}; +} - /// The supported extractors for this post processor - fn extractors(&self) -> &'static [&'static str]; +#[macro_export] +macro_rules! pydict_cast { + ($value:expr, $into:ident) => {{ + match $value.downcast::<$into>() { + Ok(result) => result, + Err(val) => panic!( + concat!( + "Expected to be able to downcast value ({:#?}) as ", + stringify!($into) + ), + val + ), + } + }}; + (@ref $value:expr, $into:ident) => {{ + match $value.downcast_ref::<$into>() { + Some(result) => result, + None => panic!( + concat!( + "Expected to be able to downcast value ({:#?}) as ", + stringify!($into) + ), + $value + ), + } + }}; } -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Failed to access a api: {0}")] - Get(#[from] reqwest::Error), +#[macro_export] +macro_rules! wrap_post_processor { + ($name:literal, $unwrap:ident, $wrapped:ident) => { + use $crate::progress_hook::__priv::vm; + + /// # Errors + /// - If the underlying function returns an error. + /// - If python operations fail. + pub fn $wrapped(vm: &vm::VirtualMachine) -> vm::PyResult<vm::PyObjectRef> { + fn actual_processor( + mut input: vm::function::FuncArgs, + vm: &vm::VirtualMachine, + ) -> vm::PyResult<vm::PyRef<vm::builtins::PyDict>> { + let input = input + .args + .remove(0) + .downcast::<vm::builtins::PyDict>() + .expect("Should be a py dict"); + + let output = match unwrapped_process(input, vm) { + Ok(ok) => ok, + Err(err) => { + return Err(vm.new_runtime_error(err.to_string())); + } + }; + + Ok(output) + } + + let scope = vm.new_scope_with_builtins(); + + scope.globals.set_item( + "actual_processor", + vm.new_function("actual_processor", actual_processor).into(), + vm, + )?; + + let local_scope = scope.clone(); + vm.run_code_string( + local_scope, + format!( + " +import yt_dlp + +class {}(yt_dlp.postprocessor.PostProcessor): + def run(self, info): + info = actual_processor(info) + return [], info + +inst = {}() +", + $name, $name + ) + .as_str(), + "<embedded post processor initializing code>".to_owned(), + )?; + + Ok(scope + .globals + .get_item("inst", vm) + .expect("We just declared it")) + } + }; } diff --git a/crates/yt_dlp/src/progress_hook.rs b/crates/yt_dlp/src/progress_hook.rs index 43f85e0..b42ae21 100644 --- a/crates/yt_dlp/src/progress_hook.rs +++ b/crates/yt_dlp/src/progress_hook.rs @@ -12,21 +12,21 @@ macro_rules! mk_python_function { ($name:ident, $new_name:ident) => { pub fn $new_name( - mut args: $crate::progress_hook::rustpython::vm::function::FuncArgs, - vm: &$crate::progress_hook::rustpython::vm::VirtualMachine, + mut args: $crate::progress_hook::__priv::vm::function::FuncArgs, + vm: &$crate::progress_hook::__priv::vm::VirtualMachine, ) { - use $crate::progress_hook::rustpython; + use $crate::progress_hook::__priv::vm; let input = { - let dict: rustpython::vm::PyRef<rustpython::vm::builtins::PyDict> = args + let dict: vm::PyRef<vm::builtins::PyDict> = args .args .remove(0) .downcast() .expect("The progress hook is always called with these args"); - let new_dict = rustpython::vm::builtins::PyDict::new_ref(&vm.ctx); + let new_dict = vm::builtins::PyDict::new_ref(&vm.ctx); dict.into_iter() .filter_map(|(name, value)| { - let real_name: rustpython::vm::PyRefExact<rustpython::vm::builtins::PyStr> = + let real_name: vm::PyRefExact<vm::builtins::PyStr> = name.downcast_exact(vm).expect("Is a string"); let name_str = real_name.to_str().expect("Is a string"); if name_str.starts_with('_') { @@ -41,11 +41,14 @@ macro_rules! mk_python_function { .expect("This is a transpositions, should always be valid"); }); - $crate::json_dumps(new_dict, vm) + $crate::progress_hook::__priv::json_dumps(new_dict, vm) }; $name(input).expect("Shall not fail!"); } }; } -pub use rustpython; +pub mod __priv { + pub use crate::info_json::{json_dumps, json_loads}; + pub use rustpython::vm; +} diff --git a/crates/yt_dlp/src/python_error.rs b/crates/yt_dlp/src/python_error.rs new file mode 100644 index 0000000..9513956 --- /dev/null +++ b/crates/yt_dlp/src/python_error.rs @@ -0,0 +1,116 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use std::fmt::Display; + +use log::{Level, debug, log_enabled}; +use rustpython::vm::{ + AsObject, PyPayload, PyRef, VirtualMachine, + builtins::{PyBaseException, PyBaseExceptionRef, PyStr}, + py_io::Write, + suggestion::offer_suggestions, +}; + +#[derive(thiserror::Error, Debug)] +pub struct PythonError(pub String); + +impl Display for PythonError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Python threw an exception: {}", self.0) + } +} + +impl PythonError { + pub(super) fn from_exception(vm: &VirtualMachine, exc: &PyRef<PyBaseException>) -> Self { + let buffer = process_exception(vm, exc); + Self(buffer) + } +} + +pub(super) fn process_exception(vm: &VirtualMachine, err: &PyBaseExceptionRef) -> String { + let mut buffer = String::new(); + write_exception(vm, &mut buffer, err) + .expect("We are writing into an *in-memory* string, it will always work"); + + if log_enabled!(Level::Debug) { + let mut output = String::new(); + vm.write_exception(&mut output, err) + .expect("We are writing into an *in-memory* string, it will always work"); + debug!("Python threw an exception: {output}"); + } + + buffer +} + +// Inlined and changed from `vm.write_exception_inner` +fn write_exception<W: Write>( + vm: &VirtualMachine, + output: &mut W, + exc: &PyBaseExceptionRef, +) -> Result<(), W::Error> { + let varargs = exc.args(); + let args_repr = { + match varargs.len() { + 0 => vec![], + 1 => { + let args0_repr = if true { + varargs[0] + .str(vm) + .unwrap_or_else(|_| PyStr::from("<element str() failed>").into_ref(&vm.ctx)) + } else { + varargs[0].repr(vm).unwrap_or_else(|_| { + PyStr::from("<element repr() failed>").into_ref(&vm.ctx) + }) + }; + vec![args0_repr] + } + _ => varargs + .iter() + .map(|vararg| { + vararg.repr(vm).unwrap_or_else(|_| { + PyStr::from("<element repr() failed>").into_ref(&vm.ctx) + }) + }) + .collect(), + } + }; + + let exc_class = exc.class(); + + if exc_class.fast_issubclass(vm.ctx.exceptions.syntax_error) { + unreachable!( + "A syntax error should never be raised, \ + as yt_dlp should not have them and neither our embedded code" + ); + } + + let exc_name = exc_class.name(); + match args_repr.len() { + 0 => write!(output, "{exc_name}"), + 1 => write!(output, "{}: {}", exc_name, args_repr[0]), + _ => write!( + output, + "{}: ({})", + exc_name, + args_repr + .iter() + .map(|val| val.as_str()) + .collect::<Vec<_>>() + .join(", "), + ), + }?; + + match offer_suggestions(exc, vm) { + Some(suggestions) => { + write!(output, ". Did you mean: '{suggestions}'?") + } + None => Ok(()), + } +} |