diff options
author | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2025-06-13 20:54:49 +0200 |
---|---|---|
committer | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2025-06-13 20:56:40 +0200 |
commit | 69145b4deed4fe512239a9f88e6af69d3b8c0309 (patch) | |
tree | 7643edd350c1cec75f91e0982ffd3c840f8661fb /crates/yt_dlp/src/lib.rs | |
parent | build(treewide): Update (diff) | |
download | yt-69145b4deed4fe512239a9f88e6af69d3b8c0309.zip |
feat({yt_dlp,yt}): Migrate from pyo3 to rustpython
That allows us to avoid cpython's GIL and gives us full ability to leverage async/concurrent code to speed up python operations. I have also taken the opportunity to change the `InfoJson` struct to an untyped json value, as that is what it actually is.
Diffstat (limited to 'crates/yt_dlp/src/lib.rs')
-rw-r--r-- | crates/yt_dlp/src/lib.rs | 954 |
1 files changed, 473 insertions, 481 deletions
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index c6d9290..34b8a5d 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -1,549 +1,541 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -// The pyo3 `pyfunction` proc-macros call unsafe functions internally, which trigger this lint. -#![allow(unsafe_op_in_unsafe_fn)] -#![allow(clippy::missing_errors_doc)] - -use std::io::stderr; -use std::{env, process}; -use std::{fs::File, io::Write}; - -use std::{path::PathBuf, sync::Once}; - -use crate::{duration::Duration, logging::setup_logging, wrapper::info_json::InfoJson}; - -use bytes::Bytes; -use error::YtDlpError; -use log::{Level, debug, info, log_enabled}; -use pyo3::types::{PyString, PyTuple, PyTupleMethods}; -use pyo3::{ - Bound, PyAny, PyResult, Python, pyfunction, - types::{PyAnyMethods, PyDict, PyDictMethods, PyList, PyListMethods, PyModule}, - wrap_pyfunction, +//! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure. + +use std::io::Write; +use std::mem; +use std::{env, fs::File, path::PathBuf}; + +use indexmap::IndexMap; +use log::{Level, debug, error, info, log_enabled}; +use logging::setup_logging; +use rustpython::vm::builtins::PyList; +use rustpython::{ + InterpreterConfig, + vm::{ + self, Interpreter, PyObjectRef, PyRef, VirtualMachine, + builtins::{PyBaseException, PyDict, PyStr}, + function::{FuncArgs, KwArgs, PosArgs}, + }, }; -use serde::Serialize; -use serde_json::{Map, Value}; use url::Url; -pub mod duration; -pub mod error; -pub mod logging; -pub mod wrapper; +mod logging; +pub mod progress_hook; -#[cfg(test)] -mod tests; - -/// Synchronisation helper, to ensure that we don't setup the logger multiple times -static SYNC_OBJ: Once = Once::new(); +#[macro_export] +macro_rules! json_get { + ($value:expr, $name:literal, $into:ident) => { + $crate::json_cast!($value.get($name).expect("Should exist"), $into) + }; +} -/// Add a logger to the yt-dlp options. -/// If you have an logger set (i.e. for rust), than this will log to rust -/// -/// # Panics -/// This should never panic. -pub fn add_logger_and_sig_handler<'a>( - opts: Bound<'a, PyDict>, - py: Python<'_>, -) -> PyResult<Bound<'a, PyDict>> { - /// Is the specified record to be logged? Returns false for no, - /// true for yes. Filters can either modify log records in-place or - /// return a completely different record instance which will replace - /// the original log record in any future processing of the event. - #[pyfunction] - fn filter_error_log(_py: Python<'_>, record: &Bound<'_, PyAny>) -> bool { - // Filter out all error logs (they are propagated as rust errors) - let levelname: String = record - .getattr("levelname") - .expect("This should exist") - .extract() - .expect("This should be a String"); - - let return_value = levelname.as_str() != "ERROR"; - - if log_enabled!(Level::Debug) && !return_value { - let message: String = record - .call_method0("getMessage") - .expect("This method exists") - .extract() - .expect("The message is a string"); - - debug!("Swollowed error message: '{message}'"); - } - return_value - } +#[macro_export] +macro_rules! json_cast { + ($value:expr, $into:ident) => { + $value.$into().expect(concat!( + "Should be able to cast value into ", + stringify!($into) + )) + }; +} - setup_logging(py, "yt_dlp")?; - - let logging = PyModule::import(py, "logging")?; - let ytdl_logger = logging.call_method1("getLogger", ("yt_dlp",))?; - - // Ensure that all events are logged by setting the log level to NOTSET (we filter on rust's side) - // Also use this static, to ensure that we don't configure the logger every time - SYNC_OBJ.call_once(|| { - // Disable the SIGINT (Ctrl+C) handler, python installs. - // This allows the user to actually stop the application with Ctrl+C. - // This is here because it can only be run in the main thread and this was here already. - py.run( - c"\ -import signal -signal.signal(signal.SIGINT, signal.SIG_DFL)", - None, - None, - ) - .expect("This code should always work"); - - let config_opts = PyDict::new(py); - config_opts - .set_item("level", 0) - .expect("Setting this item should always work"); - - logging - .call_method("basicConfig", (), Some(&config_opts)) - .expect("This method exists"); - }); - - ytdl_logger.call_method1( - "addFilter", - (wrap_pyfunction!(filter_error_log, py).expect("This function can be wrapped"),), - )?; - - // This was taken from `ytcc`, I don't think it is still applicable - // ytdl_logger.setattr("propagate", false)?; - // let logging_null_handler = logging.call_method0("NullHandler")?; - // ytdl_logger.setattr("addHandler", logging_null_handler)?; - - opts.set_item("logger", ytdl_logger).expect("Should work"); - - Ok(opts) +/// The core of the `yt_dlp` interface. +pub struct YoutubeDL { + interpreter: Interpreter, + youtube_dl_class: PyObjectRef, + yt_dlp_module: PyObjectRef, + options: serde_json::Map<String, serde_json::Value>, } -#[pyfunction] -#[allow(clippy::too_many_lines)] -#[allow(clippy::missing_panics_doc)] -#[allow(clippy::items_after_statements)] -#[allow( - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - clippy::cast_precision_loss -)] -pub fn progress_hook(py: Python<'_>, input: &Bound<'_, PyDict>) -> PyResult<()> { - // Only add the handler, if the log-level is higher than Debug (this avoids covering debug - // messages). - if log_enabled!(Level::Debug) { - return Ok(()); +impl std::fmt::Debug for YoutubeDL { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // TODO(@bpeetz): Use something useful here. <2025-06-13> + f.write_str("YoutubeDL") } +} - // ANSI ESCAPE CODES Wrappers {{{ - // see: https://en.wikipedia.org/wiki/ANSI_escape_code#Control_Sequence_Introducer_commands - const CSI: &str = "\x1b["; - fn clear_whole_line() { - eprint!("{CSI}2K"); - } - fn move_to_col(x: usize) { - eprint!("{CSI}{x}G"); - } - // }}} - - let input: Map<String, Value> = serde_json::from_str(&json_dumps( - py, - input - .downcast::<PyAny>() - .expect("Will always work") - .to_owned(), - )?) - .expect("python's json is valid"); - - macro_rules! get { - (@interrogate $item:ident, $type_fun:ident, $get_fun:ident, $name:expr) => {{ - let a = $item.get($name).expect(concat!( - "The field '", - stringify!($name), - "' should exist." - )); - - if a.$type_fun() { - a.$get_fun().expect( - "The should have been checked in the if guard, so unpacking here is fine", - ) - } else { - panic!( - "Value {} => \n{}\n is not of type: {}", - $name, - a, - stringify!($type_fun) - ); +impl YoutubeDL { + /// Construct this instance from options. + /// + /// # Panics + /// If `yt_dlp` changed their interface. + /// + /// # Errors + /// If a python call fails. + pub fn from_options(mut options: YoutubeDLOptions) -> Result<Self, build::Error> { + let mut settings = vm::Settings::default(); + if let Ok(python_path) = env::var("PYTHONPATH") { + for path in python_path.split(':') { + settings.path_list.push(path.to_owned()); } - }}; + } else { + error!( + "No PYTHONPATH found or invalid utf8. \ + This means, that you probably did not \ + supply the yt_dlp!" + ); + } - ($type_fun:ident, $get_fun:ident, $name1:expr, $name2:expr) => {{ - let a = get! {@interrogate input, is_object, as_object, $name1}; - let b = get! {@interrogate a, $type_fun, $get_fun, $name2}; - b - }}; + settings.install_signal_handlers = false; - ($type_fun:ident, $get_fun:ident, $name:expr) => {{ - get! {@interrogate input, $type_fun, $get_fun, $name} - }}; - } + // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13> + settings.optimize = 0; - macro_rules! default_get { - (@interrogate $item:ident, $default:expr, $get_fun:ident, $name:expr) => {{ - let a = if let Some(field) = $item.get($name) { - field.$get_fun().unwrap_or($default) - } else { - $default - }; - a - }}; - - ($get_fun:ident, $default:expr, $name1:expr, $name2:expr) => {{ - let a = get! {@interrogate input, is_object, as_object, $name1}; - let b = default_get! {@interrogate a, $default, $get_fun, $name2}; - b - }}; - - ($get_fun:ident, $default:expr, $name:expr) => {{ - default_get! {@interrogate input, $default, $get_fun, $name} - }}; - } + settings.isolated = true; - macro_rules! c { - ($color:expr, $format:expr) => { - format!("\x1b[{}m{}\x1b[0m", $color, $format) - }; - } + let interpreter = InterpreterConfig::new() + .init_stdlib() + .settings(settings) + .interpreter(); - fn format_bytes(bytes: u64) -> String { - let bytes = Bytes::new(bytes); - bytes.to_string() - } + let output_options = options.options.clone(); - fn format_speed(speed: f64) -> String { - #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] - let bytes = Bytes::new(speed.floor() as u64); - format!("{bytes}/s") - } + let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| { + let yt_dlp_module = vm.import("yt_dlp", 0)?; + let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; - let get_title = || -> String { - match get! {is_string, as_str, "info_dict", "ext"} { - "vtt" => { - format!( - "Subtitles ({})", - default_get! {as_str, "<No Subtitle Language>", "info_dict", "name"} - ) + let maybe_hook = mem::take(&mut options.progress_hook); + let opts = options.into_py_dict(vm); + if let Some(function) = maybe_hook { + opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { + let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); + vm.new_pyobj(vec![hook]) + }) + .expect("Should work?"); } - "webm" | "mp4" | "mp3" | "m4a" => { - default_get! { as_str, "<No title>", "info_dict", "title"}.to_owned() - } - other => panic!("The extension '{other}' is not yet implemented"), - } - }; - match get! {is_string, as_str, "status"} { - "downloading" => { - let elapsed = default_get! {as_f64, 0.0f64, "elapsed"}; - let eta = default_get! {as_f64, 0.0, "eta"}; - let speed = default_get! {as_f64, 0.0, "speed"}; - - let downloaded_bytes = get! {is_u64, as_u64, "downloaded_bytes"}; - let (total_bytes, bytes_is_estimate): (u64, &'static str) = { - let total_bytes = default_get!(as_u64, 0, "total_bytes"); - if total_bytes == 0 { - let maybe_estimate = default_get!(as_u64, 0, "total_bytes_estimate"); - - if maybe_estimate == 0 { - // The download speed should be in bytes per second and the eta in seconds. - // Thus multiplying them gets us the raw bytes (which were estimated by `yt_dlp`, from their `info.json`) - let bytes_still_needed = (speed * eta).ceil() as u64; - - (downloaded_bytes + bytes_still_needed, "~") - } else { - (maybe_estimate, "~") + { + // Unconditionally set a logger. + // Otherwise, yt_dlp will log to stderr. + + /// Is the specified record to be logged? Returns false for no, + /// true for yes. Filters can either modify log records in-place or + /// return a completely different record instance which will replace + /// the original log record in any future processing of the event. + fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool { + let record = input.args.remove(0); + + // Filter out all error logs (they are propagated as rust errors) + let levelname: PyRef<PyStr> = record + .get_attr("levelname", vm) + .expect("This should exist") + .downcast() + .expect("This should be a String"); + + let return_value = levelname.as_str() != "ERROR"; + + if log_enabled!(Level::Debug) && !return_value { + let message: String = { + let get_message = record.get_attr("getMessage", vm).expect("Is set"); + let message: PyRef<PyStr> = get_message + .call((), vm) + .expect("Can be called") + .downcast() + .expect("Downcasting works"); + + message.as_str().to_owned() + }; + + debug!("Swollowed error message: '{message}'"); } - } else { - (total_bytes, "") + return_value } - }; - let percent: f64 = { - if total_bytes == 0 { - 100.0 - } else { - (downloaded_bytes as f64 / total_bytes as f64) * 100.0 + + let logging = setup_logging(vm, "yt_dlp")?; + let ytdl_logger = { + let get_logger = logging.get_item("getLogger", vm)?; + get_logger.call(("yt_dlp",), vm)? + }; + + { + let args = FuncArgs::new( + PosArgs::new(vec![]), + KwArgs::new({ + let mut map = IndexMap::new(); + // Ensure that all events are logged by setting + // the log level to NOTSET (we filter on rust's side) + map.insert("level".to_owned(), vm.new_pyobj(0)); + map + }), + ); + + let basic_config = logging.get_item("basicConfig", vm)?; + basic_config.call(args, vm)?; } - }; - clear_whole_line(); - move_to_col(1); - - eprint!( - "'{}' [{}/{} at {}] -> [{} of {}{} {}] ", - c!("34;1", get_title()), - c!("33;1", Duration::from(Some(elapsed))), - c!("33;1", Duration::from(Some(eta))), - c!("32;1", format_speed(speed)), - c!("31;1", format_bytes(downloaded_bytes)), - c!("31;1", bytes_is_estimate), - c!("31;1", format_bytes(total_bytes)), - c!("36;1", format!("{:.02}%", percent)) - ); - stderr().flush()?; - } - "finished" => { - eprintln!("-> Finished downloading."); - } - "error" => { - // TODO: This should probably return an Err. But I'm not so sure where the error would - // bubble up to (i.e., who would catch it) <2025-01-21> - eprintln!("-> Error while downloading: {}", get_title()); - process::exit(1); - } - other => unreachable!("'{other}' should not be a valid state!"), - }; + { + let add_filter = ytdl_logger.get_attr("addFilter", vm)?; + add_filter.call( + (vm.new_function("yt_dlp_error_filter", filter_error_log),), + vm, + )?; + } - Ok(()) -} + opts.set_item("logger", ytdl_logger, vm)?; + } -pub fn add_hooks<'a>(opts: Bound<'a, PyDict>, py: Python<'_>) -> PyResult<Bound<'a, PyDict>> { - if let Some(hooks) = opts.get_item("progress_hooks")? { - let hooks = hooks.downcast::<PyList>()?; - hooks.append(wrap_pyfunction!(progress_hook, py)?)?; + let youtube_dl_class = class.call((opts,), vm)?; - opts.set_item("progress_hooks", hooks)?; - } else { - // No hooks are set yet - let hooks_list = PyList::new(py, &[wrap_pyfunction!(progress_hook, py)?])?; + Ok::<_, PyRef<PyBaseException>>((yt_dlp_module, youtube_dl_class)) + }) { + Ok(ok) => ok, + Err(err) => { + interpreter.finalize(Some(err)); + return Err(build::Error::Python); + } + }; - opts.set_item("progress_hooks", hooks_list)?; + Ok(Self { + interpreter, + youtube_dl_class, + yt_dlp_module, + options: output_options, + }) } - Ok(opts) -} - -/// Take the result of the ie (may be modified) and resolve all unresolved -/// references (URLs, playlist items). -/// -/// It will also download the videos if 'download'. -/// Returns the resolved `ie_result`. -#[allow(clippy::missing_panics_doc)] -pub fn process_ie_result( - yt_dlp_opts: &Map<String, Value>, - ie_result: InfoJson, - download: bool, -) -> Result<InfoJson, YtDlpError> { - Python::with_gil(|py| -> Result<InfoJson, YtDlpError> { - let opts = json_map_to_py_dict(yt_dlp_opts, py)?; - - let instance = get_yt_dlp(py, opts)?; - - let args = { - let ie_result = json_loads_str(py, ie_result)?; - (ie_result,) - }; + /// # Panics + /// + /// If `yt_dlp` changed their location or type of `__version__`. + pub fn version(&self) -> String { + let str_ref: PyRef<PyStr> = self.interpreter.enter_and_expect( + |vm| { + let version_module = self.yt_dlp_module.get_attr("version", vm)?; + let version = version_module.get_attr("__version__", vm)?; + let version = version.downcast().expect("This should always be a string"); + Ok(version) + }, + "yt_dlp version location has changed", + ); + str_ref.to_string() + } - let kwargs = PyDict::new(py); - kwargs.set_item("download", download)?; + /// Download a given list of URLs. + /// Returns the paths they were downloaded to. + /// + /// # Errors + /// If one of the downloads error. + pub fn download(&self, urls: &[Url]) -> Result<Vec<PathBuf>, extract_info::Error> { + let mut out_paths = Vec::with_capacity(urls.len()); + + for url in urls { + info!("Started downloading url: '{url}'"); + let info_json = self.extract_info(url, true, true)?; + + // Try to work around yt-dlp type weirdness + let result_string = if let Some(filename) = info_json.get("filename") { + PathBuf::from(json_cast!(filename, as_str)) + } else { + PathBuf::from(json_get!( + json_cast!( + json_get!(info_json, "requested_downloads", as_array)[0], + as_object + ), + "filename", + as_str + )) + }; - let result = instance - .call_method("process_ie_result", args, Some(&kwargs))? - .downcast_into::<PyDict>() - .expect("This is a dict"); + out_paths.push(result_string); + info!("Finished downloading url"); + } - let result_str = json_dumps(py, result.into_any())?; + Ok(out_paths) + } - serde_json::from_str(&result_str).map_err(Into::into) - }) -} + /// `extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False)` + /// + /// Extract and return the information dictionary of the URL + /// + /// Arguments: + /// - `url` URL to extract + /// + /// Keyword arguments: + /// :`download` Whether to download videos + /// :`process` Whether to resolve all unresolved references (URLs, playlist items). + /// Must be True for download to work + /// + /// # Panics + /// If expectations about python fail to hold. + /// + /// # Errors + /// If python operations fail. + pub fn extract_info( + &self, + url: &Url, + download: bool, + process: bool, + ) -> Result<InfoJson, extract_info::Error> { + match self.interpreter.enter(|vm| { + let pos_args = PosArgs::new(vec![vm.new_pyobj(url.to_string())]); + + let kw_args = KwArgs::new({ + let mut map = IndexMap::new(); + map.insert("download".to_owned(), vm.new_pyobj(download)); + map.insert("process".to_owned(), vm.new_pyobj(process)); + map + }); + + let fun_args = FuncArgs::new(pos_args, kw_args); + + let inner = self.youtube_dl_class.get_attr("extract_info", vm)?; + let result = inner + .call_with_args(fun_args, vm)? + .downcast::<PyDict>() + .expect("This is a dict"); + + // Resolve the generator object + if let Ok(generator) = result.get_item("entries", vm) { + if generator.payload_is::<PyList>() { + // already resolved. Do nothing + } else { + let max_backlog = self.options.get("playlistend").map_or(10000, |value| { + usize::try_from(value.as_u64().expect("Works")).expect("Should work") + }); + + let mut out = vec![]; + let next = generator.get_attr("__next__", vm)?; + while let Ok(output) = next.call((), vm) { + out.push(output); + + if out.len() == max_backlog { + break; + } + } + result.set_item("entries", vm.new_pyobj(out), vm)?; + } + } -/// `extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False)` -/// -/// Extract and return the information dictionary of the URL -/// -/// Arguments: -/// @param url URL to extract -/// -/// Keyword arguments: -/// @param download Whether to download videos -/// @param process Whether to resolve all unresolved references (URLs, playlist items). -/// Must be True for download to work -/// @param `ie_key` Use only the extractor with this key -/// -/// @param `extra_info` Dictionary containing the extra values to add to the info (For internal use only) -/// @`force_generic_extractor` Force using the generic extractor (Deprecated; use `ie_key`='Generic') -#[allow(clippy::missing_panics_doc)] -pub fn extract_info( - yt_dlp_opts: &Map<String, Value>, - url: &Url, - download: bool, - process: bool, -) -> Result<InfoJson, YtDlpError> { - Python::with_gil(|py| -> Result<InfoJson, YtDlpError> { - let opts = json_map_to_py_dict(yt_dlp_opts, py)?; - - let instance = get_yt_dlp(py, opts)?; - let args = (url.as_str(),); - - let kwargs = PyDict::new(py); - kwargs.set_item("download", download)?; - kwargs.set_item("process", process)?; - - let result = instance - .call_method("extract_info", args, Some(&kwargs))? - .downcast_into::<PyDict>() - .expect("This is a dict"); - - // Resolve the generator object - if let Some(generator) = result.get_item("entries")? { - if generator.is_instance_of::<PyList>() { - // already resolved. Do nothing - } else { - let max_backlog = yt_dlp_opts.get("playlistend").map_or(10000, |value| { - usize::try_from(value.as_u64().expect("Works")).expect("Should work") - }); + let result = { + let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?; + let value = sanitize.call((result,), vm)?; - let mut out = vec![]; - while let Ok(output) = generator.call_method0("__next__") { - out.push(output); + value.downcast::<PyDict>().expect("This should stay a dict") + }; - if out.len() == max_backlog { - break; - } + let result_json = json_dumps(result, vm); + + if let Ok(confirm) = env::var("YT_STORE_INFO_JSON") { + if confirm == "yes" { + let mut file = File::create("output.info.json").unwrap(); + write!( + file, + "{}", + serde_json::to_string_pretty(&serde_json::Value::Object( + result_json.clone() + )) + .expect("Valid json") + ) + .unwrap(); } - result.set_item("entries", out)?; + } + + Ok::<_, PyRef<PyBaseException>>(result_json) + }) { + Ok(ok) => Ok(ok), + Err(err) => { + self.interpreter.enter(|vm| { + vm.print_exception(err); + }); + Err(extract_info::Error::Python) } } + } + + /// Take the (potentially modified) result of the information extractor (i.e., + /// [`Self::extract_info`] with `process` and `download` set to false) + /// and resolve all unresolved references (URLs, + /// playlist items). + /// + /// It will also download the videos if 'download' is true. + /// Returns the resolved `ie_result`. + /// + /// # Panics + /// If expectations about python fail to hold. + /// + /// # Errors + /// If python operations fail. + pub fn process_ie_result( + &self, + ie_result: InfoJson, + download: bool, + ) -> Result<InfoJson, process_ie_result::Error> { + match self.interpreter.enter(|vm| { + let pos_args = PosArgs::new(vec![vm.new_pyobj(json_loads(ie_result, vm))]); + + let kw_args = KwArgs::new({ + let mut map = IndexMap::new(); + map.insert("download".to_owned(), vm.new_pyobj(download)); + map + }); + + let fun_args = FuncArgs::new(pos_args, kw_args); + + let inner = self.youtube_dl_class.get_attr("process_ie_result", vm)?; + let result = inner + .call_with_args(fun_args, vm)? + .downcast::<PyDict>() + .expect("This is a dict"); + + let result = { + let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?; + let value = sanitize.call((result,), vm)?; + + value.downcast::<PyDict>().expect("This should stay a dict") + }; - let result_str = json_dumps(py, result.into_any())?; + let result_json = json_dumps(result, vm); - if let Ok(confirm) = env::var("YT_STORE_INFO_JSON") { - if confirm == "yes" { - let mut file = File::create("output.info.json")?; - write!(file, "{result_str}").unwrap(); + Ok::<_, PyRef<PyBaseException>>(result_json) + }) { + Ok(ok) => Ok(ok), + Err(err) => { + self.interpreter.enter(|vm| { + vm.print_exception(err); + }); + Err(process_ie_result::Error::Python) } } - - serde_json::from_str(&result_str).map_err(Into::into) - }) + } } -/// # Panics -/// Only if python fails to return a valid URL. -pub fn unsmuggle_url(smug_url: &Url) -> PyResult<Url> { - Python::with_gil(|py| { - let utils = get_yt_dlp_utils(py)?; - let url = utils - .call_method1("unsmuggle_url", (smug_url.as_str(),))? - .downcast::<PyTuple>()? - .get_item(0)?; - - let url: Url = url - .downcast::<PyString>()? - .to_string() - .parse() - .expect("Python should be able to return a valid url"); - - Ok(url) - }) +#[allow(missing_docs)] +pub mod process_ie_result { + #[derive(Debug, thiserror::Error, Clone, Copy)] + pub enum Error { + #[error("Python threw an exception")] + Python, + } } - -/// Download a given list of URLs. -/// Returns the paths they were downloaded to. -/// -/// # Panics -/// Only if `yt_dlp` changes their `info_json` schema. -pub fn download( - urls: &[Url], - download_options: &Map<String, Value>, -) -> Result<Vec<PathBuf>, YtDlpError> { - let mut out_paths = Vec::with_capacity(urls.len()); - - for url in urls { - info!("Started downloading url: '{}'", url); - let info_json = extract_info(download_options, url, true, true)?; - - // Try to work around yt-dlp type weirdness - let result_string = if let Some(filename) = info_json.filename { - filename - } else { - info_json.requested_downloads.expect("This must exist")[0] - .filename - .clone() - }; - - out_paths.push(result_string); - info!("Finished downloading url: '{}'", url); +#[allow(missing_docs)] +pub mod extract_info { + #[derive(Debug, thiserror::Error, Clone, Copy)] + pub enum Error { + #[error("Python threw an exception")] + Python, } - - Ok(out_paths) } -fn json_map_to_py_dict<'a>( - map: &Map<String, Value>, - py: Python<'a>, -) -> PyResult<Bound<'a, PyDict>> { - let json_string = serde_json::to_string(&map).expect("This must always work"); +pub type InfoJson = serde_json::Map<String, serde_json::Value>; +pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); - let python_dict = json_loads(py, json_string)?; - - Ok(python_dict) +/// Options, that are used to customize the download behaviour. +/// +/// In the future, this might get a Builder api. +/// +/// See `help(yt_dlp.YoutubeDL())` from python for a full list of available options. +#[derive(Default, Debug)] +pub struct YoutubeDLOptions { + options: serde_json::Map<String, serde_json::Value>, + progress_hook: Option<ProgressHookFunction>, } -fn json_dumps(py: Python<'_>, input: Bound<'_, PyAny>) -> PyResult<String> { - // json.dumps(yt_dlp.sanitize_info(input)) +impl YoutubeDLOptions { + #[must_use] + pub fn new() -> Self { + Self { + options: serde_json::Map::new(), + progress_hook: None, + } + } - let yt_dlp = get_yt_dlp(py, PyDict::new(py))?; - let sanitized_result = yt_dlp.call_method1("sanitize_info", (input,))?; + #[must_use] + pub fn set(self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self { + let mut options = self.options; + options.insert(key.into(), value.into()); - let json = PyModule::import(py, "json")?; - let dumps = json.getattr("dumps")?; + Self { + options, + progress_hook: self.progress_hook, + } + } - let output = dumps.call1((sanitized_result,))?; + #[must_use] + pub fn with_progress_hook(self, progress_hook: ProgressHookFunction) -> Self { + if let Some(_previous_hook) = self.progress_hook { + todo!() + } else { + Self { + options: self.options, + progress_hook: Some(progress_hook), + } + } + } - let output_str = output.extract::<String>()?; + /// # Errors + /// If the underlying [`YoutubeDL::from_options`] errors. + pub fn build(self) -> Result<YoutubeDL, build::Error> { + YoutubeDL::from_options(self) + } - Ok(output_str) -} + #[must_use] + pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self { + Self { + options, + progress_hook: None, + } + } -fn json_loads_str<T: Serialize>(py: Python<'_>, input: T) -> PyResult<Bound<'_, PyDict>> { - let string = serde_json::to_string(&input).expect("Correct json must be pased"); + #[must_use] + pub fn get(&self, key: &str) -> Option<&serde_json::Value> { + self.options.get(key) + } - json_loads(py, string) + fn into_py_dict(self, vm: &VirtualMachine) -> PyRef<PyDict> { + json_loads(self.options, vm) + } } -fn json_loads(py: Python<'_>, input: String) -> PyResult<Bound<'_, PyDict>> { - // json.loads(input) - - let json = PyModule::import(py, "json")?; - let dumps = json.getattr("loads")?; +#[allow(missing_docs)] +pub mod build { + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error("Python threw an exception")] + Python, - let output = dumps.call1((input,))?; - - Ok(output - .downcast::<PyDict>() - .expect("This should always be a PyDict") - .clone()) + #[error("Io error: {0}")] + Io(#[from] std::io::Error), + } } -fn get_yt_dlp_utils(py: Python<'_>) -> PyResult<Bound<'_, PyAny>> { - let yt_dlp = PyModule::import(py, "yt_dlp")?; - let utils = yt_dlp.getattr("utils")?; - - Ok(utils) +fn json_loads( + input: serde_json::Map<String, serde_json::Value>, + vm: &VirtualMachine, +) -> PyRef<PyDict> { + let json = vm.import("json", 0).expect("Module exists"); + let loads = json.get_attr("loads", vm).expect("Method exists"); + let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json"); + let dict = loads + .call((self_str,), vm) + .expect("Vaild json is always a valid dict"); + + dict.downcast().expect("Should always be a dict") } -fn get_yt_dlp<'a>(py: Python<'a>, opts: Bound<'a, PyDict>) -> PyResult<Bound<'a, PyAny>> { - // Unconditionally set a logger - let opts = add_logger_and_sig_handler(opts, py)?; - let opts = add_hooks(opts, py)?; - let yt_dlp = PyModule::import(py, "yt_dlp")?; - let youtube_dl = yt_dlp.call_method1("YoutubeDL", (opts,))?; - - Ok(youtube_dl) +/// # Panics +/// If expectation about python operations fail. +pub fn json_dumps( + input: PyRef<PyDict>, + vm: &VirtualMachine, +) -> serde_json::Map<String, serde_json::Value> { + let json = vm.import("json", 0).expect("Module exists"); + let dumps = json.get_attr("dumps", vm).expect("Method exists"); + let dict = dumps + .call((input,), vm) + .map_err(|err| vm.print_exception(err)) + .expect("Might not always work, but for our dicts it works"); + + let string: PyRef<PyStr> = dict.downcast().expect("Should always be a string"); + + let real_string = string.to_str().expect("Should be valid utf8"); + + // { + // let mut file = File::create("debug.dump.json").unwrap(); + // write!(file, "{}", real_string).unwrap(); + // } + + let value: serde_json::Value = serde_json::from_str(real_string).expect("Should be valid json"); + + match value { + serde_json::Value::Object(map) => map, + _ => unreachable!("These should not be json.dumps output"), + } } |