//! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure. use std::io::Write; use std::mem; use std::{env, fs::File, path::PathBuf}; use indexmap::IndexMap; use log::{Level, debug, error, info, log_enabled}; use logging::setup_logging; use rustpython::vm::builtins::PyList; use rustpython::{ InterpreterConfig, vm::{ self, Interpreter, PyObjectRef, PyRef, VirtualMachine, builtins::{PyBaseException, PyDict, PyStr}, function::{FuncArgs, KwArgs, PosArgs}, }, }; use url::Url; mod logging; pub mod progress_hook; #[macro_export] macro_rules! json_get { ($value:expr, $name:literal, $into:ident) => { $crate::json_cast!($value.get($name).expect("Should exist"), $into) }; } #[macro_export] macro_rules! json_cast { ($value:expr, $into:ident) => { $value.$into().expect(concat!( "Should be able to cast value into ", stringify!($into) )) }; } /// The core of the `yt_dlp` interface. pub struct YoutubeDL { interpreter: Interpreter, youtube_dl_class: PyObjectRef, yt_dlp_module: PyObjectRef, options: serde_json::Map, } impl std::fmt::Debug for YoutubeDL { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { // TODO(@bpeetz): Use something useful here. <2025-06-13> f.write_str("YoutubeDL") } } impl YoutubeDL { /// Construct this instance from options. /// /// # Panics /// If `yt_dlp` changed their interface. /// /// # Errors /// If a python call fails. pub fn from_options(mut options: YoutubeDLOptions) -> Result { let mut settings = vm::Settings::default(); if let Ok(python_path) = env::var("PYTHONPATH") { for path in python_path.split(':') { settings.path_list.push(path.to_owned()); } } else { error!( "No PYTHONPATH found or invalid utf8. \ This means, that you probably did not \ supply the yt_dlp!" ); } settings.install_signal_handlers = false; // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13> settings.optimize = 0; settings.isolated = true; let interpreter = InterpreterConfig::new() .init_stdlib() .settings(settings) .interpreter(); let output_options = options.options.clone(); let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| { let yt_dlp_module = vm.import("yt_dlp", 0)?; let class = yt_dlp_module.get_attr("YoutubeDL", vm)?; let maybe_hook = mem::take(&mut options.progress_hook); let opts = options.into_py_dict(vm); if let Some(function) = maybe_hook { opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || { let hook: PyObjectRef = vm.new_function("progress_hook", function).into(); vm.new_pyobj(vec![hook]) }) .expect("Should work?"); } { // Unconditionally set a logger. // Otherwise, yt_dlp will log to stderr. /// Is the specified record to be logged? Returns false for no, /// true for yes. Filters can either modify log records in-place or /// return a completely different record instance which will replace /// the original log record in any future processing of the event. fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool { let record = input.args.remove(0); // Filter out all error logs (they are propagated as rust errors) let levelname: PyRef = record .get_attr("levelname", vm) .expect("This should exist") .downcast() .expect("This should be a String"); let return_value = levelname.as_str() != "ERROR"; if log_enabled!(Level::Debug) && !return_value { let message: String = { let get_message = record.get_attr("getMessage", vm).expect("Is set"); let message: PyRef = get_message .call((), vm) .expect("Can be called") .downcast() .expect("Downcasting works"); message.as_str().to_owned() }; debug!("Swollowed error message: '{message}'"); } return_value } let logging = setup_logging(vm, "yt_dlp")?; let ytdl_logger = { let get_logger = logging.get_item("getLogger", vm)?; get_logger.call(("yt_dlp",), vm)? }; { let args = FuncArgs::new( PosArgs::new(vec![]), KwArgs::new({ let mut map = IndexMap::new(); // Ensure that all events are logged by setting // the log level to NOTSET (we filter on rust's side) map.insert("level".to_owned(), vm.new_pyobj(0)); map }), ); let basic_config = logging.get_item("basicConfig", vm)?; basic_config.call(args, vm)?; } { let add_filter = ytdl_logger.get_attr("addFilter", vm)?; add_filter.call( (vm.new_function("yt_dlp_error_filter", filter_error_log),), vm, )?; } opts.set_item("logger", ytdl_logger, vm)?; } let youtube_dl_class = class.call((opts,), vm)?; Ok::<_, PyRef>((yt_dlp_module, youtube_dl_class)) }) { Ok(ok) => ok, Err(err) => { interpreter.finalize(Some(err)); return Err(build::Error::Python); } }; Ok(Self { interpreter, youtube_dl_class, yt_dlp_module, options: output_options, }) } /// # Panics /// /// If `yt_dlp` changed their location or type of `__version__`. pub fn version(&self) -> String { let str_ref: PyRef = self.interpreter.enter_and_expect( |vm| { let version_module = self.yt_dlp_module.get_attr("version", vm)?; let version = version_module.get_attr("__version__", vm)?; let version = version.downcast().expect("This should always be a string"); Ok(version) }, "yt_dlp version location has changed", ); str_ref.to_string() } /// Download a given list of URLs. /// Returns the paths they were downloaded to. /// /// # Errors /// If one of the downloads error. pub fn download(&self, urls: &[Url]) -> Result, extract_info::Error> { let mut out_paths = Vec::with_capacity(urls.len()); for url in urls { info!("Started downloading url: '{url}'"); let info_json = self.extract_info(url, true, true)?; // Try to work around yt-dlp type weirdness let result_string = if let Some(filename) = info_json.get("filename") { PathBuf::from(json_cast!(filename, as_str)) } else { PathBuf::from(json_get!( json_cast!( json_get!(info_json, "requested_downloads", as_array)[0], as_object ), "filename", as_str )) }; out_paths.push(result_string); info!("Finished downloading url"); } Ok(out_paths) } /// `extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False)` /// /// Extract and return the information dictionary of the URL /// /// Arguments: /// - `url` URL to extract /// /// Keyword arguments: /// :`download` Whether to download videos /// :`process` Whether to resolve all unresolved references (URLs, playlist items). /// Must be True for download to work /// /// # Panics /// If expectations about python fail to hold. /// /// # Errors /// If python operations fail. pub fn extract_info( &self, url: &Url, download: bool, process: bool, ) -> Result { match self.interpreter.enter(|vm| { let pos_args = PosArgs::new(vec![vm.new_pyobj(url.to_string())]); let kw_args = KwArgs::new({ let mut map = IndexMap::new(); map.insert("download".to_owned(), vm.new_pyobj(download)); map.insert("process".to_owned(), vm.new_pyobj(process)); map }); let fun_args = FuncArgs::new(pos_args, kw_args); let inner = self.youtube_dl_class.get_attr("extract_info", vm)?; let result = inner .call_with_args(fun_args, vm)? .downcast::() .expect("This is a dict"); // Resolve the generator object if let Ok(generator) = result.get_item("entries", vm) { if generator.payload_is::() { // already resolved. Do nothing } else { let max_backlog = self.options.get("playlistend").map_or(10000, |value| { usize::try_from(value.as_u64().expect("Works")).expect("Should work") }); let mut out = vec![]; let next = generator.get_attr("__next__", vm)?; while let Ok(output) = next.call((), vm) { out.push(output); if out.len() == max_backlog { break; } } result.set_item("entries", vm.new_pyobj(out), vm)?; } } let result = { let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?; let value = sanitize.call((result,), vm)?; value.downcast::().expect("This should stay a dict") }; let result_json = json_dumps(result, vm); if let Ok(confirm) = env::var("YT_STORE_INFO_JSON") { if confirm == "yes" { let mut file = File::create("output.info.json").unwrap(); write!( file, "{}", serde_json::to_string_pretty(&serde_json::Value::Object( result_json.clone() )) .expect("Valid json") ) .unwrap(); } } Ok::<_, PyRef>(result_json) }) { Ok(ok) => Ok(ok), Err(err) => { self.interpreter.enter(|vm| { vm.print_exception(err); }); Err(extract_info::Error::Python) } } } /// Take the (potentially modified) result of the information extractor (i.e., /// [`Self::extract_info`] with `process` and `download` set to false) /// and resolve all unresolved references (URLs, /// playlist items). /// /// It will also download the videos if 'download' is true. /// Returns the resolved `ie_result`. /// /// # Panics /// If expectations about python fail to hold. /// /// # Errors /// If python operations fail. pub fn process_ie_result( &self, ie_result: InfoJson, download: bool, ) -> Result { match self.interpreter.enter(|vm| { let pos_args = PosArgs::new(vec![vm.new_pyobj(json_loads(ie_result, vm))]); let kw_args = KwArgs::new({ let mut map = IndexMap::new(); map.insert("download".to_owned(), vm.new_pyobj(download)); map }); let fun_args = FuncArgs::new(pos_args, kw_args); let inner = self.youtube_dl_class.get_attr("process_ie_result", vm)?; let result = inner .call_with_args(fun_args, vm)? .downcast::() .expect("This is a dict"); let result = { let sanitize = self.youtube_dl_class.get_attr("sanitize_info", vm)?; let value = sanitize.call((result,), vm)?; value.downcast::().expect("This should stay a dict") }; let result_json = json_dumps(result, vm); Ok::<_, PyRef>(result_json) }) { Ok(ok) => Ok(ok), Err(err) => { self.interpreter.enter(|vm| { vm.print_exception(err); }); Err(process_ie_result::Error::Python) } } } } #[allow(missing_docs)] pub mod process_ie_result { #[derive(Debug, thiserror::Error, Clone, Copy)] pub enum Error { #[error("Python threw an exception")] Python, } } #[allow(missing_docs)] pub mod extract_info { #[derive(Debug, thiserror::Error, Clone, Copy)] pub enum Error { #[error("Python threw an exception")] Python, } } pub type InfoJson = serde_json::Map; pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine); /// Options, that are used to customize the download behaviour. /// /// In the future, this might get a Builder api. /// /// See `help(yt_dlp.YoutubeDL())` from python for a full list of available options. #[derive(Default, Debug)] pub struct YoutubeDLOptions { options: serde_json::Map, progress_hook: Option, } impl YoutubeDLOptions { #[must_use] pub fn new() -> Self { Self { options: serde_json::Map::new(), progress_hook: None, } } #[must_use] pub fn set(self, key: impl Into, value: impl Into) -> Self { let mut options = self.options; options.insert(key.into(), value.into()); Self { options, progress_hook: self.progress_hook, } } #[must_use] pub fn with_progress_hook(self, progress_hook: ProgressHookFunction) -> Self { if let Some(_previous_hook) = self.progress_hook { todo!() } else { Self { options: self.options, progress_hook: Some(progress_hook), } } } /// # Errors /// If the underlying [`YoutubeDL::from_options`] errors. pub fn build(self) -> Result { YoutubeDL::from_options(self) } #[must_use] pub fn from_json_options(options: serde_json::Map) -> Self { Self { options, progress_hook: None, } } #[must_use] pub fn get(&self, key: &str) -> Option<&serde_json::Value> { self.options.get(key) } fn into_py_dict(self, vm: &VirtualMachine) -> PyRef { json_loads(self.options, vm) } } #[allow(missing_docs)] pub mod build { #[derive(Debug, thiserror::Error)] pub enum Error { #[error("Python threw an exception")] Python, #[error("Io error: {0}")] Io(#[from] std::io::Error), } } fn json_loads( input: serde_json::Map, vm: &VirtualMachine, ) -> PyRef { let json = vm.import("json", 0).expect("Module exists"); let loads = json.get_attr("loads", vm).expect("Method exists"); let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json"); let dict = loads .call((self_str,), vm) .expect("Vaild json is always a valid dict"); dict.downcast().expect("Should always be a dict") } /// # Panics /// If expectation about python operations fail. pub fn json_dumps( input: PyRef, vm: &VirtualMachine, ) -> serde_json::Map { let json = vm.import("json", 0).expect("Module exists"); let dumps = json.get_attr("dumps", vm).expect("Method exists"); let dict = dumps .call((input,), vm) .map_err(|err| vm.print_exception(err)) .expect("Might not always work, but for our dicts it works"); let string: PyRef = dict.downcast().expect("Should always be a string"); let real_string = string.to_str().expect("Should be valid utf8"); // { // let mut file = File::create("debug.dump.json").unwrap(); // write!(file, "{}", real_string).unwrap(); // } let value: serde_json::Value = serde_json::from_str(real_string).expect("Should be valid json"); match value { serde_json::Value::Object(map) => map, _ => unreachable!("These should not be json.dumps output"), } }