diff options
author | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2024-08-23 13:06:00 +0200 |
---|---|---|
committer | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2024-08-23 13:06:00 +0200 |
commit | 52e99b38eae6b4f3cb991342ff9ba9abbab9e42c (patch) | |
tree | 9fa6f0582dfb8b6dc7b49bbd6206ab4b533ff900 /crates/yt_dlp/src | |
parent | refactor(cli): Replace the byte parser with the one from the `bytes` crate (diff) | |
download | yt-52e99b38eae6b4f3cb991342ff9ba9abbab9e42c.zip |
refactor(yt_dlp): Also move the `crates` subdirectory
Diffstat (limited to 'crates/yt_dlp/src')
-rw-r--r-- | crates/yt_dlp/src/duration.rs | 71 | ||||
-rw-r--r-- | crates/yt_dlp/src/lib.rs | 412 | ||||
-rw-r--r-- | crates/yt_dlp/src/logging.rs | 125 | ||||
-rw-r--r-- | crates/yt_dlp/src/main.rs | 96 | ||||
-rw-r--r-- | crates/yt_dlp/src/wrapper/info_json.rs | 550 | ||||
-rw-r--r-- | crates/yt_dlp/src/wrapper/mod.rs | 12 | ||||
-rw-r--r-- | crates/yt_dlp/src/wrapper/yt_dlp_options.rs | 62 |
7 files changed, 1328 insertions, 0 deletions
diff --git a/crates/yt_dlp/src/duration.rs b/crates/yt_dlp/src/duration.rs new file mode 100644 index 0000000..cd7454b --- /dev/null +++ b/crates/yt_dlp/src/duration.rs @@ -0,0 +1,71 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +// TODO: This file should be de-duplicated with the same file in the 'yt' crate <2024-06-25> +pub struct Duration { + time: u32, +} + +impl From<&str> for Duration { + fn from(v: &str) -> Self { + let buf: Vec<_> = v.split(':').take(2).collect(); + Self { + time: (buf[0].parse::<u32>().expect("Should be a number") * 60) + + buf[1].parse::<u32>().expect("Should be a number"), + } + } +} + +impl From<Option<f64>> for Duration { + fn from(value: Option<f64>) -> Self { + Self { + time: value.unwrap_or(0.0).ceil() as u32, + } + } +} + +impl std::fmt::Display for Duration { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + const SECOND: u32 = 1; + const MINUTE: u32 = 60 * SECOND; + const HOUR: u32 = 60 * MINUTE; + + let base_hour = self.time - (self.time % HOUR); + let base_min = (self.time % HOUR) - ((self.time % HOUR) % MINUTE); + let base_sec = (self.time % HOUR) % MINUTE; + + let h = base_hour / HOUR; + let m = base_min / MINUTE; + let s = base_sec / SECOND; + + if self.time == 0 { + write!(f, "0s") + } else if h > 0 { + write!(f, "{h}h {m}m") + } else { + write!(f, "{m}m {s}s") + } + } +} +#[cfg(test)] +mod test { + use super::Duration; + + #[test] + fn test_display_duration_1h() { + let dur = Duration { time: 60 * 60 }; + assert_eq!("[1h 0m]".to_owned(), dur.to_string()); + } + #[test] + fn test_display_duration_30min() { + let dur = Duration { time: 60 * 30 }; + assert_eq!("[30m 0s]".to_owned(), dur.to_string()); + } +} diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs new file mode 100644 index 0000000..37d0945 --- /dev/null +++ b/crates/yt_dlp/src/lib.rs @@ -0,0 +1,412 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +// use std::{fs::File, io::Write}; + +use std::{path::PathBuf, sync::Once}; + +use crate::{duration::Duration, logging::setup_logging, wrapper::info_json::InfoJson}; + +use bytes::Bytes; +use log::{info, warn}; +use pyo3::types::{PyString, PyTuple, PyTupleMethods}; +use pyo3::{ + pyfunction, + types::{PyAnyMethods, PyDict, PyDictMethods, PyList, PyListMethods, PyModule}, + wrap_pyfunction_bound, Bound, PyAny, PyResult, Python, +}; +use serde::Serialize; +use serde_json::{Map, Value}; +use url::Url; + +pub mod duration; +pub mod logging; +pub mod wrapper; + +/// Synchronisation helper, to ensure that we don't setup the logger multiple times +static SYNC_OBJ: Once = Once::new(); + +/// Add a logger to the yt-dlp options. +/// If you have an logger set (i.e. for rust), than this will log to rust +pub fn add_logger_and_sig_handler<'a>( + opts: Bound<'a, PyDict>, + py: Python, +) -> PyResult<Bound<'a, PyDict>> { + setup_logging(py, "yt_dlp")?; + + let logging = PyModule::import_bound(py, "logging")?; + let ytdl_logger = logging.call_method1("getLogger", ("yt_dlp",))?; + + // Ensure that all events are logged by setting the log level to NOTSET (we filter on rust's side) + // Also use this static, to ensure that we don't configure the logger every time + SYNC_OBJ.call_once(|| { + // Disable the SIGINT (Ctrl+C) handler, python installs. + // This allows the user to actually stop the application with Ctrl+C. + // This is here because it can only be run in the main thread and this was here already. + py.run_bound( + r#" +import signal +signal.signal(signal.SIGINT, signal.SIG_DFL) + "#, + None, + None, + ) + .expect("This code should always work"); + + let config_opts = PyDict::new_bound(py); + config_opts + .set_item("level", 0) + .expect("Setting this item should always work"); + + logging + .call_method("basicConfig", (), Some(&config_opts)) + .expect("This method exists"); + }); + + // This was taken from `ytcc`, I don't think it is still applicable + // ytdl_logger.setattr("propagate", false)?; + // let logging_null_handler = logging.call_method0("NullHandler")?; + // ytdl_logger.setattr("addHandler", logging_null_handler)?; + + opts.set_item("logger", ytdl_logger).expect("Should work"); + + Ok(opts) +} + +#[pyfunction] +pub fn progress_hook<'a>(py: Python, input: Bound<'_, PyDict>) -> PyResult<()> { + let input: serde_json::Map<String, Value> = serde_json::from_str(&json_dumps( + py, + input + .downcast::<PyAny>() + .expect("Will always work") + .to_owned(), + )?) + .expect("Python should always produce valid json"); + + macro_rules! get { + (@interrogate $item:ident, $type_fun:ident, $get_fun:ident, $name:expr) => {{ + let a = $item.get($name).expect(concat!( + "The field '", + stringify!($name), + "' should exist." + )); + + if a.$type_fun() { + a.$get_fun().expect( + "The should have been checked in the if guard, so unpacking here is fine", + ) + } else { + panic!( + "Value {} => \n{}\n is not of type: {}", + $name, + a, + stringify!($type_fun) + ); + } + }}; + + ($type_fun:ident, $get_fun:ident, $name1:expr, $name2:expr) => {{ + let a = get! {@interrogate input, is_object, as_object, $name1}; + let b = get! {@interrogate a, $type_fun, $get_fun, $name2}; + b + }}; + + ($type_fun:ident, $get_fun:ident, $name:expr) => {{ + get! {@interrogate input, $type_fun, $get_fun, $name} + }}; + } + + macro_rules! default_get { + (@interrogate $item:ident, $default:expr, $get_fun:ident, $name:expr) => {{ + let a = if let Some(field) = $item.get($name) { + field.$get_fun().unwrap_or($default) + } else { + $default + }; + a + }}; + + ($get_fun:ident, $default:expr, $name1:expr, $name2:expr) => {{ + let a = get! {@interrogate input, is_object, as_object, $name1}; + let b = default_get! {@interrogate a, $default, $get_fun, $name2}; + b + }}; + + ($get_fun:ident, $default:expr, $name:expr) => {{ + default_get! {@interrogate input, $default, $get_fun, $name} + }}; + } + + macro_rules! c { + ($color:expr, $format:expr) => { + format!("\x1b[{}m{}\x1b[0m", $color, $format) + }; + } + + fn format_bytes(bytes: u64) -> String { + let bytes = Bytes::new(bytes); + bytes.to_string() + } + + fn format_speed(speed: f64) -> String { + let bytes = Bytes::new(speed.floor() as u64); + format!("{}/s", bytes) + } + + let get_title = |add_extension: bool| -> String { + match get! {is_string, as_str, "info_dict", "ext"} { + "vtt" => { + format!( + "Subtitles ({})", + get! {is_string, as_str, "info_dict", "name"} + ) + } + title_extension @ ("webm" | "mp4" | "m4a") => { + if add_extension { + format!( + "{} ({})", + default_get! { as_str, "<No title>", "info_dict", "title"}, + title_extension + ) + } else { + default_get! { as_str, "<No title>", "info_dict", "title"}.to_owned() + } + } + other => panic!("The extension '{}' is not yet implemented", other), + } + }; + + match get! {is_string, as_str, "status"} { + "downloading" => { + let elapsed = default_get! {as_f64, 0.0f64, "elapsed"}; + let eta = default_get! {as_f64, 0.0, "eta"}; + let speed = default_get! {as_f64, 0.0, "speed"}; + + let downloaded_bytes = get! {is_u64, as_u64, "downloaded_bytes"}; + let total_bytes = { + let total_bytes = default_get!(as_u64, 0, "total_bytes"); + if total_bytes == 0 { + let estimate = default_get!(as_u64, 0, "total_bytes_estimate"); + warn!( + "The video does not have a total_byte count, using an estimate of '{}'", + estimate + ); + estimate + } else { + total_bytes + } + }; + let percent: f64 = { + if total_bytes == 0 { + 100.0 + } else { + (downloaded_bytes as f64 / total_bytes as f64) * 100.0 + } + }; + + print!("\x1b[1F"); // Move one line up, to allow the `println` after it to print a newline + print!("\x1b[2K"); // Clear whole line. + print!("\x1b[1G"); // Move cursor to column 1. + + println!( + "'{}' [{}/{} at {}] -> [{}/{} {}]", + c!("34;1", get_title(true)), + c!("33;1", Duration::from(Some(elapsed))), + c!("33;1", Duration::from(Some(eta))), + c!("32;1", format_speed(speed)), + c!("31;1", format_bytes(downloaded_bytes)), + c!("31;1", format_bytes(total_bytes)), + c!("36;1", format!("{:.02}%", percent)) + ); + } + "finished" => { + println!("Finished downloading: '{}'", c!("34;1", get_title(false))) + } + "error" => { + panic!("Error whilst downloading: {}", get_title(true)) + } + other => panic!("{} is not a valid state!", other), + }; + + Ok(()) +} + +pub fn add_hooks<'a>(opts: Bound<'a, PyDict>, py: Python) -> PyResult<Bound<'a, PyDict>> { + if let Some(hooks) = opts.get_item("progress_hooks")? { + let hooks = hooks.downcast::<PyList>()?; + hooks.append(wrap_pyfunction_bound!(progress_hook, py)?)?; + + opts.set_item("progress_hooks", hooks)?; + } else { + // No hooks are set yet + let hooks_list = PyList::new_bound(py, &[wrap_pyfunction_bound!(progress_hook, py)?]); + + opts.set_item("progress_hooks", hooks_list)?; + } + + Ok(opts) +} + +/// `extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False)` +/// +/// Extract and return the information dictionary of the URL +/// +/// Arguments: +/// @param url URL to extract +/// +/// Keyword arguments: +/// @param download Whether to download videos +/// @param process Whether to resolve all unresolved references (URLs, playlist items). +/// Must be True for download to work +/// @param ie_key Use only the extractor with this key +/// +/// @param extra_info Dictionary containing the extra values to add to the info (For internal use only) +/// @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic') +pub async fn extract_info( + yt_dlp_opts: &Map<String, Value>, + url: &Url, + download: bool, + process: bool, +) -> PyResult<InfoJson> { + Python::with_gil(|py| { + let opts = json_map_to_py_dict(yt_dlp_opts, py)?; + + let instance = get_yt_dlp(py, opts)?; + let args = (url.as_str(),); + + let kwargs = PyDict::new_bound(py); + kwargs.set_item("download", download)?; + kwargs.set_item("process", process)?; + + let result = instance.call_method("extract_info", args, Some(&kwargs))?; + + // Remove the `<generator at 0xsome_hex>`, by setting it to null + if !process { + result.set_item("entries", ())?; + } + + let result_str = json_dumps(py, result)?; + + //let mut file = File::create("output.info.json").unwrap(); + //write!(file, "{}", result_str).unwrap(); + + Ok(serde_json::from_str(&result_str) + .expect("Python should be able to produce correct json")) + }) +} + +pub fn unsmuggle_url(smug_url: Url) -> PyResult<Url> { + Python::with_gil(|py| { + let utils = get_yt_dlp_utils(py)?; + let url = utils + .call_method1("unsmuggle_url", (smug_url.as_str(),))? + .downcast::<PyTuple>()? + .get_item(0)?; + + let url: Url = url + .downcast::<PyString>()? + .to_string() + .parse() + .expect("Python should be able to return a valid url"); + + Ok(url) + }) +} + +/// Download a given list of URLs. +/// Returns the paths they were downloaded to. +pub async fn download( + urls: &[Url], + download_options: &Map<String, Value>, +) -> PyResult<Vec<PathBuf>> { + let mut out_paths = Vec::with_capacity(urls.len()); + + for url in urls { + info!("Started downloading url: '{}'", url); + let info_json = extract_info(download_options, url, true, true).await?; + + // Try to work around yt-dlp type weirdness + let result_string = if let Some(filename) = info_json.filename { + filename + } else { + (&info_json.requested_downloads.expect("This must exist")[0].filename).to_owned() + }; + + out_paths.push(result_string); + info!("Finished downloading url: '{}'", url); + } + + Ok(out_paths) +} + +fn json_map_to_py_dict<'a>( + map: &Map<String, Value>, + py: Python<'a>, +) -> PyResult<Bound<'a, PyDict>> { + let json_string = serde_json::to_string(&map).expect("This must always work"); + + let python_dict = json_loads(py, json_string)?; + + Ok(python_dict) +} + +fn json_dumps(py: Python, input: Bound<PyAny>) -> PyResult<String> { + // json.dumps(yt_dlp.sanitize_info(input)) + + let yt_dlp = get_yt_dlp(py, PyDict::new_bound(py))?; + let sanitized_result = yt_dlp.call_method1("sanitize_info", (input,))?; + + let json = PyModule::import_bound(py, "json")?; + let dumps = json.getattr("dumps")?; + + let output = dumps.call1((sanitized_result,))?; + + let output_str = output.extract::<String>()?; + + Ok(output_str) +} + +fn json_loads_str<T: Serialize>(py: Python, input: T) -> PyResult<Bound<PyDict>> { + let string = serde_json::to_string(&input).expect("Correct json must be pased"); + + json_loads(py, string) +} + +fn json_loads(py: Python, input: String) -> PyResult<Bound<PyDict>> { + // json.loads(input) + + let json = PyModule::import_bound(py, "json")?; + let dumps = json.getattr("loads")?; + + let output = dumps.call1((input,))?; + + Ok(output + .downcast::<PyDict>() + .expect("This should always be a PyDict") + .clone()) +} + +fn get_yt_dlp_utils<'a>(py: Python<'a>) -> PyResult<Bound<'a, PyAny>> { + let yt_dlp = PyModule::import_bound(py, "yt_dlp")?; + let utils = yt_dlp.getattr("utils")?; + + Ok(utils) +} +fn get_yt_dlp<'a>(py: Python<'a>, opts: Bound<'a, PyDict>) -> PyResult<Bound<'a, PyAny>> { + // Unconditionally set a logger + let opts = add_logger_and_sig_handler(opts, py)?; + let opts = add_hooks(opts, py)?; + + let yt_dlp = PyModule::import_bound(py, "yt_dlp")?; + let youtube_dl = yt_dlp.call_method1("YoutubeDL", (opts,))?; + + Ok(youtube_dl) +} diff --git a/crates/yt_dlp/src/logging.rs b/crates/yt_dlp/src/logging.rs new file mode 100644 index 0000000..cca917c --- /dev/null +++ b/crates/yt_dlp/src/logging.rs @@ -0,0 +1,125 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +// This file is taken from: https://github.com/dylanbstorey/pyo3-pylogger/blob/d89e0d6820ebc4f067647e3b74af59dbc4941dd5/src/lib.rs +// It is licensed under the Apache 2.0 License, copyright up to 2024 by Dylan Storey +// It was modified by Benedikt Peetz 2024 + +use log::{logger, Level, MetadataBuilder, Record}; +use pyo3::{ + prelude::{PyAnyMethods, PyListMethods, PyModuleMethods}, + pyfunction, wrap_pyfunction, Bound, PyAny, PyResult, Python, +}; + +/// Consume a Python `logging.LogRecord` and emit a Rust `Log` instead. +#[pyfunction] +fn host_log<'a>(record: Bound<'a, PyAny>, rust_target: &str) -> PyResult<()> { + let level = record.getattr("levelno")?; + let message = record.getattr("getMessage")?.call0()?.to_string(); + let pathname = record.getattr("pathname")?.to_string(); + let lineno = record + .getattr("lineno")? + .to_string() + .parse::<u32>() + .expect("This should always be a u32"); + + let logger_name = record.getattr("name")?.to_string(); + + let full_target: Option<String> = if logger_name.trim().is_empty() || logger_name == "root" { + None + } else { + // Libraries (ex: tracing_subscriber::filter::Directive) expect rust-style targets like foo::bar, + // and may not deal well with "." as a module separator: + let logger_name = logger_name.replace(".", "::"); + Some(format!("{rust_target}::{logger_name}")) + }; + + let target = full_target + .as_ref() + .map(|x| x.as_str()) + .unwrap_or(rust_target); + + // error + let error_metadata = if level.ge(40u8)? { + MetadataBuilder::new() + .target(target) + .level(Level::Error) + .build() + } else if level.ge(30u8)? { + MetadataBuilder::new() + .target(target) + .level(Level::Warn) + .build() + } else if level.ge(20u8)? { + MetadataBuilder::new() + .target(target) + .level(Level::Info) + .build() + } else if level.ge(10u8)? { + MetadataBuilder::new() + .target(target) + .level(Level::Debug) + .build() + } else { + MetadataBuilder::new() + .target(target) + .level(Level::Trace) + .build() + }; + + logger().log( + &Record::builder() + .metadata(error_metadata) + .args(format_args!("{}", &message)) + .line(Some(lineno)) + .file(None) + .module_path(Some(&pathname)) + .build(), + ); + + Ok(()) +} + +/// Registers the host_log function in rust as the event handler for Python's logging logger +/// This function needs to be called from within a pyo3 context as early as possible to ensure logging messages +/// arrive to the rust consumer. +pub fn setup_logging(py: Python, target: &str) -> PyResult<()> { + let logging = py.import_bound("logging")?; + + logging.setattr("host_log", wrap_pyfunction!(host_log, &logging)?)?; + + py.run_bound( + format!( + r#" +class HostHandler(Handler): + def __init__(self, level=0): + super().__init__(level=level) + + def emit(self, record): + host_log(record,"{}") + +oldBasicConfig = basicConfig +def basicConfig(*pargs, **kwargs): + if "handlers" not in kwargs: + kwargs["handlers"] = [HostHandler()] + return oldBasicConfig(*pargs, **kwargs) +"#, + target + ) + .as_str(), + Some(&logging.dict()), + None, + )?; + + let all = logging.index()?; + all.append("HostHandler")?; + + Ok(()) +} diff --git a/crates/yt_dlp/src/main.rs b/crates/yt_dlp/src/main.rs new file mode 100644 index 0000000..c40ddc3 --- /dev/null +++ b/crates/yt_dlp/src/main.rs @@ -0,0 +1,96 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use std::{env::args, fs}; + +use yt_dlp::wrapper::info_json::InfoJson; + +#[cfg(test)] +mod test { + use url::Url; + use yt_dlp::wrapper::yt_dlp_options::{ExtractFlat, YtDlpOptions}; + + const YT_OPTS: YtDlpOptions = YtDlpOptions { + playliststart: 1, + playlistend: 10, + noplaylist: false, + extract_flat: ExtractFlat::InPlaylist, + }; + + #[test] + fn test_extract_info_video() { + let info = yt_dlp::extract_info( + YT_OPTS, + &Url::parse("https://www.youtube.com/watch?v=dbjPnXaacAU").expect("Is valid."), + false, + false, + false, + ) + .map_err(|err| format!("Encountered error: '{}'", err)) + .unwrap(); + + println!("{:#?}", info); + } + + #[test] + fn test_extract_info_url() { + let err = yt_dlp::extract_info( + YT_OPTS, + &Url::parse("https://google.com").expect("Is valid."), + false, + false, + false, + ) + .map_err(|err| format!("Encountered error: '{}'", err)) + .unwrap(); + + println!("{:#?}", err); + } + + #[test] + fn test_extract_info_playlist() { + let err = yt_dlp::extract_info( + YT_OPTS, + &Url::parse("https://www.youtube.com/@TheGarriFrischer/videos").expect("Is valid."), + false, + false, + true, + ) + .map_err(|err| format!("Encountered error: '{}'", err)) + .unwrap(); + + println!("{:#?}", err); + } + #[test] + fn test_extract_info_playlist_full() { + let err = yt_dlp::extract_info( + YT_OPTS, + &Url::parse("https://www.youtube.com/@NixOS-Foundation/videos").expect("Is valid."), + false, + false, + true, + ) + .map_err(|err| format!("Encountered error: '{}'", err)) + .unwrap(); + + println!("{:#?}", err); + } +} + +fn main() { + let input_file: &str = &args().take(2).collect::<Vec<String>>()[1]; + + let input = fs::read_to_string(input_file).unwrap(); + + let output: InfoJson = + serde_json::from_str(&input).expect("Python should be able to produce correct json"); + + println!("{:#?}", output); +} diff --git a/crates/yt_dlp/src/wrapper/info_json.rs b/crates/yt_dlp/src/wrapper/info_json.rs new file mode 100644 index 0000000..9c0d464 --- /dev/null +++ b/crates/yt_dlp/src/wrapper/info_json.rs @@ -0,0 +1,550 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use std::{collections::HashMap, path::PathBuf}; + +use pyo3::{types::PyDict, Bound, PyResult, Python}; +use serde::{Deserialize, Deserializer, Serialize}; +use serde_json::Value; +use url::Url; + +use crate::json_loads_str; + +type Todo = String; + +// TODO: Change this to map `_type` to a structure of values, instead of the options <2024-05-27> +// And replace all the strings with better types (enums or urls) +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct InfoJson { + pub __last_playlist_index: Option<u32>, + pub __post_extractor: Option<String>, + pub __x_forwarded_for_ip: Option<String>, + pub _filename: Option<PathBuf>, + pub _format_sort_fields: Option<Vec<String>>, + pub _has_drm: Option<Todo>, + pub _type: Option<InfoType>, + pub _version: Option<Version>, + pub abr: Option<f64>, + pub acodec: Option<String>, + pub age_limit: Option<u32>, + pub aspect_ratio: Option<f64>, + pub asr: Option<u32>, + pub audio_channels: Option<u32>, + pub audio_ext: Option<String>, + pub automatic_captions: Option<HashMap<String, Vec<Caption>>>, + pub availability: Option<String>, + pub average_rating: Option<String>, + pub categories: Option<Vec<String>>, + pub channel: Option<String>, + pub channel_follower_count: Option<u32>, + pub channel_id: Option<String>, + pub channel_is_verified: Option<bool>, + pub channel_url: Option<String>, + pub chapters: Option<Vec<Chapter>>, + pub comment_count: Option<u32>, + pub comments: Option<Vec<Comment>>, + pub concurrent_view_count: Option<u32>, + pub description: Option<String>, + pub display_id: Option<String>, + pub downloader_options: Option<DownloaderOptions>, + pub duration: Option<f64>, + pub duration_string: Option<String>, + pub dynamic_range: Option<String>, + pub entries: Option<Vec<InfoJson>>, + pub episode: Option<String>, + pub episode_number: Option<u32>, + pub epoch: Option<u32>, + pub ext: Option<String>, + pub extractor: Option<Extractor>, + pub extractor_key: Option<ExtractorKey>, + pub filename: Option<PathBuf>, + pub filesize: Option<u64>, + pub filesize_approx: Option<u64>, + pub format: Option<String>, + pub format_id: Option<String>, + pub format_index: Option<u32>, + pub format_note: Option<String>, + pub formats: Option<Vec<Format>>, + pub fps: Option<f64>, + pub fulltitle: Option<String>, + pub has_drm: Option<bool>, + pub heatmap: Option<Vec<HeatMapEntry>>, + pub height: Option<u32>, + pub http_headers: Option<HttpHeader>, + pub id: Option<String>, + pub ie_key: Option<ExtractorKey>, + pub is_live: Option<bool>, + pub language: Option<String>, + pub language_preference: Option<i32>, + pub license: Option<Todo>, + pub like_count: Option<u32>, + pub live_status: Option<String>, + pub location: Option<Todo>, + pub manifest_url: Option<Url>, + pub modified_date: Option<String>, + pub n_entries: Option<u32>, + pub original_url: Option<String>, + pub playable_in_embed: Option<bool>, + pub playlist: Option<Todo>, + pub playlist_autonumber: Option<u32>, + pub playlist_channel: Option<Todo>, + pub playlist_channel_id: Option<Todo>, + pub playlist_count: Option<u32>, + pub playlist_id: Option<Todo>, + pub playlist_index: Option<u64>, + pub playlist_title: Option<Todo>, + pub playlist_uploader: Option<Todo>, + pub playlist_uploader_id: Option<Todo>, + pub preference: Option<Todo>, + pub protocol: Option<String>, + pub quality: Option<f64>, + pub release_date: Option<String>, + pub release_timestamp: Option<u64>, + pub release_year: Option<u32>, + pub requested_downloads: Option<Vec<RequestedDownloads>>, + pub requested_entries: Option<Vec<u32>>, + pub requested_formats: Option<Vec<Format>>, + pub requested_subtitles: Option<HashMap<String, Subtitle>>, + pub resolution: Option<String>, + pub season: Option<String>, + pub season_number: Option<u32>, + pub series: Option<String>, + pub source_preference: Option<i32>, + pub sponsorblock_chapters: Option<Vec<SponsorblockChapter>>, + pub stretched_ratio: Option<Todo>, + pub subtitles: Option<HashMap<String, Vec<Caption>>>, + pub tags: Option<Vec<String>>, + pub tbr: Option<f64>, + pub thumbnail: Option<Url>, + pub thumbnails: Option<Vec<ThumbNail>>, + pub timestamp: Option<u64>, + pub title: Option<String>, + pub upload_date: Option<String>, + pub uploader: Option<String>, + pub uploader_id: Option<String>, + pub uploader_url: Option<String>, + pub url: Option<Url>, + pub vbr: Option<f64>, + pub vcodec: Option<String>, + pub video_ext: Option<String>, + pub view_count: Option<u32>, + pub was_live: Option<bool>, + pub webpage_url: Option<Url>, + pub webpage_url_basename: Option<String>, + pub webpage_url_domain: Option<String>, + pub width: Option<u32>, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct RequestedDownloads { + pub __files_to_merge: Option<Vec<Todo>>, + pub __finaldir: PathBuf, + pub __infojson_filename: PathBuf, + pub __postprocessors: Vec<Todo>, + pub __real_download: bool, + pub __write_download_archive: bool, + pub _filename: PathBuf, + pub _type: InfoType, + pub _version: Version, + pub abr: f64, + pub acodec: String, + pub aspect_ratio: f64, + pub asr: u32, + pub audio_channels: u32, + pub chapters: Option<Vec<SponsorblockChapter>>, + pub duration: Option<f64>, + pub dynamic_range: String, + pub ext: String, + pub filename: PathBuf, + pub filepath: PathBuf, + pub filesize_approx: u64, + pub format: String, + pub format_id: String, + pub format_note: String, + pub fps: f64, + pub height: u32, + pub infojson_filename: PathBuf, + pub language: Option<String>, + pub protocol: String, + pub requested_formats: Vec<Format>, + pub resolution: String, + pub tbr: f64, + pub vbr: f64, + pub vcodec: String, + pub width: u32, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub struct Subtitle { + pub ext: SubtitleExt, + pub filepath: PathBuf, + pub name: String, + pub url: Url, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SubtitleExt { + #[serde(alias = "vtt")] + Vtt, + + #[serde(alias = "json")] + Json, + #[serde(alias = "json3")] + Json3, + + #[serde(alias = "ttml")] + Ttml, + + #[serde(alias = "srv1")] + Srv1, + #[serde(alias = "srv2")] + Srv2, + #[serde(alias = "srv3")] + Srv3, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Caption { + pub ext: SubtitleExt, + pub name: Option<String>, + pub protocol: Option<String>, + pub url: String, + pub filepath: Option<PathBuf>, + pub video_id: Option<String>, + pub manifest_url: Option<Url>, + pub filesize: Option<u64>, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Chapter { + pub end_time: f64, + pub start_time: f64, + pub title: String, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct SponsorblockChapter { + /// This is an utterly useless field, and should thus be ignored + pub _categories: Option<Vec<Vec<Value>>>, + + pub categories: Option<Vec<SponsorblockChapterCategory>>, + pub category: Option<SponsorblockChapterCategory>, + pub category_names: Option<Vec<String>>, + pub end_time: f64, + pub name: Option<String>, + pub r#type: Option<SponsorblockChapterType>, + pub start_time: f64, + pub title: String, +} + +pub fn get_none<'de, D, T>(_: D) -> Result<Option<T>, D::Error> +where + D: Deserializer<'de>, +{ + Ok(None) +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SponsorblockChapterType { + #[serde(alias = "skip")] + Skip, + + #[serde(alias = "chapter")] + Chapter, + + #[serde(alias = "poi")] + Poi, +} +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SponsorblockChapterCategory { + #[serde(alias = "filler")] + Filler, + + #[serde(alias = "interaction")] + Interaction, + + #[serde(alias = "poi_highlight")] + PoiHighlight, + + #[serde(alias = "preview")] + Preview, + + #[serde(alias = "sponsor")] + Sponsor, + + #[serde(alias = "selfpromo")] + SelfPromo, + + #[serde(alias = "chapter")] + Chapter, + + #[serde(alias = "intro")] + Intro, + + #[serde(alias = "outro")] + Outro, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct HeatMapEntry { + pub start_time: f64, + pub end_time: f64, + pub value: f64, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum Extractor { + #[serde(alias = "generic")] + Generic, + + #[serde(alias = "SVTSeries")] + SVTSeries, + #[serde(alias = "SVTPlay")] + SVTPlay, + + #[serde(alias = "youtube")] + YouTube, + #[serde(alias = "youtube:tab")] + YouTubeTab, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum ExtractorKey { + #[serde(alias = "Generic")] + Generic, + + #[serde(alias = "SVTSeries")] + SVTSeries, + #[serde(alias = "SVTPlay")] + SVTPlay, + + #[serde(alias = "Youtube")] + YouTube, + #[serde(alias = "YoutubeTab")] + YouTubeTab, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum InfoType { + #[serde(alias = "playlist")] + Playlist, + + #[serde(alias = "url")] + Url, + + #[serde(alias = "video")] + Video, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Version { + pub current_git_head: Option<String>, + pub release_git_head: String, + pub repository: String, + pub version: String, +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(from = "String")] +#[serde(deny_unknown_fields)] +pub enum Parent { + Root, + Id(String), +} + +impl Parent { + pub fn id(&self) -> Option<&str> { + if let Self::Id(id) = self { + Some(id) + } else { + None + } + } +} + +impl From<String> for Parent { + fn from(value: String) -> Self { + if value == "root" { + Self::Root + } else { + Self::Id(value) + } + } +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(from = "String")] +#[serde(deny_unknown_fields)] +pub struct Id { + pub id: String, +} +impl From<String> for Id { + fn from(value: String) -> Self { + Self { + // Take the last element if the string is split with dots, otherwise take the full id + id: value.split('.').last().unwrap_or(&value).to_owned(), + } + } +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Comment { + pub id: Id, + pub text: String, + #[serde(default = "zero")] + pub like_count: u32, + pub is_pinned: bool, + pub author_id: String, + #[serde(default = "unknown")] + pub author: String, + pub author_is_verified: bool, + pub author_thumbnail: Url, + pub parent: Parent, + #[serde(deserialize_with = "edited_from_time_text", alias = "_time_text")] + pub edited: bool, + // Can't also be deserialized, as it's already used in 'edited' + // _time_text: String, + pub timestamp: i64, + pub author_url: Url, + pub author_is_uploader: bool, + pub is_favorited: bool, +} +fn unknown() -> String { + "<Unknown>".to_string() +} +fn zero() -> u32 { + 0 +} +fn edited_from_time_text<'de, D>(d: D) -> Result<bool, D::Error> +where + D: Deserializer<'de>, +{ + let s = String::deserialize(d)?; + if s.contains(" (edited)") { + Ok(true) + } else { + Ok(false) + } +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct ThumbNail { + pub id: Option<String>, + pub preference: Option<i32>, + /// in the form of "[`height`]x[`width`]" + pub resolution: Option<String>, + pub url: Url, + pub width: Option<u32>, + pub height: Option<u32>, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Format { + pub __needs_testing: Option<bool>, + pub __working: Option<bool>, + pub abr: Option<f64>, + pub acodec: Option<String>, + pub aspect_ratio: Option<f64>, + pub asr: Option<f64>, + pub audio_channels: Option<u32>, + pub audio_ext: Option<String>, + pub columns: Option<u32>, + pub container: Option<String>, + pub downloader_options: Option<DownloaderOptions>, + pub dynamic_range: Option<String>, + pub ext: String, + pub filepath: Option<PathBuf>, + pub filesize: Option<u64>, + pub filesize_approx: Option<u64>, + pub format: Option<String>, + pub format_id: String, + pub format_index: Option<String>, + pub format_note: Option<String>, + pub fps: Option<f64>, + pub fragment_base_url: Option<Todo>, + pub fragments: Option<Vec<Fragment>>, + pub has_drm: Option<bool>, + pub height: Option<u32>, + pub http_headers: Option<HttpHeader>, + pub is_dash_periods: Option<bool>, + pub language: Option<String>, + pub language_preference: Option<i32>, + pub manifest_stream_number: Option<u32>, + pub manifest_url: Option<Url>, + pub preference: Option<i32>, + pub protocol: Option<String>, + pub quality: Option<f64>, + pub resolution: Option<String>, + pub rows: Option<u32>, + pub source_preference: Option<i32>, + pub tbr: Option<f64>, + pub url: Url, + pub vbr: Option<f64>, + pub vcodec: String, + pub video_ext: Option<String>, + pub width: Option<u32>, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct DownloaderOptions { + http_chunk_size: u64, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct HttpHeader { + #[serde(alias = "User-Agent")] + pub user_agent: Option<String>, + + #[serde(alias = "Accept")] + pub accept: Option<String>, + + #[serde(alias = "X-Forwarded-For")] + pub x_forwarded_for: Option<String>, + + #[serde(alias = "Accept-Language")] + pub accept_language: Option<String>, + + #[serde(alias = "Sec-Fetch-Mode")] + pub sec_fetch_mode: Option<String>, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Fragment { + pub url: Option<Url>, + pub duration: Option<f64>, + pub path: Option<PathBuf>, +} + +impl InfoJson { + pub fn to_py_dict(self, py: Python) -> PyResult<Bound<PyDict>> { + let output: Bound<PyDict> = json_loads_str(py, self)?; + Ok(output) + } +} diff --git a/crates/yt_dlp/src/wrapper/mod.rs b/crates/yt_dlp/src/wrapper/mod.rs new file mode 100644 index 0000000..3fe3247 --- /dev/null +++ b/crates/yt_dlp/src/wrapper/mod.rs @@ -0,0 +1,12 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +pub mod info_json; +// pub mod yt_dlp_options; diff --git a/crates/yt_dlp/src/wrapper/yt_dlp_options.rs b/crates/yt_dlp/src/wrapper/yt_dlp_options.rs new file mode 100644 index 0000000..c2a86df --- /dev/null +++ b/crates/yt_dlp/src/wrapper/yt_dlp_options.rs @@ -0,0 +1,62 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use pyo3::{types::PyDict, Bound, PyResult, Python}; +use serde::Serialize; + +use crate::json_loads; + +#[derive(Serialize, Clone)] +pub struct YtDlpOptions { + pub playliststart: u32, + pub playlistend: u32, + pub noplaylist: bool, + pub extract_flat: ExtractFlat, + // pub extractor_args: ExtractorArgs, + // pub format: String, + // pub fragment_retries: u32, + // #[serde(rename(serialize = "getcomments"))] + // pub get_comments: bool, + // #[serde(rename(serialize = "ignoreerrors"))] + // pub ignore_errors: bool, + // pub retries: u32, + // #[serde(rename(serialize = "writeinfojson"))] + // pub write_info_json: bool, + // pub postprocessors: Vec<serde_json::Map<String, serde_json::Value>>, +} + +#[derive(Serialize, Copy, Clone)] +pub enum ExtractFlat { + #[serde(rename(serialize = "in_playlist"))] + InPlaylist, + + #[serde(rename(serialize = "discard_in_playlist"))] + DiscardInPlaylist, +} + +#[derive(Serialize, Clone)] +pub struct ExtractorArgs { + pub youtube: YoutubeExtractorArgs, +} + +#[derive(Serialize, Clone)] +pub struct YoutubeExtractorArgs { + comment_sort: Vec<String>, + max_comments: Vec<String>, +} + +impl YtDlpOptions { + pub fn to_py_dict(self, py: Python) -> PyResult<Bound<PyDict>> { + let string = serde_json::to_string(&self).expect("This should always work"); + + let output: Bound<PyDict> = json_loads(py, string)?; + Ok(output) + } +} |