aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/src
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2024-08-21 10:49:23 +0200
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2024-08-21 11:28:43 +0200
commit1debeb77f7986de1b659dcfdc442de6415e1d9f5 (patch)
tree4df3e7c3f6a2d1ec116e4088c5ace7f143a8b05f /yt_dlp/src
downloadyt-1debeb77f7986de1b659dcfdc442de6415e1d9f5.zip
chore: Initial Commit
This repository was migrated out of my nixos-config.
Diffstat (limited to 'yt_dlp/src')
-rw-r--r--yt_dlp/src/duration.rs71
-rw-r--r--yt_dlp/src/lib.rs410
-rw-r--r--yt_dlp/src/logging.rs125
-rw-r--r--yt_dlp/src/main.rs96
-rw-r--r--yt_dlp/src/wrapper/info_json.rs526
-rw-r--r--yt_dlp/src/wrapper/mod.rs12
-rw-r--r--yt_dlp/src/wrapper/yt_dlp_options.rs62
7 files changed, 1302 insertions, 0 deletions
diff --git a/yt_dlp/src/duration.rs b/yt_dlp/src/duration.rs
new file mode 100644
index 0000000..cd7454b
--- /dev/null
+++ b/yt_dlp/src/duration.rs
@@ -0,0 +1,71 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+// TODO: This file should be de-duplicated with the same file in the 'yt' crate <2024-06-25>
+pub struct Duration {
+ time: u32,
+}
+
+impl From<&str> for Duration {
+ fn from(v: &str) -> Self {
+ let buf: Vec<_> = v.split(':').take(2).collect();
+ Self {
+ time: (buf[0].parse::<u32>().expect("Should be a number") * 60)
+ + buf[1].parse::<u32>().expect("Should be a number"),
+ }
+ }
+}
+
+impl From<Option<f64>> for Duration {
+ fn from(value: Option<f64>) -> Self {
+ Self {
+ time: value.unwrap_or(0.0).ceil() as u32,
+ }
+ }
+}
+
+impl std::fmt::Display for Duration {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+ const SECOND: u32 = 1;
+ const MINUTE: u32 = 60 * SECOND;
+ const HOUR: u32 = 60 * MINUTE;
+
+ let base_hour = self.time - (self.time % HOUR);
+ let base_min = (self.time % HOUR) - ((self.time % HOUR) % MINUTE);
+ let base_sec = (self.time % HOUR) % MINUTE;
+
+ let h = base_hour / HOUR;
+ let m = base_min / MINUTE;
+ let s = base_sec / SECOND;
+
+ if self.time == 0 {
+ write!(f, "0s")
+ } else if h > 0 {
+ write!(f, "{h}h {m}m")
+ } else {
+ write!(f, "{m}m {s}s")
+ }
+ }
+}
+#[cfg(test)]
+mod test {
+ use super::Duration;
+
+ #[test]
+ fn test_display_duration_1h() {
+ let dur = Duration { time: 60 * 60 };
+ assert_eq!("[1h 0m]".to_owned(), dur.to_string());
+ }
+ #[test]
+ fn test_display_duration_30min() {
+ let dur = Duration { time: 60 * 30 };
+ assert_eq!("[30m 0s]".to_owned(), dur.to_string());
+ }
+}
diff --git a/yt_dlp/src/lib.rs b/yt_dlp/src/lib.rs
new file mode 100644
index 0000000..5bb02c1
--- /dev/null
+++ b/yt_dlp/src/lib.rs
@@ -0,0 +1,410 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+// use std::{fs::File, io::Write};
+
+use std::{path::PathBuf, sync::Once};
+
+use crate::{duration::Duration, logging::setup_logging, wrapper::info_json::InfoJson};
+
+use log::info;
+use pyo3::types::{PyString, PyTuple, PyTupleMethods};
+use pyo3::{
+ pyfunction,
+ types::{PyAnyMethods, PyDict, PyDictMethods, PyList, PyListMethods, PyModule},
+ wrap_pyfunction_bound, Bound, PyAny, PyResult, Python,
+};
+use serde::Serialize;
+use serde_json::{Map, Value};
+use url::Url;
+
+pub mod duration;
+pub mod logging;
+pub mod wrapper;
+
+/// Synchronisation helper, to ensure that we don't setup the logger multiple times
+static SYNC_OBJ: Once = Once::new();
+
+/// Add a logger to the yt-dlp options.
+/// If you have an logger set (i.e. for rust), than this will log to rust
+pub fn add_logger_and_sig_handler<'a>(
+ opts: Bound<'a, PyDict>,
+ py: Python,
+) -> PyResult<Bound<'a, PyDict>> {
+ setup_logging(py, "yt_dlp")?;
+
+ let logging = PyModule::import_bound(py, "logging")?;
+ let ytdl_logger = logging.call_method1("getLogger", ("yt_dlp",))?;
+
+ // Ensure that all events are logged by setting the log level to NOTSET (we filter on rust's side)
+ // Also use this static, to ensure that we don't configure the logger every time
+ SYNC_OBJ.call_once(|| {
+ // Disable the SIGINT (Ctrl+C) handler, python installs.
+ // This allows the user to actually stop the application with Ctrl+C.
+ // This is here because it can only be run in the main thread and this was here already.
+ py.run_bound(
+ r#"
+import signal
+signal.signal(signal.SIGINT, signal.SIG_DFL)
+ "#,
+ None,
+ None,
+ )
+ .expect("This code should always work");
+
+ let config_opts = PyDict::new_bound(py);
+ config_opts
+ .set_item("level", 0)
+ .expect("Setting this item should always work");
+
+ logging
+ .call_method("basicConfig", (), Some(&config_opts))
+ .expect("This method exists");
+ });
+
+ // This was taken from `ytcc`, I don't think it is still applicable
+ // ytdl_logger.setattr("propagate", false)?;
+ // let logging_null_handler = logging.call_method0("NullHandler")?;
+ // ytdl_logger.setattr("addHandler", logging_null_handler)?;
+
+ opts.set_item("logger", ytdl_logger).expect("Should work");
+
+ Ok(opts)
+}
+
+#[pyfunction]
+pub fn progress_hook<'a>(py: Python, input: Bound<'_, PyDict>) -> PyResult<()> {
+ let input: serde_json::Map<String, Value> = serde_json::from_str(&json_dumps(
+ py,
+ input
+ .downcast::<PyAny>()
+ .expect("Will always work")
+ .to_owned(),
+ )?)
+ .expect("Python should always produce valid json");
+
+ macro_rules! get {
+ (@interrogate $item:ident, $type_fun:ident, $get_fun:ident, $name:expr) => {{
+ let a = $item.get($name).expect(concat!(
+ "The field '",
+ stringify!($name),
+ "' should exist."
+ ));
+
+ if a.$type_fun() {
+ a.$get_fun().expect(
+ "The should have been checked in the if guard, so unpacking here is fine",
+ )
+ } else {
+ panic!(
+ "Value {} => \n{}\n is not of type: {}",
+ $name,
+ a,
+ stringify!($type_fun)
+ );
+ }
+ }};
+
+ ($type_fun:ident, $get_fun:ident, $name1:expr, $name2:expr) => {{
+ let a = get! {@interrogate input, is_object, as_object, $name1};
+ let b = get! {@interrogate a, $type_fun, $get_fun, $name2};
+ b
+ }};
+
+ ($type_fun:ident, $get_fun:ident, $name:expr) => {{
+ get! {@interrogate input, $type_fun, $get_fun, $name}
+ }};
+ }
+
+ macro_rules! default_get {
+ (@interrogate $item:ident, $default:expr, $get_fun:ident, $name:expr) => {{
+ let a = if let Some(field) = $item.get($name) {
+ field.$get_fun().unwrap_or($default)
+ } else {
+ $default
+ };
+ a
+ }};
+
+ ($get_fun:ident, $default:expr, $name1:expr, $name2:expr) => {{
+ let a = get! {@interrogate input, is_object, as_object, $name1};
+ let b = default_get! {@interrogate a, $default, $get_fun, $name2};
+ b
+ }};
+
+ ($get_fun:ident, $default:expr, $name:expr) => {{
+ default_get! {@interrogate input, $default, $get_fun, $name}
+ }};
+ }
+
+ macro_rules! c {
+ ($color:expr, $format:expr) => {
+ format!("\x1b[{}m{}\x1b[0m", $color, $format)
+ };
+ }
+
+ fn format_bytes(bytes: u64) -> String {
+ if bytes >= 1_000_000 {
+ format!("{} MB", bytes / 1_000_000)
+ } else if bytes >= 1_000 {
+ format!("{} KB", bytes / 1_000)
+ } else {
+ format!("{} B", bytes)
+ }
+ }
+
+ fn format_speed(speed: f64) -> String {
+ if speed > 1_000_000.0 {
+ format!("{:.02} MB/s", speed / 1_000_000.0)
+ } else if speed > 1_000.0 {
+ format!("{:.02} KB/s", speed / 1_000.0)
+ } else {
+ format!("{:.02} B/s", speed)
+ }
+ }
+
+ let get_title = |add_extension: bool| -> String {
+ match get! {is_string, as_str, "info_dict", "ext"} {
+ "vtt" => {
+ format!(
+ "Subtitles ({})",
+ get! {is_string, as_str, "info_dict", "name"}
+ )
+ }
+ title_extension @ ("webm" | "mp4" | "m4a") => {
+ if add_extension {
+ format!(
+ "{} ({})",
+ default_get! { as_str, "<No title>", "info_dict", "title"},
+ title_extension
+ )
+ } else {
+ default_get! { as_str, "<No title>", "info_dict", "title"}.to_owned()
+ }
+ }
+ other => panic!("The extension '{}' is not yet implemented", other),
+ }
+ };
+
+ match get! {is_string, as_str, "status"} {
+ "downloading" => {
+ let elapsed = default_get! {as_f64, 0.0f64, "elapsed"};
+ let eta = default_get! {as_f64, 0.0, "eta"};
+ let speed = default_get! {as_f64, 0.0, "speed"};
+
+ let downloaded_bytes = get! {is_u64, as_u64, "downloaded_bytes"};
+ let total_bytes = default_get!(as_u64, 0, "total_bytes");
+
+ let percent: f64 = {
+ if total_bytes == 0 {
+ 100.0
+ } else {
+ (downloaded_bytes as f64 / total_bytes as f64) * 100.0
+ }
+ };
+
+ print!("\x1b[1F"); // Move one line up, to allow the `println` after it to print a newline
+ print!("\x1b[2K"); // Clear whole line.
+ print!("\x1b[1G"); // Move cursor to column 1.
+
+ println!(
+ "'{}' [{}/{} at {}] -> [{}/{} {}]",
+ c!("34;1", get_title(true)),
+ c!("33;1", Duration::from(Some(elapsed))),
+ c!("33;1", Duration::from(Some(eta))),
+ c!("32;1", format_speed(speed)),
+ c!("31;1", format_bytes(downloaded_bytes)),
+ c!("31;1", format_bytes(total_bytes)),
+ c!("36;1", format!("{:.02}%", percent))
+ );
+ }
+ "finished" => {
+ println!("Finished downloading: '{}'", c!("34;1", get_title(false)))
+ }
+ "error" => {
+ panic!("Error whilst downloading: {}", get_title(true))
+ }
+ other => panic!("{} is not a valid state!", other),
+ };
+
+ Ok(())
+}
+
+pub fn add_hooks<'a>(opts: Bound<'a, PyDict>, py: Python) -> PyResult<Bound<'a, PyDict>> {
+ if let Some(hooks) = opts.get_item("progress_hooks")? {
+ let hooks = hooks.downcast::<PyList>()?;
+ hooks.append(wrap_pyfunction_bound!(progress_hook, py)?)?;
+
+ opts.set_item("progress_hooks", hooks)?;
+ } else {
+ // No hooks are set yet
+ let hooks_list = PyList::new_bound(py, &[wrap_pyfunction_bound!(progress_hook, py)?]);
+
+ opts.set_item("progress_hooks", hooks_list)?;
+ }
+
+ Ok(opts)
+}
+
+/// `extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False)`
+///
+/// Extract and return the information dictionary of the URL
+///
+/// Arguments:
+/// @param url URL to extract
+///
+/// Keyword arguments:
+/// @param download Whether to download videos
+/// @param process Whether to resolve all unresolved references (URLs, playlist items).
+/// Must be True for download to work
+/// @param ie_key Use only the extractor with this key
+///
+/// @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
+/// @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
+pub async fn extract_info(
+ yt_dlp_opts: &Map<String, Value>,
+ url: &Url,
+ download: bool,
+ process: bool,
+) -> PyResult<InfoJson> {
+ Python::with_gil(|py| {
+ let opts = json_map_to_py_dict(yt_dlp_opts, py)?;
+
+ let instance = get_yt_dlp(py, opts)?;
+ let args = (url.as_str(),);
+
+ let kwargs = PyDict::new_bound(py);
+ kwargs.set_item("download", download)?;
+ kwargs.set_item("process", process)?;
+
+ let result = instance.call_method("extract_info", args, Some(&kwargs))?;
+
+ // Remove the `<generator at 0xsome_hex>`, by setting it to null
+ if !process {
+ result.set_item("entries", ())?;
+ }
+
+ let result_str = json_dumps(py, result)?;
+
+ //let mut file = File::create("output.info.json").unwrap();
+ //write!(file, "{}", result_str).unwrap();
+
+ Ok(serde_json::from_str(&result_str)
+ .expect("Python should be able to produce correct json"))
+ })
+}
+
+pub fn unsmuggle_url(smug_url: Url) -> PyResult<Url> {
+ Python::with_gil(|py| {
+ let utils = get_yt_dlp_utils(py)?;
+ let url = utils
+ .call_method1("unsmuggle_url", (smug_url.as_str(),))?
+ .downcast::<PyTuple>()?
+ .get_item(0)?;
+
+ let url: Url = url
+ .downcast::<PyString>()?
+ .to_string()
+ .parse()
+ .expect("Python should be able to return a valid url");
+
+ Ok(url)
+ })
+}
+
+/// Download a given list of URLs.
+/// Returns the paths they were downloaded to.
+pub async fn download(
+ urls: &[Url],
+ download_options: &Map<String, Value>,
+) -> PyResult<Vec<PathBuf>> {
+ let mut out_paths = Vec::with_capacity(urls.len());
+
+ for url in urls {
+ info!("Started downloading url: '{}'", url);
+ let info_json = extract_info(download_options, url, true, true).await?;
+
+ let result_string = if let Some(filename) = info_json.filename {
+ // Try to work around yt-dlp type weirdness
+ filename
+ } else {
+ (&info_json.requested_downloads.expect("This must exist")[0].filename).to_owned()
+ };
+
+ out_paths.push(result_string);
+ info!("Finished downloading url: '{}'", url);
+ }
+
+ Ok(out_paths)
+}
+
+fn json_map_to_py_dict<'a>(
+ map: &Map<String, Value>,
+ py: Python<'a>,
+) -> PyResult<Bound<'a, PyDict>> {
+ let json_string = serde_json::to_string(&map).expect("This must always work");
+
+ let python_dict = json_loads(py, json_string)?;
+
+ Ok(python_dict)
+}
+
+fn json_dumps(py: Python, input: Bound<PyAny>) -> PyResult<String> {
+ // json.dumps(yt_dlp.sanitize_info(input))
+
+ let yt_dlp = get_yt_dlp(py, PyDict::new_bound(py))?;
+ let sanitized_result = yt_dlp.call_method1("sanitize_info", (input,))?;
+
+ let json = PyModule::import_bound(py, "json")?;
+ let dumps = json.getattr("dumps")?;
+
+ let output = dumps.call1((sanitized_result,))?;
+
+ let output_str = output.extract::<String>()?;
+
+ Ok(output_str)
+}
+
+fn json_loads_str<T: Serialize>(py: Python, input: T) -> PyResult<Bound<PyDict>> {
+ let string = serde_json::to_string(&input).expect("Correct json must be pased");
+
+ json_loads(py, string)
+}
+
+fn json_loads(py: Python, input: String) -> PyResult<Bound<PyDict>> {
+ // json.loads(input)
+
+ let json = PyModule::import_bound(py, "json")?;
+ let dumps = json.getattr("loads")?;
+
+ let output = dumps.call1((input,))?;
+
+ Ok(output
+ .downcast::<PyDict>()
+ .expect("This should always be a PyDict")
+ .clone())
+}
+
+fn get_yt_dlp_utils<'a>(py: Python<'a>) -> PyResult<Bound<'a, PyAny>> {
+ let yt_dlp = PyModule::import_bound(py, "yt_dlp")?;
+ let utils = yt_dlp.getattr("utils")?;
+
+ Ok(utils)
+}
+fn get_yt_dlp<'a>(py: Python<'a>, opts: Bound<'a, PyDict>) -> PyResult<Bound<'a, PyAny>> {
+ // Unconditionally set a logger
+ let opts = add_logger_and_sig_handler(opts, py)?;
+ let opts = add_hooks(opts, py)?;
+
+ let yt_dlp = PyModule::import_bound(py, "yt_dlp")?;
+ let youtube_dl = yt_dlp.call_method1("YoutubeDL", (opts,))?;
+
+ Ok(youtube_dl)
+}
diff --git a/yt_dlp/src/logging.rs b/yt_dlp/src/logging.rs
new file mode 100644
index 0000000..cca917c
--- /dev/null
+++ b/yt_dlp/src/logging.rs
@@ -0,0 +1,125 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+// This file is taken from: https://github.com/dylanbstorey/pyo3-pylogger/blob/d89e0d6820ebc4f067647e3b74af59dbc4941dd5/src/lib.rs
+// It is licensed under the Apache 2.0 License, copyright up to 2024 by Dylan Storey
+// It was modified by Benedikt Peetz 2024
+
+use log::{logger, Level, MetadataBuilder, Record};
+use pyo3::{
+ prelude::{PyAnyMethods, PyListMethods, PyModuleMethods},
+ pyfunction, wrap_pyfunction, Bound, PyAny, PyResult, Python,
+};
+
+/// Consume a Python `logging.LogRecord` and emit a Rust `Log` instead.
+#[pyfunction]
+fn host_log<'a>(record: Bound<'a, PyAny>, rust_target: &str) -> PyResult<()> {
+ let level = record.getattr("levelno")?;
+ let message = record.getattr("getMessage")?.call0()?.to_string();
+ let pathname = record.getattr("pathname")?.to_string();
+ let lineno = record
+ .getattr("lineno")?
+ .to_string()
+ .parse::<u32>()
+ .expect("This should always be a u32");
+
+ let logger_name = record.getattr("name")?.to_string();
+
+ let full_target: Option<String> = if logger_name.trim().is_empty() || logger_name == "root" {
+ None
+ } else {
+ // Libraries (ex: tracing_subscriber::filter::Directive) expect rust-style targets like foo::bar,
+ // and may not deal well with "." as a module separator:
+ let logger_name = logger_name.replace(".", "::");
+ Some(format!("{rust_target}::{logger_name}"))
+ };
+
+ let target = full_target
+ .as_ref()
+ .map(|x| x.as_str())
+ .unwrap_or(rust_target);
+
+ // error
+ let error_metadata = if level.ge(40u8)? {
+ MetadataBuilder::new()
+ .target(target)
+ .level(Level::Error)
+ .build()
+ } else if level.ge(30u8)? {
+ MetadataBuilder::new()
+ .target(target)
+ .level(Level::Warn)
+ .build()
+ } else if level.ge(20u8)? {
+ MetadataBuilder::new()
+ .target(target)
+ .level(Level::Info)
+ .build()
+ } else if level.ge(10u8)? {
+ MetadataBuilder::new()
+ .target(target)
+ .level(Level::Debug)
+ .build()
+ } else {
+ MetadataBuilder::new()
+ .target(target)
+ .level(Level::Trace)
+ .build()
+ };
+
+ logger().log(
+ &Record::builder()
+ .metadata(error_metadata)
+ .args(format_args!("{}", &message))
+ .line(Some(lineno))
+ .file(None)
+ .module_path(Some(&pathname))
+ .build(),
+ );
+
+ Ok(())
+}
+
+/// Registers the host_log function in rust as the event handler for Python's logging logger
+/// This function needs to be called from within a pyo3 context as early as possible to ensure logging messages
+/// arrive to the rust consumer.
+pub fn setup_logging(py: Python, target: &str) -> PyResult<()> {
+ let logging = py.import_bound("logging")?;
+
+ logging.setattr("host_log", wrap_pyfunction!(host_log, &logging)?)?;
+
+ py.run_bound(
+ format!(
+ r#"
+class HostHandler(Handler):
+ def __init__(self, level=0):
+ super().__init__(level=level)
+
+ def emit(self, record):
+ host_log(record,"{}")
+
+oldBasicConfig = basicConfig
+def basicConfig(*pargs, **kwargs):
+ if "handlers" not in kwargs:
+ kwargs["handlers"] = [HostHandler()]
+ return oldBasicConfig(*pargs, **kwargs)
+"#,
+ target
+ )
+ .as_str(),
+ Some(&logging.dict()),
+ None,
+ )?;
+
+ let all = logging.index()?;
+ all.append("HostHandler")?;
+
+ Ok(())
+}
diff --git a/yt_dlp/src/main.rs b/yt_dlp/src/main.rs
new file mode 100644
index 0000000..c40ddc3
--- /dev/null
+++ b/yt_dlp/src/main.rs
@@ -0,0 +1,96 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+use std::{env::args, fs};
+
+use yt_dlp::wrapper::info_json::InfoJson;
+
+#[cfg(test)]
+mod test {
+ use url::Url;
+ use yt_dlp::wrapper::yt_dlp_options::{ExtractFlat, YtDlpOptions};
+
+ const YT_OPTS: YtDlpOptions = YtDlpOptions {
+ playliststart: 1,
+ playlistend: 10,
+ noplaylist: false,
+ extract_flat: ExtractFlat::InPlaylist,
+ };
+
+ #[test]
+ fn test_extract_info_video() {
+ let info = yt_dlp::extract_info(
+ YT_OPTS,
+ &Url::parse("https://www.youtube.com/watch?v=dbjPnXaacAU").expect("Is valid."),
+ false,
+ false,
+ false,
+ )
+ .map_err(|err| format!("Encountered error: '{}'", err))
+ .unwrap();
+
+ println!("{:#?}", info);
+ }
+
+ #[test]
+ fn test_extract_info_url() {
+ let err = yt_dlp::extract_info(
+ YT_OPTS,
+ &Url::parse("https://google.com").expect("Is valid."),
+ false,
+ false,
+ false,
+ )
+ .map_err(|err| format!("Encountered error: '{}'", err))
+ .unwrap();
+
+ println!("{:#?}", err);
+ }
+
+ #[test]
+ fn test_extract_info_playlist() {
+ let err = yt_dlp::extract_info(
+ YT_OPTS,
+ &Url::parse("https://www.youtube.com/@TheGarriFrischer/videos").expect("Is valid."),
+ false,
+ false,
+ true,
+ )
+ .map_err(|err| format!("Encountered error: '{}'", err))
+ .unwrap();
+
+ println!("{:#?}", err);
+ }
+ #[test]
+ fn test_extract_info_playlist_full() {
+ let err = yt_dlp::extract_info(
+ YT_OPTS,
+ &Url::parse("https://www.youtube.com/@NixOS-Foundation/videos").expect("Is valid."),
+ false,
+ false,
+ true,
+ )
+ .map_err(|err| format!("Encountered error: '{}'", err))
+ .unwrap();
+
+ println!("{:#?}", err);
+ }
+}
+
+fn main() {
+ let input_file: &str = &args().take(2).collect::<Vec<String>>()[1];
+
+ let input = fs::read_to_string(input_file).unwrap();
+
+ let output: InfoJson =
+ serde_json::from_str(&input).expect("Python should be able to produce correct json");
+
+ println!("{:#?}", output);
+}
diff --git a/yt_dlp/src/wrapper/info_json.rs b/yt_dlp/src/wrapper/info_json.rs
new file mode 100644
index 0000000..aceeeb8
--- /dev/null
+++ b/yt_dlp/src/wrapper/info_json.rs
@@ -0,0 +1,526 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+use std::{collections::HashMap, path::PathBuf};
+
+use pyo3::{types::PyDict, Bound, PyResult, Python};
+use serde::{Deserialize, Deserializer, Serialize};
+use serde_json::Value;
+use url::Url;
+
+use crate::json_loads_str;
+
+type Todo = String;
+
+// TODO: Change this to map `_type` to a structure of values, instead of the options <2024-05-27>
+// And replace all the strings with better types (enums or urls)
+#[derive(Debug, Deserialize, Serialize, PartialEq)]
+#[serde(deny_unknown_fields)]
+pub struct InfoJson {
+ pub __last_playlist_index: Option<u32>,
+ pub __post_extractor: Option<String>,
+ pub __x_forwarded_for_ip: Option<String>,
+ pub _filename: Option<PathBuf>,
+ pub _format_sort_fields: Option<Vec<String>>,
+ pub _has_drm: Option<Todo>,
+ pub _type: Option<InfoType>,
+ pub _version: Option<Version>,
+ pub abr: Option<f64>,
+ pub acodec: Option<String>,
+ pub age_limit: Option<u32>,
+ pub aspect_ratio: Option<f64>,
+ pub asr: Option<u32>,
+ pub audio_channels: Option<u32>,
+ pub audio_ext: Option<String>,
+ pub automatic_captions: Option<HashMap<String, Vec<Caption>>>,
+ pub availability: Option<String>,
+ pub average_rating: Option<String>,
+ pub categories: Option<Vec<String>>,
+ pub channel: Option<String>,
+ pub channel_follower_count: Option<u32>,
+ pub channel_id: Option<String>,
+ pub channel_is_verified: Option<bool>,
+ pub channel_url: Option<String>,
+ pub chapters: Option<Vec<Chapter>>,
+ pub comment_count: Option<u32>,
+ pub comments: Option<Vec<Comment>>,
+ pub concurrent_view_count: Option<u32>,
+ pub description: Option<String>,
+ pub display_id: Option<String>,
+ pub downloader_options: Option<DownloaderOptions>,
+ pub duration: Option<f64>,
+ pub duration_string: Option<String>,
+ pub dynamic_range: Option<String>,
+ pub entries: Option<Vec<InfoJson>>,
+ pub episode: Option<String>,
+ pub episode_number: Option<u32>,
+ pub epoch: Option<u32>,
+ pub ext: Option<String>,
+ pub extractor: Option<Extractor>,
+ pub extractor_key: Option<ExtractorKey>,
+ pub filename: Option<PathBuf>,
+ pub filesize: Option<u64>,
+ pub filesize_approx: Option<u64>,
+ pub format: Option<String>,
+ pub format_id: Option<String>,
+ pub format_note: Option<String>,
+ pub formats: Option<Vec<Format>>,
+ pub fps: Option<f64>,
+ pub fulltitle: Option<String>,
+ pub has_drm: Option<bool>,
+ pub heatmap: Option<Vec<HeatMapEntry>>,
+ pub height: Option<u32>,
+ pub http_headers: Option<HttpHeader>,
+ pub id: Option<String>,
+ pub ie_key: Option<ExtractorKey>,
+ pub is_live: Option<bool>,
+ pub language: Option<String>,
+ pub language_preference: Option<i32>,
+ pub license: Option<Todo>,
+ pub like_count: Option<u32>,
+ pub live_status: Option<String>,
+ pub location: Option<Todo>,
+ pub modified_date: Option<String>,
+ pub n_entries: Option<u32>,
+ pub original_url: Option<String>,
+ pub playable_in_embed: Option<bool>,
+ pub playlist: Option<Todo>,
+ pub playlist_autonumber: Option<u32>,
+ pub playlist_channel: Option<Todo>,
+ pub playlist_channel_id: Option<Todo>,
+ pub playlist_count: Option<u32>,
+ pub playlist_id: Option<Todo>,
+ pub playlist_index: Option<u64>,
+ pub playlist_title: Option<Todo>,
+ pub playlist_uploader: Option<Todo>,
+ pub playlist_uploader_id: Option<Todo>,
+ pub preference: Option<Todo>,
+ pub protocol: Option<String>,
+ pub quality: Option<f64>,
+ pub release_date: Option<String>,
+ pub release_timestamp: Option<u64>,
+ pub release_year: Option<u32>,
+ pub requested_downloads: Option<Vec<RequestedDownloads>>,
+ pub requested_entries: Option<Vec<u32>>,
+ pub requested_formats: Option<Vec<Format>>,
+ pub requested_subtitles: Option<HashMap<String, Subtitle>>,
+ pub resolution: Option<String>,
+ pub season: Option<String>,
+ pub season_number: Option<u32>,
+ pub series: Option<String>,
+ pub source_preference: Option<i32>,
+ pub sponsorblock_chapters: Option<Vec<SponsorblockChapter>>,
+ pub stretched_ratio: Option<Todo>,
+ pub subtitles: Option<HashMap<String, Vec<Caption>>>,
+ pub tags: Option<Vec<String>>,
+ pub tbr: Option<f64>,
+ pub thumbnail: Option<Url>,
+ pub thumbnails: Option<Vec<ThumbNail>>,
+ pub timestamp: Option<u64>,
+ pub title: Option<String>,
+ pub upload_date: Option<String>,
+ pub uploader: Option<String>,
+ pub uploader_id: Option<String>,
+ pub uploader_url: Option<String>,
+ pub url: Option<Url>,
+ pub vbr: Option<f64>,
+ pub vcodec: Option<String>,
+ pub video_ext: Option<String>,
+ pub view_count: Option<u32>,
+ pub was_live: Option<bool>,
+ pub webpage_url: Option<Url>,
+ pub webpage_url_basename: Option<String>,
+ pub webpage_url_domain: Option<String>,
+ pub width: Option<u32>,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq)]
+#[serde(deny_unknown_fields)]
+pub struct RequestedDownloads {
+ pub __files_to_merge: Option<Vec<Todo>>,
+ pub __finaldir: PathBuf,
+ pub __infojson_filename: PathBuf,
+ pub __postprocessors: Vec<Todo>,
+ pub __real_download: bool,
+ pub __write_download_archive: bool,
+ pub _filename: PathBuf,
+ pub _type: InfoType,
+ pub _version: Version,
+ pub abr: f64,
+ pub acodec: String,
+ pub aspect_ratio: f64,
+ pub asr: u32,
+ pub audio_channels: u32,
+ pub chapters: Option<Vec<SponsorblockChapter>>,
+ pub duration: Option<f64>,
+ pub dynamic_range: String,
+ pub ext: String,
+ pub filename: PathBuf,
+ pub filepath: PathBuf,
+ pub filesize_approx: u64,
+ pub format: String,
+ pub format_id: String,
+ pub format_note: String,
+ pub fps: f64,
+ pub height: u32,
+ pub infojson_filename: PathBuf,
+ pub language: Option<String>,
+ pub protocol: String,
+ pub requested_formats: Vec<Format>,
+ pub resolution: String,
+ pub tbr: f64,
+ pub vbr: f64,
+ pub vcodec: String,
+ pub width: u32,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)]
+#[serde(deny_unknown_fields)]
+pub struct Subtitle {
+ pub ext: SubtitleExt,
+ pub filepath: PathBuf,
+ pub name: String,
+ pub url: Url,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)]
+#[serde(deny_unknown_fields)]
+pub enum SubtitleExt {
+ #[serde(alias = "vtt")]
+ Vtt,
+
+ #[serde(alias = "json")]
+ Json,
+ #[serde(alias = "json3")]
+ Json3,
+
+ #[serde(alias = "ttml")]
+ Ttml,
+
+ #[serde(alias = "srv1")]
+ Srv1,
+ #[serde(alias = "srv2")]
+ Srv2,
+ #[serde(alias = "srv3")]
+ Srv3,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)]
+#[serde(deny_unknown_fields)]
+pub struct Caption {
+ pub ext: SubtitleExt,
+ pub name: Option<String>,
+ pub protocol: Option<String>,
+ pub url: String,
+ pub filepath: Option<PathBuf>,
+ pub video_id: Option<String>,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)]
+#[serde(deny_unknown_fields)]
+pub struct Chapter {
+ pub end_time: f64,
+ pub start_time: f64,
+ pub title: String,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq)]
+#[serde(deny_unknown_fields)]
+pub struct SponsorblockChapter {
+ /// This is an utterly useless field, and should thus be ignored
+ pub _categories: Option<Vec<Vec<Value>>>,
+
+ pub categories: Option<Vec<SponsorblockChapterCategory>>,
+ pub category: Option<SponsorblockChapterCategory>,
+ pub category_names: Option<Vec<String>>,
+ pub end_time: f64,
+ pub name: Option<String>,
+ pub r#type: Option<SponsorblockChapterType>,
+ pub start_time: f64,
+ pub title: String,
+}
+
+pub fn get_none<'de, D, T>(_: D) -> Result<Option<T>, D::Error>
+where
+ D: Deserializer<'de>,
+{
+ Ok(None)
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)]
+#[serde(deny_unknown_fields)]
+pub enum SponsorblockChapterType {
+ #[serde(alias = "skip")]
+ Skip,
+
+ #[serde(alias = "chapter")]
+ Chapter,
+}
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)]
+#[serde(deny_unknown_fields)]
+pub enum SponsorblockChapterCategory {
+ #[serde(alias = "filler")]
+ Filler,
+
+ #[serde(alias = "sponsor")]
+ Sponsor,
+
+ #[serde(alias = "selfpromo")]
+ SelfPromo,
+
+ #[serde(alias = "chapter")]
+ Chapter,
+
+ #[serde(alias = "intro")]
+ Intro,
+
+ #[serde(alias = "outro")]
+ Outro,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)]
+#[serde(deny_unknown_fields)]
+pub struct HeatMapEntry {
+ pub start_time: f64,
+ pub end_time: f64,
+ pub value: f64,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)]
+#[serde(deny_unknown_fields)]
+pub enum Extractor {
+ #[serde(alias = "generic")]
+ Generic,
+
+ #[serde(alias = "SVTSeries")]
+ SVTSeries,
+
+ #[serde(alias = "youtube")]
+ YouTube,
+
+ #[serde(alias = "youtube:tab")]
+ YouTubeTab,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)]
+#[serde(deny_unknown_fields)]
+pub enum ExtractorKey {
+ #[serde(alias = "Generic")]
+ Generic,
+
+ #[serde(alias = "SVTSeries")]
+ SVTSeries,
+
+ #[serde(alias = "Youtube")]
+ YouTube,
+
+ #[serde(alias = "YoutubeTab")]
+ YouTubeTab,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)]
+#[serde(deny_unknown_fields)]
+pub enum InfoType {
+ #[serde(alias = "playlist")]
+ Playlist,
+
+ #[serde(alias = "url")]
+ Url,
+
+ #[serde(alias = "video")]
+ Video,
+}
+
+#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)]
+#[serde(deny_unknown_fields)]
+pub struct Version {
+ pub current_git_head: Option<String>,
+ pub release_git_head: String,
+ pub repository: String,
+ pub version: String,
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)]
+#[serde(from = "String")]
+#[serde(deny_unknown_fields)]
+pub enum Parent {
+ Root,
+ Id(String),
+}
+
+impl Parent {
+ pub fn id(&self) -> Option<&str> {
+ if let Self::Id(id) = self {
+ Some(id)
+ } else {
+ None
+ }
+ }
+}
+
+impl From<String> for Parent {
+ fn from(value: String) -> Self {
+ if value == "root" {
+ Self::Root
+ } else {
+ Self::Id(value)
+ }
+ }
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)]
+#[serde(from = "String")]
+#[serde(deny_unknown_fields)]
+pub struct Id {
+ pub id: String,
+}
+impl From<String> for Id {
+ fn from(value: String) -> Self {
+ Self {
+ // Take the last element if the string is split with dots, otherwise take the full id
+ id: value.split('.').last().unwrap_or(&value).to_owned(),
+ }
+ }
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)]
+#[serde(deny_unknown_fields)]
+pub struct Comment {
+ pub id: Id,
+ pub text: String,
+ #[serde(default = "zero")]
+ pub like_count: u32,
+ pub is_pinned: bool,
+ pub author_id: String,
+ #[serde(default = "unknown")]
+ pub author: String,
+ pub author_is_verified: bool,
+ pub author_thumbnail: Url,
+ pub parent: Parent,
+ #[serde(deserialize_with = "edited_from_time_text", alias = "_time_text")]
+ pub edited: bool,
+ // Can't also be deserialized, as it's already used in 'edited'
+ // _time_text: String,
+ pub timestamp: i64,
+ pub author_url: Url,
+ pub author_is_uploader: bool,
+ pub is_favorited: bool,
+}
+fn unknown() -> String {
+ "<Unknown>".to_string()
+}
+fn zero() -> u32 {
+ 0
+}
+fn edited_from_time_text<'de, D>(d: D) -> Result<bool, D::Error>
+where
+ D: Deserializer<'de>,
+{
+ let s = String::deserialize(d)?;
+ if s.contains(" (edited)") {
+ Ok(true)
+ } else {
+ Ok(false)
+ }
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)]
+#[serde(deny_unknown_fields)]
+pub struct ThumbNail {
+ pub id: Option<String>,
+ pub preference: Option<i32>,
+ /// in the form of "[`height`]x[`width`]"
+ pub resolution: Option<String>,
+ pub url: Url,
+ pub width: Option<u32>,
+ pub height: Option<u32>,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)]
+#[serde(deny_unknown_fields)]
+pub struct Format {
+ pub __needs_testing: Option<bool>,
+ pub __working: Option<bool>,
+ pub abr: Option<f64>,
+ pub acodec: Option<String>,
+ pub aspect_ratio: Option<f64>,
+ pub asr: Option<f64>,
+ pub audio_channels: Option<u32>,
+ pub audio_ext: Option<String>,
+ pub columns: Option<u32>,
+ pub container: Option<String>,
+ pub downloader_options: Option<DownloaderOptions>,
+ pub dynamic_range: Option<String>,
+ pub ext: String,
+ pub filepath: Option<PathBuf>,
+ pub filesize: Option<u64>,
+ pub filesize_approx: Option<u64>,
+ pub format: Option<String>,
+ pub format_id: String,
+ pub format_index: Option<String>,
+ pub format_note: Option<String>,
+ pub fps: Option<f64>,
+ pub fragment_base_url: Option<Todo>,
+ pub fragments: Option<Vec<Fragment>>,
+ pub has_drm: Option<bool>,
+ pub height: Option<u32>,
+ pub http_headers: Option<HttpHeader>,
+ pub is_dash_periods: Option<bool>,
+ pub language: Option<String>,
+ pub language_preference: Option<i32>,
+ pub manifest_stream_number: Option<u32>,
+ pub manifest_url: Option<Url>,
+ pub preference: Option<i32>,
+ pub protocol: Option<String>,
+ pub quality: Option<f64>,
+ pub resolution: Option<String>,
+ pub rows: Option<u32>,
+ pub source_preference: Option<i32>,
+ pub tbr: Option<f64>,
+ pub url: Url,
+ pub vbr: Option<f64>,
+ pub vcodec: String,
+ pub video_ext: Option<String>,
+ pub width: Option<u32>,
+}
+
+#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)]
+#[serde(deny_unknown_fields)]
+pub struct DownloaderOptions {
+ http_chunk_size: u64,
+}
+
+#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)]
+#[serde(deny_unknown_fields)]
+pub struct HttpHeader {
+ #[serde(alias = "User-Agent")]
+ pub user_agent: Option<String>,
+ #[serde(alias = "Accept")]
+ pub accept: Option<String>,
+ #[serde(alias = "Accept-Language")]
+ pub accept_language: Option<String>,
+ #[serde(alias = "Sec-Fetch-Mode")]
+ pub sec_fetch_mode: Option<String>,
+}
+
+#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)]
+#[serde(deny_unknown_fields)]
+pub struct Fragment {
+ pub url: Option<Url>,
+ pub duration: Option<f64>,
+ pub path: Option<PathBuf>,
+}
+
+impl InfoJson {
+ pub fn to_py_dict(self, py: Python) -> PyResult<Bound<PyDict>> {
+ let output: Bound<PyDict> = json_loads_str(py, self)?;
+ Ok(output)
+ }
+}
diff --git a/yt_dlp/src/wrapper/mod.rs b/yt_dlp/src/wrapper/mod.rs
new file mode 100644
index 0000000..3fe3247
--- /dev/null
+++ b/yt_dlp/src/wrapper/mod.rs
@@ -0,0 +1,12 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+pub mod info_json;
+// pub mod yt_dlp_options;
diff --git a/yt_dlp/src/wrapper/yt_dlp_options.rs b/yt_dlp/src/wrapper/yt_dlp_options.rs
new file mode 100644
index 0000000..c2a86df
--- /dev/null
+++ b/yt_dlp/src/wrapper/yt_dlp_options.rs
@@ -0,0 +1,62 @@
+// yt - A fully featured command line YouTube client
+//
+// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de>
+// SPDX-License-Identifier: GPL-3.0-or-later
+//
+// This file is part of Yt.
+//
+// You should have received a copy of the License along with this program.
+// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
+
+use pyo3::{types::PyDict, Bound, PyResult, Python};
+use serde::Serialize;
+
+use crate::json_loads;
+
+#[derive(Serialize, Clone)]
+pub struct YtDlpOptions {
+ pub playliststart: u32,
+ pub playlistend: u32,
+ pub noplaylist: bool,
+ pub extract_flat: ExtractFlat,
+ // pub extractor_args: ExtractorArgs,
+ // pub format: String,
+ // pub fragment_retries: u32,
+ // #[serde(rename(serialize = "getcomments"))]
+ // pub get_comments: bool,
+ // #[serde(rename(serialize = "ignoreerrors"))]
+ // pub ignore_errors: bool,
+ // pub retries: u32,
+ // #[serde(rename(serialize = "writeinfojson"))]
+ // pub write_info_json: bool,
+ // pub postprocessors: Vec<serde_json::Map<String, serde_json::Value>>,
+}
+
+#[derive(Serialize, Copy, Clone)]
+pub enum ExtractFlat {
+ #[serde(rename(serialize = "in_playlist"))]
+ InPlaylist,
+
+ #[serde(rename(serialize = "discard_in_playlist"))]
+ DiscardInPlaylist,
+}
+
+#[derive(Serialize, Clone)]
+pub struct ExtractorArgs {
+ pub youtube: YoutubeExtractorArgs,
+}
+
+#[derive(Serialize, Clone)]
+pub struct YoutubeExtractorArgs {
+ comment_sort: Vec<String>,
+ max_comments: Vec<String>,
+}
+
+impl YtDlpOptions {
+ pub fn to_py_dict(self, py: Python) -> PyResult<Bound<PyDict>> {
+ let string = serde_json::to_string(&self).expect("This should always work");
+
+ let output: Bound<PyDict> = json_loads(py, string)?;
+ Ok(output)
+ }
+}