diff options
Diffstat (limited to '')
28 files changed, 1800 insertions, 1765 deletions
diff --git a/crates/yt_dlp/src/wrapper/mod.rs b/crates/yt/src/shared/mod.rs index 3fe3247..d3cc563 100644 --- a/crates/yt_dlp/src/wrapper/mod.rs +++ b/crates/yt/src/shared/mod.rs @@ -1,6 +1,6 @@ // yt - A fully featured command line YouTube client // -// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> // SPDX-License-Identifier: GPL-3.0-or-later // // This file is part of Yt. @@ -8,5 +8,4 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -pub mod info_json; -// pub mod yt_dlp_options; +pub(crate) mod bytes; diff --git a/crates/yt_dlp/src/python_json_decode_failed.error_msg.license b/crates/yt/tests/subscriptions/import_export/golden.txt.license index 7813eb6..7813eb6 100644 --- a/crates/yt_dlp/src/python_json_decode_failed.error_msg.license +++ b/crates/yt/tests/subscriptions/import_export/golden.txt.license diff --git a/crates/yt_dlp/Cargo.toml b/crates/yt_dlp/Cargo.toml index a948a34..87bb610 100644 --- a/crates/yt_dlp/Cargo.toml +++ b/crates/yt_dlp/Cargo.toml @@ -10,7 +10,7 @@ [package] name = "yt_dlp" -description = "A wrapper around the python yt_dlp library" +description = "A rust ffi wrapper library for the python yt_dlp library" keywords = [] categories = [] version.workspace = true @@ -19,19 +19,18 @@ authors.workspace = true license.workspace = true repository.workspace = true rust-version.workspace = true -publish = false +publish = true [dependencies] -pyo3 = { version = "0.23.4", features = ["auto-initialize"] } -bytes.workspace = true +curl = "0.4.48" log.workspace = true -serde.workspace = true +pyo3 = { workspace = true } +pyo3-pylogger = { path = "crates/pyo3-pylogger" } +serde = { workspace = true, features = ["derive"] } serde_json.workspace = true +thiserror = "2.0.12" url.workspace = true -[dev-dependencies] -tokio.workspace = true - [lints] workspace = true diff --git a/crates/yt_dlp/README.md b/crates/yt_dlp/README.md index 591ef2e..ece8540 100644 --- a/crates/yt_dlp/README.md +++ b/crates/yt_dlp/README.md @@ -12,7 +12,7 @@ If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. # Yt_py -> \[can be empty\] +> [can be empty] Some text about the project. diff --git a/crates/yt_dlp/.cargo/config.toml b/crates/yt_dlp/crates/pyo3-pylogger/.gitignore index d84f14d..733c5bc 100644 --- a/crates/yt_dlp/.cargo/config.toml +++ b/crates/yt_dlp/crates/pyo3-pylogger/.gitignore @@ -1,12 +1,13 @@ # yt - A fully featured command line YouTube client # -# Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> -# SPDX-License-Identifier: GPL-3.0-or-later +# Copyright (C) 2025 Dylan Bobby Storey <dylan.storey@gmail.com>, cpu <daniel@binaryparadox.net>, Warren Snipes <contact@warrensnipes.dev> +# SPDX-License-Identifier: Apache-2.0 # # This file is part of Yt. # # You should have received a copy of the License along with this program. # If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -[env] -PYO3_PYTHON = "/nix/store/7xzk119acyws2c4ysygdv66l0grxkr39-python3-3.11.9-env/bin/python3" +target +Cargo.lock +.idea diff --git a/crates/yt_dlp/crates/pyo3-pylogger/Cargo.toml b/crates/yt_dlp/crates/pyo3-pylogger/Cargo.toml new file mode 100644 index 0000000..28dfacd --- /dev/null +++ b/crates/yt_dlp/crates/pyo3-pylogger/Cargo.toml @@ -0,0 +1,31 @@ +# yt - A fully featured command line YouTube client +# +# Copyright (C) 2025 Dylan Bobby Storey <dylan.storey@gmail.com>, cpu <daniel@binaryparadox.net>, Warren Snipes <contact@warrensnipes.dev> +# SPDX-License-Identifier: Apache-2.0 +# +# This file is part of Yt. +# +# You should have received a copy of the License along with this program. +# If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +[package] +name = "pyo3-pylogger" +version = "1.8.0" +edition = "2021" +authors = [ + "Dylan Bobby Storey <dylan.storey@gmail.com>", + "cpu <daniel@binaryparadox.net>", + "Warren Snipes <contact@warrensnipes.dev>", +] +description = "Enables `log` for pyo3 based Rust applications using the `logging` modules." +publish = ["crates-io"] +license = "Apache-2.0" +readme = "README.md" +homepage = "https://github.com/dylanbstorey/pyo3-pylogger" +repository = "https://github.com/dylanbstorey/pyo3-pylogger" +documentation = "https://github.com/dylanbstorey/pyo3-pylogger" + +[dependencies] +pyo3 = { workspace = true } +log = { workspace = true } +phf = { version = "0.12", features = ["macros"] } diff --git a/crates/yt_dlp/crates/pyo3-pylogger/LICENSE b/crates/yt_dlp/crates/pyo3-pylogger/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/crates/yt_dlp/crates/pyo3-pylogger/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/crates/yt_dlp/crates/pyo3-pylogger/README.md b/crates/yt_dlp/crates/pyo3-pylogger/README.md new file mode 100644 index 0000000..e68903b --- /dev/null +++ b/crates/yt_dlp/crates/pyo3-pylogger/README.md @@ -0,0 +1,160 @@ +<!-- +yt - A fully featured command line YouTube client + +Copyright (C) 2025 Dylan Bobby Storey <dylan.storey@gmail.com>, cpu <daniel@binaryparadox.net>, Warren Snipes <contact@warrensnipes.dev> +SPDX-License-Identifier: Apache-2.0 + +This file is part of Yt. + +You should have received a copy of the License along with this program. +If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. +--> + +# pyo3-pylogger + +Enables log messages for pyo3 embedded Python applications using Python's +`logging` or module. + +# Features + +- Logging integration between Python's `logging` module and Rust's `log` crate +- Structured logging support via the logging + [extra](https://docs.python.org/3/library/logging.html#logging.Logger.debug) + field (requires `kv` or `tracing-kv`feature) +- Integration with Rust's `tracing` library (requires `tracing` feature) + +# Usage + +```rust +use log::{info, warn}; +use pyo3::{ffi::c_str, prelude::*}; +fn main() { + // register the host handler with python logger, providing a logger target + pyo3_pylogger::register("example_application_py_logger"); + + // initialize up a logger + env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("trace")).init(); + //just show the logger working from Rust. + info!("Just some normal information!"); + warn!("Something spooky happened!"); + + // Ask pyo3 to set up embedded Python interpreter + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + // Python code can now `import logging` as usual + py.run( + c_str!( + r#" +import logging +logging.getLogger().setLevel(0) +logging.debug('DEBUG') +logging.info('INFO') +logging.warning('WARNING') +logging.error('ERROR') +logging.getLogger('foo.bar.baz').info('INFO')"# + ), + None, + None, + ) + .unwrap(); + }) +} + + +``` + +## Outputs + +```bash +[2025-03-28T01:12:29Z INFO helloworld] Just some normal information! +[2025-03-28T01:12:29Z WARN helloworld] Something spooky happened! +[2025-03-28T01:12:29Z DEBUG example_application_py_logger] DEBUG +[2025-03-28T01:12:29Z INFO example_application_py_logger] INFO +[2025-03-28T01:12:29Z WARN example_application_py_logger] WARNING +[2025-03-28T01:12:29Z ERROR example_application_py_logger] ERROR +[2025-03-28T01:12:29Z INFO example_application_py_logger::foo::bar::baz] INFO +``` + +## Structured Logging + +To enable structured logging support, add the `kv` feature to your `Cargo.toml`: + +```toml +[dependencies] +pyo3-pylogger = { version = "0.4", features = ["kv"] } +``` + +Then you can use Python's `extra` parameter to pass structured data: + +```python +logging.info("Processing order", extra={"order_id": "12345", "amount": 99.99}) +``` + +When using a structured logging subscriber in Rust, these key-value pairs will +be properly captured, for example: + +```bash +[2025-03-28T01:12:29Z INFO example_application_py_logger] Processing order order_id=12345 amount=99.99 +``` + +## Tracing Support + +To enable integration with Rust's `tracing` library, add the `tracing` feature +to your `Cargo.toml`: + +```toml +[dependencies] +pyo3-pylogger = { version = "0.4", default-features = false, features = ["tracing"] } +``` + +When the `tracing` feature is enabled, Python logs will be forwarded to the +active tracing subscriber: + +```rust +use tracing::{info, warn}; +use pyo3::{ffi::c_str, prelude::*}; + +fn main() { + // Register the tracing handler with Python logger + pyo3_pylogger::register_tracing("example_application_py_logger"); + + // Initialize tracing subscriber + tracing_subscriber::fmt::init(); + + // Tracing events from Rust + info!("Tracing information from Rust"); + + // Python logging will be captured by the tracing subscriber + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + py.run( + c_str!( + r#" +import logging +logging.getLogger().setLevel(0) +logging.info('This will be captured by tracing')"# + ), + None, + None, + ) + .unwrap(); + }) +} +``` + +### Structured Data with Tracing + +The `tracing` feature automatically supports Python's `extra` field for +structured data. However, the KV fields are json serialized and not available as +tracing attributes. This is a limitation of the `tracing` library and is not +specific to this crate. See +[this issue](https://github.com/tokio-rs/tracing/issues/372) for more +information. + +# Feature Flags + +- `kv`: Enables structured logging support via Python's `extra` fields. This + adds support for the `log` crate's key-value system. +- `tracing`: Enables integration with Rust's `tracing` library. +- `tracing-kv`: Enables structured logging support via Python's `extra` fields + and integration with Rust's `tracing` library. diff --git a/crates/yt_dlp/crates/pyo3-pylogger/src/kv.rs b/crates/yt_dlp/crates/pyo3-pylogger/src/kv.rs new file mode 100644 index 0000000..67a0c3e --- /dev/null +++ b/crates/yt_dlp/crates/pyo3-pylogger/src/kv.rs @@ -0,0 +1,127 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Dylan Bobby Storey <dylan.storey@gmail.com>, cpu <daniel@binaryparadox.net>, Warren Snipes <contact@warrensnipes.dev> +// SPDX-License-Identifier: Apache-2.0 +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +//! Key-Value handling module for Python LogRecord attributes. +//! +//! This module provides functionality to extract and handle custom key-value pairs +//! from Python LogRecord objects, facilitating integration between Python's logging +//! system and Rust's log crate. + +use pyo3::{ + Bound, PyAny, PyResult, + types::{PyAnyMethods, PyDict, PyDictMethods, PyListMethods}, +}; +use std::collections::HashMap; + +/// A static hashset containing all standard [LogRecord](https://github.com/python/cpython/blob/8a00c9a4d2ce9d373b13f8f0a2265a65f4523293/Lib/logging/__init__.py#L286-L287) attributes defined in the CPython logging module. +/// +/// This set is used to differentiate between standard [LogRecord](https://github.com/python/cpython/blob/8a00c9a4d2ce9d373b13f8f0a2265a65f4523293/Lib/logging/__init__.py#L286-L287) attributes and custom key-value pairs +/// that users might add to their log records. The attributes listed here correspond to the default +/// attributes created by Python's [makeRecord](https://github.com/python/cpython/blob/8a00c9a4d2ce9d373b13f8f0a2265a65f4523293/Lib/logging/__init__.py#L1633-L1634) function. +pub static LOG_RECORD_KV_ATTRIBUTES: phf::Set<&'static str> = phf::phf_set! { + "name", + "msg", + "args", + "levelname", + "levelno", + "pathname", + "filename", + "module", + "exc_info", + "exc_text", + "stack_info", + "lineno", + "funcName", + "created", + "msecs", + "relativeCreated", + "thread", + "threadName", + "processName", + "process", + "taskName", +}; + +/// Extracts custom key-value pairs from a Python LogRecord object. +/// +/// This function examines the `__dict__` of a LogRecord(https://github.com/python/cpython/blob/8a00c9a4d2ce9d373b13f8f0a2265a65f4523293/Lib/logging/__init__.py#L286-L287) object and identifies any attributes +/// that are not part of the standard [LogRecord](https://github.com/python/cpython/blob/8a00c9a4d2ce9d373b13f8f0a2265a65f4523293/Lib/logging/__init__.py#L286-L287) attributes. These custom attributes are +/// treated as key-value pairs for structured logging. +/// +/// # Arguments +/// * `record` - A reference to a Python LogRecord object +/// +/// # Returns +/// * `PyResult<Option<HashMap<String, pyo3::Bound<'a, pyo3::PyAny>>>>` - If custom attributes +/// are found, returns a HashMap containing the key-value pairs. Returns None if no custom +/// attributes are present. +/// +/// # Note +/// This function relies on the fact that Python will not implement new attributes on the LogRecord object. +/// If new attributes are added, this function will not be able to filter them out and will return them as key-value pairs. +/// In that future, [LOG_RECORD_KV_ATTRIBUTES] will need to be updated to include the new attributes. +/// This is an unfortunate side effect of using the `__dict__` attribute to extract key-value pairs. However, there are no other ways to handle this given that CPython does not distinguish between user-provided attributes and attributes created by the logging module. +pub fn find_kv_args<'a>( + record: &Bound<'a, PyAny>, +) -> PyResult<Option<std::collections::HashMap<String, pyo3::Bound<'a, pyo3::PyAny>>>> { + let dict: Bound<'_, PyDict> = record.getattr("__dict__")?.extract()?; + + // We can abuse the fact that Python dictionaries are ordered by insertion order to reverse iterate over the keys + // and stop at the first key that is not a predefined key-value pair attribute. + let mut kv_args: Option<HashMap<String, pyo3::Bound<'_, pyo3::PyAny>>> = None; + + for item in dict.items().iter().rev() { + let (key, value) = + item.extract::<(pyo3::Bound<'_, pyo3::PyAny>, pyo3::Bound<'_, pyo3::PyAny>)>()?; + + let key_str = key.to_string(); + if LOG_RECORD_KV_ATTRIBUTES.contains(&key_str) { + break; + } + if kv_args.is_none() { + kv_args = Some(HashMap::new()); + } + + kv_args.as_mut().unwrap().insert(key_str, value); + } + + Ok(kv_args) +} + +/// A wrapper struct that implements the `log::kv::Source` trait for Python key-value pairs. +/// +/// This struct allows Python LogRecord custom attributes to be used with Rust's +/// structured logging system by implementing the necessary trait for key-value handling. +/// +/// # Type Parameters +/// * `'a` - The lifetime of the contained Python values +pub struct KVSource<'a>(pub HashMap<String, pyo3::Bound<'a, pyo3::PyAny>>); + +impl log::kv::Source for KVSource<'_> { + /// Visits each key-value pair in the source, converting Python values to debug representations. + /// + /// # Arguments + /// * `visitor` - The visitor that will process each key-value pair + /// + /// # Returns + /// * `Result<(), log::kv::Error>` - Success if all pairs are visited successfully, + /// or an error if visitation fails + fn visit<'kvs>( + &'kvs self, + visitor: &mut dyn log::kv::VisitSource<'kvs>, + ) -> Result<(), log::kv::Error> { + for (key, value) in &self.0 { + let v: log::kv::Value<'_> = log::kv::Value::from_debug(value); + + visitor.visit_pair(log::kv::Key::from_str(key), v)?; + } + Ok(()) + } +} diff --git a/crates/yt_dlp/crates/pyo3-pylogger/src/level.rs b/crates/yt_dlp/crates/pyo3-pylogger/src/level.rs new file mode 100644 index 0000000..d244ef4 --- /dev/null +++ b/crates/yt_dlp/crates/pyo3-pylogger/src/level.rs @@ -0,0 +1,43 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Dylan Bobby Storey <dylan.storey@gmail.com>, cpu <daniel@binaryparadox.net>, Warren Snipes <contact@warrensnipes.dev> +// SPDX-License-Identifier: Apache-2.0 +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +/// A wrapper type for logging levels that supports both `tracing` and `log` features. +pub(crate) struct Level(pub log::Level); + +/// Converts a numeric level value to the appropriate logging Level. +/// +/// # Arguments +/// +/// * `level` - A u8 value representing the logging level: +/// * 40+ = Error +/// * 30-39 = Warn +/// * 20-29 = Info +/// * 10-19 = Debug +/// * 0-9 = Trace +/// +/// # Returns +/// +/// Returns a `Level` wrapper containing either a `tracing::Level` or `log::Level` +/// depending on which feature is enabled. +pub(crate) fn get_level(level: u8) -> Level { + { + if level.ge(&40u8) { + Level(log::Level::Error) + } else if level.ge(&30u8) { + Level(log::Level::Warn) + } else if level.ge(&20u8) { + Level(log::Level::Info) + } else if level.ge(&10u8) { + Level(log::Level::Debug) + } else { + Level(log::Level::Trace) + } + } +} diff --git a/crates/yt_dlp/crates/pyo3-pylogger/src/lib.rs b/crates/yt_dlp/crates/pyo3-pylogger/src/lib.rs new file mode 100644 index 0000000..3ecb123 --- /dev/null +++ b/crates/yt_dlp/crates/pyo3-pylogger/src/lib.rs @@ -0,0 +1,211 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Dylan Bobby Storey <dylan.storey@gmail.com>, cpu <daniel@binaryparadox.net>, Warren Snipes <contact@warrensnipes.dev> +// SPDX-License-Identifier: Apache-2.0 +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use std::{ + ffi::CString, + sync::{self, OnceLock}, +}; + +use log::{debug, log_enabled}; +use pyo3::{ + Bound, Py, PyAny, PyResult, Python, pyfunction, + sync::OnceLockExt, + types::{PyAnyMethods, PyDict, PyListMethods, PyModuleMethods}, + wrap_pyfunction, +}; + +mod kv; +mod level; + +static LOGGER: sync::OnceLock<Py<PyAny>> = OnceLock::new(); + +/// Is the specified record to be logged? Returns false for no, +/// true for yes. Filters can either modify log records in-place or +/// return a completely different record instance which will replace +/// the original log record in any future processing of the event. +#[pyfunction] +fn filter_error_log<'py>(record: Bound<'py, PyAny>) -> bool { + // Filter out all error logs (they are propagated as rust errors) + let levelname: String = record + .getattr("levelname") + .expect("This should exist") + .extract() + .expect("This should be a String"); + + let return_value = levelname.as_str() != "ERROR"; + + if log_enabled!(log::Level::Debug) && !return_value { + let message: String = { + let get_message = record.getattr("getMessage").expect("Is set"); + let message: String = get_message + .call((), None) + .expect("Can be called") + .extract() + .expect("Downcasting works"); + + message.as_str().to_owned() + }; + + debug!("Swollowed error message: '{message}'"); + } + return_value +} + +/// Consume a Python `logging.LogRecord` and emit a Rust `Log` instead. +#[pyfunction] +fn host_log(record: Bound<'_, PyAny>, rust_target: &str) -> PyResult<()> { + let level = record.getattr("levelno")?.extract()?; + let message = record.getattr("getMessage")?.call0()?.to_string(); + let pathname = record.getattr("pathname")?.extract::<String>()?; + let lineno = record.getattr("lineno")?.extract::<u32>()?; + + let logger_name = record.getattr("name")?.extract::<String>()?; + + let full_target: Option<String> = if logger_name.trim().is_empty() || logger_name == "root" { + None + } else { + // Libraries (ex: tracing_subscriber::filter::Directive) expect rust-style targets like foo::bar, + // and may not deal well with "." as a module separator: + let logger_name = logger_name.replace('.', "::"); + Some(format!("{rust_target}::{logger_name}")) + }; + let target = full_target.as_deref().unwrap_or(rust_target); + + handle_record(record, target, &message, lineno, &pathname, level)?; + + Ok(()) +} + +fn handle_record( + #[allow(unused_variables)] record: Bound<'_, PyAny>, + target: &str, + message: &str, + lineno: u32, + pathname: &str, + level: u8, +) -> PyResult<()> { + // If log feature is enabled, use log::logger + let level = crate::level::get_level(level).0; + + { + let mut metadata_builder = log::MetadataBuilder::new(); + metadata_builder.target(target); + metadata_builder.level(level); + + let mut record_builder = log::Record::builder(); + + { + let kv_args = kv::find_kv_args(&record)?; + + let kv_source = kv_args.map(kv::KVSource); + if let Some(kv_source) = kv_source { + log::logger().log( + &record_builder + .metadata(metadata_builder.build()) + .args(format_args!("{}", &message)) + .line(Some(lineno)) + .file(Some(pathname)) + .module_path(Some(pathname)) + .key_values(&kv_source) + .build(), + ); + return Ok(()); + } + } + + log::logger().log( + &record_builder + .metadata(metadata_builder.build()) + .args(format_args!("{}", &message)) + .line(Some(lineno)) + .file(Some(pathname)) + .module_path(Some(pathname)) + .build(), + ); + } + + Ok(()) +} + +/// Registers the host_log function in rust as the event handler for Python's logging logger +/// This function needs to be called from within a pyo3 context as early as possible to ensure logging messages +/// arrive to the rust consumer. +pub fn setup_logging<'py>(py: Python<'py>, target: &str) -> PyResult<Bound<'py, PyAny>> { + let logger = LOGGER + .get_or_init_py_attached(py, || match setup_logging_inner(py, target) { + Ok(ok) => ok.unbind(), + Err(err) => { + panic!("Failed to initialize logger: {}", err); + } + }) + .clone_ref(py); + + Ok(logger.into_bound(py)) +} + +fn setup_logging_inner<'py>(py: Python<'py>, target: &str) -> PyResult<Bound<'py, PyAny>> { + let logging = py.import("logging")?; + + logging.setattr("host_log", wrap_pyfunction!(host_log, &logging)?)?; + + #[allow(clippy::uninlined_format_args)] + let code = CString::new(format!( + r#" +class HostHandler(Handler): + def __init__(self, level=0): + super().__init__(level=level) + + def emit(self, record: LogRecord): + host_log(record, "{}") + +oldBasicConfig = basicConfig +def basicConfig(*pargs, **kwargs): + if "handlers" not in kwargs: + kwargs["handlers"] = [HostHandler()] + return oldBasicConfig(*pargs, **kwargs) +"#, + target + ))?; + + let logging_scope = logging.dict(); + py.run(&code, Some(&logging_scope), None)?; + + let all = logging.index()?; + all.append("HostHandler")?; + + let logger = { + let get_logger = logging_scope.get_item("getLogger")?; + get_logger.call((target,), None)? + }; + + { + let basic_config = logging_scope.get_item("basicConfig")?; + basic_config.call( + (), + { + let dict = PyDict::new(py); + + // Ensure that all events are logged by setting + // the log level to NOTSET (we filter on rust's side) + dict.set_item("level", 0)?; + + Some(dict) + } + .as_ref(), + )?; + } + + { + let add_filter = logger.getattr("addFilter")?; + add_filter.call((wrap_pyfunction!(filter_error_log, &logging)?,), None)?; + } + + Ok(logger) +} diff --git a/crates/bytes/update.sh b/crates/yt_dlp/crates/pyo3-pylogger/update.sh index c1a0215..dd3e57e 100755 --- a/crates/bytes/update.sh +++ b/crates/yt_dlp/crates/pyo3-pylogger/update.sh @@ -1,9 +1,9 @@ -#!/usr/bin/env sh +#! /usr/bin/env sh # yt - A fully featured command line YouTube client # -# Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> -# SPDX-License-Identifier: GPL-3.0-or-later +# Copyright (C) 2025 Dylan Bobby Storey <dylan.storey@gmail.com>, cpu <daniel@binaryparadox.net>, Warren Snipes <contact@warrensnipes.dev> +# SPDX-License-Identifier: Apache-2.0 # # This file is part of Yt. # @@ -13,3 +13,5 @@ cd "$(dirname "$0")" || exit 1 [ "$1" = "upgrade" ] && cargo upgrade --incompatible cargo update + +# vim: ft=sh diff --git a/crates/yt_dlp/examples/main.rs b/crates/yt_dlp/examples/main.rs new file mode 100644 index 0000000..e924407 --- /dev/null +++ b/crates/yt_dlp/examples/main.rs @@ -0,0 +1,15 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +fn main() { + let yt_dlp = yt_dlp::options::YoutubeDLOptions::new().build().unwrap(); + + dbg!(yt_dlp.version().unwrap()); +} diff --git a/crates/yt_dlp/src/duration.rs b/crates/yt_dlp/src/duration.rs deleted file mode 100644 index 19181a5..0000000 --- a/crates/yt_dlp/src/duration.rs +++ /dev/null @@ -1,78 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -// TODO: This file should be de-duplicated with the same file in the 'yt' crate <2024-06-25> - -#[derive(Debug, Clone, Copy)] -pub struct Duration { - time: u32, -} - -impl From<&str> for Duration { - fn from(v: &str) -> Self { - let buf: Vec<_> = v.split(':').take(2).collect(); - Self { - time: (buf[0].parse::<u32>().expect("Should be a number") * 60) - + buf[1].parse::<u32>().expect("Should be a number"), - } - } -} - -impl From<Option<f64>> for Duration { - fn from(value: Option<f64>) -> Self { - Self { - #[allow( - clippy::cast_possible_truncation, - clippy::cast_precision_loss, - clippy::cast_sign_loss - )] - time: value.unwrap_or(0.0).ceil() as u32, - } - } -} - -impl std::fmt::Display for Duration { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - const SECOND: u32 = 1; - const MINUTE: u32 = 60 * SECOND; - const HOUR: u32 = 60 * MINUTE; - - let base_hour = self.time - (self.time % HOUR); - let base_min = (self.time % HOUR) - ((self.time % HOUR) % MINUTE); - let base_sec = (self.time % HOUR) % MINUTE; - - let h = base_hour / HOUR; - let m = base_min / MINUTE; - let s = base_sec / SECOND; - - if self.time == 0 { - write!(f, "0s") - } else if h > 0 { - write!(f, "{h}h {m}m") - } else { - write!(f, "{m}m {s}s") - } - } -} -#[cfg(test)] -mod test { - use super::Duration; - - #[test] - fn test_display_duration_1h() { - let dur = Duration { time: 60 * 60 }; - assert_eq!("1h 0m".to_owned(), dur.to_string()); - } - #[test] - fn test_display_duration_30min() { - let dur = Duration { time: 60 * 30 }; - assert_eq!("30m 0s".to_owned(), dur.to_string()); - } -} diff --git a/crates/yt_dlp/src/error.rs b/crates/yt_dlp/src/error.rs deleted file mode 100644 index 3881f0b..0000000 --- a/crates/yt_dlp/src/error.rs +++ /dev/null @@ -1,68 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -use std::{fmt::Display, io}; - -use pyo3::Python; - -#[derive(Debug)] -#[allow(clippy::module_name_repetitions)] -pub enum YtDlpError { - ResponseParseError { - error: serde_json::error::Error, - }, - PythonError { - error: Box<pyo3::PyErr>, - kind: String, - }, - IoError { - error: io::Error, - }, -} - -impl std::error::Error for YtDlpError {} - -impl Display for YtDlpError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - YtDlpError::ResponseParseError { error } => write!( - f, - include_str!("./python_json_decode_failed.error_msg"), - error - ), - YtDlpError::PythonError { error, kind: _ } => write!(f, "Python error: {error}"), - YtDlpError::IoError { error } => write!(f, "Io error: {error}"), - } - } -} - -impl From<serde_json::error::Error> for YtDlpError { - fn from(value: serde_json::error::Error) -> Self { - Self::ResponseParseError { error: value } - } -} - -impl From<pyo3::PyErr> for YtDlpError { - fn from(value: pyo3::PyErr) -> Self { - Python::with_gil(|py| { - let kind = value.get_type(py).to_string(); - Self::PythonError { - error: Box::new(value), - kind, - } - }) - } -} - -impl From<io::Error> for YtDlpError { - fn from(value: io::Error) -> Self { - Self::IoError { error: value } - } -} diff --git a/crates/yt_dlp/src/info_json.rs b/crates/yt_dlp/src/info_json.rs new file mode 100644 index 0000000..3ed08ee --- /dev/null +++ b/crates/yt_dlp/src/info_json.rs @@ -0,0 +1,56 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use pyo3::{ + Bound, Python, intern, + types::{PyAnyMethods, PyDict}, +}; + +pub type InfoJson = serde_json::Map<String, serde_json::Value>; + +/// # Panics +/// If expectation about python operations fail. +#[must_use] +pub fn json_loads( + input: serde_json::Map<String, serde_json::Value>, + py: Python<'_>, +) -> Bound<'_, PyDict> { + let json = py.import(intern!(py, "json")).expect("Module exists"); + let loads = json.getattr(intern!(py, "loads")).expect("Method exists"); + let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json"); + let dict = loads + .call((self_str,), None) + .expect("Vaild json is always a valid dict"); + + dict.downcast_into().expect("Should always be a dict") +} + +/// # Panics +/// If expectation about python operations fail. +#[must_use] +pub fn json_dumps(input: &Bound<'_, PyDict>) -> serde_json::Map<String, serde_json::Value> { + let py = input.py(); + + let json = py.import(intern!(py, "json")).expect("Module exists"); + let dumps = json.getattr(intern!(py, "dumps")).expect("Method exists"); + let dict = dumps + .call((input,), None) + .map_err(|err| err.print(py)) + .expect("Might not always work, but for our dicts it works"); + + let string: String = dict.extract().expect("Should always be a string"); + + let value: serde_json::Value = serde_json::from_str(&string).expect("Should be valid json"); + + match value { + serde_json::Value::Object(map) => map, + _ => unreachable!("These should not be json.dumps output"), + } +} diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs index 40610c2..6be5e87 100644 --- a/crates/yt_dlp/src/lib.rs +++ b/crates/yt_dlp/src/lib.rs @@ -1,6 +1,6 @@ // yt - A fully featured command line YouTube client // -// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> // SPDX-License-Identifier: GPL-3.0-or-later // // This file is part of Yt. @@ -8,544 +8,371 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -// The pyo3 `pyfunction` proc-macros call unsafe functions internally, which trigger this lint. -#![allow(unsafe_op_in_unsafe_fn)] -#![allow(clippy::missing_errors_doc)] +//! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure. -use std::io::stderr; -use std::{env, process}; -use std::{fs::File, io::Write}; +use std::path::PathBuf; -use std::{path::PathBuf, sync::Once}; - -use crate::{duration::Duration, logging::setup_logging, wrapper::info_json::InfoJson}; - -use bytes::Bytes; -use error::YtDlpError; -use log::{Level, debug, info, log_enabled}; -use pyo3::types::{PyString, PyTuple, PyTupleMethods}; +use log::{debug, info}; use pyo3::{ - Bound, PyAny, PyResult, Python, pyfunction, - types::{PyAnyMethods, PyDict, PyDictMethods, PyList, PyListMethods, PyModule}, - wrap_pyfunction, + Bound, Py, PyAny, Python, intern, + types::{PyAnyMethods, PyDict, PyIterator, PyList}, }; -use serde::Serialize; -use serde_json::{Map, Value}; use url::Url; -pub mod duration; -pub mod error; -pub mod logging; -pub mod wrapper; - -#[cfg(test)] -mod tests; - -/// Synchronisation helper, to ensure that we don't setup the logger multiple times -static SYNC_OBJ: Once = Once::new(); - -/// Add a logger to the yt-dlp options. -/// If you have an logger set (i.e. for rust), than this will log to rust -/// -/// # Panics -/// This should never panic. -pub fn add_logger_and_sig_handler<'a>( - opts: Bound<'a, PyDict>, - py: Python<'_>, -) -> PyResult<Bound<'a, PyDict>> { - /// Is the specified record to be logged? Returns false for no, - /// true for yes. Filters can either modify log records in-place or - /// return a completely different record instance which will replace - /// the original log record in any future processing of the event. - #[pyfunction] - fn filter_error_log(_py: Python<'_>, record: &Bound<'_, PyAny>) -> bool { - // Filter out all error logs (they are propagated as rust errors) - let levelname: String = record - .getattr("levelname") - .expect("This should exist") - .extract() - .expect("This should be a String"); - - let return_value = levelname.as_str() != "ERROR"; - - if log_enabled!(Level::Debug) && !return_value { - let message: String = record - .call_method0("getMessage") - .expect("This method exists") - .extract() - .expect("The message is a string"); +use crate::{ + info_json::{InfoJson, json_dumps, json_loads}, + python_error::{IntoPythonError, PythonError}, +}; - debug!("Swollowed error message: '{message}'"); +pub mod info_json; +pub mod options; +pub mod post_processors; +pub mod progress_hook; +pub mod python_error; + +#[macro_export] +macro_rules! json_get { + ($value:expr, $name:literal, $into:ident) => {{ + match $value.get($name) { + Some(val) => $crate::json_cast!(@log_key $name, val, $into), + None => panic!( + concat!( + "Expected '", + $name, + "' to be a key for the '", + stringify!($value), + "' object: {:#?}" + ), + $value + ), } - return_value - } - - setup_logging(py, "yt_dlp")?; - - let logging = PyModule::import(py, "logging")?; - let ytdl_logger = logging.call_method1("getLogger", ("yt_dlp",))?; - - // Ensure that all events are logged by setting the log level to NOTSET (we filter on rust's side) - // Also use this static, to ensure that we don't configure the logger every time - SYNC_OBJ.call_once(|| { - // Disable the SIGINT (Ctrl+C) handler, python installs. - // This allows the user to actually stop the application with Ctrl+C. - // This is here because it can only be run in the main thread and this was here already. - py.run( - c"\ -import signal -signal.signal(signal.SIGINT, signal.SIG_DFL)", - None, - None, - ) - .expect("This code should always work"); - - let config_opts = PyDict::new(py); - config_opts - .set_item("level", 0) - .expect("Setting this item should always work"); - - logging - .call_method("basicConfig", (), Some(&config_opts)) - .expect("This method exists"); - }); - - ytdl_logger.call_method1( - "addFilter", - (wrap_pyfunction!(filter_error_log, py).expect("This function can be wrapped"),), - )?; - - // This was taken from `ytcc`, I don't think it is still applicable - // ytdl_logger.setattr("propagate", false)?; - // let logging_null_handler = logging.call_method0("NullHandler")?; - // ytdl_logger.setattr("addHandler", logging_null_handler)?; - - opts.set_item("logger", ytdl_logger).expect("Should work"); - - Ok(opts) + }}; } -#[pyfunction] -#[allow(clippy::too_many_lines)] -#[allow(clippy::missing_panics_doc)] -#[allow(clippy::items_after_statements)] -#[allow( - clippy::cast_possible_truncation, - clippy::cast_sign_loss, - clippy::cast_precision_loss -)] -pub fn progress_hook(py: Python<'_>, input: &Bound<'_, PyDict>) -> PyResult<()> { - // Only add the handler, if the log-level is higher than Debug (this avoids covering debug - // messages). - if log_enabled!(Level::Debug) { - return Ok(()); - } - - // ANSI ESCAPE CODES Wrappers {{{ - // see: https://en.wikipedia.org/wiki/ANSI_escape_code#Control_Sequence_Introducer_commands - const CSI: &str = "\x1b["; - fn clear_whole_line() { - eprint!("{CSI}2K"); - } - fn move_to_col(x: usize) { - eprint!("{CSI}{x}G"); - } - // }}} - - let input: Map<String, Value> = serde_json::from_str(&json_dumps( - py, - input - .downcast::<PyAny>() - .expect("Will always work") - .to_owned(), - )?) - .expect("python's json is valid"); - - macro_rules! get { - (@interrogate $item:ident, $type_fun:ident, $get_fun:ident, $name:expr) => {{ - let a = $item.get($name).expect(concat!( - "The field '", - stringify!($name), - "' should exist." - )); - - if a.$type_fun() { - a.$get_fun().expect( - "The should have been checked in the if guard, so unpacking here is fine", - ) +#[macro_export] +macro_rules! json_try_get { + ($value:expr, $name:literal, $into:ident) => {{ + if let Some(val) = $value.get($name) { + if val.is_null() { + None } else { - panic!( - "Value {} => \n{}\n is not of type: {}", - $name, - a, - stringify!($type_fun) - ); + Some(json_cast!(@log_key $name, val, $into)) } - }}; + } else { + None + } + }}; +} - ($type_fun:ident, $get_fun:ident, $name1:expr, $name2:expr) => {{ - let a = get! {@interrogate input, is_object, as_object, $name1}; - let b = get! {@interrogate a, $type_fun, $get_fun, $name2}; - b - }}; +#[macro_export] +macro_rules! json_cast { + ($value:expr, $into:ident) => {{ + let value_name = stringify!($value); + json_cast!(@log_key value_name, $value, $into) + }}; + + (@log_key $name:expr, $value:expr, $into:ident) => {{ + match $value.$into() { + Some(result) => result, + None => panic!( + concat!( + "Expected to be able to cast '{}' value (which is '{:?}') ", + stringify!($into) + ), + $name, + $value + ), + } + }}; +} - ($type_fun:ident, $get_fun:ident, $name:expr) => {{ - get! {@interrogate input, $type_fun, $get_fun, $name} - }}; - } +macro_rules! py_kw_args { + ($py:expr => $($kw_arg_name:ident = $kw_arg_val:expr),*) => {{ + use $crate::python_error::IntoPythonError; - macro_rules! default_get { - (@interrogate $item:ident, $default:expr, $get_fun:ident, $name:expr) => {{ - let a = if let Some(field) = $item.get($name) { - field.$get_fun().unwrap_or($default) - } else { - $default - }; - a - }}; - - ($get_fun:ident, $default:expr, $name1:expr, $name2:expr) => {{ - let a = get! {@interrogate input, is_object, as_object, $name1}; - let b = default_get! {@interrogate a, $default, $get_fun, $name2}; - b - }}; - - ($get_fun:ident, $default:expr, $name:expr) => {{ - default_get! {@interrogate input, $default, $get_fun, $name} - }}; - } + let dict = PyDict::new($py); - macro_rules! c { - ($color:expr, $format:expr) => { - format!("\x1b[{}m{}\x1b[0m", $color, $format) - }; - } + $( + dict.set_item(stringify!($kw_arg_name), $kw_arg_val).wrap_exc($py)?; + )* - fn format_bytes(bytes: u64) -> String { - let bytes = Bytes::new(bytes); - bytes.to_string() + Some(dict) } + .as_ref()}; +} +pub(crate) use py_kw_args; - fn format_speed(speed: f64) -> String { - #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] - let bytes = Bytes::new(speed.floor() as u64); - format!("{bytes}/s") - } +/// The core of the `yt_dlp` interface. +#[derive(Debug)] +pub struct YoutubeDL { + inner: Py<PyAny>, + options: serde_json::Map<String, serde_json::Value>, +} - let get_title = || -> String { - match get! {is_string, as_str, "info_dict", "ext"} { - "vtt" => { - format!( - "Subtitles ({})", - default_get! {as_str, "<No Subtitle Language>", "info_dict", "name"} - ) - } - "webm" | "mp4" | "mp3" | "m4a" => { - default_get! { as_str, "<No title>", "info_dict", "title"}.to_owned() - } - other => panic!("The extension '{other}' is not yet implemented"), - } - }; - - match get! {is_string, as_str, "status"} { - "downloading" => { - let elapsed = default_get! {as_f64, 0.0f64, "elapsed"}; - let eta = default_get! {as_f64, 0.0, "eta"}; - let speed = default_get! {as_f64, 0.0, "speed"}; - - let downloaded_bytes = get! {is_u64, as_u64, "downloaded_bytes"}; - let (total_bytes, bytes_is_estimate): (u64, &'static str) = { - let total_bytes = default_get!(as_u64, 0, "total_bytes"); - if total_bytes == 0 { - let maybe_estimate = default_get!(as_u64, 0, "total_bytes_estimate"); - - if maybe_estimate == 0 { - // The download speed should be in bytes per second and the eta in seconds. - // Thus multiplying them gets us the raw bytes (which were estimated by `yt_dlp`, from their `info.json`) - let bytes_still_needed = (speed * eta).ceil() as u64; - - (downloaded_bytes + bytes_still_needed, "~") - } else { - (maybe_estimate, "~") - } - } else { - (total_bytes, "") - } - }; - let percent: f64 = { - if total_bytes == 0 { - 100.0 - } else { - (downloaded_bytes as f64 / total_bytes as f64) * 100.0 - } - }; +impl YoutubeDL { + /// Fetch the underlying `yt_dlp` and `python` version. + /// + /// # Errors + /// If python attribute access fails. + pub fn version(&self) -> Result<(String, String), PythonError> { + Python::with_gil(|py| { + let yt_dlp = py + .import(intern!(py, "yt_dlp")) + .wrap_exc(py)? + .getattr(intern!(py, "version")) + .wrap_exc(py)? + .getattr(intern!(py, "__version__")) + .wrap_exc(py)? + .extract() + .wrap_exc(py)?; - clear_whole_line(); - move_to_col(1); - - eprint!( - "'{}' [{}/{} at {}] -> [{} of {}{} {}] ", - c!("34;1", get_title()), - c!("33;1", Duration::from(Some(elapsed))), - c!("33;1", Duration::from(Some(eta))), - c!("32;1", format_speed(speed)), - c!("31;1", format_bytes(downloaded_bytes)), - c!("31;1", bytes_is_estimate), - c!("31;1", format_bytes(total_bytes)), - c!("36;1", format!("{:.02}%", percent)) - ); - stderr().flush()?; - } - "finished" => { - eprintln!("-> Finished downloading."); - } - "error" => { - // TODO: This should probably return an Err. But I'm not so sure where the error would - // bubble up to (i.e., who would catch it) <2025-01-21> - eprintln!("-> Error while downloading: {}", get_title()); - process::exit(1); - } - other => unreachable!("'{other}' should not be a valid state!"), - }; + let python = py.version(); - Ok(()) -} + Ok((yt_dlp, python.to_owned())) + }) + } -pub fn add_hooks<'a>(opts: Bound<'a, PyDict>, py: Python<'_>) -> PyResult<Bound<'a, PyDict>> { - if let Some(hooks) = opts.get_item("progress_hooks")? { - let hooks = hooks.downcast::<PyList>()?; - hooks.append(wrap_pyfunction!(progress_hook, py)?)?; + /// Download a given list of URLs. + /// Returns the paths they were downloaded to. + /// + /// # Errors + /// If one of the downloads error. + pub fn download(&self, urls: &[Url]) -> Result<Vec<PathBuf>, extract_info::Error> { + let mut out_paths = Vec::with_capacity(urls.len()); + + for url in urls { + info!("Started downloading url: '{url}'"); + let info_json = self.extract_info(url, true, true)?; + + // Try to work around yt-dlp type weirdness + let result_string = if let Some(filename) = json_try_get!(info_json, "filename", as_str) + { + PathBuf::from(filename) + } else { + PathBuf::from(json_get!( + json_cast!( + json_get!(info_json, "requested_downloads", as_array)[0], + as_object + ), + "filename", + as_str + )) + }; - opts.set_item("progress_hooks", hooks)?; - } else { - // No hooks are set yet - let hooks_list = PyList::new(py, &[wrap_pyfunction!(progress_hook, py)?])?; + out_paths.push(result_string); + info!("Finished downloading url"); + } - opts.set_item("progress_hooks", hooks_list)?; + Ok(out_paths) } - Ok(opts) -} + /// `extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False)` + /// + /// Extract and return the information dictionary of the URL + /// + /// Arguments: + /// - `url` URL to extract + /// + /// Keyword arguments: + /// :`download` Whether to download videos + /// :`process` Whether to resolve all unresolved references (URLs, playlist items). + /// Must be True for download to work + /// + /// # Panics + /// If expectations about python fail to hold. + /// + /// # Errors + /// If python operations fail. + pub fn extract_info( + &self, + url: &Url, + download: bool, + process: bool, + ) -> Result<InfoJson, extract_info::Error> { + Python::with_gil(|py| { + let inner = self + .inner + .bind(py) + .getattr(intern!(py, "extract_info")) + .wrap_exc(py)?; + + let result = inner + .call( + (url.to_string(),), + py_kw_args!(py => download = download, process = process), + ) + .wrap_exc(py)? + .downcast_into::<PyDict>() + .expect("This is a dict"); + + // Resolve the generator object + if let Ok(generator) = result.get_item(intern!(py, "entries")) { + if generator.is_instance_of::<PyList>() { + // already resolved. Do nothing + } else if let Ok(generator) = generator.downcast::<PyIterator>() { + // A python generator object. + let max_backlog = json_try_get!(self.options, "playlistend", as_u64) + .map_or(10000, |playlistend| { + usize::try_from(playlistend).expect("Should work") + }); + + let mut out = vec![]; + for output in generator { + out.push(output.wrap_exc(py)?); + + if out.len() == max_backlog { + break; + } + } -/// Take the result of the ie (may be modified) and resolve all unresolved -/// references (URLs, playlist items). -/// -/// It will also download the videos if 'download'. -/// Returns the resolved `ie_result`. -#[allow(clippy::unused_async)] -#[allow(clippy::missing_panics_doc)] -pub async fn process_ie_result( - yt_dlp_opts: &Map<String, Value>, - ie_result: InfoJson, - download: bool, -) -> Result<InfoJson, YtDlpError> { - Python::with_gil(|py| -> Result<InfoJson, YtDlpError> { - let opts = json_map_to_py_dict(yt_dlp_opts, py)?; - - let instance = get_yt_dlp(py, opts)?; - - let args = { - let ie_result = json_loads_str(py, ie_result)?; - (ie_result,) - }; - - let kwargs = PyDict::new(py); - kwargs.set_item("download", download)?; - - let result = instance - .call_method("process_ie_result", args, Some(&kwargs))? - .downcast_into::<PyDict>() - .expect("This is a dict"); - - let result_str = json_dumps(py, result.into_any())?; - - serde_json::from_str(&result_str).map_err(Into::into) - }) -} + result.set_item(intern!(py, "entries"), out).wrap_exc(py)?; + } else { + // Probably some sort of paged list (`OnDemand` or otherwise) + let max_backlog = json_try_get!(self.options, "playlistend", as_u64) + .map_or(10000, |playlistend| { + usize::try_from(playlistend).expect("Should work") + }); -/// `extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False)` -/// -/// Extract and return the information dictionary of the URL -/// -/// Arguments: -/// @param url URL to extract -/// -/// Keyword arguments: -/// @param download Whether to download videos -/// @param process Whether to resolve all unresolved references (URLs, playlist items). -/// Must be True for download to work -/// @param `ie_key` Use only the extractor with this key -/// -/// @param `extra_info` Dictionary containing the extra values to add to the info (For internal use only) -/// @`force_generic_extractor` Force using the generic extractor (Deprecated; use `ie_key`='Generic') -#[allow(clippy::unused_async)] -#[allow(clippy::missing_panics_doc)] -pub async fn extract_info( - yt_dlp_opts: &Map<String, Value>, - url: &Url, - download: bool, - process: bool, -) -> Result<InfoJson, YtDlpError> { - Python::with_gil(|py| -> Result<InfoJson, YtDlpError> { - let opts = json_map_to_py_dict(yt_dlp_opts, py)?; - - let instance = get_yt_dlp(py, opts)?; - let args = (url.as_str(),); - - let kwargs = PyDict::new(py); - kwargs.set_item("download", download)?; - kwargs.set_item("process", process)?; - - let result = instance - .call_method("extract_info", args, Some(&kwargs))? - .downcast_into::<PyDict>() - .expect("This is a dict"); - - // Resolve the generator object - if let Some(generator) = result.get_item("entries")? { - if generator.is_instance_of::<PyList>() { - // already resolved. Do nothing - } else { - let max_backlog = yt_dlp_opts.get("playlistend").map_or(10000, |value| { - usize::try_from(value.as_u64().expect("Works")).expect("Should work") - }); + let next = generator.getattr(intern!(py, "getslice")).wrap_exc(py)?; - let mut out = vec![]; - while let Ok(output) = generator.call_method0("__next__") { - out.push(output); + let output = next + .call((), py_kw_args!(py => start = 0, end = max_backlog)) + .wrap_exc(py)?; - if out.len() == max_backlog { - break; - } + result + .set_item(intern!(py, "entries"), output) + .wrap_exc(py)?; } - result.set_item("entries", out)?; } - } - - let result_str = json_dumps(py, result.into_any())?; - if let Ok(confirm) = env::var("YT_STORE_INFO_JSON") { - if confirm == "yes" { - let mut file = File::create("output.info.json")?; - write!(file, "{result_str}").unwrap(); - } - } + let result = self.prepare_info_json(&result, py)?; - serde_json::from_str(&result_str).map_err(Into::into) - }) -} + Ok(result) + }) + } -/// # Panics -/// Only if python fails to return a valid URL. -pub fn unsmuggle_url(smug_url: &Url) -> PyResult<Url> { - Python::with_gil(|py| { - let utils = get_yt_dlp_utils(py)?; - let url = utils - .call_method1("unsmuggle_url", (smug_url.as_str(),))? - .downcast::<PyTuple>()? - .get_item(0)?; - - let url: Url = url - .downcast::<PyString>()? - .to_string() - .parse() - .expect("Python should be able to return a valid url"); - - Ok(url) - }) -} + /// Take the (potentially modified) result of the information extractor (i.e., + /// [`Self::extract_info`] with `process` and `download` set to false) + /// and resolve all unresolved references (URLs, + /// playlist items). + /// + /// It will also download the videos if 'download' is true. + /// Returns the resolved `ie_result`. + /// + /// # Panics + /// If expectations about python fail to hold. + /// + /// # Errors + /// If python operations fail. + pub fn process_ie_result( + &self, + ie_result: InfoJson, + download: bool, + ) -> Result<InfoJson, process_ie_result::Error> { + Python::with_gil(|py| { + let inner = self + .inner + .bind(py) + .getattr(intern!(py, "process_ie_result")) + .wrap_exc(py)?; + + let result = inner + .call( + (json_loads(ie_result, py),), + py_kw_args!(py => download = download), + ) + .wrap_exc(py)? + .downcast_into::<PyDict>() + .expect("This is a dict"); -/// Download a given list of URLs. -/// Returns the paths they were downloaded to. -/// -/// # Panics -/// Only if `yt_dlp` changes their `info_json` schema. -pub async fn download( - urls: &[Url], - download_options: &Map<String, Value>, -) -> Result<Vec<PathBuf>, YtDlpError> { - let mut out_paths = Vec::with_capacity(urls.len()); - - for url in urls { - info!("Started downloading url: '{}'", url); - let info_json = extract_info(download_options, url, true, true).await?; - - // Try to work around yt-dlp type weirdness - let result_string = if let Some(filename) = info_json.filename { - filename - } else { - info_json.requested_downloads.expect("This must exist")[0] - .filename - .clone() - }; + let result = self.prepare_info_json(&result, py)?; - out_paths.push(result_string); - info!("Finished downloading url: '{}'", url); + Ok(result) + }) } - Ok(out_paths) -} - -fn json_map_to_py_dict<'a>( - map: &Map<String, Value>, - py: Python<'a>, -) -> PyResult<Bound<'a, PyDict>> { - let json_string = serde_json::to_string(&map).expect("This must always work"); + /// Close this [`YoutubeDL`] instance, and stop all currently running downloads. + /// + /// # Errors + /// If python operations fail. + pub fn close(&self) -> Result<(), close::Error> { + Python::with_gil(|py| { + debug!("Closing YoutubeDL."); - let python_dict = json_loads(py, json_string)?; + let inner = self + .inner + .bind(py) + .getattr(intern!(py, "close")) + .wrap_exc(py)?; - Ok(python_dict) -} + inner.call0().wrap_exc(py)?; -fn json_dumps(py: Python<'_>, input: Bound<'_, PyAny>) -> PyResult<String> { - // json.dumps(yt_dlp.sanitize_info(input)) - - let yt_dlp = get_yt_dlp(py, PyDict::new(py))?; - let sanitized_result = yt_dlp.call_method1("sanitize_info", (input,))?; + Ok(()) + }) + } - let json = PyModule::import(py, "json")?; - let dumps = json.getattr("dumps")?; + fn prepare_info_json<'py>( + &self, + info: &Bound<'py, PyDict>, + py: Python<'py>, + ) -> Result<InfoJson, prepare::Error> { + let sanitize = self + .inner + .bind(py) + .getattr(intern!(py, "sanitize_info")) + .wrap_exc(py)?; - let output = dumps.call1((sanitized_result,))?; + let value = sanitize.call((info,), None).wrap_exc(py)?; - let output_str = output.extract::<String>()?; + let result = value.downcast::<PyDict>().expect("This should stay a dict"); - Ok(output_str) + Ok(json_dumps(result)) + } } -fn json_loads_str<T: Serialize>(py: Python<'_>, input: T) -> PyResult<Bound<'_, PyDict>> { - let string = serde_json::to_string(&input).expect("Correct json must be pased"); +#[allow(missing_docs)] +pub mod close { + use crate::python_error::PythonError; - json_loads(py, string) + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error(transparent)] + Python(#[from] PythonError), + } } +#[allow(missing_docs)] +pub mod process_ie_result { + use crate::{prepare, python_error::PythonError}; -fn json_loads(py: Python<'_>, input: String) -> PyResult<Bound<'_, PyDict>> { - // json.loads(input) - - let json = PyModule::import(py, "json")?; - let dumps = json.getattr("loads")?; + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error(transparent)] + Python(#[from] PythonError), - let output = dumps.call1((input,))?; - - Ok(output - .downcast::<PyDict>() - .expect("This should always be a PyDict") - .clone()) + #[error("Failed to prepare the info json")] + InfoJsonPrepare(#[from] prepare::Error), + } } +#[allow(missing_docs)] +pub mod extract_info { + use crate::{prepare, python_error::PythonError}; -fn get_yt_dlp_utils(py: Python<'_>) -> PyResult<Bound<'_, PyAny>> { - let yt_dlp = PyModule::import(py, "yt_dlp")?; - let utils = yt_dlp.getattr("utils")?; + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error(transparent)] + Python(#[from] PythonError), - Ok(utils) + #[error("Failed to prepare the info json")] + InfoJsonPrepare(#[from] prepare::Error), + } } -fn get_yt_dlp<'a>(py: Python<'a>, opts: Bound<'a, PyDict>) -> PyResult<Bound<'a, PyAny>> { - // Unconditionally set a logger - let opts = add_logger_and_sig_handler(opts, py)?; - let opts = add_hooks(opts, py)?; - - let yt_dlp = PyModule::import(py, "yt_dlp")?; - let youtube_dl = yt_dlp.call_method1("YoutubeDL", (opts,))?; - - Ok(youtube_dl) +#[allow(missing_docs)] +pub mod prepare { + use crate::python_error::PythonError; + + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error(transparent)] + Python(#[from] PythonError), + } } diff --git a/crates/yt_dlp/src/logging.rs b/crates/yt_dlp/src/logging.rs deleted file mode 100644 index e731502..0000000 --- a/crates/yt_dlp/src/logging.rs +++ /dev/null @@ -1,133 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -// This file is taken from: https://github.com/dylanbstorey/pyo3-pylogger/blob/d89e0d6820ebc4f067647e3b74af59dbc4941dd5/src/lib.rs -// It is licensed under the Apache 2.0 License, copyright up to 2024 by Dylan Storey -// It was modified by Benedikt Peetz 2024 - -// The pyo3 `pyfunction` proc-macros call unsafe functions internally, which trigger this lint. -#![allow(unsafe_op_in_unsafe_fn)] - -use std::ffi::CString; - -use log::{Level, MetadataBuilder, Record, logger}; -use pyo3::{ - Bound, PyAny, PyResult, Python, - prelude::{PyAnyMethods, PyListMethods, PyModuleMethods}, - pyfunction, wrap_pyfunction, -}; - -/// Consume a Python `logging.LogRecord` and emit a Rust `Log` instead. -#[allow(clippy::needless_pass_by_value)] -#[pyfunction] -fn host_log(record: Bound<'_, PyAny>, rust_target: &str) -> PyResult<()> { - let level = record.getattr("levelno")?; - let message = record.getattr("getMessage")?.call0()?.to_string(); - let pathname = record.getattr("pathname")?.to_string(); - let lineno = record - .getattr("lineno")? - .to_string() - .parse::<u32>() - .expect("This should always be a u32"); - - let logger_name = record.getattr("name")?.to_string(); - - let full_target: Option<String> = if logger_name.trim().is_empty() || logger_name == "root" { - None - } else { - // Libraries (ex: tracing_subscriber::filter::Directive) expect rust-style targets like foo::bar, - // and may not deal well with "." as a module separator: - let logger_name = logger_name.replace('.', "::"); - Some(format!("{rust_target}::{logger_name}")) - }; - - let target = full_target.as_deref().unwrap_or(rust_target); - - // error - let error_metadata = if level.ge(40u8)? { - MetadataBuilder::new() - .target(target) - .level(Level::Error) - .build() - } else if level.ge(30u8)? { - MetadataBuilder::new() - .target(target) - .level(Level::Warn) - .build() - } else if level.ge(20u8)? { - MetadataBuilder::new() - .target(target) - .level(Level::Info) - .build() - } else if level.ge(10u8)? { - MetadataBuilder::new() - .target(target) - .level(Level::Debug) - .build() - } else { - MetadataBuilder::new() - .target(target) - .level(Level::Trace) - .build() - }; - - logger().log( - &Record::builder() - .metadata(error_metadata) - .args(format_args!("{}", &message)) - .line(Some(lineno)) - .file(None) - .module_path(Some(&pathname)) - .build(), - ); - - Ok(()) -} - -/// Registers the `host_log` function in rust as the event handler for Python's logging logger -/// This function needs to be called from within a pyo3 context as early as possible to ensure logging messages -/// arrive to the rust consumer. -/// -/// # Panics -/// Only if internal assertions fail. -#[allow(clippy::module_name_repetitions)] -pub fn setup_logging(py: Python<'_>, target: &str) -> PyResult<()> { - let logging = py.import("logging")?; - - logging.setattr("host_log", wrap_pyfunction!(host_log, &logging)?)?; - - py.run( - CString::new(format!( - r#" -class HostHandler(Handler): - def __init__(self, level=0): - super().__init__(level=level) - - def emit(self, record): - host_log(record,"{target}") - -oldBasicConfig = basicConfig -def basicConfig(*pargs, **kwargs): - if "handlers" not in kwargs: - kwargs["handlers"] = [HostHandler()] - return oldBasicConfig(*pargs, **kwargs) -"# - )) - .expect("This is hardcoded") - .as_c_str(), - Some(&logging.dict()), - None, - )?; - - let all = logging.index()?; - all.append("HostHandler")?; - - Ok(()) -} diff --git a/crates/yt_dlp/src/options.rs b/crates/yt_dlp/src/options.rs new file mode 100644 index 0000000..ad30301 --- /dev/null +++ b/crates/yt_dlp/src/options.rs @@ -0,0 +1,207 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use std::sync; + +use pyo3::{ + Bound, IntoPyObjectExt, PyAny, PyResult, Python, intern, + types::{PyAnyMethods, PyCFunction, PyDict, PyTuple}, +}; +use pyo3_pylogger::setup_logging; + +use crate::{ + YoutubeDL, json_loads, post_processors, py_kw_args, + python_error::{IntoPythonError, PythonError}, +}; + +pub type ProgressHookFunction = fn(py: Python<'_>) -> PyResult<Bound<'_, PyCFunction>>; +pub type PostProcessorFunction = fn(py: Python<'_>) -> PyResult<Bound<'_, PyAny>>; + +/// Options, that are used to customize the download behaviour. +/// +/// In the future, this might get a Builder api. +/// +/// See `help(yt_dlp.YoutubeDL())` from python for a full list of available options. +#[derive(Default, Debug)] +pub struct YoutubeDLOptions { + options: serde_json::Map<String, serde_json::Value>, + progress_hook: Option<ProgressHookFunction>, + post_processors: Vec<PostProcessorFunction>, +} + +impl YoutubeDLOptions { + #[must_use] + pub fn new() -> Self { + let me = Self { + options: serde_json::Map::new(), + progress_hook: None, + post_processors: vec![], + }; + + me.with_post_processor(post_processors::dearrow::process) + } + + #[must_use] + pub fn set(self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self { + let mut options = self.options; + options.insert(key.into(), value.into()); + + Self { options, ..self } + } + + #[must_use] + pub fn with_progress_hook(self, progress_hook: ProgressHookFunction) -> Self { + if let Some(_previous_hook) = self.progress_hook { + todo!() + } else { + Self { + progress_hook: Some(progress_hook), + ..self + } + } + } + + #[must_use] + pub fn with_post_processor(mut self, pp: PostProcessorFunction) -> Self { + self.post_processors.push(pp); + self + } + + /// # Errors + /// If the underlying [`YoutubeDL::from_options`] errors. + pub fn build(self) -> Result<YoutubeDL, build::Error> { + YoutubeDL::from_options(self) + } + + #[must_use] + pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self { + Self { + options, + ..Self::new() + } + } + + #[must_use] + pub fn get(&self, key: &str) -> Option<&serde_json::Value> { + self.options.get(key) + } +} + +impl YoutubeDL { + /// Construct this instance from options. + /// + /// # Panics + /// If `yt_dlp` changed their interface. + /// + /// # Errors + /// If a python call fails. + #[allow(clippy::too_many_lines)] + pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> { + pyo3::prepare_freethreaded_python(); + + let output_options = options.options.clone(); + + let yt_dlp_module = Python::with_gil(|py| { + let opts = json_loads(options.options, py); + + { + static CALL_ONCE: sync::Once = sync::Once::new(); + + CALL_ONCE.call_once(|| { + py.run( + c" +import signal +signal.signal(signal.SIGINT, signal.SIG_DFL) + ", + None, + None, + ) + .unwrap_or_else(|err| { + panic!("Failed to disable python signal handling: {err}") + }); + }); + } + + { + // Setup the progress hook + if let Some(ph) = options.progress_hook { + opts.set_item(intern!(py, "progress_hooks"), vec![ph(py).wrap_exc(py)?]) + .wrap_exc(py)?; + } + } + + { + // Unconditionally set a logger. + // Otherwise, yt_dlp will log to stderr. + + let ytdl_logger = setup_logging(py, "yt_dlp").wrap_exc(py)?; + + opts.set_item(intern!(py, "logger"), ytdl_logger) + .wrap_exc(py)?; + } + + let inner = { + let p_params = opts.into_bound_py_any(py).wrap_exc(py)?; + let p_auto_init = true.into_bound_py_any(py).wrap_exc(py)?; + + py.import(intern!(py, "yt_dlp.YoutubeDL")) + .wrap_exc(py)? + .getattr(intern!(py, "YoutubeDL")) + .wrap_exc(py)? + .call1( + PyTuple::new( + py, + [ + p_params.into_bound_py_any(py).wrap_exc(py)?, + p_auto_init.into_bound_py_any(py).wrap_exc(py)?, + ], + ) + .wrap_exc(py)?, + ) + .wrap_exc(py)? + }; + + { + // Setup the post processors + let add_post_processor_fun = inner + .getattr(intern!(py, "add_post_processor")) + .wrap_exc(py)?; + + for pp in options.post_processors { + add_post_processor_fun + .call( + (pp(py).wrap_exc(py)?.into_bound_py_any(py).wrap_exc(py)?,), + // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN + py_kw_args!(py => when = "pre_process"), + ) + .wrap_exc(py)?; + } + } + + Ok::<_, PythonError>(inner.unbind()) + })?; + + Ok(Self { + inner: yt_dlp_module, + options: output_options, + }) + } +} + +#[allow(missing_docs)] +pub mod build { + use crate::python_error::PythonError; + + #[derive(Debug, thiserror::Error)] + pub enum Error { + #[error(transparent)] + Python(#[from] PythonError), + } +} diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs new file mode 100644 index 0000000..f35f301 --- /dev/null +++ b/crates/yt_dlp/src/post_processors/dearrow.rs @@ -0,0 +1,247 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use curl::easy::Easy; +use log::{error, info, trace, warn}; +use pyo3::{ + Bound, PyAny, PyErr, PyResult, Python, exceptions, intern, pyfunction, + types::{PyAnyMethods, PyDict, PyModule}, + wrap_pyfunction, +}; +use serde::{Deserialize, Serialize}; + +use crate::{ + pydict_cast, pydict_get, + python_error::{IntoPythonError, PythonError}, +}; + +/// # Errors +/// - If the underlying function returns an error. +/// - If python operations fail. +pub fn process(py: Python<'_>) -> PyResult<Bound<'_, PyAny>> { + #[pyfunction] + fn actual_processor(info_json: Bound<'_, PyDict>) -> PyResult<Bound<'_, PyDict>> { + let output = match unwrapped_process(info_json) { + Ok(ok) => ok, + Err(err) => { + return Err(PyErr::new::<exceptions::PyRuntimeError, _>(err.to_string())); + } + }; + Ok(output) + } + + let module = PyModule::new(py, "rust_post_processors")?; + let scope = PyDict::new(py); + scope.set_item( + intern!(py, "actual_processor"), + wrap_pyfunction!(actual_processor, module)?, + )?; + py.run( + c" +import yt_dlp + +class DeArrow(yt_dlp.postprocessor.PostProcessor): + def run(self, info): + info = actual_processor(info) + return [], info + +inst = DeArrow() +", + Some(&scope), + None, + )?; + + Ok(scope.get_item(intern!(py, "inst"))?.downcast_into()?) +} + +/// # Errors +/// If the API access fails. +pub fn unwrapped_process(info: Bound<'_, PyDict>) -> Result<Bound<'_, PyDict>, Error> { + if pydict_get!(info, "extractor_key", String).as_str() != "Youtube" { + return Ok(info); + } + + let mut retry_num = 3; + let mut output: DeArrowApi = { + loop { + let output_bytes = { + let mut dst = Vec::new(); + + let mut easy = Easy::new(); + easy.url( + format!( + "https://sponsor.ajay.app/api/branding?videoID={}", + pydict_get!(info, "id", String) + ) + .as_str(), + )?; + + let mut transfer = easy.transfer(); + transfer.write_function(|data| { + dst.extend_from_slice(data); + Ok(data.len()) + })?; + transfer.perform()?; + drop(transfer); + + dst + }; + + match serde_json::from_slice(&output_bytes) { + Ok(ok) => break ok, + Err(err) => { + if retry_num > 0 { + trace!( + "DeArrow: Api access failed, trying again ({retry_num} retries left)" + ); + retry_num -= 1; + } else { + let err: serde_json::Error = err; + return Err(err.into()); + } + } + } + } + }; + + // We pop the titles, so we need this vector reversed. + output.titles.reverse(); + + let title_len = output.titles.len(); + let mut iterator = output.titles.clone(); + let selected = loop { + let Some(title) = iterator.pop() else { + break false; + }; + + if (title.locked || title.votes < 1) && title_len > 1 { + info!( + "DeArrow: Skipping title {:#?}, as it is not good enough", + title.value + ); + // Skip titles that are not “good” enough. + continue; + } + + update_title(&info, &title.value).wrap_exc(info.py())?; + + break true; + }; + + if !selected && title_len != 0 { + // No title was selected, even though we had some titles. + // Just pick the first one in this case. + update_title(&info, &output.titles[0].value).wrap_exc(info.py())?; + } + + Ok(info) +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error(transparent)] + Python(#[from] PythonError), + + #[error("Failed to access the DeArrow api: {0}")] + Get(#[from] curl::Error), + + #[error("Failed to deserialize a api json return object: {0}")] + Deserialize(#[from] serde_json::Error), +} + +fn update_title(info: &Bound<'_, PyDict>, new_title: &str) -> PyResult<()> { + let py = info.py(); + + assert!(!info.contains(intern!(py, "original_title"))?); + + if let Ok(old_title) = info.get_item(intern!(py, "title")) { + warn!( + "DeArrow: Updating title from {:#?} to {:#?}", + pydict_cast!(old_title, &str), + new_title + ); + + info.set_item(intern!(py, "original_title"), old_title) + .expect("We checked, it is a new key"); + } else { + warn!("DeArrow: Setting title to {new_title:#?}"); + } + + let cleaned_title = { + // NOTE(@bpeetz): DeArrow uses `>` as a “Don't format the next word” mark. + // They should be removed, if one does not use a auto-formatter. <2025-06-16> + new_title.replace('>', "") + }; + + info.set_item(intern!(py, "title"), cleaned_title) + .expect("This should work?"); + + Ok(()) +} + +#[derive(Serialize, Deserialize)] +/// See: <https://wiki.sponsor.ajay.app/w/API_Docs/DeArrow> +struct DeArrowApi { + titles: Vec<Title>, + thumbnails: Vec<Thumbnail>, + + #[serde(alias = "randomTime")] + random_time: Option<f64>, + + #[serde(alias = "videoDuration")] + video_duration: Option<f64>, + + #[serde(alias = "casualVotes")] + casual_votes: Vec<CasualVote>, +} + +#[derive(Serialize, Deserialize)] +struct CasualVote { + id: String, + count: u32, + title: String, +} + +#[derive(Serialize, Deserialize, Clone)] +struct Title { + /// Note: Titles will sometimes contain > before a word. + /// This tells the auto-formatter to not format a word. + /// If you have no auto-formatter, you can ignore this and replace it with an empty string + #[serde(alias = "title")] + value: String, + + original: bool, + votes: u64, + locked: bool, + + #[serde(alias = "UUID")] + uuid: String, + + /// only present if requested + #[serde(alias = "userID")] + user_id: Option<String>, +} + +#[derive(Serialize, Deserialize)] +struct Thumbnail { + // null if original is true + timestamp: Option<f64>, + + original: bool, + votes: u64, + locked: bool, + + #[serde(alias = "UUID")] + uuid: String, + + /// only present if requested + #[serde(alias = "userID")] + user_id: Option<String>, +} diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs new file mode 100644 index 0000000..d9be3f5 --- /dev/null +++ b/crates/yt_dlp/src/post_processors/mod.rs @@ -0,0 +1,48 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +pub mod dearrow; + +#[macro_export] +macro_rules! pydict_get { + ($value:expr, $name:literal, $into:ty) => {{ + let item = $value.get_item(pyo3::intern!($value.py(), $name)); + match &item { + Ok(val) => $crate::pydict_cast!(val, $into), + Err(_) => panic!( + concat!( + "Expected '", + $name, + "' to be a key for the'", + stringify!($value), + "' py dictionary: {:#?}" + ), + $value + ), + } + }}; +} + +#[macro_export] +macro_rules! pydict_cast { + ($value:expr, $into:ty) => {{ + match $value.extract::<$into>() { + Ok(result) => result, + Err(val) => panic!( + concat!( + "Expected to be able to extract ", + stringify!($into), + " from value ({:#?})." + ), + val + ), + } + }}; +} diff --git a/crates/yt_dlp/src/progress_hook.rs b/crates/yt_dlp/src/progress_hook.rs new file mode 100644 index 0000000..7e5f8a5 --- /dev/null +++ b/crates/yt_dlp/src/progress_hook.rs @@ -0,0 +1,67 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +#[macro_export] +macro_rules! wrap_progress_hook { + ($name:ident, $new_name:ident) => { + pub(crate) fn $new_name( + py: yt_dlp::progress_hook::__priv::pyo3::Python<'_>, + ) -> yt_dlp::progress_hook::__priv::pyo3::PyResult< + yt_dlp::progress_hook::__priv::pyo3::Bound< + '_, + yt_dlp::progress_hook::__priv::pyo3::types::PyCFunction, + >, + > { + #[yt_dlp::progress_hook::__priv::pyo3::pyfunction] + #[pyo3(crate = "yt_dlp::progress_hook::__priv::pyo3")] + fn inner( + input: yt_dlp::progress_hook::__priv::pyo3::Bound< + '_, + yt_dlp::progress_hook::__priv::pyo3::types::PyDict, + >, + ) -> yt_dlp::progress_hook::__priv::pyo3::PyResult<()> { + let processed_input = { + let new_dict = yt_dlp::progress_hook::__priv::pyo3::types::PyDict::new(input.py()); + + input + .into_iter() + .filter_map(|(name, value)| { + let real_name = yt_dlp::progress_hook::__priv::pyo3::types::PyAnyMethods::extract::<String>(&name).expect("Should always be a string"); + + if real_name.starts_with('_') { + None + } else { + Some((real_name, value)) + } + }) + .for_each(|(key, value)| { + yt_dlp::progress_hook::__priv::pyo3::types::PyDictMethods::set_item(&new_dict, &key, value) + .expect("This is a transpositions, should always be valid"); + }); + yt_dlp::progress_hook::__priv::json_dumps(&new_dict) + }; + + $name(processed_input)?; + + Ok(()) + } + + let module = yt_dlp::progress_hook::__priv::pyo3::types::PyModule::new(py, "progress_hook")?; + let fun = yt_dlp::progress_hook::__priv::pyo3::wrap_pyfunction!(inner, module)?; + + Ok(fun) + } + }; +} + +pub mod __priv { + pub use crate::info_json::{json_dumps, json_loads}; + pub use pyo3; +} diff --git a/crates/yt_dlp/src/python_error.rs b/crates/yt_dlp/src/python_error.rs new file mode 100644 index 0000000..0c442b3 --- /dev/null +++ b/crates/yt_dlp/src/python_error.rs @@ -0,0 +1,55 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use std::fmt::{self, Display}; + +use log::{Level, debug, log_enabled}; +use pyo3::{PyErr, Python, types::PyTracebackMethods}; + +#[derive(thiserror::Error, Debug)] +pub struct PythonError(pub String); + +pub(crate) trait IntoPythonError<T>: Sized { + fn wrap_exc(self, py: Python<'_>) -> Result<T, PythonError>; +} + +impl<T> IntoPythonError<T> for Result<T, PyErr> { + fn wrap_exc(self, py: Python<'_>) -> Result<T, PythonError> { + self.map_err(|exc| PythonError::from_exception(py, &exc)) + } +} + +impl Display for PythonError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Python threw an exception: {}", self.0) + } +} + +impl PythonError { + pub(super) fn from_exception(py: Python<'_>, exc: &PyErr) -> Self { + let buffer = process_exception(py, exc); + Self(buffer) + } +} + +pub(super) fn process_exception(py: Python<'_>, err: &PyErr) -> String { + if log_enabled!(Level::Debug) { + let mut output = err.to_string(); + + if let Some(tb) = err.traceback(py) { + output.push('\n'); + output.push_str(&tb.format().unwrap()); + } + + debug!("Python threw an exception: {output}"); + } + + err.to_string() +} diff --git a/crates/yt_dlp/src/python_json_decode_failed.error_msg b/crates/yt_dlp/src/python_json_decode_failed.error_msg deleted file mode 100644 index d10688e..0000000 --- a/crates/yt_dlp/src/python_json_decode_failed.error_msg +++ /dev/null @@ -1,5 +0,0 @@ -Failed to decode yt-dlp's response: {} - -This is probably a bug. -Try running the command again with the `YT_STORE_INFO_JSON=yes` environment variable set -and maybe debug it further via `yt check info-json output.info.json`. diff --git a/crates/yt_dlp/src/tests.rs b/crates/yt_dlp/src/tests.rs deleted file mode 100644 index 91b6626..0000000 --- a/crates/yt_dlp/src/tests.rs +++ /dev/null @@ -1,89 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -use std::sync::LazyLock; - -use serde_json::{Value, json}; -use url::Url; - -static YT_OPTS: LazyLock<serde_json::Map<String, Value>> = LazyLock::new(|| { - match json!({ - "playliststart": 1, - "playlistend": 10, - "noplaylist": false, - "extract_flat": false, - }) { - Value::Object(obj) => obj, - _ => unreachable!("This json is hardcoded"), - } -}); - -#[tokio::test] -#[ignore = "This test hangs forever"] -async fn test_extract_info_video() { - let info = crate::extract_info( - &YT_OPTS, - &Url::parse("https://www.youtube.com/watch?v=dbjPnXaacAU").expect("Is valid."), - false, - false, - ) - .await - .map_err(|err| format!("Encountered error: '{err}'")) - .unwrap(); - - println!("{info:#?}"); -} - -#[tokio::test] -#[ignore = "This test hangs forever"] -async fn test_extract_info_url() { - let err = crate::extract_info( - &YT_OPTS, - &Url::parse("https://google.com").expect("Is valid."), - false, - false, - ) - .await - .map_err(|err| format!("Encountered error: '{err}'")) - .unwrap(); - - println!("{err:#?}"); -} - -#[tokio::test] -#[ignore = "This test hangs forever"] -async fn test_extract_info_playlist() { - let err = crate::extract_info( - &YT_OPTS, - &Url::parse("https://www.youtube.com/@TheGarriFrischer/videos").expect("Is valid."), - false, - true, - ) - .await - .map_err(|err| format!("Encountered error: '{err}'")) - .unwrap(); - - println!("{err:#?}"); -} -#[tokio::test] -#[ignore = "This test hangs forever"] -async fn test_extract_info_playlist_full() { - let err = crate::extract_info( - &YT_OPTS, - &Url::parse("https://www.youtube.com/@NixOS-Foundation/videos").expect("Is valid."), - false, - true, - ) - .await - .map_err(|err| format!("Encountered error: '{err}'")) - .unwrap(); - - println!("{err:#?}"); -} diff --git a/crates/yt_dlp/src/wrapper/info_json.rs b/crates/yt_dlp/src/wrapper/info_json.rs deleted file mode 100644 index a2c00df..0000000 --- a/crates/yt_dlp/src/wrapper/info_json.rs +++ /dev/null @@ -1,824 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -// `yt_dlp` named them like this. -#![allow(clippy::pub_underscore_fields)] - -use std::{collections::HashMap, path::PathBuf}; - -use pyo3::{Bound, PyResult, Python, types::PyDict}; -use serde::{Deserialize, Deserializer, Serialize}; -use serde_json::Value; -use url::Url; - -use crate::json_loads_str; - -type Todo = String; -type Extractor = String; -type ExtractorKey = String; - -// TODO: Change this to map `_type` to a structure of values, instead of the options <2024-05-27> -// And replace all the strings with better types (enums or urls) -#[derive(Debug, Deserialize, Serialize, PartialEq)] -#[serde(deny_unknown_fields)] -pub struct InfoJson { - #[serde(skip_serializing_if = "Option::is_none")] - pub __files_to_move: Option<FilesToMove>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub __last_playlist_index: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub __post_extractor: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub __x_forwarded_for_ip: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub _filename: Option<PathBuf>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub _format_sort_fields: Option<Vec<String>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub _has_drm: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub _type: Option<InfoType>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub _version: Option<Version>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub abr: Option<f64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub acodec: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub age_limit: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub artists: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub aspect_ratio: Option<f64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub asr: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub audio_channels: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub audio_ext: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub automatic_captions: Option<HashMap<String, Vec<Caption>>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub availability: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub average_rating: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub categories: Option<Vec<String>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub channel: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub channel_follower_count: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub channel_id: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub channel_is_verified: Option<bool>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub channel_url: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub chapters: Option<Vec<Chapter>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub comment_count: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub comments: Option<Vec<Comment>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub concurrent_view_count: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub container: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub direct: Option<bool>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub display_id: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub downloader_options: Option<DownloaderOptions>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub duration: Option<f64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub duration_string: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub dynamic_range: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub entries: Option<Vec<InfoJson>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub episode: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub episode_number: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub epoch: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub ext: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub extractor: Option<Extractor>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub extractor_key: Option<ExtractorKey>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub filename: Option<PathBuf>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub filesize: Option<u64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub filesize_approx: Option<u64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub format: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub format_id: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub format_index: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub format_note: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub formats: Option<Vec<Format>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub fps: Option<f64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub fulltitle: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub genre: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub genres: Option<Vec<String>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub has_drm: Option<bool>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub heatmap: Option<Vec<HeatMapEntry>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub height: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub hls_aes: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub http_headers: Option<HttpHeader>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub id: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub ie_key: Option<ExtractorKey>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub is_live: Option<bool>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub language: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub language_preference: Option<i32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub license: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub like_count: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub live_status: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub location: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub manifest_url: Option<Url>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub media_type: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub modified_date: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub n_entries: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub original_url: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playable_in_embed: Option<bool>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist_autonumber: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist_channel: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist_channel_id: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist_count: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist_id: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist_index: Option<u64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist_title: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist_uploader: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist_uploader_id: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub playlist_webpage_url: Option<Url>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub preference: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub protocol: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub quality: Option<f64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub release_date: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub release_timestamp: Option<u64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub release_year: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub repost_count: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub requested_downloads: Option<Vec<RequestedDownloads>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub requested_entries: Option<Vec<u32>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub requested_formats: Option<Vec<Format>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub requested_subtitles: Option<HashMap<String, Subtitle>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub resolution: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub season: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub season_number: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub series: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub source_preference: Option<i32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub sponsorblock_chapters: Option<Vec<SponsorblockChapter>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub stretched_ratio: Option<Todo>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub subtitles: Option<HashMap<String, Vec<Caption>>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub tags: Option<Vec<String>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub tbr: Option<f64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub thumbnail: Option<Url>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub thumbnails: Option<Vec<ThumbNail>>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub timestamp: Option<f64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub title: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub upload_date: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub uploader: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub uploader_id: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub uploader_url: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub url: Option<Url>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub vbr: Option<f64>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub vcodec: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub video_ext: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub view_count: Option<u32>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub was_live: Option<bool>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub webpage_url: Option<Url>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub webpage_url_basename: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub webpage_url_domain: Option<String>, - - #[serde(skip_serializing_if = "Option::is_none")] - pub width: Option<u32>, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq)] -#[serde(deny_unknown_fields)] -#[allow(missing_copy_implementations)] -pub struct FilesToMove {} - -#[derive(Debug, Deserialize, Serialize, PartialEq)] -#[serde(deny_unknown_fields)] -pub struct RequestedDownloads { - pub __files_to_merge: Option<Vec<Todo>>, - pub __finaldir: PathBuf, - pub __infojson_filename: PathBuf, - pub __postprocessors: Vec<Todo>, - pub __real_download: bool, - pub __write_download_archive: bool, - pub _filename: PathBuf, - pub _type: InfoType, - pub _version: Version, - pub abr: f64, - pub acodec: String, - pub aspect_ratio: Option<f64>, - pub asr: Option<u32>, - pub audio_channels: Option<u32>, - pub audio_ext: Option<String>, - pub chapters: Option<Vec<SponsorblockChapter>>, - pub duration: Option<f64>, - pub dynamic_range: Option<String>, - pub ext: String, - pub filename: PathBuf, - pub filepath: PathBuf, - pub filesize_approx: Option<u64>, - pub format: String, - pub format_id: String, - pub format_note: Option<String>, - pub fps: Option<f64>, - pub has_drm: Option<bool>, - pub height: Option<u32>, - pub http_headers: Option<HttpHeader>, - pub infojson_filename: PathBuf, - pub language: Option<String>, - pub manifest_url: Option<Url>, - pub protocol: String, - pub quality: Option<i64>, - pub requested_formats: Option<Vec<Format>>, - pub resolution: String, - pub tbr: f64, - pub url: Option<Url>, - pub vbr: f64, - pub vcodec: String, - pub video_ext: Option<String>, - pub width: Option<u32>, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] -#[serde(deny_unknown_fields)] -pub struct Subtitle { - pub ext: SubtitleExt, - pub filepath: PathBuf, - pub filesize: Option<u64>, - pub fragment_base_url: Option<Url>, - pub fragments: Option<Vec<Fragment>>, - pub manifest_url: Option<Url>, - pub name: Option<String>, - pub protocol: Option<Todo>, - pub url: Url, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] -pub enum SubtitleExt { - #[serde(alias = "vtt")] - Vtt, - - #[serde(alias = "mp4")] - Mp4, - - #[serde(alias = "json")] - Json, - #[serde(alias = "json3")] - Json3, - - #[serde(alias = "ttml")] - Ttml, - - #[serde(alias = "srv1")] - Srv1, - #[serde(alias = "srv2")] - Srv2, - #[serde(alias = "srv3")] - Srv3, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] -#[serde(deny_unknown_fields)] -pub struct Caption { - pub ext: SubtitleExt, - pub filepath: Option<PathBuf>, - pub filesize: Option<u64>, - pub fragments: Option<Vec<SubtitleFragment>>, - pub fragment_base_url: Option<Url>, - pub manifest_url: Option<Url>, - pub name: Option<String>, - pub protocol: Option<String>, - pub url: String, - pub video_id: Option<String>, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] -#[serde(deny_unknown_fields)] -pub struct SubtitleFragment { - path: PathBuf, - duration: Option<f64>, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] -#[serde(deny_unknown_fields)] -pub struct Chapter { - pub end_time: f64, - pub start_time: f64, - pub title: String, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq)] -#[serde(deny_unknown_fields)] -pub struct SponsorblockChapter { - /// This is an utterly useless field, and should thus be ignored - pub _categories: Option<Vec<Vec<Value>>>, - - pub categories: Option<Vec<SponsorblockChapterCategory>>, - pub category: Option<SponsorblockChapterCategory>, - pub category_names: Option<Vec<String>>, - pub end_time: f64, - pub name: Option<String>, - pub r#type: Option<SponsorblockChapterType>, - pub start_time: f64, - pub title: String, -} - -pub fn get_none<'de, D, T>(_: D) -> Result<Option<T>, D::Error> -where - D: Deserializer<'de>, -{ - Ok(None) -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] -#[serde(deny_unknown_fields)] -pub enum SponsorblockChapterType { - #[serde(alias = "skip")] - Skip, - - #[serde(alias = "chapter")] - Chapter, - - #[serde(alias = "poi")] - Poi, -} -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] -#[serde(deny_unknown_fields)] -pub enum SponsorblockChapterCategory { - #[serde(alias = "filler")] - Filler, - - #[serde(alias = "interaction")] - Interaction, - - #[serde(alias = "music_offtopic")] - MusicOfftopic, - - #[serde(alias = "poi_highlight")] - PoiHighlight, - - #[serde(alias = "preview")] - Preview, - - #[serde(alias = "sponsor")] - Sponsor, - - #[serde(alias = "selfpromo")] - SelfPromo, - - #[serde(alias = "chapter")] - Chapter, - - #[serde(alias = "intro")] - Intro, - - #[serde(alias = "outro")] - Outro, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] -#[serde(deny_unknown_fields)] -#[allow(missing_copy_implementations)] -pub struct HeatMapEntry { - pub start_time: f64, - pub end_time: f64, - pub value: f64, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] -#[serde(deny_unknown_fields)] -pub enum InfoType { - #[serde(alias = "playlist")] - #[serde(rename(serialize = "playlist"))] - Playlist, - - #[serde(alias = "url")] - #[serde(rename(serialize = "url"))] - Url, - - #[serde(alias = "video")] - #[serde(rename(serialize = "video"))] - Video, -} - -#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] -#[serde(deny_unknown_fields)] -pub struct Version { - pub current_git_head: Option<String>, - pub release_git_head: String, - pub repository: String, - pub version: String, -} - -#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] -#[serde(from = "String")] -#[serde(deny_unknown_fields)] -pub enum Parent { - Root, - Id(String), -} - -impl Parent { - #[must_use] - pub fn id(&self) -> Option<&str> { - if let Self::Id(id) = self { - Some(id) - } else { - None - } - } -} - -impl From<String> for Parent { - fn from(value: String) -> Self { - if value == "root" { - Self::Root - } else { - Self::Id(value) - } - } -} - -#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] -#[serde(from = "String")] -#[serde(deny_unknown_fields)] -pub struct Id { - pub id: String, -} -impl From<String> for Id { - fn from(value: String) -> Self { - Self { - // Take the last element if the string is split with dots, otherwise take the full id - id: value.split('.').last().unwrap_or(&value).to_owned(), - } - } -} - -#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] -#[serde(deny_unknown_fields)] -#[allow(clippy::struct_excessive_bools)] -pub struct Comment { - pub id: Id, - pub text: String, - #[serde(default = "zero")] - pub like_count: u32, - pub is_pinned: bool, - pub author_id: String, - #[serde(default = "unknown")] - pub author: String, - pub author_is_verified: bool, - pub author_thumbnail: Url, - pub parent: Parent, - #[serde(deserialize_with = "edited_from_time_text", alias = "_time_text")] - pub edited: bool, - // Can't also be deserialized, as it's already used in 'edited' - // _time_text: String, - pub timestamp: i64, - pub author_url: Option<Url>, - pub author_is_uploader: bool, - pub is_favorited: bool, -} -fn unknown() -> String { - "<Unknown>".to_string() -} -fn zero() -> u32 { - 0 -} -fn edited_from_time_text<'de, D>(d: D) -> Result<bool, D::Error> -where - D: Deserializer<'de>, -{ - let s = String::deserialize(d)?; - if s.contains(" (edited)") { - Ok(true) - } else { - Ok(false) - } -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] -#[serde(deny_unknown_fields)] -pub struct ThumbNail { - pub id: Option<String>, - pub preference: Option<i32>, - /// in the form of "[`height`]x[`width`]" - pub resolution: Option<String>, - pub url: Url, - pub width: Option<u32>, - pub height: Option<u32>, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] -#[serde(deny_unknown_fields)] -pub struct Format { - pub __needs_testing: Option<bool>, - pub __working: Option<bool>, - pub abr: Option<f64>, - pub acodec: Option<String>, - pub aspect_ratio: Option<f64>, - pub asr: Option<f64>, - pub audio_channels: Option<u32>, - pub audio_ext: Option<String>, - pub columns: Option<u32>, - pub container: Option<String>, - pub downloader_options: Option<DownloaderOptions>, - pub dynamic_range: Option<String>, - pub ext: String, - pub filepath: Option<PathBuf>, - pub filesize: Option<u64>, - pub filesize_approx: Option<u64>, - pub format: Option<String>, - pub format_id: String, - pub format_index: Option<String>, - pub format_note: Option<String>, - pub fps: Option<f64>, - pub fragment_base_url: Option<Todo>, - pub fragments: Option<Vec<Fragment>>, - pub has_drm: Option<bool>, - pub height: Option<u32>, - pub http_headers: Option<HttpHeader>, - pub is_dash_periods: Option<bool>, - pub is_live: Option<bool>, - pub language: Option<String>, - pub language_preference: Option<i32>, - pub manifest_stream_number: Option<u32>, - pub manifest_url: Option<Url>, - pub preference: Option<i32>, - pub protocol: Option<String>, - pub quality: Option<f64>, - pub resolution: Option<String>, - pub rows: Option<u32>, - pub source_preference: Option<i32>, - pub tbr: Option<f64>, - pub url: Url, - pub vbr: Option<f64>, - pub vcodec: String, - pub video_ext: Option<String>, - pub width: Option<u32>, -} - -#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] -#[serde(deny_unknown_fields)] -#[allow(missing_copy_implementations)] -pub struct DownloaderOptions { - http_chunk_size: u64, -} - -#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] -#[serde(deny_unknown_fields)] -pub struct HttpHeader { - #[serde(alias = "User-Agent")] - pub user_agent: Option<String>, - - #[serde(alias = "Accept")] - pub accept: Option<String>, - - #[serde(alias = "X-Forwarded-For")] - pub x_forwarded_for: Option<String>, - - #[serde(alias = "Accept-Language")] - pub accept_language: Option<String>, - - #[serde(alias = "Sec-Fetch-Mode")] - pub sec_fetch_mode: Option<String>, -} - -#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] -#[serde(deny_unknown_fields)] -pub struct Fragment { - pub duration: Option<f64>, - pub fragment_count: Option<usize>, - pub path: Option<PathBuf>, - pub url: Option<Url>, -} - -impl InfoJson { - pub fn to_py_dict(self, py: Python<'_>) -> PyResult<Bound<'_, PyDict>> { - let output: Bound<'_, PyDict> = json_loads_str(py, self)?; - Ok(output) - } -} diff --git a/crates/yt_dlp/src/wrapper/yt_dlp_options.rs b/crates/yt_dlp/src/wrapper/yt_dlp_options.rs deleted file mode 100644 index 25595b5..0000000 --- a/crates/yt_dlp/src/wrapper/yt_dlp_options.rs +++ /dev/null @@ -1,62 +0,0 @@ -// yt - A fully featured command line YouTube client -// -// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> -// SPDX-License-Identifier: GPL-3.0-or-later -// -// This file is part of Yt. -// -// You should have received a copy of the License along with this program. -// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. - -use pyo3::{Bound, PyResult, Python, types::PyDict}; -use serde::Serialize; - -use crate::json_loads; - -#[derive(Serialize, Clone)] -pub struct YtDlpOptions { - pub playliststart: u32, - pub playlistend: u32, - pub noplaylist: bool, - pub extract_flat: ExtractFlat, - // pub extractor_args: ExtractorArgs, - // pub format: String, - // pub fragment_retries: u32, - // #[serde(rename(serialize = "getcomments"))] - // pub get_comments: bool, - // #[serde(rename(serialize = "ignoreerrors"))] - // pub ignore_errors: bool, - // pub retries: u32, - // #[serde(rename(serialize = "writeinfojson"))] - // pub write_info_json: bool, - // pub postprocessors: Vec<serde_json::Map<String, serde_json::Value>>, -} - -#[derive(Serialize, Copy, Clone)] -pub enum ExtractFlat { - #[serde(rename(serialize = "in_playlist"))] - InPlaylist, - - #[serde(rename(serialize = "discard_in_playlist"))] - DiscardInPlaylist, -} - -#[derive(Serialize, Clone)] -pub struct ExtractorArgs { - pub youtube: YoutubeExtractorArgs, -} - -#[derive(Serialize, Clone)] -pub struct YoutubeExtractorArgs { - comment_sort: Vec<String>, - max_comments: Vec<String>, -} - -impl YtDlpOptions { - pub fn to_py_dict(self, py: Python) -> PyResult<Bound<PyDict>> { - let string = serde_json::to_string(&self).expect("This should always work"); - - let output: Bound<PyDict> = json_loads(py, string)?; - Ok(output) - } -} diff --git a/crates/yt_dlp/update.sh b/crates/yt_dlp/update.sh index c1a0215..ab03b62 100755 --- a/crates/yt_dlp/update.sh +++ b/crates/yt_dlp/update.sh @@ -10,6 +10,4 @@ # You should have received a copy of the License along with this program. # If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -cd "$(dirname "$0")" || exit 1 -[ "$1" = "upgrade" ] && cargo upgrade --incompatible -cargo update +./crates/pyo3-pylogger/update.sh "$@" |