aboutsummaryrefslogtreecommitdiffstats
path: root/crates/yt_dlp
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-06-17 08:56:36 +0200
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-06-17 08:56:36 +0200
commit1a6d3639e6fddb731735554d407d1eea77f053c6 (patch)
tree7e42b8d65c283c4cf6b756901dcfccf7c0f6db94 /crates/yt_dlp
parentfix(yt_dlp/post_processors/dearrow): Migrate to curl for api requests (diff)
downloadyt-1a6d3639e6fddb731735554d407d1eea77f053c6.zip
fix(yt_dlp/post_processors): Register in python
We need to tell yt_dlp about our post processors, as they would otherwise not take full effect. For example, changing the title would previously only have changed the title in the *in-memory* info json, the actual file on disk (video and .info.json) would still have the old title, as yt_dlp did not know about our post processor. Registering it via their api also has the upside of being able to determine when to run.
Diffstat (limited to 'crates/yt_dlp')
-rw-r--r--crates/yt_dlp/src/lib.rs88
-rw-r--r--crates/yt_dlp/src/post_processors/dearrow.rs58
-rw-r--r--crates/yt_dlp/src/post_processors/mod.rs120
-rw-r--r--crates/yt_dlp/src/progress_hook.rs4
4 files changed, 207 insertions, 63 deletions
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs
index e7b37c6..d0465e1 100644
--- a/crates/yt_dlp/src/lib.rs
+++ b/crates/yt_dlp/src/lib.rs
@@ -10,16 +10,14 @@
//! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure.
-use std::{self, env, fmt::Display, path::PathBuf};
+use std::{env, fmt::Display, path::PathBuf};
use indexmap::IndexMap;
use log::{Level, debug, error, info, log_enabled};
-use logging::setup_logging;
-use post_processors::PostProcessor;
use rustpython::{
InterpreterConfig,
vm::{
- self, AsObject, Interpreter, PyObjectRef, PyPayload, PyRef, VirtualMachine,
+ self, AsObject, Interpreter, PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine,
builtins::{PyBaseException, PyBaseExceptionRef, PyDict, PyList, PyStr},
function::{FuncArgs, KwArgs, PosArgs},
py_io::Write,
@@ -28,10 +26,13 @@ use rustpython::{
};
use url::Url;
-mod logging;
+use crate::logging::setup_logging;
+
pub mod post_processors;
pub mod progress_hook;
+mod logging;
+
#[macro_export]
macro_rules! json_get {
($value:expr, $name:literal, $into:ident) => {{
@@ -73,7 +74,6 @@ pub struct YoutubeDL {
youtube_dl_class: PyObjectRef,
yt_dlp_module: PyObjectRef,
options: serde_json::Map<String, serde_json::Value>,
- post_processors: Vec<Box<dyn PostProcessor>>,
}
impl std::fmt::Debug for YoutubeDL {
@@ -91,6 +91,7 @@ impl YoutubeDL {
///
/// # Errors
/// If a python call fails.
+ #[allow(clippy::too_many_lines)]
pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> {
let mut settings = vm::Settings::default();
if let Ok(python_path) = env::var("PYTHONPATH") {
@@ -124,12 +125,16 @@ impl YoutubeDL {
let class = yt_dlp_module.get_attr("YoutubeDL", vm)?;
let opts = json_loads(options.options, vm);
- if let Some(function) = options.progress_hook {
- opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || {
- let hook: PyObjectRef = vm.new_function("progress_hook", function).into();
- vm.new_pyobj(vec![hook])
- })
- .expect("Should work?");
+
+ {
+ // Setup the progress hook
+ if let Some(function) = options.progress_hook {
+ opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || {
+ let hook: PyObjectRef = vm.new_function("progress_hook", function).into();
+ vm.new_pyobj(vec![hook])
+ })
+ .expect("Should work?");
+ }
}
{
@@ -204,6 +209,28 @@ impl YoutubeDL {
let youtube_dl_class = class.call((opts,), vm)?;
+ {
+ // Setup the post processors
+
+ let add_post_processor_fun = youtube_dl_class.get_attr("add_post_processor", vm)?;
+
+ for pp in options.post_processors {
+ let args = {
+ FuncArgs::new(
+ PosArgs::new(vec![pp(vm)?]),
+ KwArgs::new({
+ let mut map = IndexMap::new();
+ // "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN
+ map.insert("when".to_owned(), vm.new_pyobj("pre_process"));
+ map
+ }),
+ )
+ };
+
+ add_post_processor_fun.call(args, vm)?;
+ }
+ }
+
Ok::<_, PyRef<PyBaseException>>((yt_dlp_module, youtube_dl_class))
}) {
Ok(ok) => Ok(ok),
@@ -222,7 +249,6 @@ impl YoutubeDL {
youtube_dl_class,
yt_dlp_module,
options: output_options,
- post_processors: options.post_processors,
})
}
@@ -413,21 +439,7 @@ impl YoutubeDL {
let result = value.downcast::<PyDict>().expect("This should stay a dict");
- let mut json = json_dumps(result, vm);
-
- for pp in &self.post_processors {
- if pp
- .extractors()
- .iter()
- .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str))
- {
- json = pp.process(json)?;
- } else {
- error!("Extractor not found for {pp:#?}");
- }
- }
-
- Ok(json)
+ Ok(json_dumps(result, vm))
}
}
@@ -475,21 +487,21 @@ pub mod extract_info {
}
#[allow(missing_docs)]
pub mod prepare {
- use crate::{PythonError, post_processors};
+ use crate::PythonError;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error(transparent)]
Python(#[from] PythonError),
-
- #[error("Failed to run a post processor")]
- PostProcessorRun(#[from] post_processors::Error),
}
}
pub type InfoJson = serde_json::Map<String, serde_json::Value>;
+/// Wrap your function with [`mk_python_function`].
pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
+pub type PostProcessorFunction = fn(vm: &VirtualMachine) -> PyResult<PyObjectRef>;
+
/// Options, that are used to customize the download behaviour.
///
/// In the future, this might get a Builder api.
@@ -499,7 +511,7 @@ pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
pub struct YoutubeDLOptions {
options: serde_json::Map<String, serde_json::Value>,
progress_hook: Option<ProgressHookFunction>,
- post_processors: Vec<Box<dyn PostProcessor>>,
+ post_processors: Vec<PostProcessorFunction>,
}
impl YoutubeDLOptions {
@@ -511,7 +523,7 @@ impl YoutubeDLOptions {
post_processors: vec![],
};
- me.with_post_processor(post_processors::dearrow::DeArrowPP)
+ me.with_post_processor(post_processors::dearrow::process)
}
#[must_use]
@@ -535,8 +547,8 @@ impl YoutubeDLOptions {
}
#[must_use]
- pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self {
- self.post_processors.push(Box::new(post_processor));
+ pub fn with_post_processor(mut self, pp: PostProcessorFunction) -> Self {
+ self.post_processors.push(pp);
self
}
@@ -569,7 +581,9 @@ pub mod build {
}
}
-fn json_loads(
+/// # Panics
+/// If expectation about python operations fail.
+pub fn json_loads(
input: serde_json::Map<String, serde_json::Value>,
vm: &VirtualMachine,
) -> PyRef<PyDict> {
diff --git a/crates/yt_dlp/src/post_processors/dearrow.rs b/crates/yt_dlp/src/post_processors/dearrow.rs
index 7dc6bbb..77c7ab9 100644
--- a/crates/yt_dlp/src/post_processors/dearrow.rs
+++ b/crates/yt_dlp/src/post_processors/dearrow.rs
@@ -16,22 +16,21 @@ use rustpython::vm::{
};
use serde::{Deserialize, Serialize};
-use crate::{InfoJson, json_get};
+use crate::{pydict_cast, pydict_get, wrap_post_processor};
-use super::PostProcessor;
+wrap_post_processor!("DeArrow", unwrapped_process, process);
-#[derive(Debug, Clone, Copy)]
-pub struct DeArrowPP;
-
-impl PostProcessor for DeArrowPP {
- fn extractors(&self) -> &'static [&'static str] {
- &["Youtube"]
+/// # Errors
+/// If the API access fails.
+pub fn unwrapped_process(info: PyRef<PyDict>, vm: &VirtualMachine) -> Result<PyRef<PyDict>, Error> {
+ if pydict_get!(@vm, info, "extractor_key", PyStr).as_str() != "Youtube" {
+ warn!("DeArrow: Extractor did not match, exiting.");
+ return Ok(info);
}
- fn process(&self, mut info: InfoJson) -> Result<InfoJson, super::Error> {
- let mut output: DeArrowApi = {
- let output_bytes = {
- let mut dst = Vec::new();
+ let mut output: DeArrowApi = {
+ let output_bytes = {
+ let mut dst = Vec::new();
let mut easy = Easy::new();
easy.url(
@@ -88,6 +87,41 @@ impl PostProcessor for DeArrowPP {
Ok(info)
}
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+ #[error("Failed to access the DeArrow api: {0}")]
+ Get(#[from] curl::Error),
+
+ #[error("Failed to deserialize a api json return object: {0}")]
+ Deserialize(#[from] serde_json::Error),
+}
+
+fn update_title(info: &PyRef<PyDict>, new_title: &str, vm: &VirtualMachine) {
+ assert!(!info.contains_key("original_title", vm));
+
+ if let Ok(old_title) = info.get_item("title", vm) {
+ warn!(
+ "DeArrow: Updating title from {:#?} to {:#?}",
+ pydict_cast!(@ref old_title, PyStr).as_str(),
+ new_title
+ );
+
+ info.set_item("original_title", old_title, vm)
+ .expect("We checked, it is a new key");
+ } else {
+ warn!("DeArrow: Setting title to {new_title:#?}");
+ }
+
+ let cleaned_title = {
+ // NOTE(@bpeetz): DeArrow uses `>` as a “Don't format the next word” mark.
+ // They should be removed, if one does not use a auto-formatter. <2025-06-16>
+ new_title.replace('>', "")
+ };
+
+ info.set_item("title", vm.new_pyobj(cleaned_title), vm)
+ .expect("This should work?");
+}
+
#[derive(Serialize, Deserialize)]
/// See: <https://wiki.sponsor.ajay.app/w/API_Docs/DeArrow>
struct DeArrowApi {
diff --git a/crates/yt_dlp/src/post_processors/mod.rs b/crates/yt_dlp/src/post_processors/mod.rs
index 65801c2..575dc45 100644
--- a/crates/yt_dlp/src/post_processors/mod.rs
+++ b/crates/yt_dlp/src/post_processors/mod.rs
@@ -8,23 +8,115 @@
// You should have received a copy of the License along with this program.
// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
-use crate::InfoJson;
-
pub mod dearrow;
-pub trait PostProcessor: std::fmt::Debug + Send {
- /// Process a [`InfoJson`] object and return the updated one.
- ///
- /// # Errors
- /// If the processing steps failed.
- fn process(&self, info: InfoJson) -> Result<InfoJson, Error>;
+#[macro_export]
+macro_rules! pydict_get {
+ (@$vm:expr, $value:expr, $name:literal, $into:ident) => {{
+ match $value.get_item($name, $vm) {
+ Ok(val) => $crate::pydict_cast!(val, $into),
+ Err(_) => panic!(
+ concat!(
+ "Expected '",
+ $name,
+ "' to be a key for the'",
+ stringify!($value),
+ "' py dictionary: {:#?}"
+ ),
+ $value
+ ),
+ }
+ }};
+}
- /// The supported extractors for this post processor
- fn extractors(&self) -> &'static [&'static str];
+#[macro_export]
+macro_rules! pydict_cast {
+ ($value:expr, $into:ident) => {{
+ match $value.downcast::<$into>() {
+ Ok(result) => result,
+ Err(val) => panic!(
+ concat!(
+ "Expected to be able to downcast value ({:#?}) as ",
+ stringify!($into)
+ ),
+ val
+ ),
+ }
+ }};
+ (@ref $value:expr, $into:ident) => {{
+ match $value.downcast_ref::<$into>() {
+ Some(result) => result,
+ None => panic!(
+ concat!(
+ "Expected to be able to downcast value ({:#?}) as ",
+ stringify!($into)
+ ),
+ $value
+ ),
+ }
+ }};
}
-#[derive(thiserror::Error, Debug)]
-pub enum Error {
- #[error("Failed to access a api: {0}")]
- Get(#[from] reqwest::Error),
+#[macro_export]
+macro_rules! wrap_post_processor {
+ ($name:literal, $unwrap:ident, $wrapped:ident) => {
+ use $crate::progress_hook::__priv::vm;
+
+ /// # Errors
+ /// - If the underlying function returns an error.
+ /// - If python operations fail.
+ pub fn $wrapped(vm: &vm::VirtualMachine) -> vm::PyResult<vm::PyObjectRef> {
+ fn actual_processor(
+ mut input: vm::function::FuncArgs,
+ vm: &vm::VirtualMachine,
+ ) -> vm::PyResult<vm::PyRef<vm::builtins::PyDict>> {
+ let input = input
+ .args
+ .remove(0)
+ .downcast::<vm::builtins::PyDict>()
+ .expect("Should be a py dict");
+
+ let output = match unwrapped_process(input, vm) {
+ Ok(ok) => ok,
+ Err(err) => {
+ return Err(vm.new_runtime_error(err.to_string()));
+ }
+ };
+
+ Ok(output)
+ }
+
+ let scope = vm.new_scope_with_builtins();
+
+ scope.globals.set_item(
+ "actual_processor",
+ vm.new_function("actual_processor", actual_processor).into(),
+ vm,
+ )?;
+
+ let local_scope = scope.clone();
+ vm.run_code_string(
+ local_scope,
+ format!(
+ "
+import yt_dlp
+
+class {}(yt_dlp.postprocessor.PostProcessor):
+ def run(self, info):
+ info = actual_processor(info)
+ return [], info
+
+inst = {}()
+",
+ $name, $name
+ ).as_str(),
+ "<embedded post processor initializing code>".to_owned(),
+ )?;
+
+ Ok(scope
+ .globals
+ .get_item("inst", vm)
+ .expect("We just declared it"))
+ }
+ };
}
diff --git a/crates/yt_dlp/src/progress_hook.rs b/crates/yt_dlp/src/progress_hook.rs
index 43f85e0..4604223 100644
--- a/crates/yt_dlp/src/progress_hook.rs
+++ b/crates/yt_dlp/src/progress_hook.rs
@@ -49,3 +49,7 @@ macro_rules! mk_python_function {
}
pub use rustpython;
+pub mod __priv {
+ pub use crate::{json_dumps, json_loads};
+ pub use rustpython::vm;
+}