about summary refs log tree commit diff stats
path: root/crates/yt_dlp/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/yt_dlp/src/lib.rs')
-rw-r--r--crates/yt_dlp/src/lib.rs421
1 files changed, 20 insertions, 401 deletions
diff --git a/crates/yt_dlp/src/lib.rs b/crates/yt_dlp/src/lib.rs
index e7b37c6..a1db606 100644
--- a/crates/yt_dlp/src/lib.rs
+++ b/crates/yt_dlp/src/lib.rs
@@ -10,27 +10,29 @@
 
 //! The `yt_dlp` interface is completely contained in the [`YoutubeDL`] structure.
 
-use std::{self, env, fmt::Display, path::PathBuf};
+use std::path::PathBuf;
 
 use indexmap::IndexMap;
-use log::{Level, debug, error, info, log_enabled};
-use logging::setup_logging;
-use post_processors::PostProcessor;
-use rustpython::{
-    InterpreterConfig,
-    vm::{
-        self, AsObject, Interpreter, PyObjectRef, PyPayload, PyRef, VirtualMachine,
-        builtins::{PyBaseException, PyBaseExceptionRef, PyDict, PyList, PyStr},
-        function::{FuncArgs, KwArgs, PosArgs},
-        py_io::Write,
-        suggestion::offer_suggestions,
-    },
+use log::info;
+use rustpython::vm::{
+    Interpreter, PyObjectRef, PyRef, VirtualMachine,
+    builtins::{PyDict, PyList, PyStr},
+    function::{FuncArgs, KwArgs, PosArgs},
 };
 use url::Url;
 
-mod logging;
+use crate::{
+    info_json::{InfoJson, json_dumps, json_loads},
+    python_error::PythonError,
+};
+
+pub mod info_json;
+pub mod options;
 pub mod post_processors;
 pub mod progress_hook;
+pub mod python_error;
+
+mod logging;
 
 #[macro_export]
 macro_rules! json_get {
@@ -73,7 +75,6 @@ pub struct YoutubeDL {
     youtube_dl_class: PyObjectRef,
     yt_dlp_module: PyObjectRef,
     options: serde_json::Map<String, serde_json::Value>,
-    post_processors: Vec<Box<dyn PostProcessor>>,
 }
 
 impl std::fmt::Debug for YoutubeDL {
@@ -84,148 +85,6 @@ impl std::fmt::Debug for YoutubeDL {
 }
 
 impl YoutubeDL {
-    /// Construct this instance from options.
-    ///
-    /// # Panics
-    /// If `yt_dlp` changed their interface.
-    ///
-    /// # Errors
-    /// If a python call fails.
-    pub fn from_options(options: YoutubeDLOptions) -> Result<Self, build::Error> {
-        let mut settings = vm::Settings::default();
-        if let Ok(python_path) = env::var("PYTHONPATH") {
-            for path in python_path.split(':') {
-                settings.path_list.push(path.to_owned());
-            }
-        } else {
-            error!(
-                "No PYTHONPATH found or invalid utf8. \
-                This means, that you probably did not \
-                supply the yt_dlp!"
-            );
-        }
-
-        settings.install_signal_handlers = false;
-
-        // NOTE(@bpeetz): Another value leads to an internal codegen error. <2025-06-13>
-        settings.optimize = 0;
-
-        settings.isolated = true;
-
-        let interpreter = InterpreterConfig::new()
-            .init_stdlib()
-            .settings(settings)
-            .interpreter();
-
-        let output_options = options.options.clone();
-
-        let (yt_dlp_module, youtube_dl_class) = match interpreter.enter(|vm| {
-            let yt_dlp_module = vm.import("yt_dlp", 0)?;
-            let class = yt_dlp_module.get_attr("YoutubeDL", vm)?;
-
-            let opts = json_loads(options.options, vm);
-            if let Some(function) = options.progress_hook {
-                opts.get_or_insert(vm, vm.new_pyobj("progress_hooks"), || {
-                    let hook: PyObjectRef = vm.new_function("progress_hook", function).into();
-                    vm.new_pyobj(vec![hook])
-                })
-                .expect("Should work?");
-            }
-
-            {
-                // Unconditionally set a logger.
-                // Otherwise, yt_dlp will log to stderr.
-
-                /// Is the specified record to be logged? Returns false for no,
-                /// true for yes. Filters can either modify log records in-place or
-                /// return a completely different record instance which will replace
-                /// the original log record in any future processing of the event.
-                fn filter_error_log(mut input: FuncArgs, vm: &VirtualMachine) -> bool {
-                    let record = input.args.remove(0);
-
-                    // Filter out all error logs (they are propagated as rust errors)
-                    let levelname: PyRef<PyStr> = record
-                        .get_attr("levelname", vm)
-                        .expect("This should exist")
-                        .downcast()
-                        .expect("This should be a String");
-
-                    let return_value = levelname.as_str() != "ERROR";
-
-                    if log_enabled!(Level::Debug) && !return_value {
-                        let message: String = {
-                            let get_message = record.get_attr("getMessage", vm).expect("Is set");
-                            let message: PyRef<PyStr> = get_message
-                                .call((), vm)
-                                .expect("Can be called")
-                                .downcast()
-                                .expect("Downcasting works");
-
-                            message.as_str().to_owned()
-                        };
-
-                        debug!("Swollowed error message: '{message}'");
-                    }
-                    return_value
-                }
-
-                let logging = setup_logging(vm, "yt_dlp")?;
-                let ytdl_logger = {
-                    let get_logger = logging.get_item("getLogger", vm)?;
-                    get_logger.call(("yt_dlp",), vm)?
-                };
-
-                {
-                    let args = FuncArgs::new(
-                        PosArgs::new(vec![]),
-                        KwArgs::new({
-                            let mut map = IndexMap::new();
-                            // Ensure that all events are logged by setting
-                            // the log level to NOTSET (we filter on rust's side)
-                            map.insert("level".to_owned(), vm.new_pyobj(0));
-                            map
-                        }),
-                    );
-
-                    let basic_config = logging.get_item("basicConfig", vm)?;
-                    basic_config.call(args, vm)?;
-                }
-
-                {
-                    let add_filter = ytdl_logger.get_attr("addFilter", vm)?;
-                    add_filter.call(
-                        (vm.new_function("yt_dlp_error_filter", filter_error_log),),
-                        vm,
-                    )?;
-                }
-
-                opts.set_item("logger", ytdl_logger, vm)?;
-            }
-
-            let youtube_dl_class = class.call((opts,), vm)?;
-
-            Ok::<_, PyRef<PyBaseException>>((yt_dlp_module, youtube_dl_class))
-        }) {
-            Ok(ok) => Ok(ok),
-            Err(err) => {
-                // TODO(@bpeetz): Do we want to run `interpreter.finalize` here? <2025-06-14>
-                // interpreter.finalize(Some(err));
-                interpreter.enter(|vm| {
-                    let buffer = process_exception(vm, &err);
-                    Err(build::Error::Python(buffer))
-                })
-            }
-        }?;
-
-        Ok(Self {
-            interpreter,
-            youtube_dl_class,
-            yt_dlp_module,
-            options: output_options,
-            post_processors: options.post_processors,
-        })
-    }
-
     /// # Panics
     ///
     /// If `yt_dlp` changed their location or type of `__version__`.
@@ -413,43 +272,13 @@ impl YoutubeDL {
 
         let result = value.downcast::<PyDict>().expect("This should stay a dict");
 
-        let mut json = json_dumps(result, vm);
-
-        for pp in &self.post_processors {
-            if pp
-                .extractors()
-                .iter()
-                .any(|extractor| *extractor == json_get!(json, "extractor_key", as_str))
-            {
-                json = pp.process(json)?;
-            } else {
-                error!("Extractor not found for {pp:#?}");
-            }
-        }
-
-        Ok(json)
-    }
-}
-
-#[derive(thiserror::Error, Debug)]
-pub struct PythonError(pub String);
-
-impl Display for PythonError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Python threw an exception: {}", self.0)
-    }
-}
-
-impl PythonError {
-    fn from_exception(vm: &VirtualMachine, exc: &PyRef<PyBaseException>) -> Self {
-        let buffer = process_exception(vm, exc);
-        Self(buffer)
+        Ok(json_dumps(result, vm))
     }
 }
 
 #[allow(missing_docs)]
 pub mod process_ie_result {
-    use crate::{PythonError, prepare};
+    use crate::{prepare, python_error::PythonError};
 
     #[derive(Debug, thiserror::Error)]
     pub enum Error {
@@ -462,7 +291,7 @@ pub mod process_ie_result {
 }
 #[allow(missing_docs)]
 pub mod extract_info {
-    use crate::{PythonError, prepare};
+    use crate::{prepare, python_error::PythonError};
 
     #[derive(Debug, thiserror::Error)]
     pub enum Error {
@@ -475,221 +304,11 @@ pub mod extract_info {
 }
 #[allow(missing_docs)]
 pub mod prepare {
-    use crate::{PythonError, post_processors};
+    use crate::python_error::PythonError;
 
     #[derive(Debug, thiserror::Error)]
     pub enum Error {
         #[error(transparent)]
         Python(#[from] PythonError),
-
-        #[error("Failed to run a post processor")]
-        PostProcessorRun(#[from] post_processors::Error),
-    }
-}
-
-pub type InfoJson = serde_json::Map<String, serde_json::Value>;
-pub type ProgressHookFunction = fn(input: FuncArgs, vm: &VirtualMachine);
-
-/// Options, that are used to customize the download behaviour.
-///
-/// In the future, this might get a Builder api.
-///
-/// See `help(yt_dlp.YoutubeDL())` from python for a full list of available options.
-#[derive(Default, Debug)]
-pub struct YoutubeDLOptions {
-    options: serde_json::Map<String, serde_json::Value>,
-    progress_hook: Option<ProgressHookFunction>,
-    post_processors: Vec<Box<dyn PostProcessor>>,
-}
-
-impl YoutubeDLOptions {
-    #[must_use]
-    pub fn new() -> Self {
-        let me = Self {
-            options: serde_json::Map::new(),
-            progress_hook: None,
-            post_processors: vec![],
-        };
-
-        me.with_post_processor(post_processors::dearrow::DeArrowPP)
-    }
-
-    #[must_use]
-    pub fn set(self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self {
-        let mut options = self.options;
-        options.insert(key.into(), value.into());
-
-        Self { options, ..self }
-    }
-
-    #[must_use]
-    pub fn with_progress_hook(self, progress_hook: ProgressHookFunction) -> Self {
-        if let Some(_previous_hook) = self.progress_hook {
-            todo!()
-        } else {
-            Self {
-                progress_hook: Some(progress_hook),
-                ..self
-            }
-        }
-    }
-
-    #[must_use]
-    pub fn with_post_processor<P: PostProcessor + 'static>(mut self, post_processor: P) -> Self {
-        self.post_processors.push(Box::new(post_processor));
-        self
-    }
-
-    /// # Errors
-    /// If the underlying [`YoutubeDL::from_options`] errors.
-    pub fn build(self) -> Result<YoutubeDL, build::Error> {
-        YoutubeDL::from_options(self)
-    }
-
-    #[must_use]
-    pub fn from_json_options(options: serde_json::Map<String, serde_json::Value>) -> Self {
-        Self {
-            options,
-            ..Self::new()
-        }
-    }
-
-    #[must_use]
-    pub fn get(&self, key: &str) -> Option<&serde_json::Value> {
-        self.options.get(key)
-    }
-}
-
-#[allow(missing_docs)]
-pub mod build {
-    #[derive(Debug, thiserror::Error)]
-    pub enum Error {
-        #[error("Python threw an exception: {0}")]
-        Python(String),
     }
 }
-
-fn json_loads(
-    input: serde_json::Map<String, serde_json::Value>,
-    vm: &VirtualMachine,
-) -> PyRef<PyDict> {
-    let json = vm.import("json", 0).expect("Module exists");
-    let loads = json.get_attr("loads", vm).expect("Method exists");
-    let self_str = serde_json::to_string(&serde_json::Value::Object(input)).expect("Vaild json");
-    let dict = loads
-        .call((self_str,), vm)
-        .expect("Vaild json is always a valid dict");
-
-    dict.downcast().expect("Should always be a dict")
-}
-
-/// # Panics
-/// If expectation about python operations fail.
-pub fn json_dumps(
-    input: PyRef<PyDict>,
-    vm: &VirtualMachine,
-) -> serde_json::Map<String, serde_json::Value> {
-    let json = vm.import("json", 0).expect("Module exists");
-    let dumps = json.get_attr("dumps", vm).expect("Method exists");
-    let dict = dumps
-        .call((input,), vm)
-        .map_err(|err| vm.print_exception(err))
-        .expect("Might not always work, but for our dicts it works");
-
-    let string: PyRef<PyStr> = dict.downcast().expect("Should always be a string");
-
-    let real_string = string.to_str().expect("Should be valid utf8");
-
-    // {
-    //     let mut file = File::create("debug.dump.json").unwrap();
-    //     write!(file, "{}", real_string).unwrap();
-    // }
-
-    let value: serde_json::Value = serde_json::from_str(real_string).expect("Should be valid json");
-
-    match value {
-        serde_json::Value::Object(map) => map,
-        _ => unreachable!("These should not be json.dumps output"),
-    }
-}
-
-// Inlined and changed from `vm.write_exception_inner`
-fn write_exception<W: Write>(
-    vm: &VirtualMachine,
-    output: &mut W,
-    exc: &PyBaseExceptionRef,
-) -> Result<(), W::Error> {
-    let varargs = exc.args();
-    let args_repr = {
-        match varargs.len() {
-            0 => vec![],
-            1 => {
-                let args0_repr = if true {
-                    varargs[0]
-                        .str(vm)
-                        .unwrap_or_else(|_| PyStr::from("<element str() failed>").into_ref(&vm.ctx))
-                } else {
-                    varargs[0].repr(vm).unwrap_or_else(|_| {
-                        PyStr::from("<element repr() failed>").into_ref(&vm.ctx)
-                    })
-                };
-                vec![args0_repr]
-            }
-            _ => varargs
-                .iter()
-                .map(|vararg| {
-                    vararg.repr(vm).unwrap_or_else(|_| {
-                        PyStr::from("<element repr() failed>").into_ref(&vm.ctx)
-                    })
-                })
-                .collect(),
-        }
-    };
-
-    let exc_class = exc.class();
-
-    if exc_class.fast_issubclass(vm.ctx.exceptions.syntax_error) {
-        unreachable!(
-            "A syntax error should never be raised, \
-                                as yt_dlp should not have them and neither our embedded code"
-        );
-    }
-
-    let exc_name = exc_class.name();
-    match args_repr.len() {
-        0 => write!(output, "{exc_name}"),
-        1 => write!(output, "{}: {}", exc_name, args_repr[0]),
-        _ => write!(
-            output,
-            "{}: ({})",
-            exc_name,
-            args_repr
-                .iter()
-                .map(|val| val.as_str())
-                .collect::<Vec<_>>()
-                .join(", "),
-        ),
-    }?;
-
-    match offer_suggestions(exc, vm) {
-        Some(suggestions) => {
-            write!(output, ". Did you mean: '{suggestions}'?")
-        }
-        None => Ok(()),
-    }
-}
-
-fn process_exception(vm: &VirtualMachine, err: &PyBaseExceptionRef) -> String {
-    let mut buffer = String::new();
-    write_exception(vm, &mut buffer, err)
-        .expect("We are writing into an *in-memory* string, it will always work");
-
-    if log_enabled!(Level::Debug) {
-        let mut output = String::new();
-        vm.write_exception(&mut output, err)
-            .expect("We are writing into an *in-memory* string, it will always work");
-        debug!("Python threw an exception: {output}");
-    }
-
-    buffer
-}