about summary refs log tree commit diff stats
path: root/crates/yt/src/yt_dlp/mod.rs
diff options
context:
space:
mode:
authorBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-07-18 18:01:29 +0200
committerBenedikt Peetz <benedikt.peetz@b-peetz.de>2025-07-18 18:01:29 +0200
commitc4524db090d2d31af8bc3e7ec64c1ea9f5ec72aa (patch)
treef80cefb7b315155e4cca0bb4b78e5e6cd5418ab5 /crates/yt/src/yt_dlp/mod.rs
parenttest(crates/yt): Add basic integration tests (diff)
downloadyt-c4524db090d2d31af8bc3e7ec64c1ea9f5ec72aa.zip
feat(crates/yt): Separate all commands from their implementation code
This also comes with a re-worked and tested implementation of the
comments rendering code.
Diffstat (limited to 'crates/yt/src/yt_dlp/mod.rs')
-rw-r--r--crates/yt/src/yt_dlp/mod.rs249
1 files changed, 249 insertions, 0 deletions
diff --git a/crates/yt/src/yt_dlp/mod.rs b/crates/yt/src/yt_dlp/mod.rs
new file mode 100644
index 0000000..edf27e8
--- /dev/null
+++ b/crates/yt/src/yt_dlp/mod.rs
@@ -0,0 +1,249 @@
+use std::{str::FromStr, time::Duration};
+
+use anyhow::{Context, Result};
+use chrono::{DateTime, Utc};
+use futures::{FutureExt, future::BoxFuture};
+use log::{error, warn};
+use serde_json::json;
+use tokio::{fs, io};
+use url::Url;
+use yt_dlp::{YoutubeDL, info_json::InfoJson, json_cast, json_get, options::YoutubeDLOptions};
+
+use crate::{
+    app::App,
+    select::duration::MaybeDuration,
+    shared::bytes::Bytes,
+    storage::db::{
+        extractor_hash::ExtractorHash,
+        subscription::Subscription,
+        video::{Priority, TimeStamp, Video, VideoStatus},
+    },
+};
+
+pub(crate) fn yt_dlp_opts_updating(max_backlog: usize) -> Result<YoutubeDL> {
+    Ok(YoutubeDLOptions::new()
+        .set("playliststart", 1)
+        .set("playlistend", max_backlog)
+        .set("noplaylist", false)
+        .set(
+            "extractor_args",
+            json! {{"youtubetab": {"approximate_date": [""]}}},
+        )
+        // // TODO: This also removes unlisted and other stuff. Find a good way to remove the
+        // // members-only videos from the feed. <2025-04-17>
+        // .set("match-filter", "availability=public")
+        .build()?)
+}
+
+impl Video {
+    pub(crate) fn get_approx_size(&self) -> Result<u64> {
+        let yt_dlp = {
+            YoutubeDLOptions::new()
+                .set("prefer_free_formats", true)
+                .set("format", "bestvideo[height<=?1080]+bestaudio/best")
+                .set("fragment_retries", 10)
+                .set("retries", 10)
+                .set("getcomments", false)
+                .set("ignoreerrors", false)
+                .build()
+                .context("Failed to instanciate get approx size yt_dlp")
+        }?;
+
+        let result = yt_dlp
+            .extract_info(&self.url, false, true)
+            .with_context(|| format!("Failed to extract video information: '{}'", self.title))?;
+
+        let size = if let Some(val) = result.get("filesize") {
+            json_cast!(val, as_u64)
+        } else if let Some(serde_json::Value::Number(num)) = result.get("filesize_approx") {
+            // NOTE(@bpeetz): yt_dlp sets this value to `Null`, instead of omitting it when it
+            // can't calculate the approximate filesize.
+            // Thus, we have to check, that it is actually non-null, before we cast it. <2025-06-15>
+            json_cast!(num, as_u64)
+        } else if result.get("duration").is_some() && result.get("tbr").is_some() {
+            #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
+            let duration = json_get!(result, "duration", as_f64).ceil() as u64;
+
+            // TODO: yt_dlp gets this from the format
+            #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
+            let tbr = json_get!(result, "tbr", as_f64).ceil() as u64;
+
+            duration * tbr * (1000 / 8)
+        } else {
+            let hardcoded_default = Bytes::from_str("250 MiB").expect("This is hardcoded");
+            error!(
+                "Failed to find a filesize for video: {:?} (Using hardcoded value of {})",
+                self.title, hardcoded_default
+            );
+            hardcoded_default.as_u64()
+        };
+
+        Ok(size)
+    }
+}
+
+impl Video {
+    #[allow(clippy::too_many_lines)]
+    pub(crate) fn from_info_json(entry: &InfoJson, sub: Option<&Subscription>) -> Result<Video> {
+        fn fmt_context(date: &str, extended: Option<&str>) -> String {
+            let f = format!(
+                "Failed to parse the `upload_date` of the entry ('{date}'). \
+                    Expected `YYYY-MM-DD`, has the format changed?"
+            );
+            if let Some(date_string) = extended {
+                format!("{f}\nThe parsed '{date_string}' can't be turned to a valid UTC date.'")
+            } else {
+                f
+            }
+        }
+
+        let publish_date = if let Some(date) = &entry.get("upload_date") {
+            let date = json_cast!(date, as_str);
+
+            let year: u32 = date
+                .chars()
+                .take(4)
+                .collect::<String>()
+                .parse()
+                .with_context(|| fmt_context(date, None))?;
+            let month: u32 = date
+                .chars()
+                .skip(4)
+                .take(2)
+                .collect::<String>()
+                .parse()
+                .with_context(|| fmt_context(date, None))?;
+            let day: u32 = date
+                .chars()
+                .skip(4 + 2)
+                .take(2)
+                .collect::<String>()
+                .parse()
+                .with_context(|| fmt_context(date, None))?;
+
+            let date_string = format!("{year:04}-{month:02}-{day:02}T00:00:00Z");
+            Some(
+                DateTime::<Utc>::from_str(&date_string)
+                    .with_context(|| fmt_context(date, Some(&date_string)))?
+                    .timestamp(),
+            )
+        } else {
+            warn!(
+                "The video '{}' lacks it's upload date!",
+                json_get!(entry, "title", as_str)
+            );
+            None
+        };
+
+        let thumbnail_url = match (&entry.get("thumbnails"), &entry.get("thumbnail")) {
+            (None, None) => None,
+            (None, Some(thumbnail)) => Some(Url::from_str(json_cast!(thumbnail, as_str))?),
+
+            // TODO: The algorithm is not exactly the best <2024-05-28>
+            (Some(thumbnails), None) => {
+                if let Some(thumbnail) = json_cast!(thumbnails, as_array).first() {
+                    Some(Url::from_str(json_get!(
+                        json_cast!(thumbnail, as_object),
+                        "url",
+                        as_str
+                    ))?)
+                } else {
+                    None
+                }
+            }
+            (Some(_), Some(thumnail)) => Some(Url::from_str(json_cast!(thumnail, as_str))?),
+        };
+
+        let url = {
+            let smug_url: Url = json_get!(entry, "webpage_url", as_str).parse()?;
+            // TODO(@bpeetz): We should probably add this? <2025-06-14>
+            // if '#__youtubedl_smuggle' not in smug_url:
+            //     return smug_url, default
+            // url, _, sdata = smug_url.rpartition('#')
+            // jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
+            // data = json.loads(jsond)
+            // return url, data
+
+            smug_url
+        };
+
+        let extractor_hash = ExtractorHash::from_info_json(entry);
+
+        let subscription_name = if let Some(sub) = sub {
+            Some(sub.name.clone())
+        } else if let Some(uploader) = entry.get("uploader").map(|val| json_cast!(val, as_str)) {
+            if entry
+                .get("webpage_url_domain")
+                .map(|val| json_cast!(val, as_str))
+                == Some("youtube.com")
+            {
+                Some(format!("{uploader} - Videos"))
+            } else {
+                Some(uploader.to_owned())
+            }
+        } else {
+            None
+        };
+
+        let video = Video {
+            description: entry
+                .get("description")
+                .map(|val| json_cast!(val, as_str).to_owned()),
+            duration: MaybeDuration::from_maybe_secs_f64(
+                entry.get("duration").map(|val| json_cast!(val, as_f64)),
+            ),
+            extractor_hash,
+            last_status_change: TimeStamp::from_now(),
+            parent_subscription_name: subscription_name,
+            priority: Priority::default(),
+            publish_date: publish_date.map(TimeStamp::from_secs),
+            status: VideoStatus::Pick,
+            thumbnail_url,
+            title: json_get!(entry, "title", as_str).to_owned(),
+            url,
+            watch_progress: Duration::default(),
+            playback_speed: None,
+            subtitle_langs: None,
+        };
+        Ok(video)
+    }
+}
+
+pub(crate) async fn get_current_cache_allocation(app: &App) -> Result<Bytes> {
+    fn dir_size(mut dir: fs::ReadDir) -> BoxFuture<'static, Result<Bytes>> {
+        async move {
+            let mut acc = 0;
+            while let Some(entry) = dir.next_entry().await? {
+                let size = match entry.metadata().await? {
+                    data if data.is_dir() => {
+                        let path = entry.path();
+                        let read_dir = fs::read_dir(path).await?;
+
+                        dir_size(read_dir).await?.as_u64()
+                    }
+                    data => data.len(),
+                };
+                acc += size;
+            }
+            Ok(Bytes::new(acc))
+        }
+        .boxed()
+    }
+
+    let read_dir_result = match fs::read_dir(&app.config.paths.download_dir).await {
+        Ok(ok) => ok,
+        Err(err) => match err.kind() {
+            io::ErrorKind::NotFound => {
+                unreachable!("The download dir should always be created in the config finalizers.");
+            }
+            err => Err(io::Error::from(err)).with_context(|| {
+                format!(
+                    "Failed to get dir size of download dir at: '{}'",
+                    &app.config.paths.download_dir.display()
+                )
+            })?,
+        },
+    };
+
+    dir_size(read_dir_result).await
+}