diff options
author | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2025-07-18 18:01:29 +0200 |
---|---|---|
committer | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2025-07-18 18:01:29 +0200 |
commit | c4524db090d2d31af8bc3e7ec64c1ea9f5ec72aa (patch) | |
tree | f80cefb7b315155e4cca0bb4b78e5e6cd5418ab5 /crates/yt/src/yt_dlp | |
parent | test(crates/yt): Add basic integration tests (diff) | |
download | yt-c4524db090d2d31af8bc3e7ec64c1ea9f5ec72aa.zip |
feat(crates/yt): Separate all commands from their implementation code
This also comes with a re-worked and tested implementation of the comments rendering code.
Diffstat (limited to 'crates/yt/src/yt_dlp')
-rw-r--r-- | crates/yt/src/yt_dlp/mod.rs | 249 |
1 files changed, 249 insertions, 0 deletions
diff --git a/crates/yt/src/yt_dlp/mod.rs b/crates/yt/src/yt_dlp/mod.rs new file mode 100644 index 0000000..edf27e8 --- /dev/null +++ b/crates/yt/src/yt_dlp/mod.rs @@ -0,0 +1,249 @@ +use std::{str::FromStr, time::Duration}; + +use anyhow::{Context, Result}; +use chrono::{DateTime, Utc}; +use futures::{FutureExt, future::BoxFuture}; +use log::{error, warn}; +use serde_json::json; +use tokio::{fs, io}; +use url::Url; +use yt_dlp::{YoutubeDL, info_json::InfoJson, json_cast, json_get, options::YoutubeDLOptions}; + +use crate::{ + app::App, + select::duration::MaybeDuration, + shared::bytes::Bytes, + storage::db::{ + extractor_hash::ExtractorHash, + subscription::Subscription, + video::{Priority, TimeStamp, Video, VideoStatus}, + }, +}; + +pub(crate) fn yt_dlp_opts_updating(max_backlog: usize) -> Result<YoutubeDL> { + Ok(YoutubeDLOptions::new() + .set("playliststart", 1) + .set("playlistend", max_backlog) + .set("noplaylist", false) + .set( + "extractor_args", + json! {{"youtubetab": {"approximate_date": [""]}}}, + ) + // // TODO: This also removes unlisted and other stuff. Find a good way to remove the + // // members-only videos from the feed. <2025-04-17> + // .set("match-filter", "availability=public") + .build()?) +} + +impl Video { + pub(crate) fn get_approx_size(&self) -> Result<u64> { + let yt_dlp = { + YoutubeDLOptions::new() + .set("prefer_free_formats", true) + .set("format", "bestvideo[height<=?1080]+bestaudio/best") + .set("fragment_retries", 10) + .set("retries", 10) + .set("getcomments", false) + .set("ignoreerrors", false) + .build() + .context("Failed to instanciate get approx size yt_dlp") + }?; + + let result = yt_dlp + .extract_info(&self.url, false, true) + .with_context(|| format!("Failed to extract video information: '{}'", self.title))?; + + let size = if let Some(val) = result.get("filesize") { + json_cast!(val, as_u64) + } else if let Some(serde_json::Value::Number(num)) = result.get("filesize_approx") { + // NOTE(@bpeetz): yt_dlp sets this value to `Null`, instead of omitting it when it + // can't calculate the approximate filesize. + // Thus, we have to check, that it is actually non-null, before we cast it. <2025-06-15> + json_cast!(num, as_u64) + } else if result.get("duration").is_some() && result.get("tbr").is_some() { + #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] + let duration = json_get!(result, "duration", as_f64).ceil() as u64; + + // TODO: yt_dlp gets this from the format + #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] + let tbr = json_get!(result, "tbr", as_f64).ceil() as u64; + + duration * tbr * (1000 / 8) + } else { + let hardcoded_default = Bytes::from_str("250 MiB").expect("This is hardcoded"); + error!( + "Failed to find a filesize for video: {:?} (Using hardcoded value of {})", + self.title, hardcoded_default + ); + hardcoded_default.as_u64() + }; + + Ok(size) + } +} + +impl Video { + #[allow(clippy::too_many_lines)] + pub(crate) fn from_info_json(entry: &InfoJson, sub: Option<&Subscription>) -> Result<Video> { + fn fmt_context(date: &str, extended: Option<&str>) -> String { + let f = format!( + "Failed to parse the `upload_date` of the entry ('{date}'). \ + Expected `YYYY-MM-DD`, has the format changed?" + ); + if let Some(date_string) = extended { + format!("{f}\nThe parsed '{date_string}' can't be turned to a valid UTC date.'") + } else { + f + } + } + + let publish_date = if let Some(date) = &entry.get("upload_date") { + let date = json_cast!(date, as_str); + + let year: u32 = date + .chars() + .take(4) + .collect::<String>() + .parse() + .with_context(|| fmt_context(date, None))?; + let month: u32 = date + .chars() + .skip(4) + .take(2) + .collect::<String>() + .parse() + .with_context(|| fmt_context(date, None))?; + let day: u32 = date + .chars() + .skip(4 + 2) + .take(2) + .collect::<String>() + .parse() + .with_context(|| fmt_context(date, None))?; + + let date_string = format!("{year:04}-{month:02}-{day:02}T00:00:00Z"); + Some( + DateTime::<Utc>::from_str(&date_string) + .with_context(|| fmt_context(date, Some(&date_string)))? + .timestamp(), + ) + } else { + warn!( + "The video '{}' lacks it's upload date!", + json_get!(entry, "title", as_str) + ); + None + }; + + let thumbnail_url = match (&entry.get("thumbnails"), &entry.get("thumbnail")) { + (None, None) => None, + (None, Some(thumbnail)) => Some(Url::from_str(json_cast!(thumbnail, as_str))?), + + // TODO: The algorithm is not exactly the best <2024-05-28> + (Some(thumbnails), None) => { + if let Some(thumbnail) = json_cast!(thumbnails, as_array).first() { + Some(Url::from_str(json_get!( + json_cast!(thumbnail, as_object), + "url", + as_str + ))?) + } else { + None + } + } + (Some(_), Some(thumnail)) => Some(Url::from_str(json_cast!(thumnail, as_str))?), + }; + + let url = { + let smug_url: Url = json_get!(entry, "webpage_url", as_str).parse()?; + // TODO(@bpeetz): We should probably add this? <2025-06-14> + // if '#__youtubedl_smuggle' not in smug_url: + // return smug_url, default + // url, _, sdata = smug_url.rpartition('#') + // jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0] + // data = json.loads(jsond) + // return url, data + + smug_url + }; + + let extractor_hash = ExtractorHash::from_info_json(entry); + + let subscription_name = if let Some(sub) = sub { + Some(sub.name.clone()) + } else if let Some(uploader) = entry.get("uploader").map(|val| json_cast!(val, as_str)) { + if entry + .get("webpage_url_domain") + .map(|val| json_cast!(val, as_str)) + == Some("youtube.com") + { + Some(format!("{uploader} - Videos")) + } else { + Some(uploader.to_owned()) + } + } else { + None + }; + + let video = Video { + description: entry + .get("description") + .map(|val| json_cast!(val, as_str).to_owned()), + duration: MaybeDuration::from_maybe_secs_f64( + entry.get("duration").map(|val| json_cast!(val, as_f64)), + ), + extractor_hash, + last_status_change: TimeStamp::from_now(), + parent_subscription_name: subscription_name, + priority: Priority::default(), + publish_date: publish_date.map(TimeStamp::from_secs), + status: VideoStatus::Pick, + thumbnail_url, + title: json_get!(entry, "title", as_str).to_owned(), + url, + watch_progress: Duration::default(), + playback_speed: None, + subtitle_langs: None, + }; + Ok(video) + } +} + +pub(crate) async fn get_current_cache_allocation(app: &App) -> Result<Bytes> { + fn dir_size(mut dir: fs::ReadDir) -> BoxFuture<'static, Result<Bytes>> { + async move { + let mut acc = 0; + while let Some(entry) = dir.next_entry().await? { + let size = match entry.metadata().await? { + data if data.is_dir() => { + let path = entry.path(); + let read_dir = fs::read_dir(path).await?; + + dir_size(read_dir).await?.as_u64() + } + data => data.len(), + }; + acc += size; + } + Ok(Bytes::new(acc)) + } + .boxed() + } + + let read_dir_result = match fs::read_dir(&app.config.paths.download_dir).await { + Ok(ok) => ok, + Err(err) => match err.kind() { + io::ErrorKind::NotFound => { + unreachable!("The download dir should always be created in the config finalizers."); + } + err => Err(io::Error::from(err)).with_context(|| { + format!( + "Failed to get dir size of download dir at: '{}'", + &app.config.paths.download_dir.display() + ) + })?, + }, + }; + + dir_size(read_dir_result).await +} |