diff options
author | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2024-08-22 14:22:13 +0200 |
---|---|---|
committer | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2024-08-22 14:22:13 +0200 |
commit | 6bfc7ee06dc1a598014dd5bec659b14a3aa87bbd (patch) | |
tree | f12b4892214fd9cd0fbbd206abd6929179f75d2b | |
parent | test(benches/update): Init (diff) | |
download | yt-6bfc7ee06dc1a598014dd5bec659b14a3aa87bbd.zip |
feat(download): Support limiting the downloader by maximal cache size
-rw-r--r-- | src/cli.rs | 47 | ||||
-rw-r--r-- | src/constants.rs | 14 | ||||
-rw-r--r-- | src/download/download_options.rs | 2 | ||||
-rw-r--r-- | src/download/mod.rs | 96 | ||||
-rw-r--r-- | src/main.rs | 10 | ||||
-rw-r--r-- | src/storage/video_database/extractor_hash.rs | 2 |
6 files changed, 153 insertions, 18 deletions
diff --git a/src/cli.rs b/src/cli.rs index f3f4b7e..a61a57e 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -10,6 +10,7 @@ use std::path::PathBuf; +use anyhow::{bail, Error}; use chrono::NaiveDate; use clap::{ArgAction, Args, Parser, Subcommand}; use url::Url; @@ -47,6 +48,11 @@ pub enum Command { /// Forcefully re-download all cached videos (i.e. delete the cache path, then download). #[arg(short, long)] force: bool, + + /// The maximum size the download dir should have. Beware that the value must be given in + /// bytes. + #[arg(short, long, default_value = "3 GiB", value_parser = byte_parser)] + max_cache_size: u64, }, /// Watch the already cached (and selected) videos @@ -102,6 +108,47 @@ pub enum Command { }, } +fn byte_parser(s: &str) -> Result<u64, Error> { + const B: u64 = 1; + + const KIB: u64 = 1024 * B; + const MIB: u64 = 1024 * KIB; + const GIB: u64 = 1024 * MIB; + + const KB: u64 = 1000 * B; + const MB: u64 = 1000 * KB; + const GB: u64 = 1000 * MB; + + let s = s + .chars() + .filter(|elem| !elem.is_whitespace()) + .collect::<String>(); + + let number: u64 = s + .chars() + .take_while(|x| x.is_numeric()) + .collect::<String>() + .parse()?; + let extension = s.chars().skip_while(|x| x.is_numeric()).collect::<String>(); + + let output = match extension.to_lowercase().as_str() { + "" => number, + "b" => number * B, + "kib" => number * KIB, + "mib" => number * MIB, + "gib" => number * GIB, + "kb" => number * KB, + "mb" => number * MB, + "gb" => number * GB, + other => bail!( + "Your extension '{}' is not yet supported. Only KB,MB,GB or KiB,MiB,GiB are supported", + other + ), + }; + + Ok(output) +} + impl Default for Command { fn default() -> Self { Self::Select { diff --git a/src/constants.rs b/src/constants.rs index f4eef3d..00919ce 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -8,9 +8,9 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use std::{env::temp_dir, path::PathBuf}; +use std::{env::temp_dir, fs, path::PathBuf}; -use anyhow::Context; +use anyhow::{Context, Result}; pub const HELP_STR: &str = include_str!("./select/selection_file/help.str"); pub const LOCAL_COMMENTS_LENGTH: usize = 1000; @@ -21,9 +21,15 @@ pub const DEFAULT_SUBTITLE_LANGS: &str = "en"; pub const CONCURRENT_DOWNLOADS: u32 = 5; // We download to the temp dir to avoid taxing the disk -pub fn download_dir() -> PathBuf { +pub fn download_dir(create: bool) -> Result<PathBuf> { const DOWNLOAD_DIR: &str = "/tmp/yt"; - PathBuf::from(DOWNLOAD_DIR) + let dir = PathBuf::from(DOWNLOAD_DIR); + + if !dir.exists() && create { + fs::create_dir_all(&dir).context("Failed to create the download directory")? + } + + Ok(dir) } const PREFIX: &str = "yt"; diff --git a/src/download/download_options.rs b/src/download/download_options.rs index 17cf66c..04c1600 100644 --- a/src/download/download_options.rs +++ b/src/download/download_options.rs @@ -50,7 +50,7 @@ pub fn download_opts(additional_opts: YtDlpOptions) -> serde_json::Map<String, s "writeautomaticsub": true, "outtmpl": { - "default": constants::download_dir().join("%(channel)s/%(title)s.%(ext)s"), + "default": constants::download_dir(false).expect("We're not creating this dir, thus this function can't error").join("%(channel)s/%(title)s.%(ext)s"), "chapter": "%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s" }, "compat_opts": {}, diff --git a/src/download/mod.rs b/src/download/mod.rs index 3785876..c3d79b7 100644 --- a/src/download/mod.rs +++ b/src/download/mod.rs @@ -8,22 +8,24 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use std::{sync::Arc, time::Duration}; +use std::{collections::HashMap, sync::Arc, time::Duration}; use crate::{ app::App, + constants::download_dir, download::download_options::download_opts, storage::video_database::{ downloader::{get_next_uncached_video, set_video_cache_path}, extractor_hash::ExtractorHash, getters::get_video_yt_dlp_opts, - Video, + Video, YtDlpOptions, }, }; -use anyhow::{Context, Result}; -use log::{debug, info}; -use tokio::{task::JoinHandle, time}; +use anyhow::{bail, Context, Result}; +use futures::{future::BoxFuture, FutureExt}; +use log::{debug, info, warn}; +use tokio::{fs, task::JoinHandle, time}; mod download_options; @@ -53,12 +55,14 @@ impl CurrentDownload { pub struct Downloader { current_download: Option<CurrentDownload>, + video_size_cache: HashMap<ExtractorHash, u64>, } impl Downloader { pub fn new() -> Self { Self { current_download: None, + video_size_cache: HashMap::new(), } } @@ -67,7 +71,20 @@ impl Downloader { /// change which videos it downloads. /// This will run, until the database doesn't contain any watchable videos pub async fn consume(&mut self, app: Arc<App>, max_cache_size: u64) -> Result<()> { - while let Some(next_video) = get_next_uncached_video(app).await? { + while let Some(next_video) = get_next_uncached_video(&app).await? { + if Self::get_current_cache_allocation().await? + + self.get_approx_video_size(&next_video).await? + >= max_cache_size + { + warn!( + "Can't download video: '{}' as it's too large for the cache.", + next_video.title + ); + // Wait and hope, that a large video is deleted from the cache. + time::sleep(Duration::from_secs(10)).await; + continue; + } + if let Some(_) = &self.current_download { let current_download = self.current_download.take().expect("Is Some"); @@ -99,7 +116,6 @@ impl Downloader { ); // Reset the taken value self.current_download = Some(current_download); - time::sleep(Duration::new(1, 0)).await; } } else { info!( @@ -111,15 +127,75 @@ impl Downloader { self.current_download = Some(new_current_download); } - // if get_allocated_cache().await? < CONCURRENT { - // .cache_video(next_video).await?; - // } + time::sleep(Duration::new(1, 0)).await; } info!("Finished downloading!"); Ok(()) } + async fn get_current_cache_allocation() -> Result<u64> { + fn dir_size(mut dir: fs::ReadDir) -> BoxFuture<'static, Result<u64>> { + async move { + let mut acc = 0; + while let Some(entry) = dir.next_entry().await? { + let size = match entry.metadata().await? { + data if data.is_dir() => { + let path = entry.path(); + let read_dir = fs::read_dir(path).await?; + + dir_size(read_dir).await? + } + data => data.len(), + }; + acc += size; + } + Ok(acc) + } + .boxed() + } + + let val = dir_size(fs::read_dir(download_dir(true)?).await?).await; + if let Ok(val) = val.as_ref() { + info!("Cache dir has a size of '{}'", val); + } + val + } + + async fn get_approx_video_size(&mut self, video: &Video) -> Result<u64> { + if let Some(value) = self.video_size_cache.get(&video.extractor_hash) { + Ok(*value) + } else { + // the subtitle file size should be negligible + let add_opts = YtDlpOptions { + subtitle_langs: "".to_owned(), + }; + let opts = &download_opts(add_opts); + + let result = yt_dlp::extract_info(&opts, &video.url, false, true) + .await + .with_context(|| { + format!("Failed to extract video information: '{}'", video.title) + })?; + + let size = if let Some(val) = result.filesize { + val + } else if let Some(val) = result.filesize_approx { + val + } else { + bail!("Failed to find a filesize for video: '{}'", video.title); + }; + + assert_eq!( + self.video_size_cache + .insert(video.extractor_hash.clone(), size), + None + ); + + Ok(size) + } + } + async fn actually_cache_video(app: &App, video: &Video) -> Result<()> { debug!("Download started: {}", &video.title); diff --git a/src/main.rs b/src/main.rs index c223140..ebbb45f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,13 +8,14 @@ // You should have received a copy of the License along with this program. // If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. -use std::{collections::HashMap, fs}; +use std::{collections::HashMap, fs, sync::Arc}; use anyhow::{bail, Context, Result}; use app::App; use cache::invalidate; use clap::Parser; use cli::{CacheCommand, CheckCommand, SelectCommand, SubscriptionCommand}; +use log::info; use select::cmds::handle_select_cmd; use tokio::{ fs::File, @@ -56,7 +57,12 @@ async fn main() -> Result<()> { let app = App::new(args.db_path.unwrap_or(constants::database()?)).await?; match args.command.unwrap_or(Command::default()) { - Command::Download { force } => { + Command::Download { + force, + max_cache_size, + } => { + info!("max cache size: '{}'", max_cache_size); + if force { invalidate(&app, true).await?; } diff --git a/src/storage/video_database/extractor_hash.rs b/src/storage/video_database/extractor_hash.rs index 3af4f60..593b5c4 100644 --- a/src/storage/video_database/extractor_hash.rs +++ b/src/storage/video_database/extractor_hash.rs @@ -19,7 +19,7 @@ use crate::{app::App, storage::video_database::getters::get_all_hashes}; static EXTRACTOR_HASH_LENGTH: OnceCell<usize> = OnceCell::const_new(); -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ExtractorHash { hash: Hash, } |