diff options
author | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2024-05-20 16:10:21 +0200 |
---|---|---|
committer | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2024-05-20 16:14:26 +0200 |
commit | 368cb6b0d25db2ae23be42ad51584de059997e51 (patch) | |
tree | 3282e45d3ebced63c8498a47e83a255c35de620b /pkgs/sources/comments/src | |
parent | refactor(hm): Rename to `modules/home` (diff) | |
download | nixos-config-368cb6b0d25db2ae23be42ad51584de059997e51.zip |
refactor(sys): Modularize and move to `modules/system` or `pkgs`
Diffstat (limited to 'pkgs/sources/comments/src')
-rw-r--r-- | pkgs/sources/comments/src/info_json.rs | 223 | ||||
-rw-r--r-- | pkgs/sources/comments/src/main.rs | 322 |
2 files changed, 545 insertions, 0 deletions
diff --git a/pkgs/sources/comments/src/info_json.rs b/pkgs/sources/comments/src/info_json.rs new file mode 100644 index 00000000..eca4fae3 --- /dev/null +++ b/pkgs/sources/comments/src/info_json.rs @@ -0,0 +1,223 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Deserializer}; + +#[derive(Debug, Deserialize)] +pub struct InfoJson { + pub id: String, + pub title: String, + pub formats: Vec<Format>, + pub thumbnails: Vec<ThumbNail>, + pub thumbnail: String, + pub description: String, + pub channel_id: String, + pub channel_url: String, + pub duration: u32, + pub view_count: u32, + pub age_limit: u32, + pub webpage_url: String, + pub categories: Vec<String>, + pub tags: Vec<String>, + pub playable_in_embed: bool, + pub live_status: String, + _format_sort_fields: Vec<String>, + pub automatic_captions: HashMap<String, Vec<Caption>>, + pub subtitles: Subtitles, + pub comment_count: u32, + pub like_count: u32, + pub channel: String, + pub channel_follower_count: u32, + pub channel_is_verified: Option<bool>, + pub uploader: String, + pub uploader_id: String, + pub uploader_url: String, + pub upload_date: String, + pub availability: String, + pub webpage_url_basename: String, + pub webpage_url_domain: String, + pub extractor: String, + pub extractor_key: String, + pub display_id: String, + pub fulltitle: String, + pub duration_string: String, + pub is_live: bool, + pub was_live: bool, + pub epoch: u32, + pub comments: Vec<Comment>, + pub sponsorblock_chapters: Option<Vec<SponsorblockChapter>>, + pub format: String, + pub format_id: String, + pub ext: String, + pub protocol: String, + pub language: Option<String>, + pub format_note: String, + pub filesize_approx: u64, + pub tbr: f64, + pub width: u32, + pub height: u32, + pub resolution: String, + pub fps: f64, + pub dynamic_range: String, + pub vcodec: String, + pub vbr: f64, + pub aspect_ratio: f64, + pub acodec: String, + pub abr: f64, + pub asr: u32, + pub audio_channels: u32, + _type: String, + _version: Version, +} + +#[derive(Debug, Deserialize)] +pub struct Subtitles {} + +#[derive(Debug, Deserialize)] +pub struct Version { + pub version: String, + pub release_git_head: String, + pub repository: String, +} + +#[derive(Debug, Deserialize)] +pub struct SponsorblockChapter {} + +#[derive(Debug, Deserialize, Clone)] +#[serde(from = "String")] +pub enum Parent { + Root, + Id(String), +} + +impl Parent { + pub fn id(&self) -> Option<&str> { + if let Self::Id(id) = self { + Some(id) + } else { + None + } + } +} + +impl From<String> for Parent { + fn from(value: String) -> Self { + if value == "root" { + Self::Root + } else { + Self::Id(value) + } + } +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(from = "String")] +pub struct Id { + pub id: String, +} +impl From<String> for Id { + fn from(value: String) -> Self { + Self { + // Take the last element if the string is split with dots, otherwise take the full id + id: value.split('.').last().unwrap_or(&value).to_owned(), + } + } +} + +#[derive(Debug, Deserialize, Clone)] +pub struct Comment { + pub id: Id, + pub text: String, + #[serde(default = "zero")] + pub like_count: u32, + pub author_id: String, + #[serde(default = "unknown")] + pub author: String, + pub author_thumbnail: String, + pub parent: Parent, + #[serde(deserialize_with = "edited_from_time_text", alias = "_time_text")] + pub edited: bool, + // Can't also be deserialized, as it's already used in 'edited' + // _time_text: String, + pub timestamp: i64, + pub author_url: String, + pub author_is_uploader: bool, + pub is_favorited: bool, +} +fn unknown() -> String { + "<Unknown>".to_string() +} +fn zero() -> u32 { + 0 +} +fn edited_from_time_text<'de, D>(d: D) -> Result<bool, D::Error> +where + D: Deserializer<'de>, +{ + let s = String::deserialize(d)?; + if s.contains(" (edited)") { + Ok(true) + } else { + Ok(false) + } +} + +#[derive(Debug, Deserialize)] +pub struct Caption { + pub ext: String, + pub url: String, + pub name: Option<String>, + pub protocol: Option<String>, +} + +#[derive(Debug, Deserialize)] +pub struct ThumbNail { + pub url: String, + pub preference: i32, + pub id: String, + pub height: Option<u32>, + pub width: Option<u32>, + pub resolution: Option<String>, +} + +#[derive(Debug, Deserialize)] +pub struct Format { + pub format_id: String, + pub format_note: Option<String>, + pub ext: String, + pub protocol: String, + pub acodec: Option<String>, + pub vcodec: String, + pub url: String, + pub width: Option<u32>, + pub height: Option<u32>, + pub fps: Option<f64>, + pub rows: Option<u32>, + pub columns: Option<u32>, + pub fragments: Option<Vec<Fragment>>, + pub resolution: String, + pub aspect_ratio: Option<f64>, + pub http_headers: HttpHeader, + pub audio_ext: String, + pub video_ext: String, + pub vbr: Option<f64>, + pub abr: Option<f64>, + pub format: String, +} + +#[derive(Debug, Deserialize)] +pub struct HttpHeader { + #[serde(alias = "User-Agent")] + pub user_agent: String, + #[serde(alias = "Accept")] + pub accept: String, + #[serde(alias = "Accept-Language")] + pub accept_language: String, + #[serde(alias = "Sec-Fetch-Mode")] + pub sec_fetch_mode: String, +} + +#[derive(Debug, Deserialize)] +pub struct Fragment { + pub url: String, + pub duration: f64, +} diff --git a/pkgs/sources/comments/src/main.rs b/pkgs/sources/comments/src/main.rs new file mode 100644 index 00000000..6e4f72e9 --- /dev/null +++ b/pkgs/sources/comments/src/main.rs @@ -0,0 +1,322 @@ +use std::{ + env, + fmt::Display, + fs::{self, File}, + io::{BufReader, Write}, + mem, + path::PathBuf, + process::{Command, Stdio}, +}; + +use anyhow::Context; +use chrono::{Local, TimeZone}; +use chrono_humanize::{Accuracy, HumanTime, Tense}; +use info_json::{Comment, InfoJson, Parent}; +use regex::Regex; + +mod info_json; + +fn get_runtime_path(component: &'static str) -> anyhow::Result<PathBuf> { + let out: PathBuf = format!( + "{}/{}", + env::var("XDG_RUNTIME_DIR").expect("This should always exist"), + component + ) + .into(); + fs::create_dir_all(out.parent().expect("Parent should exist"))?; + Ok(out) +} + +const STATUS_PATH: &str = "ytcc/running"; +pub fn status_path() -> anyhow::Result<PathBuf> { + get_runtime_path(STATUS_PATH) +} + +#[derive(Debug, Clone)] +pub struct CommentExt { + pub value: Comment, + pub replies: Vec<CommentExt>, +} + +#[derive(Debug, Default)] +pub struct Comments { + vec: Vec<CommentExt>, +} + +impl Comments { + pub fn new() -> Self { + Self::default() + } + pub fn push(&mut self, value: CommentExt) { + self.vec.push(value); + } + pub fn get_mut(&mut self, key: &str) -> Option<&mut CommentExt> { + self.vec.iter_mut().filter(|c| c.value.id.id == key).last() + } + pub fn insert(&mut self, key: &str, value: CommentExt) { + let parent = self + .vec + .iter_mut() + .filter(|c| c.value.id.id == key) + .last() + .expect("One of these should exist"); + parent.push_reply(value); + } +} +impl CommentExt { + pub fn push_reply(&mut self, value: CommentExt) { + self.replies.push(value) + } + pub fn get_mut_reply(&mut self, key: &str) -> Option<&mut CommentExt> { + self.replies + .iter_mut() + .filter(|c| c.value.id.id == key) + .last() + } +} + +impl From<Comment> for CommentExt { + fn from(value: Comment) -> Self { + Self { + replies: vec![], + value, + } + } +} + +impl Display for Comments { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + macro_rules! c { + ($color:expr, $write:ident) => { + $write.write_str(concat!("\x1b[", $color, "m"))? + }; + } + + fn format( + comment: &CommentExt, + f: &mut std::fmt::Formatter<'_>, + ident_count: u32, + ) -> std::fmt::Result { + let ident = &(0..ident_count).map(|_| " ").collect::<String>(); + let value = &comment.value; + + f.write_str(ident)?; + + if value.author_is_uploader { + c!("91;1", f); + } else { + c!("35", f); + } + + f.write_str(&value.author)?; + c!("0", f); + if value.edited || value.is_favorited { + f.write_str("[")?; + if value.edited { + f.write_str("")?; + } + if value.edited && value.is_favorited { + f.write_str(" ")?; + } + if value.is_favorited { + f.write_str("")?; + } + f.write_str("]")?; + } + + c!("36;1", f); + write!( + f, + " {}", + HumanTime::from( + Local + .timestamp_opt(value.timestamp, 0) + .single() + .expect("This should be valid") + ) + .to_text_en(Accuracy::Rough, Tense::Past) + )?; + c!("0", f); + + // c!("31;1", f); + // f.write_fmt(format_args!(" [{}]", comment.value.like_count))?; + // c!("0", f); + + f.write_str(":\n")?; + f.write_str(ident)?; + + f.write_str(&value.text.replace('\n', &format!("\n{}", ident)))?; + f.write_str("\n")?; + + if !comment.replies.is_empty() { + let mut children = comment.replies.clone(); + children.sort_by(|a, b| a.value.timestamp.cmp(&b.value.timestamp)); + + for child in children { + format(&child, f, ident_count + 4)?; + } + } else { + f.write_str("\n")?; + } + + Ok(()) + } + + if !&self.vec.is_empty() { + let mut children = self.vec.clone(); + children.sort_by(|a, b| b.value.like_count.cmp(&a.value.like_count)); + + for child in children { + format(&child, f, 0)? + } + } + Ok(()) + } +} + +fn main() -> anyhow::Result<()> { + cli_log::init_cli_log!(); + let args: Option<String> = env::args().skip(1).last(); + let mut info_json: InfoJson = { + let status_path = if let Some(arg) = args { + PathBuf::from(arg) + } else { + status_path().context("Failed to get status path")? + }; + + let reader = + BufReader::new(File::open(&status_path).with_context(|| { + format!("Failed to open status file at {}", status_path.display()) + })?); + + serde_json::from_reader(reader)? + }; + + let base_comments = mem::take(&mut info_json.comments); + drop(info_json); + + let mut comments = Comments::new(); + base_comments.into_iter().for_each(|c| { + if let Parent::Id(id) = &c.parent { + comments.insert(&(id.clone()), CommentExt::from(c)); + } else { + comments.push(CommentExt::from(c)); + } + }); + + comments.vec.iter_mut().for_each(|comment| { + let replies = mem::take(&mut comment.replies); + let mut output_replies: Vec<CommentExt> = vec![]; + + let re = Regex::new(r"\u{200b}?(@[^\t\s]+)\u{200b}?").unwrap(); + for reply in replies { + if let Some(replyee_match) = re.captures(&reply.value.text){ + let full_match = replyee_match.get(0).expect("This always exists"); + let text = reply. + value. + text[0..full_match.start()] + .to_owned() + + + &reply + .value + .text[full_match.end()..]; + let text: &str = text.trim().trim_matches('\u{200b}'); + + let replyee = replyee_match.get(1).expect("This should exist").as_str(); + + + if let Some(parent) = output_replies + .iter_mut() + // .rev() + .flat_map(|com| &mut com.replies) + .flat_map(|com| &mut com.replies) + .flat_map(|com| &mut com.replies) + .filter(|com| com.value.author == replyee) + .last() + { + parent.replies.push(CommentExt::from(Comment { + text: text.to_owned(), + ..reply.value + })) + } else if let Some(parent) = output_replies + .iter_mut() + // .rev() + .flat_map(|com| &mut com.replies) + .flat_map(|com| &mut com.replies) + .filter(|com| com.value.author == replyee) + .last() + { + parent.replies.push(CommentExt::from(Comment { + text: text.to_owned(), + ..reply.value + })) + } else if let Some(parent) = output_replies + .iter_mut() + // .rev() + .flat_map(|com| &mut com.replies) + .filter(|com| com.value.author == replyee) + .last() + { + parent.replies.push(CommentExt::from(Comment { + text: text.to_owned(), + ..reply.value + })) + } else if let Some(parent) = output_replies.iter_mut() + // .rev() + .filter(|com| com.value.author == replyee) + .last() + { + parent.replies.push(CommentExt::from(Comment { + text: text.to_owned(), + ..reply.value + })) + } else { + eprintln!( + "Failed to find a parent for ('{}') both directly and via replies! The reply text was:\n'{}'\n", + replyee, + reply.value.text + ); + output_replies.push(reply); + } + } else { + output_replies.push(reply); + } + } + comment.replies = output_replies; + }); + + let mut less = Command::new("less") + .args(["--raw-control-chars"]) + .stdin(Stdio::piped()) + .stderr(Stdio::inherit()) + .spawn() + .context("Failed to run less")?; + + let mut child = Command::new("fmt") + .args(["--uniform-spacing", "--split-only", "--width=90"]) + .stdin(Stdio::piped()) + .stderr(Stdio::inherit()) + .stdout(less.stdin.take().expect("Should be open")) + .spawn() + .context("Failed to run fmt")?; + + let mut stdin = child.stdin.take().context("Failed to open stdin")?; + std::thread::spawn(move || { + stdin + .write_all(comments.to_string().as_bytes()) + .expect("Should be able to write to stdin of fmt"); + }); + + let _ = less.wait().context("Failed to await less")?; + + Ok(()) +} + +#[cfg(test)] +mod test { + #[test] + fn test_string_replacement() { + let s = "A \n\nB\n\nC".to_owned(); + assert_eq!("A \n \n B\n \n C", s.replace('\n', "\n ")) + } +} |