aboutsummaryrefslogtreecommitdiffstats
path: root/crates
diff options
context:
space:
mode:
Diffstat (limited to 'crates')
-rw-r--r--crates/yt/Cargo.toml1
-rw-r--r--crates/yt/src/storage/db/video/comments/display.rs8
-rw-r--r--crates/yt/src/storage/db/video/comments/mod.rs259
-rw-r--r--crates/yt/src/storage/db/video/comments/raw.rs9
-rw-r--r--crates/yt/src/storage/db/video/comments/tests.rs37
5 files changed, 162 insertions, 152 deletions
diff --git a/crates/yt/Cargo.toml b/crates/yt/Cargo.toml
index 71335d9..91d9204 100644
--- a/crates/yt/Cargo.toml
+++ b/crates/yt/Cargo.toml
@@ -35,7 +35,6 @@ futures = "0.3.32"
libmpv2.workspace = true
log.workspace = true
notify = { version = "8.2.0", default-features = false }
-regex = "1.12.3"
serde.workspace = true
serde_json.workspace = true
shlex = "2.0.1"
diff --git a/crates/yt/src/storage/db/video/comments/display.rs b/crates/yt/src/storage/db/video/comments/display.rs
index c372603..d0c400d 100644
--- a/crates/yt/src/storage/db/video/comments/display.rs
+++ b/crates/yt/src/storage/db/video/comments/display.rs
@@ -33,7 +33,7 @@ impl Comments {
color: bool,
) -> std::fmt::Result {
let ident = &(0..ident_count).map(|_| " ").collect::<String>();
- let value = &comment.value;
+ let value = &comment.raw;
f.write_str(ident)?;
@@ -79,7 +79,7 @@ impl Comments {
write!(
f,
" [{}]",
- comment.value.like_count.bold().red().render(color)
+ comment.raw.like_count.bold().red().render(color)
)?;
f.write_str(":\n")?;
@@ -102,7 +102,7 @@ impl Comments {
f.write_str("\n")?;
} else {
let mut children = comment.replies.clone();
- children.sort_by(|a, b| a.value.timestamp.cmp(&b.value.timestamp));
+ children.sort_by(|a, b| a.raw.timestamp.cmp(&b.raw.timestamp));
for child in children {
format(&child, f, ident_count + 4, color)?;
@@ -116,7 +116,7 @@ impl Comments {
if !&self.inner.is_empty() {
let mut children = self.inner.clone();
- children.sort_by(|a, b| b.value.like_count.cmp(&a.value.like_count));
+ children.sort_by(|a, b| b.raw.like_count.cmp(&a.raw.like_count));
for child in children {
format(&child, &mut f, 0, use_color)?;
diff --git a/crates/yt/src/storage/db/video/comments/mod.rs b/crates/yt/src/storage/db/video/comments/mod.rs
index 41a03be..b199346 100644
--- a/crates/yt/src/storage/db/video/comments/mod.rs
+++ b/crates/yt/src/storage/db/video/comments/mod.rs
@@ -8,11 +8,10 @@
// You should have received a copy of the License along with this program.
// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.
-use std::mem;
+use log::debug;
+use url::Url;
-use regex::{Captures, Regex};
-
-use crate::storage::db::video::comments::raw::{Parent, RawComment};
+use crate::storage::db::video::comments::raw::{Id, RawComment};
pub(crate) mod display;
pub(crate) mod raw;
@@ -20,182 +19,168 @@ pub(crate) mod raw;
#[cfg(test)]
mod tests;
-#[derive(Debug, Clone, PartialEq)]
-pub(crate) struct Comment {
- value: RawComment,
- replies: Vec<Self>,
-}
-
#[derive(Debug, Default, PartialEq)]
pub(crate) struct Comments {
inner: Vec<Comment>,
}
+#[derive(Debug, Clone, PartialEq)]
+pub(crate) struct Comment {
+ raw: RawComment,
+ replies: Vec<Self>,
+}
+
impl Comments {
- pub(crate) fn from_raw(raw: Vec<RawComment>) -> Self {
- let mut me = Self::default();
+ pub(crate) fn from_raw(mut raw: Vec<RawComment>) -> Self {
+ let mut me = Self { inner: vec![] };
+
+ raw.iter_mut().enumerate().for_each(|(index, raw_comment)| {
+ raw_comment.original_order = index;
+ });
+
+ raw.sort_by_key(|raw| match &raw.parent {
+ raw::Parent::Root => 0,
+ raw::Parent::Id(id) => id.split('.').count(),
+ });
- // Apply the parent -> child mapping yt provides us with.
for raw_comment in raw {
- if let Parent::Id(id) = &raw_comment.parent {
- me.insert(&(id.clone()), Comment::from(raw_comment));
- } else {
- me.inner.push(Comment::from(raw_comment));
+ match raw_comment.parent.clone() {
+ raw::Parent::Root => me.add_toplevel(raw_comment),
+ raw::Parent::Id(id) => {
+ let ids: Vec<_> = id.split('.').collect();
+ me.add_reply(&ids, raw_comment);
+ }
}
}
{
// Sort the final comments chronologically.
- // This ensures that replies are matched with the comment they actually replied to and
- // not a later comment from the same author.
- for comment in &mut me.inner {
- comment
- .replies
- .sort_by_key(|comment| comment.value.timestamp);
+ // This reverses our sort we did before for ids.
+ me.sort_replies();
+ }
- for reply in &comment.replies {
- assert!(reply.replies.is_empty());
- }
+ me
+ }
+
+ fn sort_replies(&mut self) {
+ self.inner.sort_by_key(|comment| comment.raw.original_order);
+
+ self.inner.iter_mut().for_each(Comment::sort_replies);
+ }
+
+ fn add_toplevel(&mut self, comment: RawComment) {
+ self.inner.push(comment.into());
+ }
+
+ fn get_id(&mut self, id: &str) -> &mut Comment {
+ for comment in &mut self.inner {
+ if comment.raw.id.id == id {
+ return comment;
}
}
- {
- let find_reply_indicator =
- Regex::new(r"\u{200b}?(@[^\t\s]+)\u{200b}?").expect("This is hardcoded");
+ unreachable!("We cannot add a comment, that is a reply to an not-yet added one.")
+ }
- // Try to re-construct the replies for the reply comments.
- for comment in &mut me.inner {
- let previous_replies = mem::take(&mut comment.replies);
+ fn add_reply(&mut self, ids: &[&str], mut raw_comment: RawComment) {
+ fn first_line(text: &str) -> &str {
+ let end = text
+ .chars()
+ .take_while(|ch| *ch != '\n' && *ch != '.')
+ .map(char::len_utf8)
+ .sum();
- let mut reply_tree = Comments::default();
+ &text[..end]
+ }
- for reply in previous_replies {
- // We try to reconstruct the parent child relation ship by looking (naively)
- // for a reply indicator. Currently, this is just the `@<some_name>`, as yt
- // seems to insert that by default if you press `reply-to` in their clients.
- //
- // This follows these steps:
- // - Does this reply have a “reply indicator”?
- // - If yes, try to resolve the indicator.
- // - If it is resolvable, add this reply to the [`Comment`] it resolved to.
- // - If not, keep the comment as reply.
+ debug!("**Searching for parent id: `{}`", ids.join("-"));
- if let Some(reply_indicator_matches) =
- find_reply_indicator.captures(&reply.value.text.clone())
- {
- // We found a reply indicator.
- // First we traverse the current `reply_tree` in reversed order to find a
- // match, than we check if the reply indicator matches the reply tree root
- // and afterward we declare it unmatching and add it as toplevel.
+ let first = ids
+ .first()
+ .expect("We cannot have a comment reply, without also having it's parent id encoded");
+ let mut reply = self.get_id(first);
+ debug!(" -> {}: `{}`", first, first_line(&reply.raw.text));
- let reply_target_author = reply_indicator_matches
- .get(1)
- .expect("This should also exist")
- .as_str();
+ for id in &ids[1..] {
+ debug!(" **Searching for id: `{id}`");
- if let Some(parent) = reply_tree.find_author_mut(reply_target_author) {
- parent
- .replies
- .push(comment_from_reply(reply, &reply_indicator_matches));
- } else if comment.value.author == reply_target_author {
- reply_tree
- .add_toplevel(comment_from_reply(reply, &reply_indicator_matches));
- } else {
- eprintln!(
- "Failed to find a parent for ('{}') both directly \
- and via replies! The reply text was:\n'{}'\n",
- reply_target_author, reply.value.text
- );
- reply_tree.add_toplevel(reply);
- }
- } else {
- // The comment text did not contain a reply indicator, so add it as
- // toplevel.
- reply_tree.add_toplevel(reply);
- }
- }
+ reply = reply.get_id(id);
- comment.replies = reply_tree.inner;
- }
+ debug!(" -> {}: `{}`", id, first_line(&reply.raw.text));
}
- me
- }
-
- fn add_toplevel(&mut self, value: Comment) {
- self.inner.push(value);
+ raw_comment.text = raw_comment
+ .text
+ .trim()
+ .trim_start_matches(&reply.raw.author)
+ .trim()
+ .to_owned();
+ reply.replies.push(raw_comment.into());
}
+}
- fn insert(&mut self, id: &str, value: Comment) {
- let parent = self
- .inner
+impl Comment {
+ fn maybe_get_id(&mut self, id: &str) -> Option<&mut Self> {
+ self.replies
.iter_mut()
- .find(|c| c.value.id.id == id)
- .expect("One of these should exist");
-
- parent.replies.push(value);
+ .find(|comment| comment.raw.id.id == id)
}
- fn find_author_mut(&mut self, reply_target_author: &str) -> Option<&mut Comment> {
- fn perform_check<'a>(
- comment: &'a mut Comment,
- reply_target_author: &str,
- ) -> Option<&'a mut Comment> {
- // TODO(@bpeetz): This is a workaround until rust has lexiographic lifetime support. <2025-07-18>
- fn find_in_replies<'a>(
- comment: &'a mut Comment,
- reply_target_author: &str,
- ) -> Option<&'a mut Comment> {
- comment
- .replies
- .iter_mut()
- .rev()
- .find_map(|reply: &mut Comment| perform_check(reply, reply_target_author))
+ fn get_id(&mut self, id: &str) -> &mut Self {
+ // TODO: This `if` is a work-around, until lexicographic lifetimes are added. <2026-05-26>
+ if self.maybe_get_id(id).is_none() {
+ macro_rules! from_last {
+ ($field:ident, $self:expr) => {
+ $self
+ .replies
+ .last()
+ .map_or(self.raw.$field, |last| last.raw.$field)
+ };
}
- let comment_author_matches_target = comment.value.author == reply_target_author;
- match find_in_replies(comment, reply_target_author) {
- Some(_) => Some(
- // PERFORMANCE(@bpeetz): We should not need to run this code twice. <2025-07-18>
- find_in_replies(comment, reply_target_author)
- .expect("We already had a Some result for this."),
- ),
- None if comment_author_matches_target => Some(comment),
- None => None,
- }
- }
+ debug!(
+ "Failed to find an id for a reply (the parent id did not exist). Assuming deleted comment"
+ );
- for comment in self.inner.iter_mut().rev() {
- if let Some(output) = perform_check(comment, reply_target_author) {
- return Some(output);
- }
- }
+ self.replies.push(Comment {
+ raw: RawComment {
+ original_order: from_last!(original_order, self) + 1,
+ id: Id { id: id.to_owned() },
+ text: "<Deleted comment>".to_owned(),
+ like_count: 0,
+ is_pinned: false,
+ author_id: "@ghost".to_owned(),
+ author: "@ghost".to_owned(),
+ author_is_verified: false,
+ author_thumbnail: Url::parse("https://example.org/@ghost").expect("hard-coded"),
+ parent: raw::Parent::Id(self.raw.id.id.clone()),
+ edited: false,
+ timestamp: from_last!(timestamp, self),
+ author_url: None,
+ author_is_uploader: false,
+ is_favorited: false,
+ },
+ replies: vec![],
+ });
- None
+ self.replies.last_mut().expect("We just added it")
+ } else {
+ self.maybe_get_id(id).expect("It's some")
+ }
}
-}
-fn comment_from_reply(reply: Comment, reply_indicator_matches: &Captures<'_>) -> Comment {
- Comment::from(RawComment {
- text: {
- // Remove the `@<some_name>` for the comment text.
- let full_match = reply_indicator_matches
- .get(0)
- .expect("This will always exist");
- let text = reply.value.text[0..full_match.start()].to_owned()
- + &reply.value.text[full_match.end()..];
+ fn sort_replies(&mut self) {
+ self.replies
+ .sort_by_key(|comment| comment.raw.original_order);
- text.trim_matches(|c: char| c == '\u{200b}' || c == '\u{2060}' || c.is_whitespace())
- .to_owned()
- },
- ..reply.value
- })
+ self.replies.iter_mut().for_each(Comment::sort_replies);
+ }
}
impl From<RawComment> for Comment {
fn from(value: RawComment) -> Self {
Self {
- value,
+ raw: value,
replies: vec![],
}
}
diff --git a/crates/yt/src/storage/db/video/comments/raw.rs b/crates/yt/src/storage/db/video/comments/raw.rs
index 3b7f40f..e27eedd 100644
--- a/crates/yt/src/storage/db/video/comments/raw.rs
+++ b/crates/yt/src/storage/db/video/comments/raw.rs
@@ -47,10 +47,15 @@ impl From<String> for Parent {
#[derive(Debug, Deserialize, Clone, Eq, PartialEq, PartialOrd, Ord)]
#[allow(clippy::struct_excessive_bools)]
pub(crate) struct RawComment {
+ /// This field is used to encode the original order of the comments in the raw vector, returned
+ /// by yt-dlp.
+ #[serde(default = "zero")]
+ pub(crate) original_order: usize,
+
pub(crate) id: Id,
pub(crate) text: String,
#[serde(default = "zero")]
- pub(crate) like_count: u32,
+ pub(crate) like_count: usize,
pub(crate) is_pinned: bool,
pub(crate) author_id: String,
#[serde(default = "unknown")]
@@ -71,7 +76,7 @@ pub(crate) struct RawComment {
fn unknown() -> String {
"<Unknown>".to_string()
}
-fn zero() -> u32 {
+fn zero() -> usize {
0
}
fn edited_from_time_text<'de, D>(d: D) -> Result<bool, D::Error>
diff --git a/crates/yt/src/storage/db/video/comments/tests.rs b/crates/yt/src/storage/db/video/comments/tests.rs
index 03e3597..8cb1a9a 100644
--- a/crates/yt/src/storage/db/video/comments/tests.rs
+++ b/crates/yt/src/storage/db/video/comments/tests.rs
@@ -38,6 +38,11 @@ macro_rules! mk_comments {
)
)+
) => {{
+ use std::sync::atomic::{AtomicUsize, Ordering};
+
+ static INDEX_INPUT: AtomicUsize = AtomicUsize::new(0);
+ static INDEX_EXPECTED: AtomicUsize = AtomicUsize::new(0);
+
let (nested_input, _) = mk_comments!(
$(
$(
@@ -49,7 +54,7 @@ macro_rules! mk_comments {
let mut input: Vec<RawComment> = vec![
$(
- mk_comments!(@to_raw input $name $comment $parent, $actual_parent)
+ mk_comments!(@to_raw input $name INDEX_INPUT.fetch_add(1, Ordering::Relaxed), $comment $parent, $actual_parent)
),+
];
input.extend(nested_input);
@@ -58,7 +63,7 @@ macro_rules! mk_comments {
inner: vec![
$(
Comment {
- value: mk_comments!(@to_raw expected $name $comment $parent, $actual_parent),
+ raw: mk_comments!(@to_raw expected $name INDEX_EXPECTED.fetch_add(1, Ordering::Relaxed), $comment $parent, $actual_parent),
replies: {
let (_, nested_expected) = mk_comments!(
$(
@@ -86,6 +91,11 @@ macro_rules! mk_comments {
)
)+
) => {{
+ use std::sync::atomic::{AtomicUsize, Ordering};
+
+ static INDEX_INPUT: AtomicUsize = AtomicUsize::new(0);
+ static INDEX_EXPECTED: AtomicUsize = AtomicUsize::new(0);
+
let (nested_input, _) = mk_comments!(
$(
$(
@@ -97,7 +107,7 @@ macro_rules! mk_comments {
let mut input: Vec<RawComment> = vec![
$(
- mk_comments!(@to_raw input $name $comment)
+ mk_comments!(@to_raw input $name INDEX_INPUT.fetch_add(1, Ordering::Relaxed), $comment)
),+
];
input.extend(nested_input);
@@ -106,7 +116,7 @@ macro_rules! mk_comments {
inner: vec![
$(
Comment {
- value: mk_comments!(@to_raw expected $name $comment),
+ raw: mk_comments!(@to_raw expected $name INDEX_EXPECTED.fetch_add(1, Ordering::Relaxed), $comment),
replies: {
let (_, nested_expected) = mk_comments!(
$(
@@ -125,19 +135,30 @@ macro_rules! mk_comments {
(input, expected)
}};
- (@mk_id $name:ident $comment:literal) => {{
+ (@mk_id $name:ident $comment:literal $($parent:expr)?) => {{
use std::hash::{Hash, Hasher};
let input = format!("{}{}", stringify!($name), $comment);
let mut digest = std::hash::DefaultHasher::new();
input.hash(&mut digest);
- Id { id: digest.finish().to_string() }
+
+ #[allow(unused_mut, unused_assignments)]
+ {
+ let mut parent_id = ".".to_owned();
+
+ $(
+ parent_id = format!("{}.", $parent.id);
+ )?
+
+ Id { id: format!("{parent_id}{}", digest.finish().to_string()) }
+ }
}};
- (@to_raw $state:ident $name:ident $comment:literal $($parent:expr, $actual_parent:ident)?) => {
+ (@to_raw $state:ident $name:ident $index:expr, $comment:literal $($parent:expr, $actual_parent:ident)?) => {
RawComment {
- id: mk_comments!(@mk_id $name $comment),
+ original_order: $index,
+ id: mk_comments!(@mk_id $name $comment $($parent)?),
text: mk_comments!(@mk_text $state $comment $(, $actual_parent)?),
like_count: 0,
is_pinned: false,