about summary refs log tree commit diff stats
path: root/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py
diff options
context:
space:
mode:
authorSoispha <soispha@vhack.eu>2024-01-03 22:42:04 +0100
committerSoispha <soispha@vhack.eu>2024-01-03 22:42:04 +0100
commitea09ccf2e9b48b3aa870e90095a44fc8839ba9ff (patch)
tree6e9321f643940e7a687fd5472d308e1d31c7f1af /hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py
parentfeat(hm/pkgs/scr): Add support for python based scripts (diff)
downloadnixos-config-ea09ccf2e9b48b3aa870e90095a44fc8839ba9ff.zip
feat(hm/pkgs/scr/ytcc): Add support for showing comments
Diffstat (limited to 'hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py')
-rwxr-xr-xhm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py100
1 files changed, 100 insertions, 0 deletions
diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py
new file mode 100755
index 00000000..2927f7bd
--- /dev/null
+++ b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+
+"""
+SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT
+Copyright © 2021 pukkandan.ytdlp@gmail.com
+Copyright © 2024 soispha@vhack.eu
+
+
+* Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote
+* Change FIELDS according to your needs
+
+The output file will be in the format:
+[{
+  'text': 'comment 1',
+  ...
+  'replies': [{
+    'text': 'reply 1',
+    ...
+    'replies': [...],
+  }, ...],
+}, ...]
+"""
+
+import json
+import sys
+import argparse
+from datetime import datetime
+
+def eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+def get_fields(dct):
+    for name, fn in FIELDS.items():
+        val = fn(dct, name)
+        if val is not None:
+            yield name, val
+
+
+def filter_func(comments):
+    return [dict(get_fields(c)) for c in comments]
+
+
+FIELDS = {
+    "text": dict.get,
+    "author": dict.get,
+    "timestamp": lambda dct, name: dct.get(name)
+    and datetime.strftime(datetime.utcfromtimestamp(dct.get(name)), "%Y-%m-%d"),
+    "edited": lambda dct, name: "(edited)" in dct.get("_time_text"),
+    "author_is_uploader": dict.get,
+    "is_favorited": dict.get,
+    # Add more fields here
+    "replies": lambda dct, name: filter_func(dct.get(name, [])) or None,
+}
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "input-file",
+    dest="inputfile",
+    metavar="FILE",
+    required=True,
+    help="File to read video metadata from (info.json)",
+)
+args = parser.parse_args()
+
+
+eprint("Reading file")
+with open(args.inputfile, encoding="utf-8") as f:
+    info_dict = json.load(f)
+
+comment_data = {
+    c["id"]: c
+    for c in sorted(info_dict["comments"], key=lambda c: c.get("timestamp") or 0)
+}
+count = len(info_dict["comments"])
+del info_dict
+nested_comments = []
+for i, (cid, c) in enumerate(comment_data.items(), 1):
+    eprint(f"Processing comment {i}/{count}", end="\r")
+    parent = (
+        nested_comments
+        if c["parent"] == "root"
+        else comment_data[c["parent"]].setdefault("replies", [])
+    )
+    parent.append(c)
+del parent
+
+
+eprint("")
+nested_comments = filter_func(nested_comments)
+
+
+eprint("Converting to json")
+out = json.dumps(nested_comments, indent=4, ensure_ascii=False)
+
+
+del nested_comments
+eprint("Writing file")
+print(out)
+eprint("Done")