From ea09ccf2e9b48b3aa870e90095a44fc8839ba9ff Mon Sep 17 00:00:00 2001 From: Soispha Date: Wed, 3 Jan 2024 22:42:04 +0100 Subject: feat(hm/pkgs/scr/ytcc): Add support for showing comments --- .../pkgs/scripts/specific/ytcc/nest_comments.py | 100 +++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100755 hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py (limited to 'hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py') diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py new file mode 100755 index 00000000..2927f7bd --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +""" +SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT +Copyright © 2021 pukkandan.ytdlp@gmail.com +Copyright © 2024 soispha@vhack.eu + + +* Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote +* Change FIELDS according to your needs + +The output file will be in the format: +[{ + 'text': 'comment 1', + ... + 'replies': [{ + 'text': 'reply 1', + ... + 'replies': [...], + }, ...], +}, ...] +""" + +import json +import sys +import argparse +from datetime import datetime + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + +def get_fields(dct): + for name, fn in FIELDS.items(): + val = fn(dct, name) + if val is not None: + yield name, val + + +def filter_func(comments): + return [dict(get_fields(c)) for c in comments] + + +FIELDS = { + "text": dict.get, + "author": dict.get, + "timestamp": lambda dct, name: dct.get(name) + and datetime.strftime(datetime.utcfromtimestamp(dct.get(name)), "%Y-%m-%d"), + "edited": lambda dct, name: "(edited)" in dct.get("_time_text"), + "author_is_uploader": dict.get, + "is_favorited": dict.get, + # Add more fields here + "replies": lambda dct, name: filter_func(dct.get(name, [])) or None, +} + + +parser = argparse.ArgumentParser() +parser.add_argument( + "input-file", + dest="inputfile", + metavar="FILE", + required=True, + help="File to read video metadata from (info.json)", +) +args = parser.parse_args() + + +eprint("Reading file") +with open(args.inputfile, encoding="utf-8") as f: + info_dict = json.load(f) + +comment_data = { + c["id"]: c + for c in sorted(info_dict["comments"], key=lambda c: c.get("timestamp") or 0) +} +count = len(info_dict["comments"]) +del info_dict +nested_comments = [] +for i, (cid, c) in enumerate(comment_data.items(), 1): + eprint(f"Processing comment {i}/{count}", end="\r") + parent = ( + nested_comments + if c["parent"] == "root" + else comment_data[c["parent"]].setdefault("replies", []) + ) + parent.append(c) +del parent + + +eprint("") +nested_comments = filter_func(nested_comments) + + +eprint("Converting to json") +out = json.dumps(nested_comments, indent=4, ensure_ascii=False) + + +del nested_comments +eprint("Writing file") +print(out) +eprint("Done") -- cgit 1.4.1