diff options
author | Soispha <soispha@vhack.eu> | 2024-01-03 22:42:04 +0100 |
---|---|---|
committer | Soispha <soispha@vhack.eu> | 2024-01-03 22:42:04 +0100 |
commit | ea09ccf2e9b48b3aa870e90095a44fc8839ba9ff (patch) | |
tree | 6e9321f643940e7a687fd5472d308e1d31c7f1af /hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py | |
parent | feat(hm/pkgs/scr): Add support for python based scripts (diff) | |
download | nixos-config-ea09ccf2e9b48b3aa870e90095a44fc8839ba9ff.zip |
feat(hm/pkgs/scr/ytcc): Add support for showing comments
Diffstat (limited to 'hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py')
-rwxr-xr-x | hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py new file mode 100755 index 00000000..2927f7bd --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +""" +SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT +Copyright © 2021 pukkandan.ytdlp@gmail.com +Copyright © 2024 soispha@vhack.eu + + +* Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote +* Change FIELDS according to your needs + +The output file will be in the format: +[{ + 'text': 'comment 1', + ... + 'replies': [{ + 'text': 'reply 1', + ... + 'replies': [...], + }, ...], +}, ...] +""" + +import json +import sys +import argparse +from datetime import datetime + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + +def get_fields(dct): + for name, fn in FIELDS.items(): + val = fn(dct, name) + if val is not None: + yield name, val + + +def filter_func(comments): + return [dict(get_fields(c)) for c in comments] + + +FIELDS = { + "text": dict.get, + "author": dict.get, + "timestamp": lambda dct, name: dct.get(name) + and datetime.strftime(datetime.utcfromtimestamp(dct.get(name)), "%Y-%m-%d"), + "edited": lambda dct, name: "(edited)" in dct.get("_time_text"), + "author_is_uploader": dict.get, + "is_favorited": dict.get, + # Add more fields here + "replies": lambda dct, name: filter_func(dct.get(name, [])) or None, +} + + +parser = argparse.ArgumentParser() +parser.add_argument( + "input-file", + dest="inputfile", + metavar="FILE", + required=True, + help="File to read video metadata from (info.json)", +) +args = parser.parse_args() + + +eprint("Reading file") +with open(args.inputfile, encoding="utf-8") as f: + info_dict = json.load(f) + +comment_data = { + c["id"]: c + for c in sorted(info_dict["comments"], key=lambda c: c.get("timestamp") or 0) +} +count = len(info_dict["comments"]) +del info_dict +nested_comments = [] +for i, (cid, c) in enumerate(comment_data.items(), 1): + eprint(f"Processing comment {i}/{count}", end="\r") + parent = ( + nested_comments + if c["parent"] == "root" + else comment_data[c["parent"]].setdefault("replies", []) + ) + parent.append(c) +del parent + + +eprint("") +nested_comments = filter_func(nested_comments) + + +eprint("Converting to json") +out = json.dumps(nested_comments, indent=4, ensure_ascii=False) + + +del nested_comments +eprint("Writing file") +print(out) +eprint("Done") |