diff options
Diffstat (limited to 'hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py')
-rwxr-xr-x | hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py | 98 |
1 files changed, 0 insertions, 98 deletions
diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py deleted file mode 100755 index af6bb36a..00000000 --- a/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env python3 - -""" -SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT -Copyright © 2021 pukkandan.ytdlp@gmail.com -Copyright © 2024 soispha@vhack.eu - - -* Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote -* Change FIELDS according to your needs - -The output file will be in the format: -[{ - 'text': 'comment 1', - ... - 'replies': [{ - 'text': 'reply 1', - ... - 'replies': [...], - }, ...], -}, ...] -""" - -import json -import sys -import argparse -from datetime import datetime - -def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) - -def get_fields(dct): - for name, fn in FIELDS.items(): - val = fn(dct, name) - if val is not None: - yield name, val - - -def filter_func(comments): - return [dict(get_fields(c)) for c in comments] - - -FIELDS = { - "text": dict.get, - "author": dict.get, - "timestamp": lambda dct, name: dct.get(name) - and datetime.strftime(datetime.utcfromtimestamp(dct.get(name)), "%Y-%m-%d"), - "edited": lambda dct, name: "(edited)" in dct.get("_time_text"), - "author_is_uploader": dict.get, - "is_favorited": dict.get, - # Add more fields here - "replies": lambda dct, name: filter_func(dct.get(name, [])) or None, -} - - -parser = argparse.ArgumentParser() -parser.add_argument( - "inputfile", - metavar="FILE", - help="File to read video metadata from (info.json)", -) -args = parser.parse_args() - - -eprint("Reading file") -with open(args.inputfile, encoding="utf-8") as f: - info_dict = json.load(f) - -comment_data = { - c["id"]: c - for c in sorted(info_dict["comments"], key=lambda c: c.get("timestamp") or 0) -} -count = len(info_dict["comments"]) -del info_dict -nested_comments = [] -for i, (cid, c) in enumerate(comment_data.items(), 1): - eprint(f"Processing comment {i}/{count}", end="\r") - parent = ( - nested_comments - if c["parent"] == "root" - else comment_data[c["parent"]].setdefault("replies", []) - ) - parent.append(c) -del parent - - -eprint("") -nested_comments = filter_func(nested_comments) - - -eprint("Converting to json") -out = json.dumps(nested_comments, indent=4, ensure_ascii=False) - - -del nested_comments -eprint("Writing file") -print(out) -eprint("Done") |