#!/usr/bin/env python3 """ SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT Copyright © 2021 pukkandan.ytdlp@gmail.com Copyright © 2024 soispha@vhack.eu * Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote * Change FIELDS according to your needs The output file will be in the format: [{ 'text': 'comment 1', ... 'replies': [{ 'text': 'reply 1', ... 'replies': [...], }, ...], }, ...] """ import json import sys import argparse from datetime import datetime def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) def get_fields(dct): for name, fn in FIELDS.items(): val = fn(dct, name) if val is not None: yield name, val def filter_func(comments): return [dict(get_fields(c)) for c in comments] FIELDS = { "text": dict.get, "author": dict.get, "timestamp": lambda dct, name: dct.get(name) and datetime.strftime(datetime.utcfromtimestamp(dct.get(name)), "%Y-%m-%d"), "edited": lambda dct, name: "(edited)" in dct.get("_time_text"), "author_is_uploader": dict.get, "is_favorited": dict.get, # Add more fields here "replies": lambda dct, name: filter_func(dct.get(name, [])) or None, } parser = argparse.ArgumentParser() parser.add_argument( "inputfile", metavar="FILE", help="File to read video metadata from (info.json)", ) args = parser.parse_args() eprint("Reading file") with open(args.inputfile, encoding="utf-8") as f: info_dict = json.load(f) comment_data = { c["id"]: c for c in sorted(info_dict["comments"], key=lambda c: c.get("timestamp") or 0) } count = len(info_dict["comments"]) del info_dict nested_comments = [] for i, (cid, c) in enumerate(comment_data.items(), 1): eprint(f"Processing comment {i}/{count}", end="\r") parent = ( nested_comments if c["parent"] == "root" else comment_data[c["parent"]].setdefault("replies", []) ) parent.append(c) del parent eprint("") nested_comments = filter_func(nested_comments) eprint("Converting to json") out = json.dumps(nested_comments, indent=4, ensure_ascii=False) del nested_comments eprint("Writing file") print(out) eprint("Done")