From ea09ccf2e9b48b3aa870e90095a44fc8839ba9ff Mon Sep 17 00:00:00 2001 From: Soispha Date: Wed, 3 Jan 2024 22:42:04 +0100 Subject: feat(hm/pkgs/scr/ytcc): Add support for showing comments --- .../pkgs/scripts/specific/ytcc/filter_comments.jq | 20 ++++ .../pkgs/scripts/specific/ytcc/filter_comments.sh | 12 +++ .../pkgs/scripts/specific/ytcc/nest_comments.py | 100 +++++++++++++++++++ hm/soispha/pkgs/scripts/specific/ytcc/ytc | 108 +++++++++++++++++++++ hm/soispha/pkgs/scripts/specific/ytcc/yts | 65 +++++++++++++ 5 files changed, 305 insertions(+) create mode 100644 hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.jq create mode 100755 hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.sh create mode 100755 hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py create mode 100755 hm/soispha/pkgs/scripts/specific/ytcc/ytc create mode 100755 hm/soispha/pkgs/scripts/specific/ytcc/yts (limited to 'hm/soispha/pkgs/scripts/specific/ytcc') diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.jq b/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.jq new file mode 100644 index 00000000..78bde7b8 --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.jq @@ -0,0 +1,20 @@ +def relative_time: + "\(((now - ("\(.timestamp)T00:00:00Z" | fromdate)) / (60 * 60 * 24)) * 10 | round / 10)d"; + +def spaces($ident): + "\([range($ident)] | map(" ") | join(""))"; + +def c($colour): + "\u001B[\($colour)m"; + +def if_states($char): + "\(if .edited or .is_favorited then $char else "" end)"; + +def status: + "\(if_states("["))\(if .edited then "" else "" end)\(if .is_favorited and .edited then " " else "" end)\(if .is_favorited then "" else "" end)\(if_states("]"))"; + +def fmt_cmt($ident): + "\(spaces($ident))\(if .author_is_uploader then c("91;1") else c("35") + end)\(.author)\(c("0"))\(status) \(c("36;1"))(\(. | relative_time))\(c("0")):\n\(spaces($ident))\(.text | gsub("\n"; "\n\(spaces($ident))"))\n\(spaces($ident))\(if .replies? then (.replies | map(fmt_cmt($ident + 4)) | join("\n\(spaces($ident))")) else "" end)"; + +. | map(fmt_cmt(0)) | join("\n") diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.sh b/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.sh new file mode 100755 index 00000000..436a3bc5 --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env dash + +# shellcheck source=/dev/null +SHELL_LIBRARY_VERSION="1.10.2" . %SHELL_LIBRARY_PATH + +# This is the symlink to the active info.json +file="$XDG_RUNTIME_DIR/ytcc/running"; + + +nest_comments.py "$file" | jq --raw-output -f %JQ_PREPROCCESSOR_SCRIPT | fmt -u -s --width=90 | less -r + +# vim: ft=sh diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py new file mode 100755 index 00000000..2927f7bd --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +""" +SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT +Copyright © 2021 pukkandan.ytdlp@gmail.com +Copyright © 2024 soispha@vhack.eu + + +* Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote +* Change FIELDS according to your needs + +The output file will be in the format: +[{ + 'text': 'comment 1', + ... + 'replies': [{ + 'text': 'reply 1', + ... + 'replies': [...], + }, ...], +}, ...] +""" + +import json +import sys +import argparse +from datetime import datetime + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + +def get_fields(dct): + for name, fn in FIELDS.items(): + val = fn(dct, name) + if val is not None: + yield name, val + + +def filter_func(comments): + return [dict(get_fields(c)) for c in comments] + + +FIELDS = { + "text": dict.get, + "author": dict.get, + "timestamp": lambda dct, name: dct.get(name) + and datetime.strftime(datetime.utcfromtimestamp(dct.get(name)), "%Y-%m-%d"), + "edited": lambda dct, name: "(edited)" in dct.get("_time_text"), + "author_is_uploader": dict.get, + "is_favorited": dict.get, + # Add more fields here + "replies": lambda dct, name: filter_func(dct.get(name, [])) or None, +} + + +parser = argparse.ArgumentParser() +parser.add_argument( + "input-file", + dest="inputfile", + metavar="FILE", + required=True, + help="File to read video metadata from (info.json)", +) +args = parser.parse_args() + + +eprint("Reading file") +with open(args.inputfile, encoding="utf-8") as f: + info_dict = json.load(f) + +comment_data = { + c["id"]: c + for c in sorted(info_dict["comments"], key=lambda c: c.get("timestamp") or 0) +} +count = len(info_dict["comments"]) +del info_dict +nested_comments = [] +for i, (cid, c) in enumerate(comment_data.items(), 1): + eprint(f"Processing comment {i}/{count}", end="\r") + parent = ( + nested_comments + if c["parent"] == "root" + else comment_data[c["parent"]].setdefault("replies", []) + ) + parent.append(c) +del parent + + +eprint("") +nested_comments = filter_func(nested_comments) + + +eprint("Converting to json") +out = json.dumps(nested_comments, indent=4, ensure_ascii=False) + + +del nested_comments +eprint("Writing file") +print(out) +eprint("Done") diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/ytc b/hm/soispha/pkgs/scripts/specific/ytcc/ytc new file mode 100755 index 00000000..c66ae96c --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/ytc @@ -0,0 +1,108 @@ +#!/usr/bin/env dash + +# shellcheck source=/dev/null +SHELL_LIBRARY_VERSION="1.10.2" . %SHELL_LIBRARY_PATH +CONCURRENT=4 +OUTPUT_PATH="/tmp/ytcc"; +STATUS_FILE="$XDG_RUNTIME_DIR/ytcc/running"; +STATUS_PATH="$(dirname "$STATUS_FILE")"; + +col() { + echo "$1" | csvtool -t ';' -u ';' col "$2" - +} + +play() { + msg2 "Playing: '$1'" + + info_json="$(echo "$1" | sed 's|\(.*\)\.[a-z0-9]\+|\1.info.json|')"; + [ -L "$STATUS_FILE" ] && rm "$STATUS_FILE" + ln -s "$(readlink -f "$info_json")" "$STATUS_FILE" + + mpv "$1" --speed=2.7 --volume=75 + output="$?"; + + if [ "$output" -eq 0 ]; then + msg2 "Removing: $1" + rm "$1" + msg2 "Marking: " "$2" + ytcc mark "$2" + fi + return "$output" +} + +escape() { + echo "$1" | awk '{gsub(/;/, ","); print}' +} + +bases="$(ytcc --output json list --attributes url --ids "$@" | jq --raw-output 'map("\(.url);\(.id)") | join("\n")')"; + +yt_flags="$(mktmp)" +cat << EOF > "$yt_flags" +--format bestvideo[height<=?1080]+bestaudio/best +--embed-chapters +--progress +--write-comments +--extractor-args youtube:max_comments=150,all,100;comment_sort=top +--write-info-json +--sponsorblock-mark default +--sponsorblock-remove sponsor +EOF + +[ -d "$STATUS_PATH" ] || mkdir "$STATUS_PATH"; +[ -d "$OUTPUT_PATH" ] || mkdir "$OUTPUT_PATH"; +cd "$OUTPUT_PATH" || die "(Bug): Was created" + +filename_file="$(mktmp)"; +files_to_play="$(mktmp)"; +while read -r base; do + url="$(col "$base" 1)"; + id="$(col "$base" 2)" + + if [ "$old_filename" ]; then + echo "$(escape "$old_filename");$old_id" >> "$files_to_play" + + # Check if the process (pid) exists + dbg "PID is '$pid'" + if ! kill -0 "$pid"; then + saved_base="$(head -n 1 "$files_to_play")"; + sed -i '1d' "$files_to_play"; + saved_name="$(col "$saved_base" 1)"; + saved_id="$(col "$saved_base" 2)" + + dbg "Started play for '$saved_name'" + play "$saved_name" "$saved_id" & + pid=$! + else + dbg "Storing for later '$old_filename'" + fi + fi + + # The sub shell needs to be unquoted, as the arguments may not be treated as one. + # shellcheck disable=2046 + yt-dlp $(cat "$yt_flags") --output "%(channel)s/%(title)s.%(ext)s" "$url" --print-to-file after_move:filepath "$filename_file" + + filename="$(cat "$filename_file")" + printf "" > "$filename_file" + + if [ "$old_filename" ]; then + if [ "$(wc -l < "$files_to_play")" -gt "$CONCURRENT" ]; then + msg2 "Waiting for '$pid' to finish as we already have '$(wc -l < "$files_to_play")' files cached" + wait "$pid" + fi + fi + + old_filename="$filename"; + old_id="$id"; +done < "$(tmp echo "$bases")" + +wait "$pid" +echo "$(escape "$old_filename");$old_id" >> "$files_to_play" + +while read -r base; do + name="$(col "$base" 1)"; + id="$(col "$base" 2)" + + dbg "Started play for '$name'" + play "$name" "$id" +done < "$files_to_play" +# vim: ft=sh diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/yts b/hm/soispha/pkgs/scripts/specific/ytcc/yts new file mode 100755 index 00000000..b5edf52c --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/yts @@ -0,0 +1,65 @@ +#!/usr/bin/env dash + +# shellcheck source=/dev/null +SHELL_LIBRARY_VERSION="1.10.2" . %SHELL_LIBRARY_PATH + +TASK_UUID=ce4f9e07-8324-4570-8be6-967955e9271e + +cleanup() { + task stop "$TASK_UUID" +} +trap cleanup EXIT + +help_text=" +# +# Commands: +# w, watch = watch id +# d, drop = mark id as watched +# p, pick = leave id as is; This is a noop +# +# These lines can be re-ordered; they are executed from top to bottom. +# vim: ft=gitrebase" + +table="$(ytcc --output json list | jq --raw-output 'map("pick \(.id) \(.title) (\(.playlists | map(.name) | join(", "))) [\(.duration | gsub("^\\s+|\\s+$";""))]") | join("\n")')" + +selection_file="$(mktmp)"; + +task start "$TASK_UUID" + +echo "$table" > "$selection_file"; +echo "$help_text" >> "$selection_file"; + +$EDITOR "$selection_file" + +ids="" +is_first=true; +while read -r line; do + cmd="$(echo "$line" | awk '{print $1}')"; + case "$cmd" in + "#" ) + # This is a comment, do nothing here + ;; + "pick" | "p") + # noop do nothing here + ;; + "drop" | "d") + id="$(echo "$line" | awk '{print $2}')"; + ytcc mark "$id"; + dbg "Marked as watched: $id" + ;; + "watch" | "w") + id="$(echo "$line" | awk '{print $2}')"; + if [ "$is_first" = "true" ]; then + ids="$id"; + else + ids="$ids,$id"; + fi + dbg "Added to be watched: $id" + is_first=false + ;; + esac +done < "$selection_file" + +[ "$ids" != "" ] && ytc "$ids"; + +# vim: ft=sh -- cgit 1.4.1