From ea09ccf2e9b48b3aa870e90095a44fc8839ba9ff Mon Sep 17 00:00:00 2001 From: Soispha Date: Wed, 3 Jan 2024 22:42:04 +0100 Subject: feat(hm/pkgs/scr/ytcc): Add support for showing comments --- hm/soispha/pkgs/scripts.nix | 19 +++- .../pkgs/scripts/specific/ytcc/filter_comments.jq | 20 ++++ .../pkgs/scripts/specific/ytcc/filter_comments.sh | 12 +++ .../pkgs/scripts/specific/ytcc/nest_comments.py | 100 +++++++++++++++++++ hm/soispha/pkgs/scripts/specific/ytcc/ytc | 108 +++++++++++++++++++++ hm/soispha/pkgs/scripts/specific/ytcc/yts | 65 +++++++++++++ hm/soispha/pkgs/scripts/wrappers/ytc | 100 ------------------- hm/soispha/pkgs/scripts/wrappers/yts | 65 ------------- 8 files changed, 322 insertions(+), 167 deletions(-) create mode 100644 hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.jq create mode 100755 hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.sh create mode 100755 hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py create mode 100755 hm/soispha/pkgs/scripts/specific/ytcc/ytc create mode 100755 hm/soispha/pkgs/scripts/specific/ytcc/yts delete mode 100755 hm/soispha/pkgs/scripts/wrappers/ytc delete mode 100755 hm/soispha/pkgs/scripts/wrappers/yts (limited to 'hm') diff --git a/hm/soispha/pkgs/scripts.nix b/hm/soispha/pkgs/scripts.nix index e0b5a086..84fdfb54 100644 --- a/hm/soispha/pkgs/scripts.nix +++ b/hm/soispha/pkgs/scripts.nix @@ -72,6 +72,14 @@ ''; }; }; + filter-comments-scr = sysLib.writeShellScriptWithLibrary { + name = "filter-comments"; + src = ./scripts/specific/ytcc/filter_comments.sh; + dependencies = with pkgs; [jq fmt less locale] ++ [nest_comments-scr]; + replacementStrings = { + JQ_PREPROCCESSOR_SCRIPT = ./scripts/specific/ytcc/filter_comments.jq; + }; + }; screenshot_persistent-scr = write_shell { name = "screenshot_persistent"; path = "small_functions"; @@ -125,6 +133,11 @@ TASK_PROJECT_FILE = "/home/soispha/repos/nix/nixos-config/hm/soispha/conf/taskwarrior/projects/default.nix"; }; }; + nest_comments-scr = write_python { + name = "nest_comments.py"; + path = "specific/ytcc"; + dependencies_python = ps: []; + }; update-sys-scr = write_shell { name = "update-sys"; path = "small_functions"; @@ -219,7 +232,7 @@ }; ytc-scr = write_shell { name = "ytc"; - path = "wrappers"; + path = "specific/ytcc"; dependencies = builtins.attrValues { inherit (pkgs) @@ -241,7 +254,7 @@ }; yts-scr = write_shell { name = "yts"; - path = "wrappers"; + path = "specific/ytcc"; keep_path = true; # We need neovim dependencies = builtins.attrValues { inherit (pkgs) ytcc jq gawk; @@ -252,6 +265,7 @@ in [ # llp-scr # TODO: see above aumo-scr con2pdf-scr + filter-comments-scr fupdate-scr hibernate-scr ll-scr @@ -259,6 +273,7 @@ in [ lyrics-scr mpc-rm-scr neorg-scr + nest_comments-scr screenshot_persistent-scr screenshot_temporary-scr spodi-scr diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.jq b/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.jq new file mode 100644 index 00000000..78bde7b8 --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.jq @@ -0,0 +1,20 @@ +def relative_time: + "\(((now - ("\(.timestamp)T00:00:00Z" | fromdate)) / (60 * 60 * 24)) * 10 | round / 10)d"; + +def spaces($ident): + "\([range($ident)] | map(" ") | join(""))"; + +def c($colour): + "\u001B[\($colour)m"; + +def if_states($char): + "\(if .edited or .is_favorited then $char else "" end)"; + +def status: + "\(if_states("["))\(if .edited then "" else "" end)\(if .is_favorited and .edited then " " else "" end)\(if .is_favorited then "" else "" end)\(if_states("]"))"; + +def fmt_cmt($ident): + "\(spaces($ident))\(if .author_is_uploader then c("91;1") else c("35") + end)\(.author)\(c("0"))\(status) \(c("36;1"))(\(. | relative_time))\(c("0")):\n\(spaces($ident))\(.text | gsub("\n"; "\n\(spaces($ident))"))\n\(spaces($ident))\(if .replies? then (.replies | map(fmt_cmt($ident + 4)) | join("\n\(spaces($ident))")) else "" end)"; + +. | map(fmt_cmt(0)) | join("\n") diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.sh b/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.sh new file mode 100755 index 00000000..436a3bc5 --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/filter_comments.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env dash + +# shellcheck source=/dev/null +SHELL_LIBRARY_VERSION="1.10.2" . %SHELL_LIBRARY_PATH + +# This is the symlink to the active info.json +file="$XDG_RUNTIME_DIR/ytcc/running"; + + +nest_comments.py "$file" | jq --raw-output -f %JQ_PREPROCCESSOR_SCRIPT | fmt -u -s --width=90 | less -r + +# vim: ft=sh diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py new file mode 100755 index 00000000..2927f7bd --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/nest_comments.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +""" +SPDX-License-Identifier: MIT https://opensource.org/licenses/MIT +Copyright © 2021 pukkandan.ytdlp@gmail.com +Copyright © 2024 soispha@vhack.eu + + +* Input file is an info.json (with comments) that yt-dlp (https://github.com/yt-dlp/yt-dlp) wrote +* Change FIELDS according to your needs + +The output file will be in the format: +[{ + 'text': 'comment 1', + ... + 'replies': [{ + 'text': 'reply 1', + ... + 'replies': [...], + }, ...], +}, ...] +""" + +import json +import sys +import argparse +from datetime import datetime + +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + +def get_fields(dct): + for name, fn in FIELDS.items(): + val = fn(dct, name) + if val is not None: + yield name, val + + +def filter_func(comments): + return [dict(get_fields(c)) for c in comments] + + +FIELDS = { + "text": dict.get, + "author": dict.get, + "timestamp": lambda dct, name: dct.get(name) + and datetime.strftime(datetime.utcfromtimestamp(dct.get(name)), "%Y-%m-%d"), + "edited": lambda dct, name: "(edited)" in dct.get("_time_text"), + "author_is_uploader": dict.get, + "is_favorited": dict.get, + # Add more fields here + "replies": lambda dct, name: filter_func(dct.get(name, [])) or None, +} + + +parser = argparse.ArgumentParser() +parser.add_argument( + "input-file", + dest="inputfile", + metavar="FILE", + required=True, + help="File to read video metadata from (info.json)", +) +args = parser.parse_args() + + +eprint("Reading file") +with open(args.inputfile, encoding="utf-8") as f: + info_dict = json.load(f) + +comment_data = { + c["id"]: c + for c in sorted(info_dict["comments"], key=lambda c: c.get("timestamp") or 0) +} +count = len(info_dict["comments"]) +del info_dict +nested_comments = [] +for i, (cid, c) in enumerate(comment_data.items(), 1): + eprint(f"Processing comment {i}/{count}", end="\r") + parent = ( + nested_comments + if c["parent"] == "root" + else comment_data[c["parent"]].setdefault("replies", []) + ) + parent.append(c) +del parent + + +eprint("") +nested_comments = filter_func(nested_comments) + + +eprint("Converting to json") +out = json.dumps(nested_comments, indent=4, ensure_ascii=False) + + +del nested_comments +eprint("Writing file") +print(out) +eprint("Done") diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/ytc b/hm/soispha/pkgs/scripts/specific/ytcc/ytc new file mode 100755 index 00000000..c66ae96c --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/ytc @@ -0,0 +1,108 @@ +#!/usr/bin/env dash + +# shellcheck source=/dev/null +SHELL_LIBRARY_VERSION="1.10.2" . %SHELL_LIBRARY_PATH +CONCURRENT=4 +OUTPUT_PATH="/tmp/ytcc"; +STATUS_FILE="$XDG_RUNTIME_DIR/ytcc/running"; +STATUS_PATH="$(dirname "$STATUS_FILE")"; + +col() { + echo "$1" | csvtool -t ';' -u ';' col "$2" - +} + +play() { + msg2 "Playing: '$1'" + + info_json="$(echo "$1" | sed 's|\(.*\)\.[a-z0-9]\+|\1.info.json|')"; + [ -L "$STATUS_FILE" ] && rm "$STATUS_FILE" + ln -s "$(readlink -f "$info_json")" "$STATUS_FILE" + + mpv "$1" --speed=2.7 --volume=75 + output="$?"; + + if [ "$output" -eq 0 ]; then + msg2 "Removing: $1" + rm "$1" + msg2 "Marking: " "$2" + ytcc mark "$2" + fi + return "$output" +} + +escape() { + echo "$1" | awk '{gsub(/;/, ","); print}' +} + +bases="$(ytcc --output json list --attributes url --ids "$@" | jq --raw-output 'map("\(.url);\(.id)") | join("\n")')"; + +yt_flags="$(mktmp)" +cat << EOF > "$yt_flags" +--format bestvideo[height<=?1080]+bestaudio/best +--embed-chapters +--progress +--write-comments +--extractor-args youtube:max_comments=150,all,100;comment_sort=top +--write-info-json +--sponsorblock-mark default +--sponsorblock-remove sponsor +EOF + +[ -d "$STATUS_PATH" ] || mkdir "$STATUS_PATH"; +[ -d "$OUTPUT_PATH" ] || mkdir "$OUTPUT_PATH"; +cd "$OUTPUT_PATH" || die "(Bug): Was created" + +filename_file="$(mktmp)"; +files_to_play="$(mktmp)"; +while read -r base; do + url="$(col "$base" 1)"; + id="$(col "$base" 2)" + + if [ "$old_filename" ]; then + echo "$(escape "$old_filename");$old_id" >> "$files_to_play" + + # Check if the process (pid) exists + dbg "PID is '$pid'" + if ! kill -0 "$pid"; then + saved_base="$(head -n 1 "$files_to_play")"; + sed -i '1d' "$files_to_play"; + saved_name="$(col "$saved_base" 1)"; + saved_id="$(col "$saved_base" 2)" + + dbg "Started play for '$saved_name'" + play "$saved_name" "$saved_id" & + pid=$! + else + dbg "Storing for later '$old_filename'" + fi + fi + + # The sub shell needs to be unquoted, as the arguments may not be treated as one. + # shellcheck disable=2046 + yt-dlp $(cat "$yt_flags") --output "%(channel)s/%(title)s.%(ext)s" "$url" --print-to-file after_move:filepath "$filename_file" + + filename="$(cat "$filename_file")" + printf "" > "$filename_file" + + if [ "$old_filename" ]; then + if [ "$(wc -l < "$files_to_play")" -gt "$CONCURRENT" ]; then + msg2 "Waiting for '$pid' to finish as we already have '$(wc -l < "$files_to_play")' files cached" + wait "$pid" + fi + fi + + old_filename="$filename"; + old_id="$id"; +done < "$(tmp echo "$bases")" + +wait "$pid" +echo "$(escape "$old_filename");$old_id" >> "$files_to_play" + +while read -r base; do + name="$(col "$base" 1)"; + id="$(col "$base" 2)" + + dbg "Started play for '$name'" + play "$name" "$id" +done < "$files_to_play" +# vim: ft=sh diff --git a/hm/soispha/pkgs/scripts/specific/ytcc/yts b/hm/soispha/pkgs/scripts/specific/ytcc/yts new file mode 100755 index 00000000..b5edf52c --- /dev/null +++ b/hm/soispha/pkgs/scripts/specific/ytcc/yts @@ -0,0 +1,65 @@ +#!/usr/bin/env dash + +# shellcheck source=/dev/null +SHELL_LIBRARY_VERSION="1.10.2" . %SHELL_LIBRARY_PATH + +TASK_UUID=ce4f9e07-8324-4570-8be6-967955e9271e + +cleanup() { + task stop "$TASK_UUID" +} +trap cleanup EXIT + +help_text=" +# +# Commands: +# w, watch = watch id +# d, drop = mark id as watched +# p, pick = leave id as is; This is a noop +# +# These lines can be re-ordered; they are executed from top to bottom. +# vim: ft=gitrebase" + +table="$(ytcc --output json list | jq --raw-output 'map("pick \(.id) \(.title) (\(.playlists | map(.name) | join(", "))) [\(.duration | gsub("^\\s+|\\s+$";""))]") | join("\n")')" + +selection_file="$(mktmp)"; + +task start "$TASK_UUID" + +echo "$table" > "$selection_file"; +echo "$help_text" >> "$selection_file"; + +$EDITOR "$selection_file" + +ids="" +is_first=true; +while read -r line; do + cmd="$(echo "$line" | awk '{print $1}')"; + case "$cmd" in + "#" ) + # This is a comment, do nothing here + ;; + "pick" | "p") + # noop do nothing here + ;; + "drop" | "d") + id="$(echo "$line" | awk '{print $2}')"; + ytcc mark "$id"; + dbg "Marked as watched: $id" + ;; + "watch" | "w") + id="$(echo "$line" | awk '{print $2}')"; + if [ "$is_first" = "true" ]; then + ids="$id"; + else + ids="$ids,$id"; + fi + dbg "Added to be watched: $id" + is_first=false + ;; + esac +done < "$selection_file" + +[ "$ids" != "" ] && ytc "$ids"; + +# vim: ft=sh diff --git a/hm/soispha/pkgs/scripts/wrappers/ytc b/hm/soispha/pkgs/scripts/wrappers/ytc deleted file mode 100755 index c607ea81..00000000 --- a/hm/soispha/pkgs/scripts/wrappers/ytc +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env dash - -# shellcheck source=/dev/null -SHELL_LIBRARY_VERSION="1.10.2" . %SHELL_LIBRARY_PATH -CONCURRENT=4 -OUTPUT_PATH="/tmp/ytcc"; - -col() { - echo "$1" | csvtool -t ';' -u ';' col "$2" - -} - -play() { - msg2 "Playing: '$1'" - mpv "$1" --speed=2.7 --volume=75 - output="$?"; - - if [ "$output" -eq 0 ]; then - msg2 "Removing: $1" - rm "$1" - msg2 "Marking: " "$2" - ytcc mark "$2" - fi - return "$output" -} - -escape() { - echo "$1" | awk '{gsub(/;/, ","); print}' -} - -bases="$(ytcc --output json list --attributes url --ids "$@" | jq --raw-output 'map("\(.url);\(.id)") | join("\n")')"; - -yt_flags="$(mktmp)" -cat << EOF > "$yt_flags" ---format bestvideo[height<=?1080]+bestaudio/best ---embed-chapters ---progress ---write-comments ---extractor-args youtube:max_comments=150,all,100;comment_sort=top ---write-info-json ---sponsorblock-mark default ---sponsorblock-remove sponsor -EOF - -[ -d "$OUTPUT_PATH" ] || mkdir "$OUTPUT_PATH"; -cd "$OUTPUT_PATH" || die "(Bug): Was created" - -filename_file="$(mktmp)"; -files_to_play="$(mktmp)"; -while read -r base; do - url="$(col "$base" 1)"; - id="$(col "$base" 2)" - - if [ "$old_filename" ]; then - echo "$(escape "$old_filename");$old_id" >> "$files_to_play" - - # Check if the process (pid) exists - dbg "PID is '$pid'" - if ! kill -0 "$pid"; then - saved_base="$(head -n 1 "$files_to_play")"; - sed -i '1d' "$files_to_play"; - saved_name="$(col "$saved_base" 1)"; - saved_id="$(col "$saved_base" 2)" - - dbg "Started play for '$saved_name'" - play "$saved_name" "$saved_id" & - pid=$! - else - dbg "Storing for later '$old_filename'" - fi - fi - - # The sub shell needs to be unquoted, as the arguments may not be treated as one. - # shellcheck disable=2046 - yt-dlp $(cat "$yt_flags") --output "%(channel)s/%(title)s.%(ext)s" "$url" --print-to-file after_move:filepath "$filename_file" - - filename="$(cat "$filename_file")" - printf "" > "$filename_file" - - if [ "$old_filename" ]; then - if [ "$(wc -l < "$files_to_play")" -gt "$CONCURRENT" ]; then - msg2 "Waiting for '$pid' to finish as we already have '$(wc -l < "$files_to_play")' files cached" - wait "$pid" - fi - fi - - old_filename="$filename"; - old_id="$id"; -done < "$(tmp echo "$bases")" - -wait "$pid" -echo "$(escape "$old_filename");$old_id" >> "$files_to_play" - -while read -r base; do - name="$(col "$base" 1)"; - id="$(col "$base" 2)" - - dbg "Started play for '$name'" - play "$name" "$id" -done < "$files_to_play" -# vim: ft=sh diff --git a/hm/soispha/pkgs/scripts/wrappers/yts b/hm/soispha/pkgs/scripts/wrappers/yts deleted file mode 100755 index b5edf52c..00000000 --- a/hm/soispha/pkgs/scripts/wrappers/yts +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env dash - -# shellcheck source=/dev/null -SHELL_LIBRARY_VERSION="1.10.2" . %SHELL_LIBRARY_PATH - -TASK_UUID=ce4f9e07-8324-4570-8be6-967955e9271e - -cleanup() { - task stop "$TASK_UUID" -} -trap cleanup EXIT - -help_text=" -# -# Commands: -# w, watch = watch id -# d, drop = mark id as watched -# p, pick = leave id as is; This is a noop -# -# These lines can be re-ordered; they are executed from top to bottom. -# vim: ft=gitrebase" - -table="$(ytcc --output json list | jq --raw-output 'map("pick \(.id) \(.title) (\(.playlists | map(.name) | join(", "))) [\(.duration | gsub("^\\s+|\\s+$";""))]") | join("\n")')" - -selection_file="$(mktmp)"; - -task start "$TASK_UUID" - -echo "$table" > "$selection_file"; -echo "$help_text" >> "$selection_file"; - -$EDITOR "$selection_file" - -ids="" -is_first=true; -while read -r line; do - cmd="$(echo "$line" | awk '{print $1}')"; - case "$cmd" in - "#" ) - # This is a comment, do nothing here - ;; - "pick" | "p") - # noop do nothing here - ;; - "drop" | "d") - id="$(echo "$line" | awk '{print $2}')"; - ytcc mark "$id"; - dbg "Marked as watched: $id" - ;; - "watch" | "w") - id="$(echo "$line" | awk '{print $2}')"; - if [ "$is_first" = "true" ]; then - ids="$id"; - else - ids="$ids,$id"; - fi - dbg "Added to be watched: $id" - is_first=false - ;; - esac -done < "$selection_file" - -[ "$ids" != "" ] && ytc "$ids"; - -# vim: ft=sh -- cgit 1.4.1