diff options
author | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2024-08-21 10:49:23 +0200 |
---|---|---|
committer | Benedikt Peetz <benedikt.peetz@b-peetz.de> | 2024-08-21 11:28:43 +0200 |
commit | 1debeb77f7986de1b659dcfdc442de6415e1d9f5 (patch) | |
tree | 4df3e7c3f6a2d1ec116e4088c5ace7f143a8b05f /yt_dlp/src/wrapper | |
download | yt-1debeb77f7986de1b659dcfdc442de6415e1d9f5.zip |
chore: Initial Commit
This repository was migrated out of my nixos-config.
Diffstat (limited to '')
-rw-r--r-- | yt_dlp/src/wrapper/info_json.rs | 526 | ||||
-rw-r--r-- | yt_dlp/src/wrapper/mod.rs | 12 | ||||
-rw-r--r-- | yt_dlp/src/wrapper/yt_dlp_options.rs | 62 |
3 files changed, 600 insertions, 0 deletions
diff --git a/yt_dlp/src/wrapper/info_json.rs b/yt_dlp/src/wrapper/info_json.rs new file mode 100644 index 0000000..aceeeb8 --- /dev/null +++ b/yt_dlp/src/wrapper/info_json.rs @@ -0,0 +1,526 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use std::{collections::HashMap, path::PathBuf}; + +use pyo3::{types::PyDict, Bound, PyResult, Python}; +use serde::{Deserialize, Deserializer, Serialize}; +use serde_json::Value; +use url::Url; + +use crate::json_loads_str; + +type Todo = String; + +// TODO: Change this to map `_type` to a structure of values, instead of the options <2024-05-27> +// And replace all the strings with better types (enums or urls) +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct InfoJson { + pub __last_playlist_index: Option<u32>, + pub __post_extractor: Option<String>, + pub __x_forwarded_for_ip: Option<String>, + pub _filename: Option<PathBuf>, + pub _format_sort_fields: Option<Vec<String>>, + pub _has_drm: Option<Todo>, + pub _type: Option<InfoType>, + pub _version: Option<Version>, + pub abr: Option<f64>, + pub acodec: Option<String>, + pub age_limit: Option<u32>, + pub aspect_ratio: Option<f64>, + pub asr: Option<u32>, + pub audio_channels: Option<u32>, + pub audio_ext: Option<String>, + pub automatic_captions: Option<HashMap<String, Vec<Caption>>>, + pub availability: Option<String>, + pub average_rating: Option<String>, + pub categories: Option<Vec<String>>, + pub channel: Option<String>, + pub channel_follower_count: Option<u32>, + pub channel_id: Option<String>, + pub channel_is_verified: Option<bool>, + pub channel_url: Option<String>, + pub chapters: Option<Vec<Chapter>>, + pub comment_count: Option<u32>, + pub comments: Option<Vec<Comment>>, + pub concurrent_view_count: Option<u32>, + pub description: Option<String>, + pub display_id: Option<String>, + pub downloader_options: Option<DownloaderOptions>, + pub duration: Option<f64>, + pub duration_string: Option<String>, + pub dynamic_range: Option<String>, + pub entries: Option<Vec<InfoJson>>, + pub episode: Option<String>, + pub episode_number: Option<u32>, + pub epoch: Option<u32>, + pub ext: Option<String>, + pub extractor: Option<Extractor>, + pub extractor_key: Option<ExtractorKey>, + pub filename: Option<PathBuf>, + pub filesize: Option<u64>, + pub filesize_approx: Option<u64>, + pub format: Option<String>, + pub format_id: Option<String>, + pub format_note: Option<String>, + pub formats: Option<Vec<Format>>, + pub fps: Option<f64>, + pub fulltitle: Option<String>, + pub has_drm: Option<bool>, + pub heatmap: Option<Vec<HeatMapEntry>>, + pub height: Option<u32>, + pub http_headers: Option<HttpHeader>, + pub id: Option<String>, + pub ie_key: Option<ExtractorKey>, + pub is_live: Option<bool>, + pub language: Option<String>, + pub language_preference: Option<i32>, + pub license: Option<Todo>, + pub like_count: Option<u32>, + pub live_status: Option<String>, + pub location: Option<Todo>, + pub modified_date: Option<String>, + pub n_entries: Option<u32>, + pub original_url: Option<String>, + pub playable_in_embed: Option<bool>, + pub playlist: Option<Todo>, + pub playlist_autonumber: Option<u32>, + pub playlist_channel: Option<Todo>, + pub playlist_channel_id: Option<Todo>, + pub playlist_count: Option<u32>, + pub playlist_id: Option<Todo>, + pub playlist_index: Option<u64>, + pub playlist_title: Option<Todo>, + pub playlist_uploader: Option<Todo>, + pub playlist_uploader_id: Option<Todo>, + pub preference: Option<Todo>, + pub protocol: Option<String>, + pub quality: Option<f64>, + pub release_date: Option<String>, + pub release_timestamp: Option<u64>, + pub release_year: Option<u32>, + pub requested_downloads: Option<Vec<RequestedDownloads>>, + pub requested_entries: Option<Vec<u32>>, + pub requested_formats: Option<Vec<Format>>, + pub requested_subtitles: Option<HashMap<String, Subtitle>>, + pub resolution: Option<String>, + pub season: Option<String>, + pub season_number: Option<u32>, + pub series: Option<String>, + pub source_preference: Option<i32>, + pub sponsorblock_chapters: Option<Vec<SponsorblockChapter>>, + pub stretched_ratio: Option<Todo>, + pub subtitles: Option<HashMap<String, Vec<Caption>>>, + pub tags: Option<Vec<String>>, + pub tbr: Option<f64>, + pub thumbnail: Option<Url>, + pub thumbnails: Option<Vec<ThumbNail>>, + pub timestamp: Option<u64>, + pub title: Option<String>, + pub upload_date: Option<String>, + pub uploader: Option<String>, + pub uploader_id: Option<String>, + pub uploader_url: Option<String>, + pub url: Option<Url>, + pub vbr: Option<f64>, + pub vcodec: Option<String>, + pub video_ext: Option<String>, + pub view_count: Option<u32>, + pub was_live: Option<bool>, + pub webpage_url: Option<Url>, + pub webpage_url_basename: Option<String>, + pub webpage_url_domain: Option<String>, + pub width: Option<u32>, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct RequestedDownloads { + pub __files_to_merge: Option<Vec<Todo>>, + pub __finaldir: PathBuf, + pub __infojson_filename: PathBuf, + pub __postprocessors: Vec<Todo>, + pub __real_download: bool, + pub __write_download_archive: bool, + pub _filename: PathBuf, + pub _type: InfoType, + pub _version: Version, + pub abr: f64, + pub acodec: String, + pub aspect_ratio: f64, + pub asr: u32, + pub audio_channels: u32, + pub chapters: Option<Vec<SponsorblockChapter>>, + pub duration: Option<f64>, + pub dynamic_range: String, + pub ext: String, + pub filename: PathBuf, + pub filepath: PathBuf, + pub filesize_approx: u64, + pub format: String, + pub format_id: String, + pub format_note: String, + pub fps: f64, + pub height: u32, + pub infojson_filename: PathBuf, + pub language: Option<String>, + pub protocol: String, + pub requested_formats: Vec<Format>, + pub resolution: String, + pub tbr: f64, + pub vbr: f64, + pub vcodec: String, + pub width: u32, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub struct Subtitle { + pub ext: SubtitleExt, + pub filepath: PathBuf, + pub name: String, + pub url: Url, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SubtitleExt { + #[serde(alias = "vtt")] + Vtt, + + #[serde(alias = "json")] + Json, + #[serde(alias = "json3")] + Json3, + + #[serde(alias = "ttml")] + Ttml, + + #[serde(alias = "srv1")] + Srv1, + #[serde(alias = "srv2")] + Srv2, + #[serde(alias = "srv3")] + Srv3, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Caption { + pub ext: SubtitleExt, + pub name: Option<String>, + pub protocol: Option<String>, + pub url: String, + pub filepath: Option<PathBuf>, + pub video_id: Option<String>, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Chapter { + pub end_time: f64, + pub start_time: f64, + pub title: String, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq)] +#[serde(deny_unknown_fields)] +pub struct SponsorblockChapter { + /// This is an utterly useless field, and should thus be ignored + pub _categories: Option<Vec<Vec<Value>>>, + + pub categories: Option<Vec<SponsorblockChapterCategory>>, + pub category: Option<SponsorblockChapterCategory>, + pub category_names: Option<Vec<String>>, + pub end_time: f64, + pub name: Option<String>, + pub r#type: Option<SponsorblockChapterType>, + pub start_time: f64, + pub title: String, +} + +pub fn get_none<'de, D, T>(_: D) -> Result<Option<T>, D::Error> +where + D: Deserializer<'de>, +{ + Ok(None) +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SponsorblockChapterType { + #[serde(alias = "skip")] + Skip, + + #[serde(alias = "chapter")] + Chapter, +} +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum SponsorblockChapterCategory { + #[serde(alias = "filler")] + Filler, + + #[serde(alias = "sponsor")] + Sponsor, + + #[serde(alias = "selfpromo")] + SelfPromo, + + #[serde(alias = "chapter")] + Chapter, + + #[serde(alias = "intro")] + Intro, + + #[serde(alias = "outro")] + Outro, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct HeatMapEntry { + pub start_time: f64, + pub end_time: f64, + pub value: f64, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum Extractor { + #[serde(alias = "generic")] + Generic, + + #[serde(alias = "SVTSeries")] + SVTSeries, + + #[serde(alias = "youtube")] + YouTube, + + #[serde(alias = "youtube:tab")] + YouTubeTab, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum ExtractorKey { + #[serde(alias = "Generic")] + Generic, + + #[serde(alias = "SVTSeries")] + SVTSeries, + + #[serde(alias = "Youtube")] + YouTube, + + #[serde(alias = "YoutubeTab")] + YouTubeTab, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd, Ord, Eq)] +#[serde(deny_unknown_fields)] +pub enum InfoType { + #[serde(alias = "playlist")] + Playlist, + + #[serde(alias = "url")] + Url, + + #[serde(alias = "video")] + Video, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Version { + pub current_git_head: Option<String>, + pub release_git_head: String, + pub repository: String, + pub version: String, +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(from = "String")] +#[serde(deny_unknown_fields)] +pub enum Parent { + Root, + Id(String), +} + +impl Parent { + pub fn id(&self) -> Option<&str> { + if let Self::Id(id) = self { + Some(id) + } else { + None + } + } +} + +impl From<String> for Parent { + fn from(value: String) -> Self { + if value == "root" { + Self::Root + } else { + Self::Id(value) + } + } +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(from = "String")] +#[serde(deny_unknown_fields)] +pub struct Id { + pub id: String, +} +impl From<String> for Id { + fn from(value: String) -> Self { + Self { + // Take the last element if the string is split with dots, otherwise take the full id + id: value.split('.').last().unwrap_or(&value).to_owned(), + } + } +} + +#[derive(Debug, Deserialize, Serialize, Clone, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct Comment { + pub id: Id, + pub text: String, + #[serde(default = "zero")] + pub like_count: u32, + pub is_pinned: bool, + pub author_id: String, + #[serde(default = "unknown")] + pub author: String, + pub author_is_verified: bool, + pub author_thumbnail: Url, + pub parent: Parent, + #[serde(deserialize_with = "edited_from_time_text", alias = "_time_text")] + pub edited: bool, + // Can't also be deserialized, as it's already used in 'edited' + // _time_text: String, + pub timestamp: i64, + pub author_url: Url, + pub author_is_uploader: bool, + pub is_favorited: bool, +} +fn unknown() -> String { + "<Unknown>".to_string() +} +fn zero() -> u32 { + 0 +} +fn edited_from_time_text<'de, D>(d: D) -> Result<bool, D::Error> +where + D: Deserializer<'de>, +{ + let s = String::deserialize(d)?; + if s.contains(" (edited)") { + Ok(true) + } else { + Ok(false) + } +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct ThumbNail { + pub id: Option<String>, + pub preference: Option<i32>, + /// in the form of "[`height`]x[`width`]" + pub resolution: Option<String>, + pub url: Url, + pub width: Option<u32>, + pub height: Option<u32>, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Format { + pub __needs_testing: Option<bool>, + pub __working: Option<bool>, + pub abr: Option<f64>, + pub acodec: Option<String>, + pub aspect_ratio: Option<f64>, + pub asr: Option<f64>, + pub audio_channels: Option<u32>, + pub audio_ext: Option<String>, + pub columns: Option<u32>, + pub container: Option<String>, + pub downloader_options: Option<DownloaderOptions>, + pub dynamic_range: Option<String>, + pub ext: String, + pub filepath: Option<PathBuf>, + pub filesize: Option<u64>, + pub filesize_approx: Option<u64>, + pub format: Option<String>, + pub format_id: String, + pub format_index: Option<String>, + pub format_note: Option<String>, + pub fps: Option<f64>, + pub fragment_base_url: Option<Todo>, + pub fragments: Option<Vec<Fragment>>, + pub has_drm: Option<bool>, + pub height: Option<u32>, + pub http_headers: Option<HttpHeader>, + pub is_dash_periods: Option<bool>, + pub language: Option<String>, + pub language_preference: Option<i32>, + pub manifest_stream_number: Option<u32>, + pub manifest_url: Option<Url>, + pub preference: Option<i32>, + pub protocol: Option<String>, + pub quality: Option<f64>, + pub resolution: Option<String>, + pub rows: Option<u32>, + pub source_preference: Option<i32>, + pub tbr: Option<f64>, + pub url: Url, + pub vbr: Option<f64>, + pub vcodec: String, + pub video_ext: Option<String>, + pub width: Option<u32>, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct DownloaderOptions { + http_chunk_size: u64, +} + +#[derive(Debug, Deserialize, Serialize, Eq, PartialEq, PartialOrd, Ord)] +#[serde(deny_unknown_fields)] +pub struct HttpHeader { + #[serde(alias = "User-Agent")] + pub user_agent: Option<String>, + #[serde(alias = "Accept")] + pub accept: Option<String>, + #[serde(alias = "Accept-Language")] + pub accept_language: Option<String>, + #[serde(alias = "Sec-Fetch-Mode")] + pub sec_fetch_mode: Option<String>, +} + +#[derive(Debug, Deserialize, Serialize, PartialEq, PartialOrd)] +#[serde(deny_unknown_fields)] +pub struct Fragment { + pub url: Option<Url>, + pub duration: Option<f64>, + pub path: Option<PathBuf>, +} + +impl InfoJson { + pub fn to_py_dict(self, py: Python) -> PyResult<Bound<PyDict>> { + let output: Bound<PyDict> = json_loads_str(py, self)?; + Ok(output) + } +} diff --git a/yt_dlp/src/wrapper/mod.rs b/yt_dlp/src/wrapper/mod.rs new file mode 100644 index 0000000..3fe3247 --- /dev/null +++ b/yt_dlp/src/wrapper/mod.rs @@ -0,0 +1,12 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +pub mod info_json; +// pub mod yt_dlp_options; diff --git a/yt_dlp/src/wrapper/yt_dlp_options.rs b/yt_dlp/src/wrapper/yt_dlp_options.rs new file mode 100644 index 0000000..c2a86df --- /dev/null +++ b/yt_dlp/src/wrapper/yt_dlp_options.rs @@ -0,0 +1,62 @@ +// yt - A fully featured command line YouTube client +// +// Copyright (C) 2024 Benedikt Peetz <benedikt.peetz@b-peetz.de> +// SPDX-License-Identifier: GPL-3.0-or-later +// +// This file is part of Yt. +// +// You should have received a copy of the License along with this program. +// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>. + +use pyo3::{types::PyDict, Bound, PyResult, Python}; +use serde::Serialize; + +use crate::json_loads; + +#[derive(Serialize, Clone)] +pub struct YtDlpOptions { + pub playliststart: u32, + pub playlistend: u32, + pub noplaylist: bool, + pub extract_flat: ExtractFlat, + // pub extractor_args: ExtractorArgs, + // pub format: String, + // pub fragment_retries: u32, + // #[serde(rename(serialize = "getcomments"))] + // pub get_comments: bool, + // #[serde(rename(serialize = "ignoreerrors"))] + // pub ignore_errors: bool, + // pub retries: u32, + // #[serde(rename(serialize = "writeinfojson"))] + // pub write_info_json: bool, + // pub postprocessors: Vec<serde_json::Map<String, serde_json::Value>>, +} + +#[derive(Serialize, Copy, Clone)] +pub enum ExtractFlat { + #[serde(rename(serialize = "in_playlist"))] + InPlaylist, + + #[serde(rename(serialize = "discard_in_playlist"))] + DiscardInPlaylist, +} + +#[derive(Serialize, Clone)] +pub struct ExtractorArgs { + pub youtube: YoutubeExtractorArgs, +} + +#[derive(Serialize, Clone)] +pub struct YoutubeExtractorArgs { + comment_sort: Vec<String>, + max_comments: Vec<String>, +} + +impl YtDlpOptions { + pub fn to_py_dict(self, py: Python) -> PyResult<Bound<PyDict>> { + let string = serde_json::to_string(&self).expect("This should always work"); + + let output: Bound<PyDict> = json_loads(py, string)?; + Ok(output) + } +} |