diff options
Diffstat (limited to 'src/instances/get_instances.py')
-rw-r--r-- | src/instances/get_instances.py | 484 |
1 files changed, 167 insertions, 317 deletions
diff --git a/src/instances/get_instances.py b/src/instances/get_instances.py index a95eb46f..770f2581 100644 --- a/src/instances/get_instances.py +++ b/src/instances/get_instances.py @@ -3,12 +3,10 @@ import requests import json from urllib.parse import urlparse -from bs4 import BeautifulSoup import re from colorama import Fore, Back, Style from urllib.parse import urlparse import socket -import subprocess mightyList = {} @@ -18,6 +16,11 @@ torRegex = startRegex + "onion" + endRegex i2pRegex = startRegex + "i2p" + endRegex lokiRegex = startRegex + "loki" + endRegex authRegex = "https?:\/{2}\S+:\S+@(?:[^\s\/]+\.)+[a-zA-Z0-9]+" + endRegex +config = {} + +with open('./config/config.json') as file: + config = file + def filterLastSlash(urlList): tmp = {} @@ -102,70 +105,162 @@ def is_offline(url): except: return False +def fetchCache(frontend, name) : + # json_object = json.dumps(mightyList, ensure_ascii=False, indent=2) + with open('./src/instances/data.json') as file: + mightyList[frontend] = json.load(file).frontend + print(Fore.ORANGE + 'Failed' + Style.RESET_ALL + ' to fetch ' + name) + +def fetchFromFile(frontend, name): + #json_object = json.dumps(mightyList, ensure_ascii=False, indent=2) + with open('./src/instances/' + frontend + '.json') as file: + mightyList[frontend] = json.load(file) + print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name) + +def fetchJsonList(frontend, name, url, urlItem): + try: + r = requests.get(url) + rJson = json.loads(r.text) + _list = {} + for network in config.networks: + _list[network] = [] + if type(urlItem) is 'str': + for item in rJson: + if urlItem is not None: + tmpUrl = item[urlItem] + else: + tmpUrl = item + if tmpUrl.strip() == "": + continue + elif re.search(torRegex, tmpUrl): + _list['tor'].append(tmpUrl) + elif re.search(i2pRegex, tmpUrl): + _list['i2p'].append(tmpUrl) + elif re.search(lokiRegex, tmpUrl): + _list['loki'].append(tmpUrl) + else: + _list['clearnet'].append(tmpUrl) + else: + for i in range(config.networks.length): + # The expected order is the same as in config.json. If the frontend doesn't have any instances for a specified network, use None + if urlItem != None: + for item in rJson: + if network in item: + if item[network].strip() != "": + _list[config.networks[i]].append(item[urlItem[i]]) + + mightyList[frontend] = _list + print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name) + except: + fetchCache(frontend, name) + +def fetchRegexList(frontend, name, url, regex): + try: + r = requests.get(url) + _list = {} + for network in config.networks: + _list[network] = [] + + tmp = re.findall(regex, r.text) + + for item in tmp: + if item.strip() == "": + continue + elif re.search(torRegex, item): + _list['tor'].append(item) + elif re.search(i2pRegex, item): + _list['i2p'].append(item) + elif re.search(lokiRegex, item): + _list['loki'].append(item) + else: + _list['clearnet'].append(item) + mightyList[frontend] = _list + print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name) + except: + fetchCache(frontend, name) + +def fetchTextList(frontend, name, url, prepend): + try: + r = requests.get(url) + tmp = r.text.strip().split('\n') + + _list = {} + for network in config.networks: + _list[network] = [] + + for item in tmp: + item = prepend + item + if re.search(torRegex, item): + _list['tor'].append(item) + elif re.search(i2pRegex, item): + _list['i2p'].append(item) + elif re.search(lokiRegex, item): + _list['loki'].append(item) + else: + _list['clearnet'].append(item) + mightyList[frontend] = _list + print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name) + except: + fetchCache(frontend, name) + def invidious(): - r = requests.get('https://api.invidious.io/instances.json') - rJson = json.loads(r.text) - invidiousList = {} - invidiousList['clearnet'] = [] - invidiousList['tor'] = [] - invidiousList['i2p'] = [] - invidiousList['loki'] = [] - for instance in rJson: - if instance[1]['type'] == 'https': - invidiousList['clearnet'].append(instance[1]['uri']) - elif instance[1]['type'] == 'onion': - invidiousList['tor'].append(instance[1]['uri']) - mightyList['invidious'] = invidiousList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Invidious') + name = 'Invidious' + frontend = 'invidious' + try: + _list = {} + _list['clearnet'] = [] + _list['tor'] = [] + _list['i2p'] = [] + _list['loki'] = [] + r = requests.get('https://api.invidious.io/instances.json') + rJson = json.loads(r.text) + for instance in rJson: + if instance[1]['type'] == 'https': + _list['clearnet'].append(instance[1]['uri']) + elif instance[1]['type'] == 'onion': + _list['tor'].append(instance[1]['uri']) + mightyList[frontend] = _list + print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name) + except: + fetchCache(frontend, name) def piped(): - r = requests.get( - 'https://raw.githubusercontent.com/wiki/TeamPiped/Piped/Instances.md') - - tmp = re.findall( - '(?:[^\s\/]+\.)+[a-zA-Z]+ (?:\(Official\) )?\| (https:\/{2}(?:[^\s\/]+\.)+[a-zA-Z]+) \| ', r.text) - _list = {} - _list['clearnet'] = [] - _list['tor'] = [] - _list['i2p'] = [] - _list['loki'] = [] - for item in tmp: - try: - url = requests.get(item, timeout=5).url - if url.strip("/") == item: + frontend = 'piped' + name = 'Piped' + try: + _list = {} + _list['clearnet'] = [] + _list['tor'] = [] + _list['i2p'] = [] + _list['loki'] = [] + r = requests.get( + 'https://raw.githubusercontent.com/wiki/TeamPiped/Piped/Instances.md') + + tmp = re.findall( + '(?:[^\s\/]+\.)+[a-zA-Z]+ (?:\(Official\) )?\| (https:\/{2}(?:[^\s\/]+\.)+[a-zA-Z]+) \| ', r.text) + for item in tmp: + try: + url = requests.get(item, timeout=5).url + if url.strip("/") == item: + continue + else: + _list['clearnet'].append(url) + except: continue - else: - _list['clearnet'].append(url) - except: - continue - mightyList['piped'] = _list - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Piped') + mightyList[frontend] = _list + print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name) + except: + fetchCache(frontend, name) def pipedMaterial(): - r = requests.get( - 'https://raw.githubusercontent.com/mmjee/Piped-Material/master/README.md') - - tmp = re.findall( - r"\| (https?:\/{2}(?:\S+\.)+[a-zA-Z0-9]*) +\|", r.text) - pipedMaterialList = {} - pipedMaterialList['clearnet'] = [] - pipedMaterialList['tor'] = [] - pipedMaterialList['i2p'] = [] - pipedMaterialList['loki'] = [] - for item in tmp: - pipedMaterialList['clearnet'].append(item) - mightyList['pipedMaterial'] = pipedMaterialList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'pipedMaterial') + fetchRegexList('pipedMaterial', 'Piped-Material', 'https://raw.githubusercontent.com/mmjee/Piped-Material/master/README.md', r"\| (https?:\/{2}(?:\S+\.)+[a-zA-Z0-9]*) +\|") def cloudtube(): - json_object = json.dumps(mightyList, ensure_ascii=False, indent=2) - with open('./src/instances/cloudtube.json') as file: - mightyList['cloudtube'] = json.load(file) - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'CloudTube') + fetchCache('cloudtube', 'Cloudtube') def proxitok(): @@ -186,19 +281,7 @@ def proxitok(): def send(): - r = requests.get( - 'https://gitlab.com/timvisee/send-instances/-/raw/master/README.md') - tmp = re.findall( - r"- ([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z0-9]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}", r.text) - sendList = {} - sendList['clearnet'] = [] - sendList['tor'] = [] - sendList['i2p'] = [] - sendList['loki'] = [] - for item in tmp: - sendList['clearnet'].append(item) - mightyList['send'] = sendList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Send') + fetchRegexList('send', 'Send', 'https://gitlab.com/timvisee/send-instances/-/raw/master/README.md', r"- ([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z0-9]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}") def nitter(): @@ -230,150 +313,35 @@ def nitter(): def bibliogram(): - json_object = json.dumps(mightyList, ensure_ascii=False, indent=2) - with open('./src/instances/bibliogram.json') as file: - mightyList['bibliogram'] = json.load(file) - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Bibliogram') + fetchFromFile('bibliogram', 'Bibliogram') def libreddit(): - r = requests.get( - 'https://raw.githubusercontent.com/spikecodes/libreddit/master/README.md') - libredditList = {} - libredditList['clearnet'] = [] - libredditList['tor'] = [] - libredditList['i2p'] = [] - libredditList['loki'] = [] - - tmp = re.findall( - r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|", r.text) - - for item in tmp: - if re.search(torRegex, item): - libredditList['tor'].append(item) - else: - libredditList['clearnet'].append(item) - mightyList['libreddit'] = libredditList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'LibReddit') + fetchRegexList('libreddit', 'Libreddit', 'https://raw.githubusercontent.com/spikecodes/libreddit/master/README.md', r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|") def teddit(): - r = requests.get( - 'https://codeberg.org/teddit/teddit/raw/branch/main/instances.json') - rJson = json.loads(r.text) - tedditList = {} - tedditList['clearnet'] = [] - tedditList['tor'] = [] - tedditList['i2p'] = [] - tedditList['loki'] = [] - for item in rJson: - url = item['url'] - if url != '': - tedditList['clearnet'].append(url) - if 'onion' in item: - onion = item['onion'] - if onion != '': - tedditList['tor'].append(onion) - - mightyList['teddit'] = tedditList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Teddit') + fetchJsonList('teddit', 'Teddit', 'https://codeberg.org/teddit/teddit/raw/branch/main/instances.json', ['url', 'onion', 'i2p', None]) def wikiless(): - r = requests.get('https://wikiless.org/instances.json') - rJson = json.loads(r.text) - wikilessList = {} - wikilessList['clearnet'] = [] - wikilessList['tor'] = [] - wikilessList['i2p'] = [] - wikilessList['loki'] = [] - for item in rJson: - if 'url' in item: - if item['url'].strip() != "": - wikilessList['clearnet'].append(item['url']) - if 'onion' in item: - if item['onion'].strip() != "": - wikilessList['tor'].append(item['onion']) - if 'i2p' in item: - if item['i2p'].strip() != "": - wikilessList['i2p'].append(item['i2p']) - mightyList['wikiless'] = wikilessList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Wikiless') + fetchJsonList('wikiless', 'Wikiless', 'https://wikiless.org/instances.json', ['url', 'onion', 'i2p', None]) def scribe(): - r = requests.get( - 'https://git.sr.ht/~edwardloveall/scribe/blob/main/docs/instances.json') - rJson = json.loads(r.text) - scribeList = {} - scribeList['clearnet'] = [] - scribeList['tor'] = [] - scribeList['i2p'] = [] - scribeList['loki'] = [] - for item in rJson: - scribeList['clearnet'].append(item) - mightyList['scribe'] = scribeList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Scribe') + fetchJsonList('scribe', 'Scribe', 'https://git.sr.ht/~edwardloveall/scribe/blob/main/docs/instances.json', None) def quetre(): - r = requests.get( - 'https://raw.githubusercontent.com/zyachel/quetre/main/README.md') - _list = {} - _list['clearnet'] = [] - _list['tor'] = [] - _list['i2p'] = [] - _list['loki'] = [] - - tmp = re.findall( - r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|", r.text) - - - for item in tmp: - if re.search(torRegex, item): - _list['tor'].append(item) - else: - _list['clearnet'].append(item) - mightyList['quetre'] = _list - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Quetre') + fetchRegexList('quetre', 'Quetre', 'https://raw.githubusercontent.com/zyachel/quetre/main/README.md', r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|") def libremdb(): - r = requests.get( - 'https://raw.githubusercontent.com/zyachel/libremdb/main/README.md') - _list = {} - _list['clearnet'] = [] - _list['tor'] = [] - _list['i2p'] = [] - _list['loki'] = [] - - tmp = re.findall( - r"\| ([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)*\|*[A-Z]{0,}.*\|.*\|", r.text) + fetchRegexList('libremdb', 'libremdb', 'https://raw.githubusercontent.com/zyachel/libremdb/main/README.md', r"\| ([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)*\|*[A-Z]{0,}.*\|.*\|") - for item in tmp: - if item.strip() == "": - continue - if re.search(torRegex, item): - _list['tor'].append(item) - else: - _list['clearnet'].append(item) - - mightyList['libremdb'] = _list - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Libremdb') - def simpleertube(): - r = requests.get('https://simple-web.org/instances/simpleertube') - _list = {} - _list['clearnet'] = [] - _list['tor'] = [] - _list['i2p'] = [] - _list['loki'] = [] - for item in r.text.strip().split('\n'): - _list['clearnet'].append('https://' + item) - - mightyList['simpleertube'] = _list - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'SimpleerTube') + fetchTextList('simpleertube', 'SimpleerTube', 'https://simple-web.org/instances/simpleertube', 'https://') def simplytranslate(): @@ -403,19 +371,7 @@ def simplytranslate(): def linvgatranslate(): - r = requests.get( - 'https://raw.githubusercontent.com/TheDavidDelta/lingva-translate/main/instances.json') - rJson = json.loads(r.text) - lingvaList = {} - lingvaList['clearnet'] = [] - lingvaList['tor'] = [] - lingvaList['i2p'] = [] - lingvaList['loki'] = [] - for item in rJson: - lingvaList['clearnet'].append(item) - - mightyList['lingva'] = lingvaList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'LinvgaTranslate') + fetchJsonList('lingva', 'LingvaTranslate', 'https://raw.githubusercontent.com/TheDavidDelta/lingva-translate/main/instances.json', None) def searx_searxng(): @@ -454,140 +410,34 @@ def searx_searxng(): def whoogle(): - r = requests.get( - 'https://raw.githubusercontent.com/benbusby/whoogle-search/main/misc/instances.txt') - tmpList = r.text.strip().split('\n') - whoogleList = {} - whoogleList['clearnet'] = [] - whoogleList['tor'] = [] - whoogleList['i2p'] = [] - whoogleList['loki'] = [] - for item in tmpList: - if re.search(torRegex, item): - whoogleList['tor'].append(item) - elif re.search(torRegex, item): - whoogleList['i2p'].append(item) - else: - whoogleList['clearnet'].append(item) - mightyList['whoogle'] = whoogleList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Whoogle') + fetchTextList('whoogle', 'Whoogle', 'https://raw.githubusercontent.com/benbusby/whoogle-search/main/misc/instances.txt', '') def librex(): - r = requests.get( - 'https://raw.githubusercontent.com/hnhx/librex/main/README.md') - _list = {} - _list['clearnet'] = [] - _list['tor'] = [] - _list['i2p'] = [] - _list['loki'] = [] - - tmp = re.findall( - r"\| {1,2}\[(?:(?:[a-zA-Z0-9]+\.)+[a-zA-Z]{2,}|✅)\]\((https?:\/{2}(?:[a-zA-Z0-9]+\.)+[a-zA-Z0-9]{2,})", r.text) - - for item in tmp: - if item.strip() == "": - continue - elif re.search(torRegex, item): - _list['tor'].append(item) - elif re.search(i2pRegex, item): - _list['i2p'].append(item) - else: - _list['clearnet'].append(item) - mightyList['librex'] = _list - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Librex') + fetchRegexList('librex', 'LibreX', 'https://raw.githubusercontent.com/hnhx/librex/main/README.md', r"\| {1,2}\[(?:(?:[a-zA-Z0-9]+\.)+[a-zA-Z]{2,}|✅)\]\((https?:\/{2}(?:[a-zA-Z0-9]+\.)+[a-zA-Z0-9]{2,})") def rimgo(): - r = requests.get( - 'https://codeberg.org/video-prize-ranch/rimgo/raw/branch/main/instances.json') - rJson = json.loads(r.text) - rimgoList = {} - rimgoList['clearnet'] = [] - rimgoList['tor'] = [] - rimgoList['i2p'] = [] - rimgoList['loki'] = [] - for item in rJson: - if 'url' in item: - rimgoList['clearnet'].append(item['url']) - if 'onion' in item: - rimgoList['tor'].append(item['onion']) - if 'i2p' in item: - rimgoList['i2p'].append(item['i2p']) - mightyList['rimgo'] = rimgoList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Rimgo') + fetchJsonList('rimgo', 'rimgo', 'https://codeberg.org/video-prize-ranch/rimgo/raw/branch/main/instances.json', ['url', 'onion', 'i2p', None]) def librarian(): - r = requests.get( - 'https://codeberg.org/librarian/librarian/raw/branch/main/instances.json') - rJson = json.loads(r.text) - librarianList = {} - librarianList['clearnet'] = [] - librarianList['tor'] = [] - librarianList['i2p'] = [] - librarianList['loki'] = [] - instances = rJson['instances'] - for item in instances: - url = item['url'] - if url.strip() == "": - continue - elif re.search(torRegex, url): - librarianList['tor'].append(url) - elif re.search(i2pRegex, url): - librarianList['i2p'].append(url) - elif re.search(lokiRegex, url): - librarianList['loki'].append(url) - else: - librarianList['clearnet'].append(url) - mightyList['librarian'] = librarianList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Librarian') - + fetchJsonList('librarian', 'Librarian', 'https://codeberg.org/librarian/librarian/raw/branch/main/instances.json', 'url') def neuters(): - json_object = json.dumps(mightyList, ensure_ascii=False, indent=2) - with open('./src/instances/neuters.json') as file: - mightyList['neuters'] = json.load(file) - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Neuters') + fetchFromFile('neuters', 'Neuters') def beatbump(): - json_object = json.dumps(mightyList, ensure_ascii=False, indent=2) - with open('./src/instances/beatbump.json') as file: - mightyList['beatbump'] = json.load(file) - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Beatbump') + fetchFromFile('beatbump', 'Beatbump') def hyperpipe(): - r = requests.get( - 'https://codeberg.org/Hyperpipe/pages/raw/branch/main/api/frontend.json') - rJson = json.loads(r.text) - hyperpipeList = {} - hyperpipeList['clearnet'] = [] - hyperpipeList['tor'] = [] - hyperpipeList['i2p'] = [] - hyperpipeList['loki'] = [] - for item in rJson: - url = item['url'] - if url.strip() == "": - continue - elif re.search(torRegex, url): - hyperpipeList['tor'].append(url) - elif re.search(i2pRegex, url): - hyperpipeList['i2p'].append(url) - elif re.search(lokiRegex, url): - hyperpipeList['loki'].append(url) - else: - hyperpipeList['clearnet'].append(url) - mightyList['hyperpipe'] = hyperpipeList - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Hyperpipe') + fetchJsonList('hyperpipe', 'Hyperpipe', 'https://codeberg.org/Hyperpipe/pages/raw/branch/main/api/frontend.json', 'url') def facil(): - json_object = json.dumps(mightyList, ensure_ascii=False, indent=2) - with open('./src/instances/facil.json') as file: - mightyList['facil'] = json.load(file) - print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'FacilMap') + fetchFromFile('facil', 'FacilMap') def peertube(): |