# Note: Run this script from the root of the repo import requests import json from urllib.parse import urlparse from bs4 import BeautifulSoup import re from colorama import Fore, Back, Style from urllib.parse import urlparse import socket import subprocess mightyList = {} def filterLastSlash(urlList): tmp = [] for i in urlList: if i.endswith('/'): tmp.append(i[:-1]) print(Fore.YELLOW + "Fixed " + Style.RESET_ALL + i) else: tmp.append(i) return tmp def ip2bin(ip): return "".join( map( str, [ "{0:08b}".format(int(x)) for x in ip.split(".") ] ) ) def get_cloudflare_ips(): r = requests.get('https://www.cloudflare.com/ips-v4') return r.text.split('\n') cloudflare_ips = get_cloudflare_ips() def is_cloudflare(url): instance_ip = None try: instance_ip = socket.gethostbyname(urlparse(url).hostname) if instance_ip is None: return False except: return False instance_bin = ip2bin(instance_ip) for cloudflare_ip_mask in cloudflare_ips: cloudflare_ip = cloudflare_ip_mask.split('/')[0] cloudflare_bin = ip2bin(cloudflare_ip) mask = int(cloudflare_ip_mask.split('/')[1]) cloudflare_bin_masked = cloudflare_bin[:mask] instance_bin_masked = instance_bin[:mask] if cloudflare_bin_masked == instance_bin_masked: print(url + ' is ' + Fore.RED + 'cloudflare' + Style.RESET_ALL) return True return False # Invidious r = requests.get('https://api.invidious.io/instances.json') rJson = json.loads(r.text) invidiousList = {} invidiousList['normal'] = [] invidiousList['tor'] = [] for instance in rJson: if instance[1]['type'] == 'https': invidiousList['normal'].append(instance[1]['uri']) elif instance[1]['type'] == 'onion': invidiousList['tor'].append(instance[1]['uri']) mightyList['invidious'] = invidiousList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Invidious') # ProxiTok r = requests.get( 'https://raw.githubusercontent.com/wiki/pablouser1/ProxiTok/Public-instances.md') tmp = re.findall( r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|", r.text) proxiTokList = {} proxiTokList['normal'] = [] proxiTokList['tor'] = [] for item in tmp: proxiTokList['normal'].append(item) mightyList['proxiTok'] = proxiTokList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'ProxiTok') # Send r = requests.get( 'https://gitlab.com/timvisee/send-instances/-/raw/master/README.md') tmp = re.findall( r"- ([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z0-9]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}", r.text) sendList = {} sendList['normal'] = [] sendList['tor'] = [] for item in tmp: sendList['normal'].append(item) mightyList['send'] = sendList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Send') # Nitter r = requests.get('https://github.com/zedeus/nitter/wiki/Instances') soup = BeautifulSoup(r.text, 'html.parser') markdownBody = soup.find(class_='markdown-body') tables = markdownBody.find_all('table') tables.pop(3) tables.pop(3) nitterList = {} nitterList['normal'] = [] nitterList['tor'] = [] for table in tables: tbody = table.find('tbody') trs = tbody.find_all('tr') for tr in trs: td = tr.find('td') a = td.find('a') url = a.contents[0] if url.endswith('.onion'): url = 'http://' + url nitterList['tor'].append(url) else: url = 'https://' + url nitterList['normal'].append(url) mightyList['nitter'] = nitterList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Nitter') # Bibliogram r = requests.get('https://bibliogram.pussthecat.org/api/instances') rJson = json.loads(r.text) bibliogramList = {} bibliogramList['normal'] = [] bibliogramList['tor'] = [] for item in rJson['data']: bibliogramList['normal'].append(item['address']) mightyList['bibliogram'] = bibliogramList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Bibliogram') # LibReddit r = requests.get( 'https://raw.githubusercontent.com/spikecodes/libreddit/master/README.md') libredditList = {} libredditList['normal'] = [] libredditList['tor'] = [] tmp = re.findall( r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|", r.text) tmp = filterLastSlash(tmp) for item in tmp: if item.endswith('.onion'): libredditList['tor'].append(item) else: libredditList['normal'].append(item) mightyList['libreddit'] = libredditList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'LibReddit') # Teddit r = requests.get( 'https://codeberg.org/teddit/teddit/raw/branch/main/instances.json') rJson = json.loads(r.text) tedditList = {} tedditList['normal'] = [] tedditList['tor'] = [] for item in rJson: url = item['url'] if url != '': tedditList['normal'].append(url) if 'onion' in item: onion = item['onion'] if onion != '': tedditList['tor'].append(onion) mightyList['teddit'] = tedditList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Teddit') # Wikiless r = requests.get('https://wikiless.org/instances.json') rJson = json.loads(r.text) wikilessList = {} wikilessList['normal'] = [] wikilessList['tor'] = [] wikilessList['i2p'] = [] for item in rJson: if item.endswith('.onion'): wikilessList['tor'].append('http://' + item) elif item.endswith('.i2p'): wikilessList['i2p'].append('http://' + item) else: wikilessList['normal'].append('https://' + item) mightyList['wikiless'] = wikilessList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Wikiless') # Scribe r = requests.get( 'https://git.sr.ht/~edwardloveall/scribe/blob/main/docs/instances.json') rJson = json.loads(r.text) scribeList = {} scribeList['normal'] = [] scribeList['tor'] = [] for item in rJson: scribeList['normal'].append(item) mightyList['scribe'] = scribeList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Scribe') # SimplyTranslate r = requests.get('https://simple-web.org/instances/simplytranslate') simplyTranslateList = {} simplyTranslateList['normal'] = [] for item in r.text.strip().split('\n'): simplyTranslateList['normal'].append('https://' + item) r = requests.get('https://simple-web.org/instances/simplytranslate_onion') simplyTranslateList['tor'] = [] for item in r.text.strip().split('\n'): simplyTranslateList['tor'].append('http://' + item) mightyList['simplyTranslate'] = simplyTranslateList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'SimplyTranslate') # LinvgaTranslate r = requests.get( 'https://raw.githubusercontent.com/TheDavidDelta/lingva-translate/main/instances.json') rJson = json.loads(r.text) lingvaList = {} lingvaList['normal'] = [] lingvaList['tor'] = [] for item in rJson: lingvaList['normal'].append(item) mightyList['lingva'] = lingvaList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'LinvgaTranslate') # SearX, SearXNG r = requests.get('https://searx.space/data/instances.json') rJson = json.loads(r.text) searxList = {} searxList['tor'] = [] searxList['i2p'] = [] searxList['normal'] = [] searxngList = {} searxngList['tor'] = [] searxngList['i2p'] = [] searxngList['normal'] = [] for item in rJson['instances']: if item[:-1].endswith('.onion'): if (rJson['instances'][item].get('generator') == 'searxng'): searxngList['tor'].append(item[:-1]) else: searxList['tor'].append(item[:-1]) elif item[:-1].endswith('.i2p'): if (rJson['instances'][item].get('generator') == 'searxng'): searxngList['i2p'].append(item[:-1]) else: searxList['i2p'].append(item[:-1]) else: if (rJson['instances'][item].get('generator') == 'searxng'): searxngList['normal'].append(item[:-1]) else: searxList['normal'].append(item[:-1]) mightyList['searx'] = searxList mightyList['searxng'] = searxngList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'SearX, SearXNG') # Whoogle r = requests.get( 'https://raw.githubusercontent.com/benbusby/whoogle-search/main/misc/instances.txt') tmpList = r.text.strip().split('\n') whoogleList = {} whoogleList['normal'] = [] whoogleList['tor'] = [] whoogleList['i2p'] = [] for item in tmpList: if item.endswith('.onion'): whoogleList['tor'].append(item) elif item.endswith('.i2p'): whoogleList['i2p'].append(item) else: whoogleList['normal'].append(item) mightyList['whoogle'] = whoogleList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Whoogle') # Rimgo r = requests.get( 'https://codeberg.org/video-prize-ranch/rimgo/raw/branch/main/instances.json') rJson = json.loads(r.text) rimgoList = {} rimgoList['normal'] = [] rimgoList['tor'] = [] rimgoList['i2p'] = [] for item in rJson: if item.endswith('.onion'): rimgoList['tor'].append('http://' + item) elif item.endswith('.i2p'): rimgoList['i2p'].append('http://' + item) else: rimgoList['normal'].append('https://' + item) mightyList['rimgo'] = rimgoList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Rimgo') # Peertube r = requests.get( 'https://instances.joinpeertube.org/api/v1/instances?start=0&count=1045&sort=-createdAt') rJson = json.loads(r.text) myList = [] for k in rJson['data']: myList.append('https://'+k['host']) mightyList['peertube'] = myList print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'Peertube') def isValid(url): # This code is contributed by avanitrachhadiya2155 try: result = urlparse(url) return all([result.scheme, result.netloc]) except: return False cloudflareMightyList = [] for k1, v1 in mightyList.items(): if type(mightyList[k1]) is dict: for k2, v2 in mightyList[k1].items(): for instance in mightyList[k1][k2]: if (not isValid(instance)): mightyList[k1][k2].remove(instance) print("removed " + instance) else: if not instance.endswith('.onion') and not instance.endswith('.i2p') and is_cloudflare(instance): cloudflareMightyList.append(instance) # Writing to file json_object = json.dumps(mightyList, ensure_ascii=False, indent=2) with open('./src/instances/data.json', 'w') as outfile: outfile.write(json_object) print(Fore.BLUE + 'wrote ' + Style.RESET_ALL + 'instances/data.json') json_object = json.dumps(cloudflareMightyList, ensure_ascii=False, indent=2) with open('./src/instances/cloudflare.json', 'w') as outfile: outfile.write(json_object) print(Fore.BLUE + 'wrote ' + Style.RESET_ALL + 'instances/cloudflare.json') # print(json_object)