aboutsummaryrefslogtreecommitdiffstats
path: root/src/instances
diff options
context:
space:
mode:
authorHygna <hygna@proton.me>2022-09-29 18:32:03 +0100
committerHygna <hygna@proton.me>2022-09-29 18:32:03 +0100
commitf1b8b64c48fede90ee9c828c1b82e641c0fe653d (patch)
tree44b55dfde203a223d57e6dcfd717dad7614e3b54 /src/instances
parentUnify cookies (diff)
downloadlibredirect-f1b8b64c48fede90ee9c828c1b82e641c0fe653d.zip
Changes:
Improved instance fetcher Added instance updating Fix a few bugs
Diffstat (limited to '')
-rw-r--r--src/instances/get_instances.py77
1 files changed, 38 insertions, 39 deletions
diff --git a/src/instances/get_instances.py b/src/instances/get_instances.py
index 3b773304..06b547b3 100644
--- a/src/instances/get_instances.py
+++ b/src/instances/get_instances.py
@@ -1,25 +1,26 @@
# Note: Run this script from the root of the repo
+import traceback
+import logging
import requests
import json
from urllib.parse import urlparse
import re
-from colorama import Fore, Back, Style
-from urllib.parse import urlparse
+from colorama import Fore, Style
import socket
mightyList = {}
config = {}
-startRegex = "https?:\/{2}(?:[^\s\/]+\.)+"
+startRegex = r"https?:\/{2}(?:[^\s\/]+\.)+"
endRegex = "(?:\/[^\s\/]+)*\/?"
torRegex = startRegex + "onion" + endRegex
i2pRegex = startRegex + "i2p" + endRegex
lokiRegex = startRegex + "loki" + endRegex
-authRegex = "https?:\/{2}\S+:\S+@(?:[^\s\/]+\.)+[a-zA-Z0-9]+" + endRegex
+authRegex = r"https?:\/{2}\S+:\S+@(?:[^\s\/]+\.)+[a-zA-Z0-9]+" + endRegex
with open('./src/config/config.json', 'rt') as tmp:
- config['networks'] = json.load(tmp)['config']['networks']
+ config['networks'] = json.load(tmp)['networks']
def filterLastSlash(urlList):
@@ -61,7 +62,7 @@ def is_cloudflare(url):
instance_ip = socket.gethostbyname(urlparse(url).hostname)
if instance_ip is None:
return False
- except:
+ except Exception:
return False
instance_bin = ip2bin(instance_ip)
@@ -88,10 +89,11 @@ def is_authenticate(url):
if 'www-authenticate' in r.headers:
print(url + ' requires ' + Fore.RED + 'authentication' + Style.RESET_ALL)
return True
- except:
+ except Exception:
return False
return False
+
def is_offline(url):
try:
r = requests.get(url, timeout=5)
@@ -102,21 +104,22 @@ def is_offline(url):
return True
else:
return False
- except:
+ except Exception:
return False
+
def fetchCache(frontend, name):
- # json_object = json.dumps(mightyList, ensure_ascii=False, indent=2)
with open('./src/instances/data.json') as file:
mightyList[frontend] = json.load(file)[frontend]
print(Fore.YELLOW + 'Failed' + Style.RESET_ALL + ' to fetch ' + name)
+
def fetchFromFile(frontend, name):
- #json_object = json.dumps(mightyList, ensure_ascii=False, indent=2)
with open('./src/instances/' + frontend + '.json') as file:
mightyList[frontend] = json.load(file)
print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name)
+
def fetchJsonList(frontend, name, url, urlItem):
try:
r = requests.get(url)
@@ -127,13 +130,13 @@ def fetchJsonList(frontend, name, url, urlItem):
if type(urlItem) == dict:
for item in rJson:
for network in config['networks']:
- if urlItem[network] != None:
+ if urlItem[network] is not None:
if urlItem[network] in item:
if item[urlItem[network]].strip() != '':
_list[network].append(item[urlItem[network]])
else:
if frontend == 'librarian':
- rJson = rJson['instances'] # I got lazy :p Might fix this at some point...
+ rJson = rJson['instances'] # I got lazy :p Might fix this at some point...
for item in rJson:
tmpItem = item
if urlItem is not None:
@@ -151,18 +154,20 @@ def fetchJsonList(frontend, name, url, urlItem):
mightyList[frontend] = _list
print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name)
- except:
+ except Exception:
fetchCache(frontend, name)
+ logging.error(traceback.format_exc())
+
-def fetchRegexList(frontend, name, url, regex):
+def fetchRegexList(frontend, name, url, regex):
try:
r = requests.get(url)
_list = {}
for network in config['networks']:
_list[network] = []
-
+
tmp = re.findall(regex, r.text)
-
+
for item in tmp:
if item.strip() == "":
continue
@@ -176,8 +181,10 @@ def fetchRegexList(frontend, name, url, regex):
_list['clearnet'].append(item)
mightyList[frontend] = _list
print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name)
- except:
+ except Exception:
fetchCache(frontend, name)
+ logging.error(traceback.format_exc())
+
def fetchTextList(frontend, name, url, prepend):
try:
@@ -200,8 +207,9 @@ def fetchTextList(frontend, name, url, prepend):
_list['clearnet'].append(item)
mightyList[frontend] = _list
print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name)
- except:
+ except Exception:
fetchCache(frontend, name)
+ logging.error(traceback.format_exc())
def invidious():
@@ -223,8 +231,9 @@ def invidious():
_list['tor'].append(instance[1]['uri'])
mightyList[frontend] = _list
print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name)
- except:
+ except Exception:
fetchCache(frontend, name)
+ logging.error(traceback.format_exc())
def piped():
@@ -240,7 +249,7 @@ def piped():
'https://raw.githubusercontent.com/wiki/TeamPiped/Piped/Instances.md')
tmp = re.findall(
- '(?:[^\s\/]+\.)+[a-zA-Z]+ (?:\(Official\) )?\| (https:\/{2}(?:[^\s\/]+\.)+[a-zA-Z]+) \| ', r.text)
+ r'(?:[^\s\/]+\.)+[a-zA-Z]+ (?:\(Official\) )?\| (https:\/{2}(?:[^\s\/]+\.)+[a-zA-Z]+) \| ', r.text)
for item in tmp:
try:
url = requests.get(item, timeout=5).url
@@ -248,12 +257,14 @@ def piped():
continue
else:
_list['clearnet'].append(url)
- except:
+ except Exception:
+ logging.error(traceback.format_exc())
continue
mightyList[frontend] = _list
print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + name)
- except:
+ except Exception:
fetchCache(frontend, name)
+ logging.error(traceback.format_exc())
def pipedMaterial():
@@ -265,20 +276,7 @@ def cloudtube():
def proxitok():
- r = requests.get(
- 'https://raw.githubusercontent.com/wiki/pablouser1/ProxiTok/Public-instances.md')
-
- tmp = re.findall(
- r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)*\|*[A-Z]{0,}.*\|.*\|", r.text)
- proxiTokList = {}
- proxiTokList['clearnet'] = []
- proxiTokList['tor'] = []
- proxiTokList['i2p'] = []
- proxiTokList['loki'] = []
- for item in tmp:
- proxiTokList['clearnet'].append(re.sub(r'/$', '', item))
- mightyList['proxiTok'] = proxiTokList
- print(Fore.GREEN + 'Fetched ' + Style.RESET_ALL + 'ProxiTok')
+ fetchRegexList('proxiTok', 'ProxiTok', 'https://raw.githubusercontent.com/wiki/pablouser1/ProxiTok/Public-instances.md', r"\| \[.*\]\(([-a-zA-Z0-9@:%_\+.~#?&//=]{2,}\.[a-z]{2,}\b(?:\/[-a-zA-Z0-9@:%_\+.~#?&//=]*)?)\)(?: \(Official\))? +\|(?:(?: [A-Z]*.*\|.*\|)|(?:$))")
def send():
@@ -298,11 +296,11 @@ def libreddit():
def teddit():
- fetchJsonList('teddit', 'Teddit', 'https://codeberg.org/teddit/teddit/raw/branch/main/instances.json', { 'clearnet': 'url', 'tor': 'onion', 'i2p': 'i2p', 'loki': None })
+ fetchJsonList('teddit', 'Teddit', 'https://codeberg.org/teddit/teddit/raw/branch/main/instances.json', {'clearnet': 'url', 'tor': 'onion', 'i2p': 'i2p', 'loki': None})
def wikiless():
- fetchJsonList('wikiless', 'Wikiless', 'https://wikiless.org/instances.json', { 'clearnet': 'url', 'tor': 'onion', 'i2p': 'i2p', 'loki': None})
+ fetchJsonList('wikiless', 'Wikiless', 'https://wikiless.org/instances.json', {'clearnet': 'url', 'tor': 'onion', 'i2p': 'i2p', 'loki': None})
def scribe():
@@ -401,6 +399,7 @@ def rimgo():
def librarian():
fetchJsonList('librarian', 'Librarian', 'https://codeberg.org/librarian/librarian/raw/branch/main/instances.json', 'url')
+
def neuters():
fetchFromFile('neuters', 'Neuters')
@@ -434,7 +433,7 @@ def isValid(url): # This code is contributed by avanitrachhadiya2155
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
- except:
+ except Exception:
return False