Last active
December 29, 2020 17:15
-
-
Save sae13/869a77d9ef3f1ef5ee2d0a7f23253209 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import concurrent.futures | |
import logging | |
import re | |
from sys import argv | |
if '-v' in argv: | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.INFO) | |
from requests import get | |
def get_film_page_urls_from_content(url: str, params: dict): | |
res = get(url, params=params) | |
content = res.content | |
if not res.ok: | |
logging.error(('get_film_page_urls_from_content not ok', url, params, content.decode())) | |
return [] | |
content_article = re.compile(r'\<article.*?>(.*)\<\/article\>', re.IGNORECASE + re.MULTILINE).findall( | |
content.decode().replace('\n', '')) | |
if content_article is None or len(content_article) == 0: | |
logging.error(('get_film_page_urls_from_content article tag not found', url, params, content.decode())) | |
return [] | |
all_urls = re.compile(f'http[s]?:[/a-zA-Z0-9\-\.]+tt\d+', re.IGNORECASE + re.MULTILINE).findall(content_article[0]) | |
if all_urls is None or len(all_urls) == 0: | |
logging.error(('get_film_page_urls_from_content nor url found', url, params, content.decode())) | |
return [] | |
return list(filter(lambda url: re.compile(r'imdb\.com/title/tt', re.I).search(url) == None, all_urls)) | |
def get_download_links_from_film_page_content(url: str, quality='720p', dooble=True, x265=False): | |
res = get(url) | |
content = res.content | |
if not res.ok: | |
logging.error(('get_film_page_urls_from_content not ok', url, content.decode())) | |
return [] | |
urls = re.compile('http[s]?:[/a-zA-Z0-9\-\.]+\.mkv', re.IGNORECASE + re.MULTILINE).findall(content.decode()) | |
if urls is None or len(urls) == 0: | |
logging.error(('get_download_links_from_film_page_content article tag not found', url, content.decode())) | |
return [] | |
urls = list(filter(lambda url: re.compile(quality, re.I).search(url) != None, urls)) | |
# urls = [url.lower() for url in urls] | |
if not dooble: | |
urls = list(filter(lambda url: re.compile('dubbed', re.I).search(url) == None, urls)) | |
else: | |
urls = list(filter(lambda url: re.compile('dubbed', re.I).search(url) != None, urls)) | |
if not x265: | |
urls = list(filter(lambda url: re.compile('x265', re.I).search(url) == None, urls)) | |
else: | |
urls = list(filter(lambda url: re.compile('x265', re.I).search(url) != None, urls)) | |
return urls | |
def download_genre(file_path='./dl.list', quality='720p', dooble=True, genre=802, x265=False, | |
url='https://valamovie.xyz/'): | |
janer = dict(mostenad=802) | |
param = dict(search="", genre=genre) | |
if dooble: param['dubbed'] = 'on' | |
res = get(url, params=param) | |
if not res.ok: | |
return | |
last_page = max([int(i) for i in re.compile(r'<a class="page-link" href=".*?">(\d+)', re.I + re.MULTILINE).findall( | |
res.content.decode())]) | |
executor = concurrent.futures.ThreadPoolExecutor(max_workers=5) | |
futures = {executor.submit(get_film_page_urls_from_content, f'{url}page/{i}/', params=param) for i in | |
range(2, last_page + 1)} | |
concurrent.futures.wait(futures) | |
page_links = [] | |
for future in concurrent.futures.as_completed(futures): | |
if not future.cancelled(): | |
page_links += future.result() | |
else: | |
logging.error(future.exception()) | |
dl_page_url = [] | |
futures = {executor.submit(get_download_links_from_film_page_content, _url, quality, dooble, x265) for _url in | |
page_links} | |
concurrent.futures.wait(futures) | |
for future in concurrent.futures.as_completed(futures): | |
if not future.cancelled(): | |
dl_page_url += future.result() | |
else: | |
logging.error(future.exception()) | |
dl_page_url = list(set([_url + "\n" for _url in dl_page_url])) | |
dl_file = open(file_path, 'a') | |
dl_file.writelines(dl_page_url) | |
dl_file.close() | |
return dl_page_url | |
def start(): | |
help = """ | |
-h help | |
-v verbose | |
-d dooble; if no argument set dooble is set | |
x265 for x265 | |
480p 720p 1080p for size default 720p | |
-g for genre download if not find download links in page | |
/path/to/file where download links save default ./dl.list | |
url should start with http[s]:// default https://valamovie.xyz/ | |
""" | |
if len(argv) == 1: | |
dl = download_genre() | |
print(dl) | |
exit(0) | |
if '-h' in argv: | |
print(help) | |
return | |
dooble = False if '-d' not in argv else True | |
x265 = False if 'x265' not in argv else True | |
quality = list(filter(lambda aa: re.compile(r'\d{3}\d?p', re.I).match(aa), argv)) | |
quality = '720p' if quality is None or len(quality) == 0 else quality.pop() | |
download_links_of_page_only = True if '-g' not in argv else False | |
url = list(filter(lambda aa: re.compile(r'^http[s]:[\-\/\.a-z0-9]+', re.I).match(aa), argv)) | |
url = None if url is None or len(url) == 0 else url.pop() | |
genre = None | |
if not download_links_of_page_only: | |
genre = list(filter(lambda aa: re.compile(r'^\d+$', re.I).match(aa), argv)) | |
genre = None if genre is None or len(genre) == 0 else genre.pop() | |
if genre == None: raise Exception('no genere found') | |
if download_links_of_page_only and url is None: raise Exception('url needed') | |
address = list( | |
filter(lambda aa: re.compile(r'^[\.|\.\.|\/]|[\w|\d|_|\ ]+/[\-\/\.a-z0-9]+', re.I).match(aa), argv[1:])) | |
address = './dl.list' if address is None or len(address) == 0 else address.pop() | |
if url is None: url = 'https://valamovie.xyz/' | |
if genre is not None: | |
links = download_genre(address, quality, dooble, genre, x265, url) | |
else: | |
links = get_download_links_from_film_page_content(url, quality, dooble, x265) | |
with open(address, 'a') as file: | |
file.writelines([f'{link}\n' for link in links]) | |
print(links) | |
if __name__ == '__main__': | |
start() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment