Skip to content

Instantly share code, notes, and snippets.

@sae13
Last active December 29, 2020 17:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sae13/869a77d9ef3f1ef5ee2d0a7f23253209 to your computer and use it in GitHub Desktop.
Save sae13/869a77d9ef3f1ef5ee2d0a7f23253209 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import concurrent.futures
import logging
import re
from sys import argv
if '-v' in argv:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
from requests import get
def get_film_page_urls_from_content(url: str, params: dict):
res = get(url, params=params)
content = res.content
if not res.ok:
logging.error(('get_film_page_urls_from_content not ok', url, params, content.decode()))
return []
content_article = re.compile(r'\<article.*?>(.*)\<\/article\>', re.IGNORECASE + re.MULTILINE).findall(
content.decode().replace('\n', ''))
if content_article is None or len(content_article) == 0:
logging.error(('get_film_page_urls_from_content article tag not found', url, params, content.decode()))
return []
all_urls = re.compile(f'http[s]?:[/a-zA-Z0-9\-\.]+tt\d+', re.IGNORECASE + re.MULTILINE).findall(content_article[0])
if all_urls is None or len(all_urls) == 0:
logging.error(('get_film_page_urls_from_content nor url found', url, params, content.decode()))
return []
return list(filter(lambda url: re.compile(r'imdb\.com/title/tt', re.I).search(url) == None, all_urls))
def get_download_links_from_film_page_content(url: str, quality='720p', dooble=True, x265=False):
res = get(url)
content = res.content
if not res.ok:
logging.error(('get_film_page_urls_from_content not ok', url, content.decode()))
return []
urls = re.compile('http[s]?:[/a-zA-Z0-9\-\.]+\.mkv', re.IGNORECASE + re.MULTILINE).findall(content.decode())
if urls is None or len(urls) == 0:
logging.error(('get_download_links_from_film_page_content article tag not found', url, content.decode()))
return []
urls = list(filter(lambda url: re.compile(quality, re.I).search(url) != None, urls))
# urls = [url.lower() for url in urls]
if not dooble:
urls = list(filter(lambda url: re.compile('dubbed', re.I).search(url) == None, urls))
else:
urls = list(filter(lambda url: re.compile('dubbed', re.I).search(url) != None, urls))
if not x265:
urls = list(filter(lambda url: re.compile('x265', re.I).search(url) == None, urls))
else:
urls = list(filter(lambda url: re.compile('x265', re.I).search(url) != None, urls))
return urls
def download_genre(file_path='./dl.list', quality='720p', dooble=True, genre=802, x265=False,
url='https://valamovie.xyz/'):
janer = dict(mostenad=802)
param = dict(search="", genre=genre)
if dooble: param['dubbed'] = 'on'
res = get(url, params=param)
if not res.ok:
return
last_page = max([int(i) for i in re.compile(r'<a class="page-link" href=".*?">(\d+)', re.I + re.MULTILINE).findall(
res.content.decode())])
executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
futures = {executor.submit(get_film_page_urls_from_content, f'{url}page/{i}/', params=param) for i in
range(2, last_page + 1)}
concurrent.futures.wait(futures)
page_links = []
for future in concurrent.futures.as_completed(futures):
if not future.cancelled():
page_links += future.result()
else:
logging.error(future.exception())
dl_page_url = []
futures = {executor.submit(get_download_links_from_film_page_content, _url, quality, dooble, x265) for _url in
page_links}
concurrent.futures.wait(futures)
for future in concurrent.futures.as_completed(futures):
if not future.cancelled():
dl_page_url += future.result()
else:
logging.error(future.exception())
dl_page_url = list(set([_url + "\n" for _url in dl_page_url]))
dl_file = open(file_path, 'a')
dl_file.writelines(dl_page_url)
dl_file.close()
return dl_page_url
def start():
help = """
-h help
-v verbose
-d dooble; if no argument set dooble is set
x265 for x265
480p 720p 1080p for size default 720p
-g for genre download if not find download links in page
/path/to/file where download links save default ./dl.list
url should start with http[s]:// default https://valamovie.xyz/
"""
if len(argv) == 1:
dl = download_genre()
print(dl)
exit(0)
if '-h' in argv:
print(help)
return
dooble = False if '-d' not in argv else True
x265 = False if 'x265' not in argv else True
quality = list(filter(lambda aa: re.compile(r'\d{3}\d?p', re.I).match(aa), argv))
quality = '720p' if quality is None or len(quality) == 0 else quality.pop()
download_links_of_page_only = True if '-g' not in argv else False
url = list(filter(lambda aa: re.compile(r'^http[s]:[\-\/\.a-z0-9]+', re.I).match(aa), argv))
url = None if url is None or len(url) == 0 else url.pop()
genre = None
if not download_links_of_page_only:
genre = list(filter(lambda aa: re.compile(r'^\d+$', re.I).match(aa), argv))
genre = None if genre is None or len(genre) == 0 else genre.pop()
if genre == None: raise Exception('no genere found')
if download_links_of_page_only and url is None: raise Exception('url needed')
address = list(
filter(lambda aa: re.compile(r'^[\.|\.\.|\/]|[\w|\d|_|\ ]+/[\-\/\.a-z0-9]+', re.I).match(aa), argv[1:]))
address = './dl.list' if address is None or len(address) == 0 else address.pop()
if url is None: url = 'https://valamovie.xyz/'
if genre is not None:
links = download_genre(address, quality, dooble, genre, x265, url)
else:
links = get_download_links_from_film_page_content(url, quality, dooble, x265)
with open(address, 'a') as file:
file.writelines([f'{link}\n' for link in links])
print(links)
if __name__ == '__main__':
start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment