-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_urls.py
executable file
·53 lines (38 loc) · 1.53 KB
/
check_urls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python3
# http://chriskiehl.com/article/parallelism-in-one-line
# https://docs.python.org/2/library/multiprocessing.html
# http://stackoverflow.com/questions/26432411/multiprocessing-dummy-in-python
# http://stackoverflow.com/questions/4319236/remove-the-newline-character-in-a-list-read-from-a-file
# http://sebastianraschka.com/Articles/2014_multiprocessing_intro.html
# http://www.phpied.com/downloading-top-x-sites-data-with-zombiejs/
# https://support.alexa.com/hc/en-us/articles/200449834-Does-Alexa-have-a-list-of-its-top-ranked-websites
# Find the number of threads for a python process:
# ps -o nlwp $(pgrep python)
"""Get the status of a set of urls."""
from multiprocessing import cpu_count
from multiprocessing.dummy import Pool as ThreadPool
import requests
import six
def get(url):
"""Get the status of a given url."""
try:
return requests.get(url, timeout=10)
except requests.ConnectionError as error:
six.print_(url, error)
def check():
"""Get the status of a set of urls."""
success = 200
infile = open("urls.txt", "r")
urls = [x.strip() for x in infile.readlines()]
pool = ThreadPool(cpu_count() * 4)
results = pool.map(get, urls)
for result in results:
if result is not None:
if result.status_code != success:
six.print_("Invalid status code", result.url)
if not result.content:
six.print_("Invalid content length", result.url)
pool.close()
pool.join()
if __name__ == "__main__":
check()