diff --git a/spider/HtmlDownloader.py b/spider/HtmlDownloader.py
index f6a37a6..74ccb93 100644
--- a/spider/HtmlDownloader.py
+++ b/spider/HtmlDownloader.py
@@ -12,61 +12,36 @@
class Html_Downloader(object):
- @classmethod
- def download(self, url):
- count = 0 # 重试次数
- r = ''
+ @staticmethod
+ def download(url):
try:
r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT)
r.encoding = chardet.detect(r.content)['encoding']
+ if (not r.ok) or len(r.content) < 500:
+ raise ConnectionError
+ else:
+ return r.text
+
+ except Exception:
+ count = 0 # 重试次数
+ proxylist = sqlhelper.select(10)
+ if not proxylist:
+ return None
+
while count < config.RETRY_TIME:
- if (not r.ok) or len(r.content) < 500:
- proxylist = sqlhelper.select(10)
+ try:
proxy = random.choice(proxylist)
ip = proxy[0]
port = proxy[1]
proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)}
- try:
- r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies)
- r.encoding = chardet.detect(r.content)['encoding']
- count += 1
- except Exception as e:
- count += 1
-
- else:
- return r.text
-
- return None
-
- except Exception as e:
- while count < config.RETRY_TIME:
- if r == '' or (not r.ok) or len(r.content) < 500:
- try:
- proxylist = sqlhelper.select(10)
- proxy = random.choice(proxylist)
- ip = proxy[0]
- port = proxy[1]
- proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)}
- try:
- r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies)
- r.encoding = chardet.detect(r.content)['encoding']
- count += 1
- except Exception as e:
- count += 1
-
- except Exception as e:
- return None
-
- else:
- return r.text
-
- return None
-
-
-
-
-
-
-
+ r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies)
+ r.encoding = chardet.detect(r.content)['encoding']
+ if (not r.ok) or len(r.content) < 500:
+ raise ConnectionError
+ else:
+ return r.text
+ except Exception:
+ count += 1
+ return None
diff --git a/util/exception.py b/util/exception.py
index 80d10ff..c992ac7 100644
--- a/util/exception.py
+++ b/util/exception.py
@@ -4,7 +4,7 @@
class Test_URL_Fail(Exception):
def __str__(self):
- str = "访问%s失败,请检查网络连接" % config.TEST_URL
+ str = "访问%s失败,请检查网络连接" % config.TEST_IP
return str