Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

台风脚本那里有点问题 #6

Open
nesteiner opened this issue Mar 4, 2021 · 0 comments
Open

台风脚本那里有点问题 #6

nesteiner opened this issue Mar 4, 2021 · 0 comments

Comments

@nesteiner
Copy link

我手抄了一下台风历史信息的脚本,运行的时候发现总有一个错误发生
麻烦你帮我看一下哪里出错了

-*- mode: compilation; default-directory: "~/spider/spider/spiders/" -*-
Compilation started at Thu Mar  4 14:19:50

python3 typhoon.py
Traceback (most recent call last):
  File "typhoon.py", line 114, in <module>
    tfcraw.get_tf_detail()
  File "typhoon.py", line 62, in get_tf_detail
    tf_list = self.get_tf_list()
  File "typhoon.py", line 44, in get_tf_list
    year_list = self.get_year()
  File "typhoon.py", line 34, in get_year
    years = r.json()
  File "/home/steiner/.local/lib/python3.6/site-packages/requests/models.py", line 897, in json
    return complexjson.loads(self.text, **kwargs)
  File "/usr/lib/python3/dist-packages/simplejson/__init__.py", line 518, in loads
    return _default_decoder.decode(s)
  File "/usr/lib/python3/dist-packages/simplejson/decoder.py", line 370, in decode
    obj, end = self.raw_decode(s)
  File "/usr/lib/python3/dist-packages/simplejson/decoder.py", line 400, in raw_decode
    return self.scan_once(s, idx=_w(s, idx).end())
simplejson.errors.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

Compilation exited abnormally with code 1 at Thu Mar  4 14:19:51

代码在这

import requests
from pymongo import MongoClient
import time
import random

class Typhoon:
    def __init__(self):
        self.user_agent = [
                           "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
                           "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
                           "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
                           "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
                           "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
    ]

        self.base_url = 'http://www.wztf121.com/data/complex/{}.json'
        self.headers = {
            'Cookie': '_gscu_1378142123=65572018r5on4x80; _gscbrs_1378142123=1; vjuids=30469f88b.16c835d32ea.0.8062809782e9b; vjlast=1565572019.1565572019.30; Hm_lvt_e592d6befa4f9918e6496980d22c5649=1565572019; Wa_lvt_1=1565572019; Wa_lpvt_1=1565576034; _gscs_1378142123=65572018v2ofkf80|pv:8; Hm_lpvt_e592d6befa4f9918e6496980d22c5649=1565576061',
            'Host': 'www.wztf121.com',
            'Referer': 'http://www.wztf121.com/history.html',
            'User-Agent': random.choice(self.user_agent)
        }

        self.client = MongoClient()
        self.db     = self.client.typhoon



    def get_year(self):
        year_list = []
        years_url = self.base_url.format('years')

        r = requests.get(years_url, headers = self.headers)
        years = r.json()

        for year in years:
            year_list.append(year['year'])

        print('以获取所有台风记录的年份')
        return year_list

    def get_tf_list(self):
        tf_list = []
        year_list = self.get_year()

        for year in year_list:
            url = self.base_url.format(year)

            r = requests.get(url, headers = self.headers)
            tfs = r.json()

            for tf in tfs:
                tfbh = tf['tfbh']
                tf_list.append(tfbh)

            time.sleep(random.random())

        print('已获得所有台风的编号,格式为 年份 + 次序')
        return tf_list

    def get_tf_detail(self):
        tf_list = self.get_tf_list()
        count = 1
        for tf in tf_list:
            tf_url = self.base_url.format(tf)
            r = requests.get(tf_url, headers = self.headers)
            tf_detail = r.json()

            begin_time = tf_detail[0]['begin_time']
            ename      = tf_detail[0]['ename']
            end_time   = tf_detail[0]['end_time']
            name       = tf_detail[0]['name']
            points     = tf_detail[0]['points']

            for point in points:
                latitude  = point['latitude']
                longitude = point['longitude']
                power     = point['power']
                speed     = point['speed']
                pressure  = point['pressure']
                strong    = point['strong']
                real_time = point['time']

                detail = {
                    'name': name,
                    'ename': ename,
                    'latitude': latitude,
                    'longitude': longitude,
                    'power': power,
                    'speed': speed,
                    'pressure': pressure,
                    'strong': strong,
                    'time': real_time,
                }
                self.db['detail'].insert_one(detail)


            time.sleep(5 * random.random())
            tf_info = {
                'name': name,
                'ename': ename,
                'begin_time': begin_time,
                'end_time': end_time,
            }

            self.db['info'].insert_one(tf_info)
            print('已存入第{}条台风详细信息!'.format(count))
            count += 1

                
            
        
tfcraw = Typhoon()
tfcraw.get_tf_detail()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant