Merge pull request #80 from 1nchaos/dev

fixed
1nchaos · Jul 8, 2024 · eba9646 · eba9646
2 parents b461ef6 + 1337c9a
commit eba9646
Show file tree

Hide file tree

Showing 6 changed files with 131 additions and 110 deletions.
diff --git a/adata/__version__.py b/adata/__version__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-VERSION = (2, 3, 0)
+VERSION = (2, 3, 1)
 PRERELEASE = None  # alpha, beta or rc
 REVISION = None
 

diff --git a/adata/stock/info/stock_index.py b/adata/stock/info/stock_index.py
@@ -19,15 +19,11 @@
 @time: 2023/5/23
 @log: change log
 """
-import copy
 
 import pandas as pd
 from bs4 import BeautifulSoup
 
-from adata.common.exception.exception_msg import *
-from adata.common.headers import ths_headers
-from adata.common.utils import cookie, requests
-from adata.stock.cache.index_code_rel_ths import rel
+from adata.common.utils import requests
 
 
 class StockIndex(object):
@@ -47,51 +43,37 @@ def all_index_code(self):
         concept_code为同花顺的概念代码
         :return: 指数信息[name,index_code,concept_code,source]
         """
-        return self.__all_index_code_ths()
+        return self.__all_index_code_east()
 
-    def __all_index_code_ths(self):
+    def __all_index_code_east(self, wait_time=0):
         """
-        获取同花顺所有行情中心的指数代码
-        http://q.10jqka.com.cn/zs/
-        上面地址可不用翻页
+        东方财富指数列表
+        https://quote.eastmoney.com/center/gridlist.html#index_sh
+        https://39.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&dect=1&wbp2u=|0|0|0|web&fid=f3&fs=m:1+s:2&fields=f12,f14&_=1720430951494
         :return: 指数信息[name,index_code，concept_code,source]
         """
-        # 1. url拼接页码等参数
         data = []
-        total_pages = 1
-        curr_page = 1
-        while curr_page <= total_pages:
-            api_url = f"http://q.10jqka.com.cn/zs/index/field/zdf/order/desc/page/{curr_page}/ajax/1/"
-            headers = copy.deepcopy(ths_headers.text_headers)
-            headers['Cookie'] = cookie.ths_cookie()
-            res = requests.request(method='get', url=api_url, headers=headers, proxies={})
-            curr_page += 1
-            # 2. 判断请求是否成功
-            if res.status_code != 200:
-                continue
-            text = res.text
-            soup = BeautifulSoup(text, 'html.parser')
-            # 3 .获取总的页数
-            if total_pages == 1:
-                page_info = soup.find('span', {'class': 'page_info'})
-                if page_info:
-                    total_pages = int(page_info.text.split("/")[1])
-            # 4. 解析数据
-            page_data = []
-            for idx, tr in enumerate(soup.find_all('tr')):
-                if idx != 0:
-                    tds = tr.find_all('td')
-                    a_href = tds[1].find('a')
-                    page_data.append({'index_code': tds[1].contents[0].text,
-                                      'concept_code': a_href['href'].split('/')[-2],
-                                      'name': tds[2].contents[0].text, 'source': '同花顺'})
-            data.extend(page_data)
-        # 5. 封装数据
-        if not data:
-            return pd.DataFrame(data=data, columns=self.__INDEX_CODE_COLUMN)
-        result_df = pd.DataFrame(data=data)
-        data.clear()
-        return result_df[self.__INDEX_CODE_COLUMN]
+        for i in range(2):
+            curr_page = 1
+            while curr_page < 88:
+                if i == 0:
+                    url = f"https://39.push2.eastmoney.com/api/qt/clist/get?" \
+                          f"pn={curr_page}&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&" \
+                          f"dect=1&wbp2u=|0|0|0|web&fid=f3&fs=m:1+s:2&fields=f12,f14&_=1720430951494"
+                else:
+                    url = f"https://31.push2.eastmoney.com/api/qt/clist/get?" \
+                          f"pn={curr_page}&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&dect=1&" \
+                          f"wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:5&fields=f12,f14&_=1720432207117"
+                res_json = requests.request('get', url, headers={}, proxies={}, wait_time=wait_time).json()
+                res_data = res_json['data']
+                if not res_data:
+                    break
+                res_data = res_data['diff']
+                for _ in res_data:
+                    data.append({'index_code': _['f12'], 'name': _['f14'], 'source': '东方财富', 'concept_code': ''})
+                curr_page += 1
+        result_df = pd.DataFrame(data=data, columns=self.__INDEX_CODE_COLUMN)
+        return result_df
 
     def index_constituent(self, index_code=None, wait_time=None):
         """
@@ -101,61 +83,7 @@ def index_constituent(self, index_code=None, wait_time=None):
         :param wait_time: 等待时间：毫秒；表示每个请求的间隔时间，主要用于防止请求太频繁的限制。
         :return: ['index_code', 'stock_code', 'short_name']
         """
-        res = self.__index_constituent_baidu(index_code=index_code)
-        if not res.empty:
-            return res
-        return self.__index_constituent_ths(index_code=index_code, wait_time=wait_time)
-
-    def __index_constituent_ths(self, index_code=None, wait_time=None):
-        """
-        同花顺指数成分股
-        :param index_code: 指数代码 399282
-        :param wait_time: 等待时间：毫秒；表示每个请求的间隔时间，主要用于防止请求太频繁的限制。
-        :return:['index_code', 'stock_code', 'short_name']
-        """
-        # 转换抓取的code,
-        catch_code = rel[index_code] if index_code.startswith('0') and index_code in rel.keys() else index_code
-        # 转换指数代码
-        index_code = rel[index_code] if ('A' in index_code or 'B' in index_code or 'C' in index_code) and index_code in rel.keys() else index_code
-        # 1. url拼接页码等参数
-        data = []
-        total_pages = 1
-        curr_page = 1
-        while curr_page <= total_pages:
-            api_url = f"http://q.10jqka.com.cn/zs/detail/field/199112/order/desc/page/" \
-                      f"{curr_page}/ajax/1/code/{catch_code}"
-            headers = copy.deepcopy(ths_headers.text_headers)
-            headers['Cookie'] = cookie.ths_cookie()
-            res = requests.request(method='get', url=api_url, headers=headers, proxies={}, wait_time=wait_time)
-            curr_page += 1
-            # 2. 判断请求是否成功
-            if res.status_code != 200:
-                continue
-            text = res.text
-            if THS_IP_LIMIT_RES in res:
-                raise Exception(THS_IP_LIMIT_MSG)
-            if '暂无成份股数据' in text or '概念板块' in text or '概念时间表' in text:
-                break
-            soup = BeautifulSoup(text, 'html.parser')
-            # 3 .获取总的页数
-            if total_pages == 1:
-                page_info = soup.find('span', {'class': 'page_info'})
-                if page_info:
-                    total_pages = int(page_info.text.split("/")[1])
-            # 4. 解析数据
-            page_data = []
-            for idx, tr in enumerate(soup.find_all('tr')):
-                if idx != 0:
-                    tds = tr.find_all('td')
-                    page_data.append({'index_code': index_code, 'stock_code': tds[1].contents[0].text,
-                                      'short_name': tds[2].contents[0].text})
-            data.extend(page_data)
-        # 5. 封装数据
-        if not data:
-            return pd.DataFrame(data=data, columns=self.__INDEX_CONSTITUENT_COLUMN)
-        result_df = pd.DataFrame(data=data)
-        data.clear()
-        return result_df[self.__INDEX_CONSTITUENT_COLUMN]
+        return self.__index_constituent_baidu(index_code=index_code)
 
     def __index_constituent_baidu(self, index_code=None):
         """
@@ -245,5 +173,4 @@ def __index_constituent_sina(self, index_code=None, wait_time=None):
 
 if __name__ == '__main__':
     print(StockIndex().all_index_code())
-    # print(StockIndex().index_constituent(index_code='000033'))
-    # print(StockIndex().index_constituent(index_code='399387', wait_time=158))
+    print(StockIndex().index_constituent(index_code='000113'))
diff --git a/adata/stock/market/index_market/market_index.py b/adata/stock/market/index_market/market_index.py
@@ -4,7 +4,7 @@
 @author: 1nchaos
 @date: 2023/06/01 16:17
 """
-
+from adata.stock.market.index_market.market_index_baidu import StockMarketIndexBaidu
 from adata.stock.market.index_market.market_index_east import StockMarketIndexEast
 from adata.stock.market.index_market.market_index_ths import StockMarketIndexThs
 
@@ -17,12 +17,13 @@ class StockMarketIndex(object):
     def __init__(self) -> None:
         self.ths_index = StockMarketIndexThs()
         self.east_index = StockMarketIndexEast()
+        self.baidu_index = StockMarketIndexBaidu()
 
     def get_market_index(self, index_code: str = '000001', start_date='2020-01-01', k_type: int = 1):
         """
         获取指数行情
         """
-        res_df = self.east_index.get_market_index(index_code=index_code, start_date=start_date, k_type=k_type)
+        res_df = self.baidu_index.get_market_index(index_code=index_code, start_date=start_date, k_type=k_type)
         if res_df.empty:
             res_df = self.ths_index.get_market_index(index_code=index_code, start_date=start_date, k_type=k_type)
         return res_df

diff --git a/adata/stock/market/index_market/market_index_baidu.py b/adata/stock/market/index_market/market_index_baidu.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+"""
+@desc: 百度股市通
+https://gushitong.baidu.com/
+
+@author: 1nchaos
+@time: 2023/06/19
+@log: change log
+"""
+
+import time
+
+import pandas as pd
+
+from adata.common.headers import baidu_headers
+from adata.common.utils import requests
+from adata.stock.market.index_market.market_index_template import StockMarketIndexTemplate
+
+
+class StockMarketIndexBaidu(StockMarketIndexTemplate):
+    """
+    百度股票行情
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+
+    def get_market_index(self, index_code: str = '000001', start_date='2020-01-01', k_type: int = 1):
+        """
+        获取百度的股票行情数据
+        web： https://gushitong.baidu.com/stock/ab-000001
+        "ma5均价","ma5成交量","ma10均价","ma10成交量","ma20均价","ma20成交量"
+        :param index_code: 6位股票代码
+        :param start_date: 开始时间
+        :param k_type: k线类型：1.日；2.周；3.月
+        # :param adjust_type: k线复权类型：0.不复权；1.前复权；2.后复权 默认：1 前复权 TODO
+        :return: k线行情数据:"时间戳", "时间","开盘","收盘","成交量","最高","最低","成交额","涨跌额","涨跌幅","换手率","昨收"
+        """
+        # 1. 请求接口 url
+        api_url = f" https://finance.pae.baidu.com/vapi/v1/getquotation?srcid=5353&all=1&pointType=string&" \
+                  f"group=quotation_index_kline&query={index_code}&code={index_code}&market_type=ab&" \
+                  f"newFormat=1&is_kc=0&ktype=day&finClientType=pc"
+
+        res_json = None
+        for i in range(3):
+            res = requests.request('get', api_url, headers=baidu_headers.json_headers, proxies={})
+            # 2. 校验请求结果数据
+            res_json = res.json()
+            if res_json['ResultCode'] == '0':
+                break
+            time.sleep(2)
+        # 3.解析数据
+        # 3.1 空数据时返回为空
+        result = res_json['Result']
+        if not result:
+            return pd.DataFrame(data=[], columns=self._MARKET_INDEX_COLUMNS)
+
+        # 3.2. 正常解析数据
+        keys = res_json['Result']['newMarketData']['keys']
+        market_data = res_json['Result']['newMarketData']['marketData']
+        market_data_list = str(market_data).split(';')
+        data = []
+        for one in market_data_list:
+            data.append(one.split(','))
+
+        # 4. 封装数据
+        rename_columns = {'turnoverratio': 'turnover_ratio', 'preClose': 'pre_close', 'range': 'change',
+                          'ratio': 'change_pct', 'time': 'trade_time'}
+        result_df = pd.DataFrame(data=data, columns=keys).rename(columns=rename_columns)[
+            self._MARKET_INDEX_BASE_COLUMNS]
+        if result_df.empty:
+            return pd.DataFrame(data=[], columns=self._MARKET_INDEX_COLUMNS)
+        result_df['index_code'] = index_code
+        result_df['trade_date'] = result_df['trade_time']
+        result_df['trade_time'] = pd.to_datetime(result_df['trade_time']).dt.strftime('%Y-%m-%d %H:%M:%S')
+        # 5. 数据清洗，剔除成交量且成交额为0的异常数据
+        result_df.replace('--', None, inplace=True)
+        result_df.replace('', None, inplace=True)
+        result_df['amount'] = result_df['amount'].astype(float)
+        result_df['volume'] = result_df['volume'].astype(float)
+        result_df = result_df[(result_df['amount'] > 0) | (result_df['volume'] > 0)]
+        result_df['change'] = result_df['change'].str.replace('+', '').astype(float)
+        result_df['change_pct'] = result_df['change_pct'].str.replace('+', '').astype(float)
+        if start_date:
+            result_df = result_df[result_df['trade_date'] >= start_date]
+        return result_df
+
+
+if __name__ == '__main__':
+    print(StockMarketIndexBaidu().get_market_index(index_code='000001', start_date='2021-01-01', k_type=1))
diff --git a/adata/stock/market/index_market/market_index_east.py b/adata/stock/market/index_market/market_index_east.py
@@ -22,13 +22,15 @@ def get_market_index(self, index_code: str = '000001', start_date='2020-01-01',
         """
         获取指数行情
         http://77.push2his.eastmoney.com/api/qt/stock/kline/get?secid=1.000300&fields1=f1,f2,f3,f4,f5,f6&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&klt=102&fqt=1&beg=0&end=20500101&smplmt=1247.73&lmt=1000000
+
+        https://push2his.eastmoney.com/api/qt/stock/kline/get?cb=jQuery35106984074321162019_1720433274629&secid=0.399008&ut=fa5fd1943c7b386f172d6893dbfba10b&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5%2Cf6&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58%2Cf59%2Cf60%2Cf61&klt=101&fqt=1&beg=0&end=20500101&smplmt=1419&lmt=1000000&_=1720433274631
         :param start_date: 开始时间
         :param index_code: 指数代码
         :param k_type: k线类型：1.日；2.周；3.月 默认：1 日k
         :return: k线行情数据 [日期，开，高，低，收,成交量，成交额]
         """
         url = f"https://push2his.eastmoney.com/api/qt/stock/kline/get?" \
-              f"secid=1.{index_code}&fields1=f1,f2,f3,f4,f5,f6&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&" \
+              f"secid=0.{index_code}&fields1=f1,f2,f3,f4,f5,f6&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&" \
               f"klt=10{k_type}&fqt=1&end=20500101&lmt=1000000"
         res_json = requests.request('get', url, headers={}, proxies={}).json()
         # 解析数据
@@ -64,7 +66,7 @@ def get_market_index_min(self, index_code='000001'):
         """
         url = f"http://push2his.eastmoney.com/api/qt/stock/trends2/get?" \
               f"fields1=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13&fields2=f51,f52,f53,f54,f55,f56,f57,f58&" \
-              f"iscr=0&ndays=1&secid=1.{index_code}"
+              f"iscr=0&ndays=1&secid=0.{index_code}"
         res_json = requests.request('get', url, headers={}, proxies={}).json()
         # 解析数据
         code = res_json['data']['code']
@@ -101,7 +103,7 @@ def get_market_index_current(self, index_code: str = '000001'):
         url = f"http://push2.eastmoney.com/api/qt/stock/get?" \
               f"invt=2&fltt=1&fields=f58,f107,f57,f43,f59,f169,f170,f152,f46,f60,f44,f45,f47,f48,f19,f532,f39,f161,f49," \
               f"f171,f50,f86,f600,f601,f154,f84,f85,f168,f108,f116,f167,f164,f92,f71,f117,f292,f113,f114,f115,f119," \
-              f"f120,f121,f122,f296&secid=1.{index_code}&wbp2u=|0|0|0|web"
+              f"f120,f121,f122,f296&secid=0.{index_code}&wbp2u=|0|0|0|web"
         res_json = requests.request('get', url, headers={}, proxies={}).json()
         # 解析数据
         j = res_json['data']

diff --git a/adata/stock/market/index_market/market_index_template.py b/adata/stock/market/index_market/market_index_template.py
@@ -10,8 +10,9 @@ class StockMarketIndexTemplate(object):
     """
     股票指数 行情
     """
-    _MARKET_INDEX_COLUMNS = ['index_code', 'trade_date', 'trade_time', 'open', 'high', 'low', 'close', 'volume',
-                             'amount', 'change', 'change_pct']
+    _MARKET_INDEX_BASE_COLUMNS = ['trade_time', 'open', 'high', 'low', 'close', 'volume', 'amount', 'change',
+                                  'change_pct']
+    _MARKET_INDEX_COLUMNS = ['index_code', 'trade_date'].extend(_MARKET_INDEX_BASE_COLUMNS)
     _MARKET_INDEX_MIN_COLUMNS = ['index_code', 'trade_time', 'trade_date', 'price', 'avg_price', 'volume', 'amount',
                                  'change', 'change_pct']
     _MARKET_INDEX_CURRENT_COLUMNS = ['index_code', 'trade_time', 'trade_date', 'open', 'high', 'low', 'price',