diff --git a/adata/__version__.py b/adata/__version__.py index 1bea758..e841060 100644 --- a/adata/__version__.py +++ b/adata/__version__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -VERSION = (2, 3, 0) +VERSION = (2, 3, 1) PRERELEASE = None # alpha, beta or rc REVISION = None diff --git a/adata/stock/info/stock_index.py b/adata/stock/info/stock_index.py index 3ebd90d..5e3774b 100644 --- a/adata/stock/info/stock_index.py +++ b/adata/stock/info/stock_index.py @@ -19,15 +19,11 @@ @time: 2023/5/23 @log: change log """ -import copy import pandas as pd from bs4 import BeautifulSoup -from adata.common.exception.exception_msg import * -from adata.common.headers import ths_headers -from adata.common.utils import cookie, requests -from adata.stock.cache.index_code_rel_ths import rel +from adata.common.utils import requests class StockIndex(object): @@ -47,51 +43,37 @@ def all_index_code(self): concept_code为同花顺的概念代码 :return: 指数信息[name,index_code,concept_code,source] """ - return self.__all_index_code_ths() + return self.__all_index_code_east() - def __all_index_code_ths(self): + def __all_index_code_east(self, wait_time=0): """ - 获取同花顺所有行情中心的指数代码 - http://q.10jqka.com.cn/zs/ - 上面地址可不用翻页 + 东方财富指数列表 + https://quote.eastmoney.com/center/gridlist.html#index_sh + https://39.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&dect=1&wbp2u=|0|0|0|web&fid=f3&fs=m:1+s:2&fields=f12,f14&_=1720430951494 :return: 指数信息[name,index_code,concept_code,source] """ - # 1. url拼接页码等参数 data = [] - total_pages = 1 - curr_page = 1 - while curr_page <= total_pages: - api_url = f"http://q.10jqka.com.cn/zs/index/field/zdf/order/desc/page/{curr_page}/ajax/1/" - headers = copy.deepcopy(ths_headers.text_headers) - headers['Cookie'] = cookie.ths_cookie() - res = requests.request(method='get', url=api_url, headers=headers, proxies={}) - curr_page += 1 - # 2. 判断请求是否成功 - if res.status_code != 200: - continue - text = res.text - soup = BeautifulSoup(text, 'html.parser') - # 3 .获取总的页数 - if total_pages == 1: - page_info = soup.find('span', {'class': 'page_info'}) - if page_info: - total_pages = int(page_info.text.split("/")[1]) - # 4. 解析数据 - page_data = [] - for idx, tr in enumerate(soup.find_all('tr')): - if idx != 0: - tds = tr.find_all('td') - a_href = tds[1].find('a') - page_data.append({'index_code': tds[1].contents[0].text, - 'concept_code': a_href['href'].split('/')[-2], - 'name': tds[2].contents[0].text, 'source': '同花顺'}) - data.extend(page_data) - # 5. 封装数据 - if not data: - return pd.DataFrame(data=data, columns=self.__INDEX_CODE_COLUMN) - result_df = pd.DataFrame(data=data) - data.clear() - return result_df[self.__INDEX_CODE_COLUMN] + for i in range(2): + curr_page = 1 + while curr_page < 88: + if i == 0: + url = f"https://39.push2.eastmoney.com/api/qt/clist/get?" \ + f"pn={curr_page}&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&" \ + f"dect=1&wbp2u=|0|0|0|web&fid=f3&fs=m:1+s:2&fields=f12,f14&_=1720430951494" + else: + url = f"https://31.push2.eastmoney.com/api/qt/clist/get?" \ + f"pn={curr_page}&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&dect=1&" \ + f"wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:5&fields=f12,f14&_=1720432207117" + res_json = requests.request('get', url, headers={}, proxies={}, wait_time=wait_time).json() + res_data = res_json['data'] + if not res_data: + break + res_data = res_data['diff'] + for _ in res_data: + data.append({'index_code': _['f12'], 'name': _['f14'], 'source': '东方财富', 'concept_code': ''}) + curr_page += 1 + result_df = pd.DataFrame(data=data, columns=self.__INDEX_CODE_COLUMN) + return result_df def index_constituent(self, index_code=None, wait_time=None): """ @@ -101,61 +83,7 @@ def index_constituent(self, index_code=None, wait_time=None): :param wait_time: 等待时间:毫秒;表示每个请求的间隔时间,主要用于防止请求太频繁的限制。 :return: ['index_code', 'stock_code', 'short_name'] """ - res = self.__index_constituent_baidu(index_code=index_code) - if not res.empty: - return res - return self.__index_constituent_ths(index_code=index_code, wait_time=wait_time) - - def __index_constituent_ths(self, index_code=None, wait_time=None): - """ - 同花顺指数成分股 - :param index_code: 指数代码 399282 - :param wait_time: 等待时间:毫秒;表示每个请求的间隔时间,主要用于防止请求太频繁的限制。 - :return:['index_code', 'stock_code', 'short_name'] - """ - # 转换抓取的code, - catch_code = rel[index_code] if index_code.startswith('0') and index_code in rel.keys() else index_code - # 转换指数代码 - index_code = rel[index_code] if ('A' in index_code or 'B' in index_code or 'C' in index_code) and index_code in rel.keys() else index_code - # 1. url拼接页码等参数 - data = [] - total_pages = 1 - curr_page = 1 - while curr_page <= total_pages: - api_url = f"http://q.10jqka.com.cn/zs/detail/field/199112/order/desc/page/" \ - f"{curr_page}/ajax/1/code/{catch_code}" - headers = copy.deepcopy(ths_headers.text_headers) - headers['Cookie'] = cookie.ths_cookie() - res = requests.request(method='get', url=api_url, headers=headers, proxies={}, wait_time=wait_time) - curr_page += 1 - # 2. 判断请求是否成功 - if res.status_code != 200: - continue - text = res.text - if THS_IP_LIMIT_RES in res: - raise Exception(THS_IP_LIMIT_MSG) - if '暂无成份股数据' in text or '概念板块' in text or '概念时间表' in text: - break - soup = BeautifulSoup(text, 'html.parser') - # 3 .获取总的页数 - if total_pages == 1: - page_info = soup.find('span', {'class': 'page_info'}) - if page_info: - total_pages = int(page_info.text.split("/")[1]) - # 4. 解析数据 - page_data = [] - for idx, tr in enumerate(soup.find_all('tr')): - if idx != 0: - tds = tr.find_all('td') - page_data.append({'index_code': index_code, 'stock_code': tds[1].contents[0].text, - 'short_name': tds[2].contents[0].text}) - data.extend(page_data) - # 5. 封装数据 - if not data: - return pd.DataFrame(data=data, columns=self.__INDEX_CONSTITUENT_COLUMN) - result_df = pd.DataFrame(data=data) - data.clear() - return result_df[self.__INDEX_CONSTITUENT_COLUMN] + return self.__index_constituent_baidu(index_code=index_code) def __index_constituent_baidu(self, index_code=None): """ @@ -245,5 +173,4 @@ def __index_constituent_sina(self, index_code=None, wait_time=None): if __name__ == '__main__': print(StockIndex().all_index_code()) - # print(StockIndex().index_constituent(index_code='000033')) - # print(StockIndex().index_constituent(index_code='399387', wait_time=158)) + print(StockIndex().index_constituent(index_code='000113')) diff --git a/adata/stock/market/index_market/market_index.py b/adata/stock/market/index_market/market_index.py index f10dd77..0fff60e 100644 --- a/adata/stock/market/index_market/market_index.py +++ b/adata/stock/market/index_market/market_index.py @@ -4,7 +4,7 @@ @author: 1nchaos @date: 2023/06/01 16:17 """ - +from adata.stock.market.index_market.market_index_baidu import StockMarketIndexBaidu from adata.stock.market.index_market.market_index_east import StockMarketIndexEast from adata.stock.market.index_market.market_index_ths import StockMarketIndexThs @@ -17,12 +17,13 @@ class StockMarketIndex(object): def __init__(self) -> None: self.ths_index = StockMarketIndexThs() self.east_index = StockMarketIndexEast() + self.baidu_index = StockMarketIndexBaidu() def get_market_index(self, index_code: str = '000001', start_date='2020-01-01', k_type: int = 1): """ 获取指数行情 """ - res_df = self.east_index.get_market_index(index_code=index_code, start_date=start_date, k_type=k_type) + res_df = self.baidu_index.get_market_index(index_code=index_code, start_date=start_date, k_type=k_type) if res_df.empty: res_df = self.ths_index.get_market_index(index_code=index_code, start_date=start_date, k_type=k_type) return res_df diff --git a/adata/stock/market/index_market/market_index_baidu.py b/adata/stock/market/index_market/market_index_baidu.py new file mode 100644 index 0000000..8d5d331 --- /dev/null +++ b/adata/stock/market/index_market/market_index_baidu.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +""" +@desc: 百度股市通 +https://gushitong.baidu.com/ + +@author: 1nchaos +@time: 2023/06/19 +@log: change log +""" + +import time + +import pandas as pd + +from adata.common.headers import baidu_headers +from adata.common.utils import requests +from adata.stock.market.index_market.market_index_template import StockMarketIndexTemplate + + +class StockMarketIndexBaidu(StockMarketIndexTemplate): + """ + 百度股票行情 + """ + + def __init__(self) -> None: + super().__init__() + + def get_market_index(self, index_code: str = '000001', start_date='2020-01-01', k_type: int = 1): + """ + 获取百度的股票行情数据 + web: https://gushitong.baidu.com/stock/ab-000001 + "ma5均价","ma5成交量","ma10均价","ma10成交量","ma20均价","ma20成交量" + :param index_code: 6位股票代码 + :param start_date: 开始时间 + :param k_type: k线类型:1.日;2.周;3.月 + # :param adjust_type: k线复权类型:0.不复权;1.前复权;2.后复权 默认:1 前复权 TODO + :return: k线行情数据:"时间戳", "时间","开盘","收盘","成交量","最高","最低","成交额","涨跌额","涨跌幅","换手率","昨收" + """ + # 1. 请求接口 url + api_url = f" https://finance.pae.baidu.com/vapi/v1/getquotation?srcid=5353&all=1&pointType=string&" \ + f"group=quotation_index_kline&query={index_code}&code={index_code}&market_type=ab&" \ + f"newFormat=1&is_kc=0&ktype=day&finClientType=pc" + + res_json = None + for i in range(3): + res = requests.request('get', api_url, headers=baidu_headers.json_headers, proxies={}) + # 2. 校验请求结果数据 + res_json = res.json() + if res_json['ResultCode'] == '0': + break + time.sleep(2) + # 3.解析数据 + # 3.1 空数据时返回为空 + result = res_json['Result'] + if not result: + return pd.DataFrame(data=[], columns=self._MARKET_INDEX_COLUMNS) + + # 3.2. 正常解析数据 + keys = res_json['Result']['newMarketData']['keys'] + market_data = res_json['Result']['newMarketData']['marketData'] + market_data_list = str(market_data).split(';') + data = [] + for one in market_data_list: + data.append(one.split(',')) + + # 4. 封装数据 + rename_columns = {'turnoverratio': 'turnover_ratio', 'preClose': 'pre_close', 'range': 'change', + 'ratio': 'change_pct', 'time': 'trade_time'} + result_df = pd.DataFrame(data=data, columns=keys).rename(columns=rename_columns)[ + self._MARKET_INDEX_BASE_COLUMNS] + if result_df.empty: + return pd.DataFrame(data=[], columns=self._MARKET_INDEX_COLUMNS) + result_df['index_code'] = index_code + result_df['trade_date'] = result_df['trade_time'] + result_df['trade_time'] = pd.to_datetime(result_df['trade_time']).dt.strftime('%Y-%m-%d %H:%M:%S') + # 5. 数据清洗,剔除成交量且成交额为0的异常数据 + result_df.replace('--', None, inplace=True) + result_df.replace('', None, inplace=True) + result_df['amount'] = result_df['amount'].astype(float) + result_df['volume'] = result_df['volume'].astype(float) + result_df = result_df[(result_df['amount'] > 0) | (result_df['volume'] > 0)] + result_df['change'] = result_df['change'].str.replace('+', '').astype(float) + result_df['change_pct'] = result_df['change_pct'].str.replace('+', '').astype(float) + if start_date: + result_df = result_df[result_df['trade_date'] >= start_date] + return result_df + + +if __name__ == '__main__': + print(StockMarketIndexBaidu().get_market_index(index_code='000001', start_date='2021-01-01', k_type=1)) diff --git a/adata/stock/market/index_market/market_index_east.py b/adata/stock/market/index_market/market_index_east.py index 3b130a8..1f5dd7c 100644 --- a/adata/stock/market/index_market/market_index_east.py +++ b/adata/stock/market/index_market/market_index_east.py @@ -22,13 +22,15 @@ def get_market_index(self, index_code: str = '000001', start_date='2020-01-01', """ 获取指数行情 http://77.push2his.eastmoney.com/api/qt/stock/kline/get?secid=1.000300&fields1=f1,f2,f3,f4,f5,f6&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&klt=102&fqt=1&beg=0&end=20500101&smplmt=1247.73&lmt=1000000 + + https://push2his.eastmoney.com/api/qt/stock/kline/get?cb=jQuery35106984074321162019_1720433274629&secid=0.399008&ut=fa5fd1943c7b386f172d6893dbfba10b&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5%2Cf6&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58%2Cf59%2Cf60%2Cf61&klt=101&fqt=1&beg=0&end=20500101&smplmt=1419&lmt=1000000&_=1720433274631 :param start_date: 开始时间 :param index_code: 指数代码 :param k_type: k线类型:1.日;2.周;3.月 默认:1 日k :return: k线行情数据 [日期,开,高,低,收,成交量,成交额] """ url = f"https://push2his.eastmoney.com/api/qt/stock/kline/get?" \ - f"secid=1.{index_code}&fields1=f1,f2,f3,f4,f5,f6&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&" \ + f"secid=0.{index_code}&fields1=f1,f2,f3,f4,f5,f6&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&" \ f"klt=10{k_type}&fqt=1&end=20500101&lmt=1000000" res_json = requests.request('get', url, headers={}, proxies={}).json() # 解析数据 @@ -64,7 +66,7 @@ def get_market_index_min(self, index_code='000001'): """ url = f"http://push2his.eastmoney.com/api/qt/stock/trends2/get?" \ f"fields1=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13&fields2=f51,f52,f53,f54,f55,f56,f57,f58&" \ - f"iscr=0&ndays=1&secid=1.{index_code}" + f"iscr=0&ndays=1&secid=0.{index_code}" res_json = requests.request('get', url, headers={}, proxies={}).json() # 解析数据 code = res_json['data']['code'] @@ -101,7 +103,7 @@ def get_market_index_current(self, index_code: str = '000001'): url = f"http://push2.eastmoney.com/api/qt/stock/get?" \ f"invt=2&fltt=1&fields=f58,f107,f57,f43,f59,f169,f170,f152,f46,f60,f44,f45,f47,f48,f19,f532,f39,f161,f49," \ f"f171,f50,f86,f600,f601,f154,f84,f85,f168,f108,f116,f167,f164,f92,f71,f117,f292,f113,f114,f115,f119," \ - f"f120,f121,f122,f296&secid=1.{index_code}&wbp2u=|0|0|0|web" + f"f120,f121,f122,f296&secid=0.{index_code}&wbp2u=|0|0|0|web" res_json = requests.request('get', url, headers={}, proxies={}).json() # 解析数据 j = res_json['data'] diff --git a/adata/stock/market/index_market/market_index_template.py b/adata/stock/market/index_market/market_index_template.py index ed87376..98ba9e5 100644 --- a/adata/stock/market/index_market/market_index_template.py +++ b/adata/stock/market/index_market/market_index_template.py @@ -10,8 +10,9 @@ class StockMarketIndexTemplate(object): """ 股票指数 行情 """ - _MARKET_INDEX_COLUMNS = ['index_code', 'trade_date', 'trade_time', 'open', 'high', 'low', 'close', 'volume', - 'amount', 'change', 'change_pct'] + _MARKET_INDEX_BASE_COLUMNS = ['trade_time', 'open', 'high', 'low', 'close', 'volume', 'amount', 'change', + 'change_pct'] + _MARKET_INDEX_COLUMNS = ['index_code', 'trade_date'].extend(_MARKET_INDEX_BASE_COLUMNS) _MARKET_INDEX_MIN_COLUMNS = ['index_code', 'trade_time', 'trade_date', 'price', 'avg_price', 'volume', 'amount', 'change', 'change_pct'] _MARKET_INDEX_CURRENT_COLUMNS = ['index_code', 'trade_time', 'trade_date', 'open', 'high', 'low', 'price',