Skip to content

Commit

Permalink
Merge pull request #80 from 1nchaos/dev
Browse files Browse the repository at this point in the history
fixed
  • Loading branch information
1nchaos authored Jul 8, 2024
2 parents b461ef6 + 1337c9a commit eba9646
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 110 deletions.
2 changes: 1 addition & 1 deletion adata/__version__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

VERSION = (2, 3, 0)
VERSION = (2, 3, 1)
PRERELEASE = None # alpha, beta or rc
REVISION = None

Expand Down
131 changes: 29 additions & 102 deletions adata/stock/info/stock_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,11 @@
@time: 2023/5/23
@log: change log
"""
import copy

import pandas as pd
from bs4 import BeautifulSoup

from adata.common.exception.exception_msg import *
from adata.common.headers import ths_headers
from adata.common.utils import cookie, requests
from adata.stock.cache.index_code_rel_ths import rel
from adata.common.utils import requests


class StockIndex(object):
Expand All @@ -47,51 +43,37 @@ def all_index_code(self):
concept_code为同花顺的概念代码
:return: 指数信息[name,index_code,concept_code,source]
"""
return self.__all_index_code_ths()
return self.__all_index_code_east()

def __all_index_code_ths(self):
def __all_index_code_east(self, wait_time=0):
"""
获取同花顺所有行情中心的指数代码
http://q.10jqka.com.cn/zs/
上面地址可不用翻页
东方财富指数列表
https://quote.eastmoney.com/center/gridlist.html#index_sh
https://39.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&dect=1&wbp2u=|0|0|0|web&fid=f3&fs=m:1+s:2&fields=f12,f14&_=1720430951494
:return: 指数信息[name,index_code,concept_code,source]
"""
# 1. url拼接页码等参数
data = []
total_pages = 1
curr_page = 1
while curr_page <= total_pages:
api_url = f"http://q.10jqka.com.cn/zs/index/field/zdf/order/desc/page/{curr_page}/ajax/1/"
headers = copy.deepcopy(ths_headers.text_headers)
headers['Cookie'] = cookie.ths_cookie()
res = requests.request(method='get', url=api_url, headers=headers, proxies={})
curr_page += 1
# 2. 判断请求是否成功
if res.status_code != 200:
continue
text = res.text
soup = BeautifulSoup(text, 'html.parser')
# 3 .获取总的页数
if total_pages == 1:
page_info = soup.find('span', {'class': 'page_info'})
if page_info:
total_pages = int(page_info.text.split("/")[1])
# 4. 解析数据
page_data = []
for idx, tr in enumerate(soup.find_all('tr')):
if idx != 0:
tds = tr.find_all('td')
a_href = tds[1].find('a')
page_data.append({'index_code': tds[1].contents[0].text,
'concept_code': a_href['href'].split('/')[-2],
'name': tds[2].contents[0].text, 'source': '同花顺'})
data.extend(page_data)
# 5. 封装数据
if not data:
return pd.DataFrame(data=data, columns=self.__INDEX_CODE_COLUMN)
result_df = pd.DataFrame(data=data)
data.clear()
return result_df[self.__INDEX_CODE_COLUMN]
for i in range(2):
curr_page = 1
while curr_page < 88:
if i == 0:
url = f"https://39.push2.eastmoney.com/api/qt/clist/get?" \
f"pn={curr_page}&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&" \
f"dect=1&wbp2u=|0|0|0|web&fid=f3&fs=m:1+s:2&fields=f12,f14&_=1720430951494"
else:
url = f"https://31.push2.eastmoney.com/api/qt/clist/get?" \
f"pn={curr_page}&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&dect=1&" \
f"wbp2u=|0|0|0|web&fid=f3&fs=m:0+t:5&fields=f12,f14&_=1720432207117"
res_json = requests.request('get', url, headers={}, proxies={}, wait_time=wait_time).json()
res_data = res_json['data']
if not res_data:
break
res_data = res_data['diff']
for _ in res_data:
data.append({'index_code': _['f12'], 'name': _['f14'], 'source': '东方财富', 'concept_code': ''})
curr_page += 1
result_df = pd.DataFrame(data=data, columns=self.__INDEX_CODE_COLUMN)
return result_df

def index_constituent(self, index_code=None, wait_time=None):
"""
Expand All @@ -101,61 +83,7 @@ def index_constituent(self, index_code=None, wait_time=None):
:param wait_time: 等待时间:毫秒;表示每个请求的间隔时间,主要用于防止请求太频繁的限制。
:return: ['index_code', 'stock_code', 'short_name']
"""
res = self.__index_constituent_baidu(index_code=index_code)
if not res.empty:
return res
return self.__index_constituent_ths(index_code=index_code, wait_time=wait_time)

def __index_constituent_ths(self, index_code=None, wait_time=None):
"""
同花顺指数成分股
:param index_code: 指数代码 399282
:param wait_time: 等待时间:毫秒;表示每个请求的间隔时间,主要用于防止请求太频繁的限制。
:return:['index_code', 'stock_code', 'short_name']
"""
# 转换抓取的code,
catch_code = rel[index_code] if index_code.startswith('0') and index_code in rel.keys() else index_code
# 转换指数代码
index_code = rel[index_code] if ('A' in index_code or 'B' in index_code or 'C' in index_code) and index_code in rel.keys() else index_code
# 1. url拼接页码等参数
data = []
total_pages = 1
curr_page = 1
while curr_page <= total_pages:
api_url = f"http://q.10jqka.com.cn/zs/detail/field/199112/order/desc/page/" \
f"{curr_page}/ajax/1/code/{catch_code}"
headers = copy.deepcopy(ths_headers.text_headers)
headers['Cookie'] = cookie.ths_cookie()
res = requests.request(method='get', url=api_url, headers=headers, proxies={}, wait_time=wait_time)
curr_page += 1
# 2. 判断请求是否成功
if res.status_code != 200:
continue
text = res.text
if THS_IP_LIMIT_RES in res:
raise Exception(THS_IP_LIMIT_MSG)
if '暂无成份股数据' in text or '概念板块' in text or '概念时间表' in text:
break
soup = BeautifulSoup(text, 'html.parser')
# 3 .获取总的页数
if total_pages == 1:
page_info = soup.find('span', {'class': 'page_info'})
if page_info:
total_pages = int(page_info.text.split("/")[1])
# 4. 解析数据
page_data = []
for idx, tr in enumerate(soup.find_all('tr')):
if idx != 0:
tds = tr.find_all('td')
page_data.append({'index_code': index_code, 'stock_code': tds[1].contents[0].text,
'short_name': tds[2].contents[0].text})
data.extend(page_data)
# 5. 封装数据
if not data:
return pd.DataFrame(data=data, columns=self.__INDEX_CONSTITUENT_COLUMN)
result_df = pd.DataFrame(data=data)
data.clear()
return result_df[self.__INDEX_CONSTITUENT_COLUMN]
return self.__index_constituent_baidu(index_code=index_code)

def __index_constituent_baidu(self, index_code=None):
"""
Expand Down Expand Up @@ -245,5 +173,4 @@ def __index_constituent_sina(self, index_code=None, wait_time=None):

if __name__ == '__main__':
print(StockIndex().all_index_code())
# print(StockIndex().index_constituent(index_code='000033'))
# print(StockIndex().index_constituent(index_code='399387', wait_time=158))
print(StockIndex().index_constituent(index_code='000113'))
5 changes: 3 additions & 2 deletions adata/stock/market/index_market/market_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
@author: 1nchaos
@date: 2023/06/01 16:17
"""

from adata.stock.market.index_market.market_index_baidu import StockMarketIndexBaidu
from adata.stock.market.index_market.market_index_east import StockMarketIndexEast
from adata.stock.market.index_market.market_index_ths import StockMarketIndexThs

Expand All @@ -17,12 +17,13 @@ class StockMarketIndex(object):
def __init__(self) -> None:
self.ths_index = StockMarketIndexThs()
self.east_index = StockMarketIndexEast()
self.baidu_index = StockMarketIndexBaidu()

def get_market_index(self, index_code: str = '000001', start_date='2020-01-01', k_type: int = 1):
"""
获取指数行情
"""
res_df = self.east_index.get_market_index(index_code=index_code, start_date=start_date, k_type=k_type)
res_df = self.baidu_index.get_market_index(index_code=index_code, start_date=start_date, k_type=k_type)
if res_df.empty:
res_df = self.ths_index.get_market_index(index_code=index_code, start_date=start_date, k_type=k_type)
return res_df
Expand Down
90 changes: 90 additions & 0 deletions adata/stock/market/index_market/market_index_baidu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*-
"""
@desc: 百度股市通
https://gushitong.baidu.com/
@author: 1nchaos
@time: 2023/06/19
@log: change log
"""

import time

import pandas as pd

from adata.common.headers import baidu_headers
from adata.common.utils import requests
from adata.stock.market.index_market.market_index_template import StockMarketIndexTemplate


class StockMarketIndexBaidu(StockMarketIndexTemplate):
"""
百度股票行情
"""

def __init__(self) -> None:
super().__init__()

def get_market_index(self, index_code: str = '000001', start_date='2020-01-01', k_type: int = 1):
"""
获取百度的股票行情数据
web: https://gushitong.baidu.com/stock/ab-000001
"ma5均价","ma5成交量","ma10均价","ma10成交量","ma20均价","ma20成交量"
:param index_code: 6位股票代码
:param start_date: 开始时间
:param k_type: k线类型:1.日;2.周;3.月
# :param adjust_type: k线复权类型:0.不复权;1.前复权;2.后复权 默认:1 前复权 TODO
:return: k线行情数据:"时间戳", "时间","开盘","收盘","成交量","最高","最低","成交额","涨跌额","涨跌幅","换手率","昨收"
"""
# 1. 请求接口 url
api_url = f" https://finance.pae.baidu.com/vapi/v1/getquotation?srcid=5353&all=1&pointType=string&" \
f"group=quotation_index_kline&query={index_code}&code={index_code}&market_type=ab&" \
f"newFormat=1&is_kc=0&ktype=day&finClientType=pc"

res_json = None
for i in range(3):
res = requests.request('get', api_url, headers=baidu_headers.json_headers, proxies={})
# 2. 校验请求结果数据
res_json = res.json()
if res_json['ResultCode'] == '0':
break
time.sleep(2)
# 3.解析数据
# 3.1 空数据时返回为空
result = res_json['Result']
if not result:
return pd.DataFrame(data=[], columns=self._MARKET_INDEX_COLUMNS)

# 3.2. 正常解析数据
keys = res_json['Result']['newMarketData']['keys']
market_data = res_json['Result']['newMarketData']['marketData']
market_data_list = str(market_data).split(';')
data = []
for one in market_data_list:
data.append(one.split(','))

# 4. 封装数据
rename_columns = {'turnoverratio': 'turnover_ratio', 'preClose': 'pre_close', 'range': 'change',
'ratio': 'change_pct', 'time': 'trade_time'}
result_df = pd.DataFrame(data=data, columns=keys).rename(columns=rename_columns)[
self._MARKET_INDEX_BASE_COLUMNS]
if result_df.empty:
return pd.DataFrame(data=[], columns=self._MARKET_INDEX_COLUMNS)
result_df['index_code'] = index_code
result_df['trade_date'] = result_df['trade_time']
result_df['trade_time'] = pd.to_datetime(result_df['trade_time']).dt.strftime('%Y-%m-%d %H:%M:%S')
# 5. 数据清洗,剔除成交量且成交额为0的异常数据
result_df.replace('--', None, inplace=True)
result_df.replace('', None, inplace=True)
result_df['amount'] = result_df['amount'].astype(float)
result_df['volume'] = result_df['volume'].astype(float)
result_df = result_df[(result_df['amount'] > 0) | (result_df['volume'] > 0)]
result_df['change'] = result_df['change'].str.replace('+', '').astype(float)
result_df['change_pct'] = result_df['change_pct'].str.replace('+', '').astype(float)
if start_date:
result_df = result_df[result_df['trade_date'] >= start_date]
return result_df


if __name__ == '__main__':
print(StockMarketIndexBaidu().get_market_index(index_code='000001', start_date='2021-01-01', k_type=1))
8 changes: 5 additions & 3 deletions adata/stock/market/index_market/market_index_east.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ def get_market_index(self, index_code: str = '000001', start_date='2020-01-01',
"""
获取指数行情
http://77.push2his.eastmoney.com/api/qt/stock/kline/get?secid=1.000300&fields1=f1,f2,f3,f4,f5,f6&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&klt=102&fqt=1&beg=0&end=20500101&smplmt=1247.73&lmt=1000000
https://push2his.eastmoney.com/api/qt/stock/kline/get?cb=jQuery35106984074321162019_1720433274629&secid=0.399008&ut=fa5fd1943c7b386f172d6893dbfba10b&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5%2Cf6&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58%2Cf59%2Cf60%2Cf61&klt=101&fqt=1&beg=0&end=20500101&smplmt=1419&lmt=1000000&_=1720433274631
:param start_date: 开始时间
:param index_code: 指数代码
:param k_type: k线类型:1.日;2.周;3.月 默认:1 日k
:return: k线行情数据 [日期,开,高,低,收,成交量,成交额]
"""
url = f"https://push2his.eastmoney.com/api/qt/stock/kline/get?" \
f"secid=1.{index_code}&fields1=f1,f2,f3,f4,f5,f6&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&" \
f"secid=0.{index_code}&fields1=f1,f2,f3,f4,f5,f6&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61&" \
f"klt=10{k_type}&fqt=1&end=20500101&lmt=1000000"
res_json = requests.request('get', url, headers={}, proxies={}).json()
# 解析数据
Expand Down Expand Up @@ -64,7 +66,7 @@ def get_market_index_min(self, index_code='000001'):
"""
url = f"http://push2his.eastmoney.com/api/qt/stock/trends2/get?" \
f"fields1=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13&fields2=f51,f52,f53,f54,f55,f56,f57,f58&" \
f"iscr=0&ndays=1&secid=1.{index_code}"
f"iscr=0&ndays=1&secid=0.{index_code}"
res_json = requests.request('get', url, headers={}, proxies={}).json()
# 解析数据
code = res_json['data']['code']
Expand Down Expand Up @@ -101,7 +103,7 @@ def get_market_index_current(self, index_code: str = '000001'):
url = f"http://push2.eastmoney.com/api/qt/stock/get?" \
f"invt=2&fltt=1&fields=f58,f107,f57,f43,f59,f169,f170,f152,f46,f60,f44,f45,f47,f48,f19,f532,f39,f161,f49," \
f"f171,f50,f86,f600,f601,f154,f84,f85,f168,f108,f116,f167,f164,f92,f71,f117,f292,f113,f114,f115,f119," \
f"f120,f121,f122,f296&secid=1.{index_code}&wbp2u=|0|0|0|web"
f"f120,f121,f122,f296&secid=0.{index_code}&wbp2u=|0|0|0|web"
res_json = requests.request('get', url, headers={}, proxies={}).json()
# 解析数据
j = res_json['data']
Expand Down
5 changes: 3 additions & 2 deletions adata/stock/market/index_market/market_index_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ class StockMarketIndexTemplate(object):
"""
股票指数 行情
"""
_MARKET_INDEX_COLUMNS = ['index_code', 'trade_date', 'trade_time', 'open', 'high', 'low', 'close', 'volume',
'amount', 'change', 'change_pct']
_MARKET_INDEX_BASE_COLUMNS = ['trade_time', 'open', 'high', 'low', 'close', 'volume', 'amount', 'change',
'change_pct']
_MARKET_INDEX_COLUMNS = ['index_code', 'trade_date'].extend(_MARKET_INDEX_BASE_COLUMNS)
_MARKET_INDEX_MIN_COLUMNS = ['index_code', 'trade_time', 'trade_date', 'price', 'avg_price', 'volume', 'amount',
'change', 'change_pct']
_MARKET_INDEX_CURRENT_COLUMNS = ['index_code', 'trade_time', 'trade_date', 'open', 'high', 'low', 'price',
Expand Down

0 comments on commit eba9646

Please sign in to comment.