首页 > 学院 > 开发设计 > 正文

[python]数据整理,将取得的众多的沪深龙虎榜数据整一整

2019-11-14 16:58:55
字体:
来源:转载
供稿:网友

将昨日取得的众多的沪深龙虎榜数据整一整

提取文件夹内所有抓取下来的沪深龙虎榜数据,整理出沪深两市(含中小创)涨幅榜股票及前5大买入卖出资金净值,保存到csv文件

再手动使用数据透视表进行统计

原始数据:

整理后数据:

代码如下(如果觉得对于炒股又用,敬请使用):

  1 #coding=utf-8  2   3 import re  4 import os  5 import time  6 import datetime  7   8 def writeFile(file,stocks,BS,day):  9     for s in stocks: 10         allfile.write('/n') 11         allfile.write(day 12                       +',"/''+s['code'] 13                       +'","'+s['name'] 14                       +'",'+str(float(BS[s['code']]['buy'])-float(BS[s['code']]['sell'])) 15                       +','+BS[s['code']]['buy'] 16                       +','+BS[s['code']]['sell'] 17                       +','+s['偏离值'] 18                       +',"'+s['成交量'] 19                       +'","'+s['成交金额(万元)']+'"') 20  21         ''' 22         allfile.write(day 23                       +",'"+s["code"] 24                       +"','"+s["name"] 25                       +"',"+str(float(BS[s["code"]]["buy"])-float(BS[s["code"]]["sell"])) 26                       +","+BS[s["code"]]["buy"] 27                       +","+BS[s["code"]]["sell"] 28                       +","+s["偏离值"] 29                       +",'"+s["成交量"] 30                       +"','"+s["成交金额(万元)"]+"'") 31         ''' 32          33 path=r'./files' 34 #path=r'./a' 35 files = os.listdir(path) 36 files.sort() 37  38 nowDayStr = '' 39 now = datetime.datetime.now() 40 nowStr = now.strftime("%Y-%m-%d") 41  42 allfile = open(r'./沪深龙虎榜统计_'+nowStr+'.csv','w') 43 allfile.write('"日期","代码","名称","净流入流出","流入","流出","偏离值","成交量","成交金额(万元)"') 44 for f in files: 45     if(os.path.isfile(path+'/'+f) & 46        f.endswith('.txt')): 47         #PRint(path+'/'+f.replace('.txt','')) 48         a = f.replace('.txt','').split('_') 49         print('读取文件:'+path+'/'+f) 50         ''' 51         if(nowDayStr!=a[0]): 52             #print('a') 53         else: 54             #print('b') 55             nowDayStr = a[0] 56         ''' 57         nowDayStr = a[0] 58          59         f=open(path+'/'+f,'rt') 60         infos = f.readlines() 61         f.close() 62  63         if(a[1]=='上证'): 64             #continue #test jump 65             #上证 66             readStocks = 1 67             readBS = 0 68             readBuy = 0 69             readSell = 0 70             nowStock = '' 71             stocks = [] 72             BS = dict() 73             buy = 0 74             sell = 0 75             for info in infos: 76                  77                 info = re.sub('/ +', '_',info) 78                 info = re.sub('/n', '',info) 79                  80                 #print('line:' +info) 81                 if(readStocks==1 and 82                    info.startswith('_2')): 83                     break 84                 if(readStocks==1 and 85                    (not info.startswith('_证券代码:')) and 86                    info.startswith('_(')): 87                      88                     tmp = info.split('_') 89                     dictTmp = {'code':tmp[2],'name':tmp[3],'偏离值':tmp[4],'成交量':tmp[5],'成交金额(万元)':tmp[6]} 90                     stocks.append(dictTmp) 91                      92                 elif(readStocks==1 and 93                      info.startswith('_证券代码:')): 94                      95                     readStocks = 0 96                     readBS = 1 97                     #continue 98  99                 if(readBS==1 and100                    info.startswith('_证券代码')):101                     tmp = info.split('_')102                     #print('code:'+tmp[2])103                     nowStock = tmp[2]104                     readBS = 0105                     readBuy = 1106                     continue107                 108                 if(readBuy == 1 and109                    info.startswith('_(') and110                    (not info.startswith('_卖出'))):111                     tmp = info.split('_')112                     buy = buy + float(tmp[3])113                     #print('buy:'+str(buy))114                 elif(readBuy == 1 and115                    info.startswith('_卖出')):116                     readBuy = 0117                     readSell = 1118                     continue119                 120                 if(readSell == 1 and121                    info.startswith('_(') and122                    ((not info.startswith('_2')) or123                    (not info.startswith('_证券')))):124                     tmp = info.split('_')125                     sell = sell + float(tmp[3])126                     #print('sell:'+str(sell))127                 elif(readSell == 1 and128                    (info.startswith('_2') or129                    info.startswith('_证券'))):130                     readSell = 0131                     if(info.startswith('_证券')):132                         readBS = 1133                         #dictTmp = {nowStock:{'buy':str(buy),'sell':str(sell)}}134                         BS[nowStock]={'buy':str(buy),'sell':str(sell)};135                         buy = 0136                         sell = 0137 138                         if(readBS==1 and139                            info.startswith('_证券代码')):140                             tmp = info.split('_')141                             #print('code:'+tmp[2])142                             nowStock = tmp[2]143                             readBS = 0144                             readBuy = 1145                             continue146                         147                     else:148                         #dictTmp = {nowStock:{'buy':str(buy),'sell':str(sell)}}149                         BS[nowStock]={'buy':str(buy),'sell':str(sell)};150                         #write to doc151                         #print(stocks[0]['成交金额(万元)'])152                         #print(BS)153                         154                         writeFile(allfile,stocks,BS,nowDayStr);155                         break;156                     157         else:158             #深证,中小创159             160             readStocks = 0161             #readBS = 0162             readBuy = 0163             readSell = 0164             nowStock = ''165             stocks = []166             BS = dict()167             buy = 0168             sell = 0169             threeBlank = 0170             for info in infos:171                 172                     173                 if(info.startswith('--') and readStocks==1 and len(stocks)>1):174                     readStocks=1175                     readSell=0176                     BS[nowStock]={'buy':str(buy),'sell':str(sell)};177                     buy = 0178                     sell = 0179                     writeFile(allfile,stocks,BS,nowDayStr);180                     break;181                 182                 #print('-----'+info)183                 if(threeBlank==3):184                     threeBlank = 0185                     haveBreaked = True186                 else:187                     haveBreaked = False188                 189                 info = re.sub('/ +', '_',info)190                 info = re.sub('/n', '',info)191                 192                 #print('line:' +info)193                 if(info == ''):194                     threeBlank = threeBlank + 1195                     continue196                 if((not info.startswith('日涨幅偏离值达到7%的前五只证券')) and197                    readStocks==0 and readBuy==0 and readSell==0):198                     continue199                 elif(readStocks==0 and readBuy==0 and readSell==0):200                     201                     if(info.endswith('')):202                         203                         break204                     readStocks=1205                     continue206                 207                 if(#haveBreaked and208                    readStocks==1 and209                    len(info.split('(代码'))>1):210 211                     if(info.startswith('--')):212                         #print(stocks)213                         #print(BS)214                         writeFile(allfile,stocks,BS,nowDayStr);215                         break;216                     #print('1'+info)217                     code = info.split('(代码')[1].split(')')[0]218                     name = info.split('(代码')[0]219                     plz = info.split('涨幅偏离值:')[1].split('_')[0]220                     cjl = info.split('成交量:')[1].split('_')[0]221                     cje = info.split('成交金额:_')[1]#.split('万元')[0]222                     nowStock = code223                     dictTmp = {'code':code,'name':name,'偏离值':plz,'成交量':cjl,'成交金额(万元)':cje}224                     stocks.append(dictTmp)225                     #print(dictTmp)226                     readStocks = 0227                     readBuy = 1228                     continue229 230                 if(readBuy == 1 and info!='' and231                    (not info.startswith('买入金额最大的前5名')) and232                    (not info.startswith('营业部或交易单元名称')) ):233                     #print('1'+info)234                     if(info.startswith('卖出金额最大的前5名')):235                         readBuy=0236                         readSell=1237                         continue238                     else:239                         buy = buy + float(info.split('_')[1]) - float(info.split('_')[2])240                         continue241 242                 if(readSell == 1 and info!='' and243                    (not info.startswith('营业部或交易单元名称')) ):244                     #print('2'+info)245                     246                     if(info.startswith('--')):247                         readStocks=1248                         readSell=0249                         250                         #dictTmp = {nowStock:{'buy':str(buy),'sell':str(sell)}}251                         #print(nowStock)252                         BS[nowStock]={'buy':str(buy),'sell':str(sell)};253                         254                         buy = 0255                         sell = 0256                         #print(stocks)257                         #print(BS)258                         writeFile(allfile,stocks,BS,nowDayStr);259                         break;260                         261                     if(len(info.split('代码'))>1):262                         readStocks=1263                         readSell=0264                         265                         #dictTmp = {nowStock:{'buy':str(buy),'sell':str(sell)}}266                         #print(nowStock)267                         BS[nowStock]={'buy':str(buy),'sell':str(sell)};268                         269                         buy = 0270                         sell = 0271 272                         #read code273                         #print('2'+info)274                         code = info.split('(代码')[1].split(')')[0]275                         name = info.split('(代码')[0]276                         plz = info.split('涨幅偏离值:')[1].split('_')[0]277                         cjl = info.split('成交量:')[1].split('_')[0]278                         cje = info.split('成交金额:_')[1]#.split('万元')[0]279                         nowStock = code280                         dictTmp = {'code':code,'name':name,'偏离值':plz,'成交量':cjl,'成交金额(万元)':cje}281                         stocks.append(dictTmp)282                         #print(dictTmp)283                         readStocks = 0284                         readBuy = 1285                         continue286                         287                     else:288                         sell = sell - float(info.split('_')[1]) + float(info.split('_')[2])289                         continue290                 291         #break292 293 294 allfile.close();295 print('统计完成!'+'文件:'+'./沪深龙虎榜统计_'+nowStr+'.csv')

 


发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表