首页 > 编程 > Python > 正文

python pandas 对时间序列文件处理的实例

2020-02-15 21:59:06
字体:
来源:转载
供稿:网友

如下所示:

import pandas as pdfrom numpy import *import matplotlib.pylab as pltimport copydef read(filename): dat=pd.read_csv(filename,iterator=True) loop = True chunkSize = 1000000 R=[] while loop:  try:   data = dat.get_chunk(chunkSize)   data=data.loc[:,'B':'C'] # 切片   data=data[data.B==855]  #条件选择   data['C']=pd.to_datetime(data['C']) # 转换成时间格式   data=data.set_index(['C'])    # 设置索引   data.loc[:,'D']=array([1]*len(data)) #增加一列   data=data.resample('D').sum() #按天求和   data=data.loc[:,'D'] #截取   data.fillna(0) #填充缺失值   R.append(data)  except StopIteration:   loop = False   print ("Iteration is stopped.") R.to_csv('855_pay.csv') # 保存def read2(filename): reader=pd.read_csv(filename,iterator=True) loop = True chunkSize = 100000 chunks = [] while loop:  try:   chunk = reader.get_chunk(chunkSize)   chunks.append(chunk)  except StopIteration:   loop = False   print ("Iteration is stopped.") df = pd.concat(chunks, ignore_index=True) return dfdef read3save(filename): dat=pd.read_csv(filename) #data = dat.get_chunk(chunkSize) data=dat.loc[:,'B':'C'] # 切片 data=data[data.B==855]#条件选择 print(shape(data)) data['C']=pd.to_datetime(data['C']) # 转换成时间格式 data=data.set_index(['C'])# 设置索引 if len(data)==0:  return data.loc[:,'D']=array([1]*len(data)) #增加一列 data=data.resample('D').sum() #按天求和 data=data.loc[:,'D'] #截取 data.fillna(0) #填充缺失值 data.to_csv('855_pay.csv',mode='a') # 保存def loadDataSet(fileName, delim='/t'): fr = open(fileName) stringArr = [line.strip().split(delim) for line in fr.readlines()] datArr = [list(map(float,line)) for line in stringArr] return mat(datArr)def getShopData(): fr = open('shopInfo.txt') shopID = [line.strip().split('/n') for line in fr.readlines()] # datArr = [list(map(float,line))for line in stringArr] for i in range(1,9):  name="user_pay.001.00%d"%i  dat=pd.read_csv(name)  #data = dat.get_chunk(chunkSize)  data=dat.loc[:,'B':'C'] # 切片  for factor in shopID:   data=data[data.B==int(str(factor[0]))]#条件选择   print(shape(data))   if len(data)==0: continue   data['C']=pd.to_datetime(data['C']) # 转换成时间格式   data=data.set_index(['C'])# 设置索引   data.loc[:,'D']=array([1]*len(data)) #增加一列   data=data.resample('D').sum() #按天求和   data=data.loc[:,'D'] #截取   data.fillna(0) #填充缺失值   s=str(factor[0])   savename='D:/python/data/%s_pay.csv'%s   data.to_csv(savename,mode='a') # 保存   del dat print("over")def tset(filename): dat=pd.read_csv(filename) #data = dat.get_chunk(chunkSize) data=dat.loc[:,'B':'C'] # 切片 data=data[data.B==855]#条件选择 print(shape(data)) data['C']=pd.to_datetime(data['C']) # 转换成时间格式 data=data.set_index(['C'])# 设置索引 if len(data)==0:  return data.loc[:,'D']=array([1]*len(data)) #增加一列 data=data.resample('D').sum() #按天求和 data=data.loc[:,'D'] #截取 data.fillna(0) #填充缺失值 #data.to_csv('855_pay.csv',mode='a') # 保存 s='my' savename='D:/python/data/%s_pay.csv'%s data.to_csv(savename,mode='a') # 保存  def getShopData2(filename):  import csv # fr = open('shopInfo.txt')  # shopID = [line.strip().split('/n') for line in fr.readlines()] # datArr = [list(map(float,line))for line in stringArr] #for i in range(1,9): #name="user_pay.001.00%d"%i  dat=pd.read_csv(filename)  #data = dat.get_chunk(chunkSize)  data=dat.loc[:,'B':'C'] # 切片  data['C']=pd.to_datetime(data['C']) # 转换成时间格式  data=data.set_index(['C'])# 设置索引  data.loc[:,'D']=array([1]*len(data)) #增加一列  for i in range(1,2001):   d=copy.copy(data)   d=d[data.B==i]#条件选择   #print(shape(d))   print(i)   if len(d)==0: continue   d=d.resample('D').sum() #按天求和   d=d.loc[:,'D'] #截取   d.fillna(0) #填充缺失值   s=str(i)   #print(s)   savename='D:/python/data2/%s_pay.csv'%s   c=open(savename,'a')   writer=csv.writer(c)   writer.writerow(['C','D'])   c.close()   d.to_csv(savename,mode='a') # 保存   # del dat   print("over")def formatData():  #fr = open('shopInfo.txt')  #shopID = [line.strip().split('/n') for line in fr.readlines()] # datArr = [list(map(float,line))for line in stringArr]  #data = dat.get_chunk(chunkSize)  for i in range(1,2001):   s=str(i)   print(s)   name='D:/python/data2/%s_pay.csv'%s   dat=pd.read_csv(name)   data['C']=pd.to_datetime(data['C']) # 转换成时间格式   data=data.set_index(['C'])# 设置索引   data=data.resample('D').sum() #按天求和   data.fillna(0) #填充缺失值   savename='D:/python/data3/%s_pay.csv'%s   data.to_csv(savename,mode='w') # 保存   del dat   print("over")            
发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表