首页 > 编程 > Python > 正文

python使用正则搜索字符串或文件中的浮点数代码实例

2019-11-25 18:19:51
字体:
来源:转载
供稿:网友

用python和numpy处理数据次数比较多,写了几个小函数,可以方便地读写数据:

# -*- coding: utf-8 -*-#----------------------------------------------------------------------# FileName:gettxtdata.py#功能:读取字符串和文件中的数值数据(浮点数)#主要提供类似matlab中的dlmread和dlmwrite函数#同时提供loadtxtdata和savetxtdata函数#Data: 2013-1-10#Author:吴徐平#----------------------------------------------------------------------import numpy#----------------------------------------------------------------------def StringToDoubleArray(String):  """  #将字符串中的所有非Double类型的字符全部替换成空格  #以'#'开头注释直至行尾,都被清空  #返回一维numpy.array数组  """   from StringIO import StringIO  import re    DataArray=numpy.empty([0],numpy.float64)  if len(String.strip())>0:    #清空注释行,都是以'#'开头子字符    doublestring=re.sub('#.*$', " ", String, count=0, flags=re.IGNORECASE)    #删除非数字字符          doublestring=re.sub('[^0-9.e+-]', " ", doublestring, count=0, flags=re.IGNORECASE)    #去掉不正确的数字格式(代码重复是有必要的)    doublestring=re.sub('[.e+-](?=/s)', " ", doublestring, count=0, flags=re.IGNORECASE)    doublestring=re.sub('[.e+-](?=/s)', " ", doublestring, count=0, flags=re.IGNORECASE)    doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)    doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)    #去掉首尾空格    doublestring=doublestring.strip()    if len(doublestring)>0:      StrIOds=StringIO(doublestring)      DataArray= numpy.genfromtxt(StrIOds)    return DataArray#----------------------------------------------------------------------def GetDoubleListFromString(String):  """  #使用换行符分割字符串  #将字符串中的所有非Double类型的字符全部替换成空格  #以'#'开头注释直至行尾,都被清空  #将每一行转换成numpy.array数组  #返回numpy.array数组的列表  """   from StringIO import StringIO  import re   DoubleList=[]  StringList=String.split('/n')#使用换行符分割字符串  for Line in StringList:    if len(Line.strip())>0:      #清空注释行,都是以'#'开头子字符      doublestring=re.sub('#.*$', " ", Line, count=0, flags=re.IGNORECASE)      #删除非数字字符            doublestring=re.sub('[^0-9.e+-]', " ", doublestring, count=0, flags=re.IGNORECASE)      #去掉不正确的数字格式(代码重复是有必要的)      doublestring=re.sub('[.e+-](?=/s)', " ", doublestring, count=0, flags=re.IGNORECASE)      doublestring=re.sub('[.e+-](?=/s)', " ", doublestring, count=0, flags=re.IGNORECASE)      doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)      doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)      #去掉首尾空格      doublestring=doublestring.strip()      if len(doublestring)>0:        StrIOds=StringIO(doublestring)        DoubleList.append(numpy.genfromtxt(StrIOds))     return DoubleList  #----------------------------------------------------------------------def GetDoubleListFromFile(FileName):  """  #将文本文件中的所有Double类型的字符全部替换成numpy.array数组  #每一行都是numpy.array数组  ##返回numpy.array数组的列表  #注意:返回列表的每个元素又都是一个numpy.array数组  #注意:返回列表的每个元素(或文件每行)可以包含不同多个数的数字  """   file=open(FileName, 'r')  read_file = file.read()  file.close()   DoubleList=GetDoubleListFromString(read_file)  return DoubleListdef dlmread(FileName,dtype=numpy.float64):  """  #Load Data From Txt-File.  #分隔符默认是:";",",",空格类 (包括/t)等等  #以#开头的被认为是注释,不会被读取  #Return Value:二维数值数组(numpy.ndarray)  #对文本中数据的排列格式要求最低,且容许出现注释字符,智能化程度最高,但速度较慢  """  DoubleList=GetDoubleListFromFile(FileName)  dlsize=[]#每一行数组的大小  for dL in DoubleList:    dlsize.append(dL.size)      MinColumnSize=min(dlsize)#数组的最大列数  MaxColumnSize=max(dlsize)#数组的最小列数  #数组创建和赋值  DoubleArray=numpy.empty([len(DoubleList),MinColumnSize],dtype=dtype)    row=range(0,len(DoubleList))  colum=range(0,MinColumnSize)    for i in row:    for j in colum:      DoubleArray[i][j]=DoubleList[i][j]       return DoubleArray#----------------------------------------------------------------------def loadtxtdata(filename,delimiter=""):  """  #Load Data From Txt-File with delimiter.  #分隔符默认是:";",",",空格类 (包括/t)和自定义的delimiter等  #Return Value:  二维数值数组(numpy.ndarray)  #对文本中数据的排列格式要求较高,且不容许出现注释字符,智能化程度较低,但速度较快  """  from StringIO import StringIO  import re    file_handle=open(filename,'r')  LinesALL=file_handle.read()#读入字符串  file_handle.close()    DelimiterALL=delimiter+",;"#分隔符  SpaceString=" "#空格  for RChar in DelimiterALL:    LinesALL=LinesALL.replace(RChar,SpaceString)      return numpy.genfromtxt(StringIO(LinesALL))  #----------------------------------------------------------------------  def savetxtdata(filename, X, fmt='%.8e', delimiter=' ', newline='/n'):  """  Save Data To Txt-File.  """  numpy.savetxt(filename, X, fmt=fmt, delimiter=delimiter, newline=newline)     return True  #----------------------------------------------------------------------def dlmwrite(filename, X, fmt='%.8e', delimiter=' ', newline='/n'):  """  Save Data To Txt-File.  """  numpy.savetxt(filename, X, fmt=fmt, delimiter=delimiter, newline=newline)     return True  #----------------------------------------------------------------------#测试程序 #----------------------------------------------------------------------if __name__ == '__main__':  #生成随机数  data=numpy.random.randn(3,4)  filename='D:/x.txt'  #写入文件  dlmwrite(filename,data)  x=GetDoubleListFromFile(filename)  print(x)  print(dlmread(filename))  y=StringToDoubleArray('79l890joj')  print(y)  z=loadtxtdata(filename)  print(z)

我只在python2.7中试过,如果要在python3.x中使用,可自行测试.

发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表