首页 > 开发 > 综合 > 正文

C#写的一段解析 CSV 文件的代码

2024-07-21 02:18:32
字体:
来源:转载
供稿:网友
using system;

/**
* the comma separated value (csv) file format: http://www.creativyst.com/doc/articles/csv/csv01.htm
* 描述:解析 csv 格式的文件。
* 由这里 http://blog.csdn.net/emu/archive/2003/03/01/16338.aspx 的java代码改写而来(增加了行处理)
* 日期: 2004-10-22 14:55
*/
namespace mitumori {
/// <summary>
/// csvutil 用来处理csv格式的文件内容成一二维数组。
/// </summary>
public class csvutil {
private csvutil() {
}

/// <summary>
/// 分割 cvs 文件内容为一个二维数组。
/// </summary>
/// <param name="src">cvs 文件内容字符串</param>
/// <returns>二维数组。string[line count][column count]</returns>
public static string[][] splitcsv(string src) {
// 如果输入为空,返回 0 长度字符串数组
if (src==null || src.length == 0) return new string[0][]{};
string st="";
system.collections.arraylist lines = new system.collections.arraylist(); // 行集合。其元素为行
system.collections.arraylist cells = new system.collections.arraylist(); // 单元格集合。其元素为一个单元格
bool beginwithquote = false;
int maxcolumns = 0;
// 遍历字符串的字符
for (int i=0;i<src.length;i++){
char ch = src[i];

#region cr 或者 lf
//a record separator may consist of a line feed (ascii/lf=0x0a),
//or a carriage return and line feed pair (ascii/crlf=0x0d 0x0a).
// 这里我不明白cr为什么不作为separator呢,在mac os上好像是用cr的吧。
// 这里我“容错”一下,crlf、lfcr、cr、lf都作为separator
if (ch == '/r') {
#region cr
if (beginwithquote) {
st += ch;
}
else {
if(i+1 < src.length && src[i+1] == '/n') { // 如果紧接的是lf,那么直接把lf吃掉
i++;
}

//line = new string[cells.count];
//system.array.copy (cells.toarray(typeof(string)), line, line.length);
//lines.add(line); // 把上一行放到行集合中去

cells.add(st);
st = "";
beginwithquote = false;

maxcolumns = (cells.count > maxcolumns ? cells.count : maxcolumns);
lines.add(cells);
st = "";
cells = new system.collections.arraylist();
}
#endregion cr
}
else if (ch == '/n') {
#region lf
if (beginwithquote) {
st += ch;
}
else {
if(i+1 < src.length && src[i+1] == '/r') { // 如果紧接的是lf,那么直接把lf吃掉
i++;
}

//line = new string[cells.count];
//system.array.copy (cells.toarray(typeof(string)), line, line.length);
//lines.add(line); // 把上一行放到行集合中去

cells.add(st);
st = "";
beginwithquote = false;

maxcolumns = (cells.count > maxcolumns ? cells.count : maxcolumns);
lines.add(cells);
st = "";
cells = new system.collections.arraylist();
}
#endregion lf
}
#endregion cr 或者 lf
else if (ch == '/"'){ // 双引号
#region 双引号
if (beginwithquote){
i++;
if (i>=src.length){
cells.add(st);
st="";
beginwithquote=false;
}
else{
ch=src[i];
if (ch == '/"'){
st += ch;
}
else if (ch == ','){
cells.add(st);
st="";
beginwithquote = false;
}
else{
throw new exception("single double-quote char mustn't exist in filed "+(cells.count+1)+" while it is begined with quote/nchar at:"+i);
}
}
}
else if (st.length==0){
beginwithquote = true;
}
else{
throw new exception("quote cannot exist in a filed which doesn't begin with quote!/nfield:"+(cells.count+1));
}
#endregion 双引号
}
else if (ch==','){
#region 逗号
if (beginwithquote){
st += ch;
}
else{
cells.add(st);
st = "";
beginwithquote = false;
}
#endregion 逗号
}
else{
#region 其它字符
st += ch;
#endregion 其它字符
}

}
if (st.length != 0){
if (beginwithquote){
throw new exception("last field is begin with but not end with double quote");
}
else{
cells.add(st);
maxcolumns = (cells.count > maxcolumns ? cells.count : maxcolumns);
lines.add(cells);
}
}

string[][] ret = new string[lines.count][];
for (int i = 0; i < ret.length; i++) {
cells = (system.collections.arraylist) lines[i];
ret[i] = new string[maxcolumns];
for (int j = 0; j < maxcolumns; j++) {
ret[i][j] = cells[j].tostring();
}
}
//system.array.copy(lines.toarray(typeof(string[])), ret, ret.length);
return ret;
}

public static void amain(string[] args){
string src1= "/"fh,zg/",sdf,/"asfs,/",/",dsdf/",/"aadf/"/"/",/"/"/"hdfg/",/"fgh/"/"dgnh/",hgfg'dfh,/"asdfa/"/"/"/"/",/"/"/"/"/"fgjhg/",/"gfhg/"/"/"/"hb/"/n";
try {
string[][] ret = splitcsv(src1);
for (int i=0;i<ret.length;i++){
for (int j = 0; j < ret[i].length; i++) {
system.console.writeline(ret[i][j]);
}
system.console.writeline();
}
}
catch(exception e) {
system.console.writeline(e.stacktrace);
}
}


}
}



发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表