首页 > 编程 > C# > 正文

使用NOPI读取Word、Excel文档内容

2020-01-24 00:19:41
字体:
来源:转载
供稿:网友

使用NOPI读取Excel的例子很多,读取Word的例子不多。

Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。

Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。

Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)

也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。

using NPOI.POIFS.FileSystem;using NPOI.SS.UserModel;using NPOI.XSSF.UserModel;using NPOI.XWPF.UserModel;using System;using System.Collections.Generic;using System.Configuration;using System.IO;using System.Text;namespace eyuan{  public static class NOPIHandler  {    /// <summary>    ///     /// </summary>    /// <param name="fileName"></param>    /// <returns></returns>    public static List<List<List<string>>> ReadExcel(string fileName)    {      //打开Excel工作簿      XSSFWorkbook hssfworkbook = null;      try      {        using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))        {          hssfworkbook = new XSSFWorkbook(file);        }      }      catch (Exception e)      {        LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));      }      //循环Sheet页      int sheetsCount = hssfworkbook.NumberOfSheets;      List<List<List<string>>> workBookContent = new List<List<List<string>>>();      for (int i = 0; i < sheetsCount; i++)      {        //Sheet索引从0开始        ISheet sheet = hssfworkbook.GetSheetAt(i);        //循环行        List<List<string>> sheetContent = new List<List<string>>();        int rowCount = sheet.PhysicalNumberOfRows;        for (int j = 0; j < rowCount; j++)        {          //Row(逻辑行)的索引从0开始          IRow row = sheet.GetRow(j);          //循环列(各行的列数可能不同)          List<string> rowContent = new List<string>();          int cellCount = row.PhysicalNumberOfCells;          for (int k = 0; k < cellCount; k++)          {            //ICell cell = row.GetCell(k);            ICell cell = row.Cells[k];            if (cell == null)            {              rowContent.Add("NIL");            }            else            {              rowContent.Add(cell.ToString());              //rowContent.Add(cell.StringCellValue);            }          }          //添加行到集合中          sheetContent.Add(rowContent);        }        //添加Sheet到集合中        workBookContent.Add(sheetContent);      }      return workBookContent;    }    /// <summary>    ///     /// </summary>    /// <param name="fileName"></param>    /// <returns></returns>    public static string ReadExcelText(string fileName)    {      string ExcelCellSeparator = ConfigurationManager.AppSettings["ExcelCellSeparator"];      string ExcelRowSeparator = ConfigurationManager.AppSettings["ExcelRowSeparator"];      string ExcelSheetSeparator = ConfigurationManager.AppSettings["ExcelSheetSeparator"];      //      List<List<List<string>>> excelContent = ReadExcel(fileName);      string fileText = string.Empty;      StringBuilder sbFileText = new StringBuilder();      //循环处理WorkBook中的各Sheet页      List<List<List<string>>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator();      while (enumeratorWorkBook.MoveNext())      {        //循环处理当期Sheet页中的各行        List<List<string>>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator();        while (enumeratorSheet.MoveNext())        {          string[] rowContent = enumeratorSheet.Current.ToArray();          sbFileText.Append(string.Join(ExcelCellSeparator, rowContent));          sbFileText.Append(ExcelRowSeparator);        }        sbFileText.Append(ExcelSheetSeparator);      }      //      fileText = sbFileText.ToString();      return fileText;    }    /// <summary>    /// 读取Word内容    /// </summary>    /// <param name="fileName"></param>    /// <returns></returns>    public static string ReadWordText(string fileName)    {      string WordTableCellSeparator = ConfigurationManager.AppSettings["WordTableCellSeparator"];      string WordTableRowSeparator = ConfigurationManager.AppSettings["WordTableRowSeparator"];      string WordTableSeparator = ConfigurationManager.AppSettings["WordTableSeparator"];      //      string CaptureWordHeader = ConfigurationManager.AppSettings["CaptureWordHeader"];      string CaptureWordFooter = ConfigurationManager.AppSettings["CaptureWordFooter"];      string CaptureWordTable = ConfigurationManager.AppSettings["CaptureWordTable"];      string CaptureWordImage = ConfigurationManager.AppSettings["CaptureWordImage"];      //      string CaptureWordImageFileName = ConfigurationManager.AppSettings["CaptureWordImageFileName"];      //      string fileText = string.Empty;      StringBuilder sbFileText = new StringBuilder();      #region 打开文档      XWPFDocument document = null;      try      {        using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))        {          document = new XWPFDocument(file);        }      }      catch (Exception e)      {        LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));      }      #endregion      #region 页眉、页脚      //页眉      if (CaptureWordHeader == "true")      {        sbFileText.AppendLine("Capture Header Begin");        foreach (XWPFHeader xwpfHeader in document.HeaderList)        {          sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfHeader.Text }));        }        sbFileText.AppendLine("Capture Header End");      }      //页脚      if (CaptureWordFooter == "true")      {        sbFileText.AppendLine("Capture Footer Begin");        foreach (XWPFFooter xwpfFooter in document.FooterList)        {          sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfFooter.Text }));        }        sbFileText.AppendLine("Capture Footer End");      }      #endregion      #region 表格      if (CaptureWordTable == "true")      {        sbFileText.AppendLine("Capture Table Begin");        foreach (XWPFTable table in document.Tables)        {          //循环表格行          foreach (XWPFTableRow row in table.Rows)          {            foreach (XWPFTableCell cell in row.GetTableCells())            {              sbFileText.Append(cell.GetText());              //              sbFileText.Append(WordTableCellSeparator);            }            sbFileText.Append(WordTableRowSeparator);          }          sbFileText.Append(WordTableSeparator);        }        sbFileText.AppendLine("Capture Table End");      }      #endregion      #region 图片      if (CaptureWordImage == "true")      {        sbFileText.AppendLine("Capture Image Begin");        foreach (XWPFPictureData pictureData in document.AllPictures)        {          string picExtName = pictureData.suggestFileExtension();          string picFileName = pictureData.GetFileName();          byte[] picFileContent = pictureData.GetData();          //          string picTempName = string.Format(CaptureWordImageFileName, new string[] { Guid.NewGuid().ToString() + "_" + picFileName + "." + picExtName });          //          using (FileStream fs = new FileStream(picTempName, FileMode.Create, FileAccess.Write))          {            fs.Write(picFileContent, 0, picFileContent.Length);            fs.Close();          }          //          sbFileText.AppendLine(picTempName);        }        sbFileText.AppendLine("Capture Image End");      }      #endregion      //正文段落      sbFileText.AppendLine("Capture Paragraph Begin");      foreach (XWPFParagraph paragraph in document.Paragraphs)      {        sbFileText.AppendLine(paragraph.ParagraphText);      }      sbFileText.AppendLine("Capture Paragraph End");      //      //      fileText = sbFileText.ToString();      return fileText;    }  }}

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持武林网。

发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表