先贴代码后简单解释:
public static object[,] BuildTable(Stream inputStream)
{
object[,] table = null;
using (var doc =
SpreadsheetDocument.Open(
inputStream, false))
{
Sheet sheet = doc.WorkbookPart.Workbook.Descendants().FirstOrDefault();
var worksheetPart = (WorksheetPart) doc.WorkbookPart.GetPartById(sheet.Id);
var headers = worksheetPart.Worksheet.Descendants()
.FirstOrDefault(x => x.RowIndex == 1);
SheetData sheetData = worksheetPart.Worksheet.GetFirstChild();
var allRows = sheetData.Elements()
.Where(x => x.RowIndex > 1 && !string.IsNullOrEmpty(x.InnerText)).ToList();
var headerCells = headers.Descendants().ToList();
var headerCellReferences =
headerCells.Select(c => Regex.Replace(c.CellReference, @"\d+", string.Empty)).ToArray();
SharedStringTablePart sstPart = doc.WorkbookPart.GetPartsOfType().First();
SharedStringTable ssTable = sstPart.SharedStringTable;
WorkbookStylesPart workbookStylesPart = doc.WorkbookPart.GetPartsOfType().First();
CellFormats cellFormats = workbookStylesPart.Stylesheet.CellFormats;
var headerStrings = headerCells.Select(c => ProcessCellValue(c, ssTable, cellFormats)).ToArray();
table = new object[allRows.Count + 1,headerStrings.Length];
//assign column title to the table
for (int i = 0; i < headerStrings.Length; i++)
{
table[0, i] = headerStrings[i];
}
//assign all cell values to the table
for (int i = 0; i < allRows.Count; i++)
{
var cells = allRows[i].Elements().Select(c =>
{
c.CellReference = Regex.Replace(c.CellReference, @"\d+", string.Empty);
return c;
});
for (int j = 0; j < headerCells.Count; j++)
{
table[i + 1, j] =
ProcessCellValue(
cells.FirstOrDefault(
c =>
c.CellReference == headerCellReferences[j]),
ssTable, cellFormats);
}
}
}
return table;
}
///
/// Process the valus of a cell and return a .NET value
///
private static object ProcessCellValue(Cell c, SharedStringTable ssTable, CellFormats cellFormats)
{
if (c == null) return null;
// If there is no data type, this must be a string that has been formatted as a number
if (c.DataType == null)
{
if (c.CellValue == null) return string.Empty;
if (c.StyleIndex == null) return c.CellValue.Text;
CellFormat cf =
cellFormats.Descendants()
.ElementAt(Convert.ToInt32(c.StyleIndex.Value));
if (cf.NumberFormatId >= 0 && cf.NumberFormatId <= 13) // This is a number
{
return Convert.ToDecimal(c.CellValue.Text);
}
if (cf.NumberFormatId >= 14 && cf.NumberFormatId <= 22) // This is a date
{
return DateTime.FromOADate(Convert.ToDouble(c.CellValue.Text));
}
return c.CellValue.Text;
}
switch (c.DataType.Value)
{
case CellValues.SharedString:
return ssTable.ChildElements[Convert.ToInt32(c.CellValue.Text)].InnerText;
case CellValues.Boolean:
return c.CellValue.Text == "1";
case CellValues.Date:
return DateTime.FromOADate(Convert.ToDouble(c.CellValue.Text));
case CellValues.Number:
return Convert.ToDecimal(c.CellValue.Text);
default:
return c.CellValue != null ? c.CellValue.Text : string.Empty;
}
}
| |
主要值得提的是为什么要使用EXCEL的列头(如A,B,C,D)来取数据而不是用索引。如下面代码片断:
var headerCellReferences =
headerCells.Select(c => Regex.Replace(c.CellReference, @"\d+", string.Empty)).ToArray();
上面是把EXCEL中所有行的单元格的列头由A1,A2,B1,B2全部变成A,A,B,B,用于后期单元格定位。
如果当你某一列或多列EXCEL单元格是空值,那么取出来的单元格集合将不包含此列,因此集合的长度不确定, 我们不能用索引来取每一行特定单元格的值。
使用:
BuildTable(Request.Files["filename"].InputStream)
public static IEnumerable BuildList(object[,] table)
{
var list = new List();
Type t = typeof(T);
PropertyInfo[] props = t.GetProperties();
for (int i = 1; i < table.GetLength(0); i++)
{
var item = Activator.CreateInstance();
for (int j = 0; j < table.GetLength(1); j++)
{
var title = Regex.Replace(table[0, j].ToString(), @"\s+", string.Empty);
var prop =
props.FirstOrDefault(p => p.Name.Equals(title, StringComparison.InvariantCultureIgnoreCase));
if (prop != null && table[i, j] != null && !string.IsNullOrEmpty(table[i, j].ToString().Trim()))
{
prop.SetValue(item, ConvertHelper.ChangeType(table[i, j], prop.PropertyType), null);
}
}
list.Add(item);
}
return list;
}
var table = ExcelHelper.BuildTable(file.InputStream);
var students = ExcelHelper.BuildList(table);