Click
here to login or sign-up
I'm sure there's an easier way to do this, but I've been maintaining my own class to do the job for a while now and keep needing to find it in my code library every time I use it - I figure it might be useful to others:-
using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
namespace CSVParser
{
/// <summary>
/// Simon's stock CSV parser class
/// </summary>
public class CSVParser
{
/// <summary>
/// Takes the CSV files contents and creates a data table from it
/// </summary>
/// <param name="csvFileContents">The entire contents of a CSV file split by line (rather than the filename)</param>
/// <param name="validateLayout">Validate the file to check conformance of the layout with expected standards</param>
/// <param name="topRowIsHeader">The top row is the header row, take it as the column names rather than data</param>
/// <returns>A datatable</returns>
public static DataTable DataTableFromCSV(string[] csvFileContents, bool validateLayout, bool topRowIsHeader)
{
DataTable outputDataTable = new DataTable();
List<string[]> csvFileRows = new List<string[]>();
List<string> columns = new List<string>();
#region Pre-parse the file
int columnCount = 0;
bool gotHeaders = false;
int rowNumber = 0;
foreach (string line in csvFileContents)
{
string[] parts = ExtractCSVElements(line);
//initial set of header names but only if the top row is header option is set
if (!gotHeaders && topRowIsHeader)
{
columns.AddRange(parts);
columnCount = parts.Length;
gotHeaders = true;
}
else
{
if (parts.Length > 0)
{
csvFileRows.Add(parts);
}
}
if (parts.Length > columnCount)
{
//if set to validate the layout and that the first row contains the headers then we know any extra columns are wrong
if (validateLayout && gotHeaders)
{
throw new Exception("Row has extra data columns: " + rowNumber.ToString());
}
//new column detected mid-data-set!
for (int i = columnCount; i < parts.Length; i++)
{
columns.Add("Column " + i.ToString());
}
columnCount = parts.Length;
}
//we always ignore zero length rows as the last line can be empty
if (parts.Length < columnCount && parts.Length != 0)
{
if (validateLayout)
{
throw new Exception("Row has missing data columns: " + rowNumber.ToString());
}
}
rowNumber++;
}
#endregion
#region Build the data tables layout and data
//columns
foreach (string column in columns)
{
outputDataTable.Columns.Add(column);
}
//rows
foreach (string[] row in csvFileRows)
{
outputDataTable.Rows.Add(row);
}
#endregion
return outputDataTable;
}
/// <summary>
/// Extract the elements of a line from a CSV file with support for quotes
/// </summary>
/// <param name="line">The data to parse</param>
private static string[] ExtractCSVElements(string line)
{
List<string> elements = new List<string>();
//do the initial split, based on commas
string[] firstParts = line.Split(',');
//reparse it
StringBuilder temporaryPart = new StringBuilder("");
bool inside = false;
foreach (string part in firstParts)
{
//are we inside a quoted part, or did we just find a quote?
if (!inside && !part.Contains("\""))
{
elements.Add(part);
}
else
{
if (inside)
{
//we are still inside a quote...
temporaryPart.Append("," + part);
if (part.Contains("\""))
{
//then we are also at the end!
elements.Add(temporaryPart.Replace("\"", "").ToString()); //add the part minus its quotes to the array
//all done!
inside = false;
}
}
else
{
//else we just found a quote!
inside = true;
temporaryPart = new StringBuilder(part);
}
}
}
return elements.ToArray();
}
}
}
Permalink 0 Comments Digg This!
CommentsYou must be logged in to post comments. Click here to login or signup.
Server:
echo - Your IP:
38.107.191.82 -
Copyright © 2010
Simon Soanes