-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
3 changed files
with
226 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
using Ben.Collections.Specialized; | ||
using Knapcode.NCsvPerf.HomeGrown; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
|
||
namespace Knapcode.NCsvPerf.CsvReadable | ||
{ | ||
/// <summary> | ||
/// Package: N/A | ||
/// Source: see HomeGrownImproved.cs in this repository | ||
/// </summary> | ||
public class HomeGrown2 : ICsvReader | ||
{ | ||
private readonly ActivationMethod _activationMethod; | ||
|
||
public HomeGrown2(ActivationMethod activationMethod) | ||
{ | ||
_activationMethod = activationMethod; | ||
} | ||
|
||
public List<T> GetRecords<T>(MemoryStream stream) where T : ICsvReadable, new() | ||
{ | ||
var activate = ActivatorFactory.Create<T>(_activationMethod); | ||
var allRecords = new List<T>(); | ||
var fields = new List<string>(); | ||
|
||
using (var reader = new StreamReader(stream)) | ||
{ | ||
var parser = new HomeGrownImproved(buffer: new char[200], stringPool: new InternPool().Intern); | ||
|
||
while (parser.TryReadLine(reader, fields)) | ||
{ | ||
var record = activate(); | ||
record.Read(i => fields[i]); | ||
allRecords.Add(record); | ||
} | ||
} | ||
|
||
return allRecords; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
|
||
namespace Knapcode.NCsvPerf.HomeGrown | ||
{ | ||
public delegate string StringPool(ReadOnlySpan<char> text); | ||
|
||
public class HomeGrownImproved | ||
{ | ||
private readonly char[] _buffer; | ||
private readonly StringPool _stringPool; | ||
private int _index; | ||
|
||
public HomeGrownImproved(char[] buffer, StringPool stringPool) | ||
{ | ||
_buffer = buffer; | ||
_stringPool = stringPool; | ||
} | ||
|
||
private enum State | ||
{ | ||
BeforeField, | ||
InField, | ||
InQuotedField, | ||
LineEnd, | ||
} | ||
|
||
public bool TryReadLine(TextReader reader, List<string> fields) | ||
{ | ||
_index = 0; | ||
fields.Clear(); | ||
|
||
var state = State.BeforeField; | ||
int c; | ||
while ((c = reader.Read()) > -1) | ||
{ | ||
switch (state) | ||
{ | ||
case State.BeforeField: | ||
switch (c) | ||
{ | ||
case '"': | ||
state = State.InQuotedField; | ||
break; | ||
case ',': | ||
fields.Add(string.Empty); | ||
break; | ||
case '\r': | ||
fields.Add(string.Empty); | ||
if (reader.Peek() == '\n') | ||
{ | ||
reader.Read(); | ||
} | ||
state = State.LineEnd; | ||
break; | ||
case '\n': | ||
fields.Add(string.Empty); | ||
state = State.LineEnd; | ||
break; | ||
default: | ||
_buffer[_index++] = (char)c; | ||
state = State.InField; | ||
break; | ||
} | ||
break; | ||
|
||
case State.InField: | ||
switch (c) | ||
{ | ||
case ',': | ||
AddField(fields); | ||
state = State.BeforeField; | ||
break; | ||
case '\r': | ||
AddField(fields); | ||
if (reader.Peek() == '\n') | ||
{ | ||
reader.Read(); | ||
} | ||
state = State.LineEnd; | ||
break; | ||
case '\n': | ||
AddField(fields); | ||
state = State.LineEnd; | ||
break; | ||
default: | ||
_buffer[_index++] = (char)c; | ||
break; | ||
} | ||
break; | ||
|
||
case State.InQuotedField: | ||
switch (c) | ||
{ | ||
case '"': | ||
var nc = reader.Peek(); | ||
switch (nc) | ||
{ | ||
case '"': | ||
_buffer[_index++] = '"'; | ||
reader.Read(); | ||
break; | ||
case ',': | ||
reader.Read(); | ||
AddField(fields); | ||
state = State.BeforeField; | ||
break; | ||
case '\r': | ||
reader.Read(); | ||
AddField(fields); | ||
if (reader.Peek() == '\n') | ||
{ | ||
reader.Read(); | ||
} | ||
state = State.LineEnd; | ||
break; | ||
case '\n': | ||
reader.Read(); | ||
AddField(fields); | ||
state = State.LineEnd; | ||
break; | ||
default: | ||
throw new InvalidDataException("Corrupt field found. A double quote is not escaped or there is extra data after a quoted field."); | ||
} | ||
break; | ||
default: | ||
_buffer[_index++] = (char)c; | ||
break; | ||
} | ||
break; | ||
|
||
default: | ||
throw new NotImplementedException(); | ||
} | ||
|
||
if (state == State.LineEnd) | ||
{ | ||
break; | ||
} | ||
} | ||
|
||
switch (state) | ||
{ | ||
case State.InField: | ||
var span = _buffer.AsSpan(0, _index); | ||
var text = _stringPool != null | ||
? _stringPool(span) | ||
: span.ToString(); | ||
|
||
fields.Add(text); | ||
break; | ||
case State.InQuotedField: | ||
throw new InvalidDataException("When the line ends with a quoted field, the last character should be an unescaped double quote."); | ||
} | ||
|
||
return fields.Count > 0; | ||
} | ||
|
||
private void AddField(List<string> fields) | ||
{ | ||
if (_index == 0) | ||
{ | ||
fields.Add(string.Empty); | ||
} | ||
else | ||
{ | ||
var span = _buffer.AsSpan(0, _index); | ||
var text = _stringPool != null | ||
? _stringPool(span) | ||
: span.ToString(); | ||
|
||
fields.Add(text); | ||
_index = 0; | ||
} | ||
} | ||
} | ||
} |