Skip to content

Commit

Permalink
adds homegrow2 (improved) (#55)
Browse files Browse the repository at this point in the history
* done

* minor improviments
  • Loading branch information
leandromoh authored Oct 26, 2023
1 parent c97ebba commit f80d069
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 0 deletions.
6 changes: 6 additions & 0 deletions NCsvPerf/CsvReadable/Benchmarks/PackageAssetsSuite.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,12 @@ public void HomeGrown()
Execute(new HomeGrown(ActivationMethod.ILEmit));
}

[Benchmark]
public void HomeGrownImproved()
{
Execute(new HomeGrown2(ActivationMethod.ILEmit));
}

[Benchmark]
public void KBCsv()
{
Expand Down
42 changes: 42 additions & 0 deletions NCsvPerf/CsvReadable/Implementations/HomeGrown2.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
using Ben.Collections.Specialized;
using Knapcode.NCsvPerf.HomeGrown;
using System.Collections.Generic;
using System.IO;

namespace Knapcode.NCsvPerf.CsvReadable
{
/// <summary>
/// Package: N/A
/// Source: see HomeGrownImproved.cs in this repository
/// </summary>
public class HomeGrown2 : ICsvReader
{
private readonly ActivationMethod _activationMethod;

public HomeGrown2(ActivationMethod activationMethod)
{
_activationMethod = activationMethod;
}

public List<T> GetRecords<T>(MemoryStream stream) where T : ICsvReadable, new()
{
var activate = ActivatorFactory.Create<T>(_activationMethod);
var allRecords = new List<T>();
var fields = new List<string>();

using (var reader = new StreamReader(stream))
{
var parser = new HomeGrownImproved(buffer: new char[200], stringPool: new InternPool().Intern);

while (parser.TryReadLine(reader, fields))
{
var record = activate();
record.Read(i => fields[i]);
allRecords.Add(record);
}
}

return allRecords;
}
}
}
178 changes: 178 additions & 0 deletions NCsvPerf/HomeGrown/HomeGrownImproved.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
using System;
using System.Collections.Generic;
using System.IO;

namespace Knapcode.NCsvPerf.HomeGrown
{
public delegate string StringPool(ReadOnlySpan<char> text);

public class HomeGrownImproved
{
private readonly char[] _buffer;
private readonly StringPool _stringPool;
private int _index;

public HomeGrownImproved(char[] buffer, StringPool stringPool)
{
_buffer = buffer;
_stringPool = stringPool;
}

private enum State
{
BeforeField,
InField,
InQuotedField,
LineEnd,
}

public bool TryReadLine(TextReader reader, List<string> fields)
{
_index = 0;
fields.Clear();

var state = State.BeforeField;
int c;
while ((c = reader.Read()) > -1)
{
switch (state)
{
case State.BeforeField:
switch (c)
{
case '"':
state = State.InQuotedField;
break;
case ',':
fields.Add(string.Empty);
break;
case '\r':
fields.Add(string.Empty);
if (reader.Peek() == '\n')
{
reader.Read();
}
state = State.LineEnd;
break;
case '\n':
fields.Add(string.Empty);
state = State.LineEnd;
break;
default:
_buffer[_index++] = (char)c;
state = State.InField;
break;
}
break;

case State.InField:
switch (c)
{
case ',':
AddField(fields);
state = State.BeforeField;
break;
case '\r':
AddField(fields);
if (reader.Peek() == '\n')
{
reader.Read();
}
state = State.LineEnd;
break;
case '\n':
AddField(fields);
state = State.LineEnd;
break;
default:
_buffer[_index++] = (char)c;
break;
}
break;

case State.InQuotedField:
switch (c)
{
case '"':
var nc = reader.Peek();
switch (nc)
{
case '"':
_buffer[_index++] = '"';
reader.Read();
break;
case ',':
reader.Read();
AddField(fields);
state = State.BeforeField;
break;
case '\r':
reader.Read();
AddField(fields);
if (reader.Peek() == '\n')
{
reader.Read();
}
state = State.LineEnd;
break;
case '\n':
reader.Read();
AddField(fields);
state = State.LineEnd;
break;
default:
throw new InvalidDataException("Corrupt field found. A double quote is not escaped or there is extra data after a quoted field.");
}
break;
default:
_buffer[_index++] = (char)c;
break;
}
break;

default:
throw new NotImplementedException();
}

if (state == State.LineEnd)
{
break;
}
}

switch (state)
{
case State.InField:
var span = _buffer.AsSpan(0, _index);
var text = _stringPool != null
? _stringPool(span)
: span.ToString();

fields.Add(text);
break;
case State.InQuotedField:
throw new InvalidDataException("When the line ends with a quoted field, the last character should be an unescaped double quote.");
}

return fields.Count > 0;
}

private void AddField(List<string> fields)
{
if (_index == 0)
{
fields.Add(string.Empty);
}
else
{
var span = _buffer.AsSpan(0, _index);
var text = _stringPool != null
? _stringPool(span)
: span.ToString();

fields.Add(text);
_index = 0;
}
}
}
}

0 comments on commit f80d069

Please sign in to comment.