diff --git a/NCsvPerf/CsvReadable/Benchmarks/PackageAssetsSuite.cs b/NCsvPerf/CsvReadable/Benchmarks/PackageAssetsSuite.cs index 7b12ba5..4445017 100644 --- a/NCsvPerf/CsvReadable/Benchmarks/PackageAssetsSuite.cs +++ b/NCsvPerf/CsvReadable/Benchmarks/PackageAssetsSuite.cs @@ -157,6 +157,12 @@ public void HomeGrown() Execute(new HomeGrown(ActivationMethod.ILEmit)); } + [Benchmark] + public void HomeGrownImproved() + { + Execute(new HomeGrown2(ActivationMethod.ILEmit)); + } + [Benchmark] public void KBCsv() { diff --git a/NCsvPerf/CsvReadable/Implementations/HomeGrown2.cs b/NCsvPerf/CsvReadable/Implementations/HomeGrown2.cs new file mode 100644 index 0000000..e8c00a3 --- /dev/null +++ b/NCsvPerf/CsvReadable/Implementations/HomeGrown2.cs @@ -0,0 +1,42 @@ +using Ben.Collections.Specialized; +using Knapcode.NCsvPerf.HomeGrown; +using System.Collections.Generic; +using System.IO; + +namespace Knapcode.NCsvPerf.CsvReadable +{ + /// + /// Package: N/A + /// Source: see HomeGrownImproved.cs in this repository + /// + public class HomeGrown2 : ICsvReader + { + private readonly ActivationMethod _activationMethod; + + public HomeGrown2(ActivationMethod activationMethod) + { + _activationMethod = activationMethod; + } + + public List GetRecords(MemoryStream stream) where T : ICsvReadable, new() + { + var activate = ActivatorFactory.Create(_activationMethod); + var allRecords = new List(); + var fields = new List(); + + using (var reader = new StreamReader(stream)) + { + var parser = new HomeGrownImproved(buffer: new char[200], stringPool: new InternPool().Intern); + + while (parser.TryReadLine(reader, fields)) + { + var record = activate(); + record.Read(i => fields[i]); + allRecords.Add(record); + } + } + + return allRecords; + } + } +} diff --git a/NCsvPerf/HomeGrown/HomeGrownImproved.cs b/NCsvPerf/HomeGrown/HomeGrownImproved.cs new file mode 100644 index 0000000..20e1756 --- /dev/null +++ b/NCsvPerf/HomeGrown/HomeGrownImproved.cs @@ -0,0 +1,178 @@ +using System; +using System.Collections.Generic; +using System.IO; + +namespace Knapcode.NCsvPerf.HomeGrown +{ + public delegate string StringPool(ReadOnlySpan text); + + public class HomeGrownImproved + { + private readonly char[] _buffer; + private readonly StringPool _stringPool; + private int _index; + + public HomeGrownImproved(char[] buffer, StringPool stringPool) + { + _buffer = buffer; + _stringPool = stringPool; + } + + private enum State + { + BeforeField, + InField, + InQuotedField, + LineEnd, + } + + public bool TryReadLine(TextReader reader, List fields) + { + _index = 0; + fields.Clear(); + + var state = State.BeforeField; + int c; + while ((c = reader.Read()) > -1) + { + switch (state) + { + case State.BeforeField: + switch (c) + { + case '"': + state = State.InQuotedField; + break; + case ',': + fields.Add(string.Empty); + break; + case '\r': + fields.Add(string.Empty); + if (reader.Peek() == '\n') + { + reader.Read(); + } + state = State.LineEnd; + break; + case '\n': + fields.Add(string.Empty); + state = State.LineEnd; + break; + default: + _buffer[_index++] = (char)c; + state = State.InField; + break; + } + break; + + case State.InField: + switch (c) + { + case ',': + AddField(fields); + state = State.BeforeField; + break; + case '\r': + AddField(fields); + if (reader.Peek() == '\n') + { + reader.Read(); + } + state = State.LineEnd; + break; + case '\n': + AddField(fields); + state = State.LineEnd; + break; + default: + _buffer[_index++] = (char)c; + break; + } + break; + + case State.InQuotedField: + switch (c) + { + case '"': + var nc = reader.Peek(); + switch (nc) + { + case '"': + _buffer[_index++] = '"'; + reader.Read(); + break; + case ',': + reader.Read(); + AddField(fields); + state = State.BeforeField; + break; + case '\r': + reader.Read(); + AddField(fields); + if (reader.Peek() == '\n') + { + reader.Read(); + } + state = State.LineEnd; + break; + case '\n': + reader.Read(); + AddField(fields); + state = State.LineEnd; + break; + default: + throw new InvalidDataException("Corrupt field found. A double quote is not escaped or there is extra data after a quoted field."); + } + break; + default: + _buffer[_index++] = (char)c; + break; + } + break; + + default: + throw new NotImplementedException(); + } + + if (state == State.LineEnd) + { + break; + } + } + + switch (state) + { + case State.InField: + var span = _buffer.AsSpan(0, _index); + var text = _stringPool != null + ? _stringPool(span) + : span.ToString(); + + fields.Add(text); + break; + case State.InQuotedField: + throw new InvalidDataException("When the line ends with a quoted field, the last character should be an unescaped double quote."); + } + + return fields.Count > 0; + } + + private void AddField(List fields) + { + if (_index == 0) + { + fields.Add(string.Empty); + } + else + { + var span = _buffer.AsSpan(0, _index); + var text = _stringPool != null + ? _stringPool(span) + : span.ToString(); + + fields.Add(text); + _index = 0; + } + } + } +}