From 235b3a03f27f752a00b82448a7fc128e4dd802c5 Mon Sep 17 00:00:00 2001 From: Curtis Wensley Date: Sun, 23 May 2021 16:01:12 -0700 Subject: [PATCH] Fix memory issue with large character set ranges Fixes #48 --- .../Samples/LargeCharcterSetRange.cs | 17 ++++++++++++ Eto.Parse/Grammar.cs | 26 +++++++++++++++---- 2 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 Eto.Parse.Tests/Samples/LargeCharcterSetRange.cs diff --git a/Eto.Parse.Tests/Samples/LargeCharcterSetRange.cs b/Eto.Parse.Tests/Samples/LargeCharcterSetRange.cs new file mode 100644 index 0000000..54ea68a --- /dev/null +++ b/Eto.Parse.Tests/Samples/LargeCharcterSetRange.cs @@ -0,0 +1,17 @@ +using Eto.Parse.Grammars; +using NUnit.Framework; + +namespace Eto.Parse.Tests.Samples +{ + [TestFixture] + public class LargeCharcterSetRange + { + [Test] + public void LargeRangeShouldntCauseMemoryException() + { + var _grammar = new EbnfGrammar(EbnfStyle.W3c).Build($"id ::= [a-zA-Z\u0100-\uffff_][0-9a-zA-Z\u0100-\uffff_]*", "id"); + var _match = _grammar.Match("张三李四"); + } + + } +} \ No newline at end of file diff --git a/Eto.Parse/Grammar.cs b/Eto.Parse/Grammar.cs index e2ccff0..2f82acd 100644 --- a/Eto.Parse/Grammar.cs +++ b/Eto.Parse/Grammar.cs @@ -80,6 +80,12 @@ public class Grammar : UnaryParser public bool AllowPartialMatch { get; set; } public bool Trace { get; set; } + + /// + /// Sets the maximum character set range when is enabled. + /// + /// + public int MaxCharacterSetRangeOptimization { get; set; } = 100; public GrammarOptimizations Optimizations { get; set; } @@ -264,6 +270,7 @@ void OptimizeCharacterSets() { var chars = new List(); var inverse = new List(); + var additionalParsers = new List(); for (int i1 = 0; i1 < alt.Items.Count; i1++) { Parser item = alt.Items[i1]; @@ -288,12 +295,19 @@ void OptimizeCharacterSets() var charRange = item as CharRangeTerminal; if (charRange != null) { - for (char i = charRange.Start; i <= charRange.End; i++) + if (charRange.End - charRange.Start > MaxCharacterSetRangeOptimization) + { + additionalParsers.Add(charRange); + } + else { - if (charRange.Inverse) - inverse.Add(i); - else - chars.Add(i); + for (char i = charRange.Start; i <= charRange.End; i++) + { + if (charRange.Inverse) + inverse.Add(i); + else + chars.Add(i); + } } continue; } @@ -313,6 +327,8 @@ void OptimizeCharacterSets() { Inverse = true }); + if (additionalParsers.Count > 0) + alt.Items.AddRange(additionalParsers); } } }