Skip to content

Commit

Permalink
fixed LALR(1) parser but it doesn't do error-and-continue
Browse files Browse the repository at this point in the history
  • Loading branch information
codewitch-honey-crisis committed Aug 21, 2019
1 parent 59c0ed9 commit 060c83b
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 62 deletions.
202 changes: 154 additions & 48 deletions lalr1/Lalr1DebugParser2.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,60 @@ public class Lalr1DebugParser2 : Lalr1Parser
HashSet<string> _hidden;
HashSet<string> _collapsed;
ITokenizer _tokenizer;

Queue<Token> _tokens;
CfgLalr1ParseTable _parseTable;
Token _token;
string[] _ruleDef;
LRNodeType _nodeType;
IEnumerator<Token> _tokenEnum;
Stack<int> _stack;

public override LRNodeType NodeType {
get {
if(0<_tokens.Count)
LRNodeType _nodeType;
public Lalr1DebugParser2(CfgDocument cfg,ITokenizer tokenizer,CfgLalr1ParseTable parseTable=null)
{
_cfg = cfg;
_parseTable = parseTable ?? cfg.ToLalr1ParseTable();
_stack = new Stack<int>();
_Populate();
Restart(tokenizer);
}
void _Populate()
{
_substitutions = new Dictionary<string, string>();
_hidden = new HashSet<string>();
_collapsed = new HashSet<string>();
foreach (var attrsym in _cfg.AttributeSets)
{
var i = attrsym.Value.IndexOf("hidden");
if (-1 < i)
{
var hidden = attrsym.Value[i].Value;
if ((hidden is bool) && ((bool)hidden))
_hidden.Add(attrsym.Key);
}
i = attrsym.Value.IndexOf("collapsed");
if (-1 < i)
{
var collapsed = attrsym.Value[i].Value;
if ((collapsed is bool) && ((bool)collapsed))
_collapsed.Add(attrsym.Key);
}
i = attrsym.Value.IndexOf("substitute");
if (-1 < i)
{
var t = _tokens.Peek();
return ("#ERROR" == t.Symbol) ? LRNodeType.Error : LRNodeType.Shift;
var substitute = attrsym.Value[i].Value as string;
if (!string.IsNullOrEmpty(substitute) && _cfg.IsSymbol(substitute) && substitute != attrsym.Key)
_substitutions.Add(attrsym.Key, substitute);
}
if(null!=_ruleDef)
return LRNodeType.Reduce;
if (0!=_stack.Count)
return LRNodeType.EndDocument;
return LRNodeType.Initial;
}
}
public override LRNodeType NodeType => _nodeType;


public override string Value {
get {
switch (NodeType)
{
case LRNodeType.Shift:
case LRNodeType.Error:
return _tokens.Peek().Value;
return _token.Value;
}
return null;
}
Expand All @@ -50,7 +75,7 @@ public override string Symbol {
{
case LRNodeType.Error:
case LRNodeType.Shift:
return _tokens.Peek().Symbol;
return _token.Symbol;
case LRNodeType.Reduce:
return _ruleDef[0];
}
Expand Down Expand Up @@ -104,27 +129,10 @@ public override int[] RuleDefinitionIds {
return null;
}
}
public override int Line {
get {
if (0 < _tokens.Count)
return _tokens.Peek().Line;
return 1;
}
}
public override int Column {
get {
if (0 < _tokens.Count)
return _tokens.Peek().Column;
return 1;
}
}
public override long Position {
get {
if (0 < _tokens.Count)
return _tokens.Peek().Position;
return 0;
}
}
public override int Line => _token.Line;
public override int Column => _token.Column;
public override long Position => _token.Position;

public override bool IsHidden => _hidden.Contains(Symbol);
public override bool IsCollapsed => _collapsed.Contains(Symbol);

Expand All @@ -134,15 +142,15 @@ public override void Close()
_tokenEnum.Dispose();
_tokenEnum = null;
_stack.Clear();
_tokens.Clear();
_nodeType = LRNodeType.EndDocument;
}
public override void Restart()
{
if (null == _tokenEnum)
throw new ObjectDisposedException(GetType().Name);
_tokenEnum.Reset();
_stack.Clear();
_tokens.Clear();
_nodeType = LRNodeType.Initial;
}
public override void Restart(IEnumerable<char> input)
{
Expand All @@ -151,22 +159,30 @@ public override void Restart(IEnumerable<char> input)
Close();
_tokenizer.Restart(input);
_tokenEnum = _tokenizer.GetEnumerator();

_nodeType = LRNodeType.Initial;
}
public override void Restart(ITokenizer tokenizer)
{
Close();
if (null != _tokenizer)
if (null != tokenizer)
{
_tokenizer = tokenizer;
_tokenEnum = _tokenizer.GetEnumerator();
_nodeType =LRNodeType.Initial;
_nodeType = LRNodeType.Initial;
}
}
public override bool Read()
{
switch(NodeType)
{
case LRNodeType.Error:
_stack.Clear();
_nodeType = LRNodeType.EndDocument;
return false;
case LRNodeType.Accept:
_stack.Clear();
_nodeType = LRNodeType.EndDocument;
return true;
case LRNodeType.EndDocument:
return false;
case LRNodeType.Initial:
Expand All @@ -176,17 +192,107 @@ public override bool Read()
if (!_tokenEnum.MoveNext())
throw new Exception("Error in ITokenizer implementation.");
break;

}
if(!ShowHidden)
while (IsHidden)
_tokens.Dequeue();

if (0<_tokens.Count)
if (!ShowHidden)
{
while (_hidden.Contains(_tokenEnum.Current.Symbol))
_tokenEnum.MoveNext();
} else
{
if(_hidden.Contains(_tokenEnum.Current.Symbol))
{
_token = _tokenEnum.Current;
_tokenEnum.MoveNext();
_nodeType = LRNodeType.Shift;
return true;
}
}

(int RuleOrStateId, string Left, string[] Right) trns;
if(!_parseTable[_stack.Peek()].TryGetValue(_tokenEnum.Current.Symbol,out trns))
{
_Panic();
return true;
}
if (null == trns.Right) // shift or accept
{
if (-1 != trns.RuleOrStateId) // shift
{
_ruleDef = null;
_token=_tokenEnum.Current;
_tokenEnum.MoveNext();
_stack.Push(trns.RuleOrStateId);
_nodeType = LRNodeType.Shift;
return true;
}
else
{ // accept
_ruleDef = null;
//throw if _tok is not $ (end)
if ("#EOS" != _tokenEnum.Current.Symbol)
{
_Panic();
return true;
}
_nodeType = LRNodeType.Accept;
_stack.Clear();
return true;
}
}
else // reduce
{
_ruleDef = new string[trns.Right.Length + 1];
_ruleDef[0] = trns.Left;
trns.Right.CopyTo(_ruleDef, 1);
for (int i = 0; i < trns.Right.Length; ++i)
if (null != trns.Right[i])
_stack.Pop();

// There is a new number at the top of the stack.
// This number is our temporary state. Get the symbol
// from the left-hand side of the rule #. Treat it as
// the next input token in the GOTO table (and place
// the matching state at the top of the set stack).
_stack.Push(_parseTable[_stack.Peek()][trns.Left].RuleOrStateId);
_nodeType = LRNodeType.Reduce;
return true;
}
return false;

}
void _Panic()
{
if(0==_stack.Count)
throw new Exception("Parse error");
var sb = new StringBuilder();
var l = _tokenEnum.Current.Line;
var c = _tokenEnum.Current.Column;
var p = _tokenEnum.Current.Position;
var moved = false;
while (!_IsMatch(_tokenEnum.Current.Symbol) && "#EOS" != _tokenEnum.Current.Symbol)
{
moved = true;
sb.Append(_tokenEnum.Current.Value);
_tokenEnum.MoveNext();
}
if (moved)
{
var et = new Token();
et.Symbol = "#ERROR";
et.SymbolId = _cfg.GetIdOfSymbol(et.Symbol);
et.Line = l;
et.Column = c;
et.Position = p;
et.Value = sb.ToString();
_token = et;
_nodeType = LRNodeType.Error;
}


}
bool _IsMatch(string sym)
{
return _parseTable[_stack.Peek()].ContainsKey(sym);
}
}
}
Binary file added pck.refresh.v0.0.1.4.zip
Binary file not shown.
16 changes: 12 additions & 4 deletions pck/Lalr1TableParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,17 @@ public override bool Read()
else if (LRNodeType.Accept == _nodeType)
{
_nodeType = LRNodeType.EndDocument;
_stack.Clear();
return true;
}
else if (LRNodeType.EndDocument == _nodeType )
return false;
else if (_eosId == _tokenEnum.Current.SymbolId && LRNodeType.Error == _nodeType)
else if (LRNodeType.EndDocument == _nodeType)
return false;
else if (LRNodeType.Error == _nodeType)
{
_nodeType = LRNodeType.EndDocument;
_stack.Clear();
return true;
}
if (LRNodeType.Error != _nodeType)
{
if (!ShowHidden)
Expand Down Expand Up @@ -270,6 +275,7 @@ public override void Close()
_tokenEnum.Dispose();
_tokenEnum = null;
}
_nodeType = LRNodeType.EndDocument;
_stack.Clear();
}
public override void Restart()
Expand All @@ -287,13 +293,15 @@ public override void Restart(ITokenizer tokenizer)
{
_tokenizer = tokenizer;
_tokenEnum = tokenizer.GetEnumerator();
_nodeType = LRNodeType.Initial;
}
}
public override void Restart(IEnumerable<char> input)
{
Close();
_tokenizer.Restart(input);
_tokenEnum = _tokenizer.GetEnumerator();
_nodeType = LRNodeType.Initial;
}
void _Panic()
{
Expand Down Expand Up @@ -321,7 +329,7 @@ void _Panic()
break;
} else
{
_errorToken.Value += _tokenEnum.Current.Value;
//_errorToken.Value += _tokenEnum.Current.Value;
_tokenEnum.MoveNext();
}
}
Expand Down
7 changes: 0 additions & 7 deletions pckedit/Progress.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;

namespace Pck
Expand Down
18 changes: 15 additions & 3 deletions scratch/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ static void Main(string[] args)
// Console.WriteLine(RegexExpression.Parse(test));
//_RunLL(args);
//_RunLalr(args);
_RunXbnfGenerated(args);
_RunLalrXbnf(args);
//_RunXbnfGenerated(args);
_RunDebugLalrXbnf(args);
}
static void _TestXbnfTokenizers(string[] args)
{
Expand Down Expand Up @@ -79,13 +79,25 @@ static void _RunLalrXbnf(string[] args)
var cfg = CfgDocument.ReadFrom(@"..\..\..\xbnf.pck");
var lex = LexDocument.ReadFrom(@"..\..\..\xbnf.pck");
var tokenizer = lex.ToTokenizer(new FileReaderEnumerable(@"..\..\..\xbnf.xbnf"), cfg.EnumSymbols());
//var pt = cfg.ToLalr1ParseTable();// new _ConsoleProgress());
var parser = cfg.ToLalr1Parser(tokenizer); //new Lalr1DebugParser(cfg, tokenizer, pt);

parser.ShowHidden = true;
while (LRNodeType.EndDocument != parser.NodeType)
Console.WriteLine(parser.ParseReductions(true));

}
static void _RunDebugLalrXbnf(string[] args)
{
var cfg = CfgDocument.ReadFrom(@"..\..\..\xbnf.pck");
var lex = LexDocument.ReadFrom(@"..\..\..\xbnf.pck");
IEnumerable<char> input = new FileReaderEnumerable(@"..\..\..\xbnf.xbnf");
input = "foo<start>=bar;";
var tokenizer = lex.ToTokenizer(input, cfg.EnumSymbols());
var parser = cfg.ToLalr1Parser(tokenizer, new _ConsoleProgress());//new Lalr1DebugParser2(cfg, tokenizer);
parser.ShowHidden = false;
while (LRNodeType.EndDocument != parser.NodeType)
Console.WriteLine(parser.ParseReductions());

}
static void _RunLalr(string[] args)
{
Expand Down

0 comments on commit 060c83b

Please sign in to comment.