From f2ab30fdfef91cb2d44d3dd2b7c76d417fa8665d Mon Sep 17 00:00:00 2001 From: Gerald Bauer Date: Tue, 7 Feb 2023 17:24:43 +0100 Subject: [PATCH] up lexer --- solidity/NOTES.md | 55 ------------------ solidity/lib/solidity/lexer.rb | 45 ++++++--------- solidity/lib/solidity/parser.rb | 6 +- solidity/lib/solidity/version.rb | 4 +- solidity/sandbox/test_lexer_ruby.rb | 90 +++++++++++++++++++++++++++++ solidity/test/test_lexer.rb | 37 ++++++++++++ 6 files changed, 150 insertions(+), 87 deletions(-) create mode 100644 solidity/sandbox/test_lexer_ruby.rb create mode 100644 solidity/test/test_lexer.rb diff --git a/solidity/NOTES.md b/solidity/NOTES.md index 7d1900f..9f4e7d4 100644 --- a/solidity/NOTES.md +++ b/solidity/NOTES.md @@ -55,58 +55,3 @@ https://pygments.org/docs/lexers/ -RubyVM::AbstractSyntaxTree.parse("puts('test', )", keep_tokens: true).tokens -# => -# [[0, :tIDENTIFIER, "puts", [1, 0, 1, 4]], -# [1, :"(", "(", [1, 4, 1, 5]], -# [2, :tSTRING_BEG, "'", [1, 5, 1, 6]], -# [3, :tSTRING_CONTENT, "test", [1, 6, 1, 10]], -# [4, :tSTRING_END, "'", [1, 10, 1, 11]], -# [5, :",", ",", [1, 11, 1, 12]], -# [6, :tSP, " ", [1, 12, 1, 13]], -# [7, :")", ")", [1, 13, 1, 14]]] - - -require 'ripper' -require 'pp' - -code = < An identifier in solidity has to start with a letter, @@ -76,40 +75,34 @@ def initialize( txt ) ## ## COMMENT ## : '/*' .*? '*/' ; - ## ## LINE_COMMENT ## : '//' ~[\r\n]* ; + COMMENT = %r{/\* + .*? + \*/}x + + LINE_COMMENT = %r{// + [^\r\n]*}x def tokenize t = [] s = StringScanner.new( @txt ) until s.eos? ## loop until hitting end-of-string (file) - if s.check( /[ \t]*\/\*/ ) - ## note: auto-slurp leading (optinal) spaces!!!! - why? why not? - comment = s.scan_until( /\*\// ) - ## print "multi-line comment:" - ## pp comment - t << [:comment, comment.lstrip] - elsif s.check( /[ \t]*\/\// ) - ## note: auto-slurp leading (optinal) spaces!!!! - why? why not? - ## note: auto-remove newline AND trailing whitespace - why? why not? - comment = s.scan_until( /\n|$/ ).strip - ## print "comment:" - ## pp comment - t << [:comment, comment] - elsif s.scan( /[ \t]+/ ) ## one or more spaces + if s.scan( /[ \t]+/ ) ## one or more spaces ## note: (auto-)convert tab to space - why? why not? t << [:sp, s.matched.gsub( /[\t]/, ' ') ] elsif s.scan( /\r?\n/ ) ## check for (windows) carriage return (\r) - why? why not? t << [:nl, "\n" ] - elsif s.check( "'" ) ## single-quoted string - str = s.scan( SINGLE_QUOTE ) - t << [:string, str] - elsif s.check( '"' ) ## double-quoted string - str = s.scan( DOUBLE_QUOTE ) - t << [:string, str] + elsif s.scan( COMMENT ) + t << [:comment, s.matched] + elsif s.scan( LINE_COMMENT ) + t << [:comment, s.matched] + elsif s.scan( DOUBLE_QUOTE ) ## double-quoted string + t << [:string, s.matched] + elsif s.scan( SINGLE_QUOTE ) ## single-quoted string + t << [:string, s.matched] elsif s.scan( NAME ) name = s.matched case name diff --git a/solidity/lib/solidity/parser.rb b/solidity/lib/solidity/parser.rb index b7925ba..7f2aa58 100644 --- a/solidity/lib/solidity/parser.rb +++ b/solidity/lib/solidity/parser.rb @@ -23,13 +23,11 @@ def _quick_pass_one lex = Lexer.new( @txt ) until lex.eos? - while lex.peek == :sp do ## note: do NOT skip newlines here; pass along blank/empty lines for now - why? why not? - lex.next - end - case lex.peek when :comment ## single or multi-line comment tree << [:comment, lex.next] + ## note: if next token is newline - slurp / ignore + lex.next if lex.peek == :nl when :pragma code = lex.scan_until( :';', include: true ) diff --git a/solidity/lib/solidity/version.rb b/solidity/lib/solidity/version.rb index 84be610..fc30a5c 100644 --- a/solidity/lib/solidity/version.rb +++ b/solidity/lib/solidity/version.rb @@ -1,8 +1,8 @@ module Solidity MAJOR = 0 - MINOR = 1 - PATCH = 5 + MINOR = 2 + PATCH = 0 VERSION = [MAJOR,MINOR,PATCH].join('.') def self.version diff --git a/solidity/sandbox/test_lexer_ruby.rb b/solidity/sandbox/test_lexer_ruby.rb new file mode 100644 index 0000000..51b5464 --- /dev/null +++ b/solidity/sandbox/test_lexer_ruby.rb @@ -0,0 +1,90 @@ +### +# test ruby built-in lexers +# answer questions +# does end-of-line comment include newline in lexeme - yes/no? +# +# - [[6, 21], :on_comment, "## a comment here\n", END], + + + +require 'ripper' +require 'pp' + +code = < +# [[0, :tIDENTIFIER, "puts", [1, 0, 1, 4]], +# [1, :"(", "(", [1, 4, 1, 5]], +# [2, :tSTRING_BEG, "'", [1, 5, 1, 6]], +# [3, :tSTRING_CONTENT, "test", [1, 6, 1, 10]], +# [4, :tSTRING_END, "'", [1, 10, 1, 11]], +# [5, :",", ",", [1, 11, 1, 12]], +# [6, :tSP, " ", [1, 12, 1, 13]], +# [7, :")", ")", [1, 13, 1, 14]]] + + +__END__ + +[[[1, 0], :on_ignored_nl, "\n", BEG], + [[2, 0], :on_ignored_nl, "\n", BEG], + [[3, 0], :on_int, "5", END], + [[3, 1], :on_period, ".", DOT], + [[3, 2], :on_ident, "times", ARG], + [[3, 7], :on_sp, " ", ARG], + [[3, 11], :on_kw, "do", BEG], + [[3, 13], :on_sp, " ", BEG], + [[3, 17], :on_op, "|", BEG|LABEL], + [[3, 18], :on_sp, " ", BEG|LABEL], + [[3, 22], :on_ident, "x", ARG], + [[3, 23], :on_sp, " ", ARG], + [[3, 27], :on_op, "|", BEG|LABEL], + [[3, 28], :on_ignored_nl, "\n", BEG|LABEL], + [[4, 0], :on_sp, "\t", BEG|LABEL], + [[4, 1], :on_ident, "puts", CMDARG], + [[4, 5], :on_sp, " ", CMDARG], + [[4, 6], :on_ident, "x", END|LABEL], + [[4, 7], :on_nl, "\n", BEG], + [[5, 0], :on_sp, " ", BEG], + [[5, 2], :on_ident, "puts", CMDARG], + [[5, 6], :on_sp, " ", CMDARG], + [[5, 7], :on_tstring_beg, "\"", CMDARG], + [[5, 8], :on_tstring_content, "hello", CMDARG], + [[5, 13], :on_tstring_end, "\"", END], + [[5, 14], :on_nl, "\n", BEG], + [[6, 0], :on_sp, " ", BEG], + [[6, 2], :on_ident, "puts", CMDARG], + [[6, 6], :on_sp, " ", CMDARG], + [[6, 7], :on_tstring_beg, "'", CMDARG], + [[6, 8], :on_tstring_content, "hello", CMDARG], + [[6, 13], :on_tstring_end, "'", END], + [[6, 14], :on_sp, " ", END], + [[6, 21], :on_comment, "## a comment here\n", END], + [[7, 0], :on_kw, "end", END], + [[7, 3], :on_nl, "\n", BEG], + [[8, 0], :on_ignored_nl, "\n", BEG], + [[9, 0], :on_ignored_nl, "\n", BEG]] \ No newline at end of file diff --git a/solidity/test/test_lexer.rb b/solidity/test/test_lexer.rb new file mode 100644 index 0000000..1b69fa8 --- /dev/null +++ b/solidity/test/test_lexer.rb @@ -0,0 +1,37 @@ +## +# to run use +# ruby -I ./lib -I ./test test/test_lexer.rb + + +require 'helper' + + + +class TestLexer < MiniTest::Test + +def _untokenize( tokens ) + buf = String.new('') + tokens.each do |t| + buf << (t.is_a?( String ) ? t : t[1]) + + ## dump some token types + pp t if [:comment, :string].include?( t[0] ) + end + buf +end + + +def test_contracts + ['contract1', + 'contract2', + 'contract3'].each do |name, exp| + path = "./contracts/#{name}.sol" + lexer = Solidity::Lexer.read( path ) + + tokens = lexer.tokenize + + txt = read_text( path ) + assert_equal txt, _untokenize( tokens ) + end +end +end ## class TestLexer