Skip to content

Commit

Permalink
up lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
geraldb committed Feb 7, 2023
1 parent 7b5a471 commit f2ab30f
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 87 deletions.
55 changes: 0 additions & 55 deletions solidity/NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,58 +55,3 @@ https://pygments.org/docs/lexers/



RubyVM::AbstractSyntaxTree.parse("puts('test', )", keep_tokens: true).tokens
# =>
# [[0, :tIDENTIFIER, "puts", [1, 0, 1, 4]],
# [1, :"(", "(", [1, 4, 1, 5]],
# [2, :tSTRING_BEG, "'", [1, 5, 1, 6]],
# [3, :tSTRING_CONTENT, "test", [1, 6, 1, 10]],
# [4, :tSTRING_END, "'", [1, 10, 1, 11]],
# [5, :",", ",", [1, 11, 1, 12]],
# [6, :tSP, " ", [1, 12, 1, 13]],
# [7, :")", ")", [1, 13, 1, 14]]]


require 'ripper'
require 'pp'

code = <<STR


5.times do | x |
puts x
puts "hello"
puts 'hello' ## a comment here
end


STR

puts code
pp Ripper.lex(code)



[[[1, 0], :on_ignored_nl, "\n", BEG],
[[2, 0], :on_ignored_nl, "\n", BEG],
[[3, 0], :on_int, "5", END],
[[3, 1], :on_period, ".", DOT],
[[3, 2], :on_ident, "times", ARG],
[[3, 7], :on_sp, " ", ARG],
[[3, 11], :on_kw, "do", BEG],
[[3, 13], :on_sp, " ", BEG],
[[3, 17], :on_op, "|", BEG|LABEL],
[[3, 18], :on_sp, " ", BEG|LABEL],
[[3, 22], :on_ident, "x", ARG],
[[3, 23], :on_sp, " ", ARG],
[[3, 27], :on_op, "|", BEG|LABEL],
[[3, 28], :on_ignored_nl, "\n", BEG|LABEL],
[[4, 0], :on_sp, "\t", BEG|LABEL],
[[4, 1], :on_ident, "puts", CMDARG],
[[4, 5], :on_sp, " ", CMDARG],
[[4, 6], :on_ident, "x", END|LABEL],
[[4, 7], :on_nl, "\n", BEG],
[[5, 0], :on_kw, "end", END],
[[5, 3], :on_nl, "\n", BEG],
[[6, 0], :on_ignored_nl, "\n", BEG],
[[7, 0], :on_ignored_nl, "\n", BEG]]
45 changes: 19 additions & 26 deletions solidity/lib/solidity/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,14 @@ def initialize( txt )
## SingleQuotedStringCharacter
## : ~['\r\n\\] | ('\\' .) ;

DOUBLE_QUOTE = %r{"
( \\\\. | [^"\r\n\\] )*
"}x

SINGLE_QUOTE = %r{'
( \\\\. | [^'] )*
( \\\\. | [^'\r\n\\] )*
'}x

DOUBLE_QUOTE = %r{"
( \\\\. | [^"] )*
"}x


## from the solidity grammar
## > An identifier in solidity has to start with a letter,
Expand All @@ -76,40 +75,34 @@ def initialize( txt )
##
## COMMENT
## : '/*' .*? '*/' ;
##
## LINE_COMMENT
## : '//' ~[\r\n]* ;

COMMENT = %r{/\*
.*?
\*/}x

LINE_COMMENT = %r{//
[^\r\n]*}x

def tokenize
t = []
s = StringScanner.new( @txt )

until s.eos? ## loop until hitting end-of-string (file)
if s.check( /[ \t]*\/\*/ )
## note: auto-slurp leading (optinal) spaces!!!! - why? why not?
comment = s.scan_until( /\*\// )
## print "multi-line comment:"
## pp comment
t << [:comment, comment.lstrip]
elsif s.check( /[ \t]*\/\// )
## note: auto-slurp leading (optinal) spaces!!!! - why? why not?
## note: auto-remove newline AND trailing whitespace - why? why not?
comment = s.scan_until( /\n|$/ ).strip
## print "comment:"
## pp comment
t << [:comment, comment]
elsif s.scan( /[ \t]+/ ) ## one or more spaces
if s.scan( /[ \t]+/ ) ## one or more spaces
## note: (auto-)convert tab to space - why? why not?
t << [:sp, s.matched.gsub( /[\t]/, ' ') ]
elsif s.scan( /\r?\n/ ) ## check for (windows) carriage return (\r) - why? why not?
t << [:nl, "\n" ]
elsif s.check( "'" ) ## single-quoted string
str = s.scan( SINGLE_QUOTE )
t << [:string, str]
elsif s.check( '"' ) ## double-quoted string
str = s.scan( DOUBLE_QUOTE )
t << [:string, str]
elsif s.scan( COMMENT )
t << [:comment, s.matched]
elsif s.scan( LINE_COMMENT )
t << [:comment, s.matched]
elsif s.scan( DOUBLE_QUOTE ) ## double-quoted string
t << [:string, s.matched]
elsif s.scan( SINGLE_QUOTE ) ## single-quoted string
t << [:string, s.matched]
elsif s.scan( NAME )
name = s.matched
case name
Expand Down
6 changes: 2 additions & 4 deletions solidity/lib/solidity/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,11 @@ def _quick_pass_one
lex = Lexer.new( @txt )

until lex.eos?
while lex.peek == :sp do ## note: do NOT skip newlines here; pass along blank/empty lines for now - why? why not?
lex.next
end

case lex.peek
when :comment ## single or multi-line comment
tree << [:comment, lex.next]
## note: if next token is newline - slurp / ignore
lex.next if lex.peek == :nl
when :pragma
code = lex.scan_until( :';',
include: true )
Expand Down
4 changes: 2 additions & 2 deletions solidity/lib/solidity/version.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

module Solidity
MAJOR = 0
MINOR = 1
PATCH = 5
MINOR = 2
PATCH = 0
VERSION = [MAJOR,MINOR,PATCH].join('.')

def self.version
Expand Down
90 changes: 90 additions & 0 deletions solidity/sandbox/test_lexer_ruby.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
###
# test ruby built-in lexers
# answer questions
# does end-of-line comment include newline in lexeme - yes/no?
#
# - [[6, 21], :on_comment, "## a comment here\n", END],



require 'ripper'
require 'pp'

code = <<STR
5.times do | x |
puts x
puts "hello"
puts 'hello' ## a comment here
## another comment here
## another here
## yet another here
end
STR


puts code
pp Ripper.lex(code)


puts code
## unknown keyword: :keep_tokens
## note: requires ruby 3.2+ or such - double check!!!!
pp RubyVM::AbstractSyntaxTree.parse( code,
keep_tokens: true).tokens

# =>
# [[0, :tIDENTIFIER, "puts", [1, 0, 1, 4]],
# [1, :"(", "(", [1, 4, 1, 5]],
# [2, :tSTRING_BEG, "'", [1, 5, 1, 6]],
# [3, :tSTRING_CONTENT, "test", [1, 6, 1, 10]],
# [4, :tSTRING_END, "'", [1, 10, 1, 11]],
# [5, :",", ",", [1, 11, 1, 12]],
# [6, :tSP, " ", [1, 12, 1, 13]],
# [7, :")", ")", [1, 13, 1, 14]]]


__END__

[[[1, 0], :on_ignored_nl, "\n", BEG],
[[2, 0], :on_ignored_nl, "\n", BEG],
[[3, 0], :on_int, "5", END],
[[3, 1], :on_period, ".", DOT],
[[3, 2], :on_ident, "times", ARG],
[[3, 7], :on_sp, " ", ARG],
[[3, 11], :on_kw, "do", BEG],
[[3, 13], :on_sp, " ", BEG],
[[3, 17], :on_op, "|", BEG|LABEL],
[[3, 18], :on_sp, " ", BEG|LABEL],
[[3, 22], :on_ident, "x", ARG],
[[3, 23], :on_sp, " ", ARG],
[[3, 27], :on_op, "|", BEG|LABEL],
[[3, 28], :on_ignored_nl, "\n", BEG|LABEL],
[[4, 0], :on_sp, "\t", BEG|LABEL],
[[4, 1], :on_ident, "puts", CMDARG],
[[4, 5], :on_sp, " ", CMDARG],
[[4, 6], :on_ident, "x", END|LABEL],
[[4, 7], :on_nl, "\n", BEG],
[[5, 0], :on_sp, " ", BEG],
[[5, 2], :on_ident, "puts", CMDARG],
[[5, 6], :on_sp, " ", CMDARG],
[[5, 7], :on_tstring_beg, "\"", CMDARG],
[[5, 8], :on_tstring_content, "hello", CMDARG],
[[5, 13], :on_tstring_end, "\"", END],
[[5, 14], :on_nl, "\n", BEG],
[[6, 0], :on_sp, " ", BEG],
[[6, 2], :on_ident, "puts", CMDARG],
[[6, 6], :on_sp, " ", CMDARG],
[[6, 7], :on_tstring_beg, "'", CMDARG],
[[6, 8], :on_tstring_content, "hello", CMDARG],
[[6, 13], :on_tstring_end, "'", END],
[[6, 14], :on_sp, " ", END],
[[6, 21], :on_comment, "## a comment here\n", END],
[[7, 0], :on_kw, "end", END],
[[7, 3], :on_nl, "\n", BEG],
[[8, 0], :on_ignored_nl, "\n", BEG],
[[9, 0], :on_ignored_nl, "\n", BEG]]
37 changes: 37 additions & 0 deletions solidity/test/test_lexer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
##
# to run use
# ruby -I ./lib -I ./test test/test_lexer.rb


require 'helper'



class TestLexer < MiniTest::Test

def _untokenize( tokens )
buf = String.new('')
tokens.each do |t|
buf << (t.is_a?( String ) ? t : t[1])

## dump some token types
pp t if [:comment, :string].include?( t[0] )
end
buf
end


def test_contracts
['contract1',
'contract2',
'contract3'].each do |name, exp|
path = "./contracts/#{name}.sol"
lexer = Solidity::Lexer.read( path )

tokens = lexer.tokenize

txt = read_text( path )
assert_equal txt, _untokenize( tokens )
end
end
end ## class TestLexer

0 comments on commit f2ab30f

Please sign in to comment.