# usage: # ruby rx3.rb # run the unit test # ruby -rrx3 -e 'p matchstr(...)' # use rx as library. don't run the unit test. def matchstr(exp, str) result = [] try(exp, str.split(//), 0) {|pos| result << pos } result end def startwithmatch(exp, str) ary = str.split(//) try(exp, ary, 0) { return 0 } nil end def hasmatch(exp, str) ary = str.split(//) 0.upto(ary.length) {|i| try(exp, ary, i) { return i } } nil end def count_try(exp, str) $try_count = 0 matchstr(exp, str) $try_count end $try_count = 0 def try(exp, seq, pos, &block) #p [pos, exp] $try_count += 1 case exp[0] when :empseq try_empseq(seq, pos, &block) when :lit _, sym = exp try_lit(sym, seq, pos, &block) when :cat _, e1, e2 = exp try_cat(e1, e2, seq, pos, &block) when :alt _, e1, e2 = exp try_alt(e1, e2, seq, pos, &block) when :rep _, e = exp try_rep(e, seq, pos, &block) when :anysym try_anysym(seq, pos, &block) when :string_start try_string_start(seq, pos, &block) when :string_end try_string_end(seq, pos, &block) when :line_start try_line_start(seq, pos, &block) when :line_end try_line_end(seq, pos, &block) when :opt _, e = exp try_opt(e, seq, pos, &block) when :plus _, e = exp try_plus(e, seq, pos, &block) when :rep_lazy _, e = exp try_rep_lazy(e, seq, pos, &block) when :opt_lazy _, e = exp try_opt_lazy(e, seq, pos, &block) when :ntimes _, n, e = exp try_ntimes(n, e, seq, pos, &block) else raise "unexpected AST: #{exp.inspect}" end end def try_ntimes(n, e, seq, pos, &block) # report end def try_opt(e, seq, pos, &block) try(e, seq, pos, &block) yield pos end def try_opt_lazy(e, seq, pos, &block) yield pos try(e, seq, pos, &block) end def try_string_end(seq, pos) yield pos if pos == seq.length end def try_string_start(seq, pos) yield pos if pos == 0 end def try_line_start(seq, pos) if pos == 0 || (pos < seq.length && seq[pos-1] == "\n") yield pos end end def try_line_end(seq, pos) if pos == seq.length || seq[pos] == "\n" yield pos end end def try_anysym(seq, pos, &block) if pos < seq.length yield pos+1 end end def try_empseq(seq, pos) yield pos end def try_lit(sym, seq, pos) #p [:try, sym, seq, pos] if pos < seq.length && seq[pos] == sym yield pos + 1 end end def try_cat(e1, e2, seq, pos, &block) try(e1, seq, pos) {|pos2| try(e2, seq, pos2, &block) } end def try_alt(e1, e2, seq, pos, &block) try(e1, seq, pos, &block) try(e2, seq, pos, &block) end def try_rep(exp, seq, pos, &block) try(exp, seq, pos) {|pos2| try_rep(exp, seq, pos2, &block) if pos < pos2 } yield pos end def try_rep_lazy(e, seq, pos, &block) yield pos try(e, seq, pos) {|pos2| try_rep_lazy(e, seq, pos2, &block) if pos < pos2 } end def try_plus(e, seq, pos, &block) try(e, seq, pos) {|pos2| try_rep(e, seq, pos2, &block) } end if $0 == __FILE__ # The trick to run the unit test only for non-library execution. require 'test/unit' class TestRX < Test::Unit::TestCase def test_empseq assert_equal([0], matchstr([:empseq], "")) end def test_lit assert_equal([], matchstr([:lit, "a"], "")) assert_equal([1], matchstr([:lit, "a"], "a")) assert_equal([1], matchstr([:lit, "a"], "aa")) assert_equal([], matchstr([:lit, "a"], "b")) end def test_cat assert_equal([], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "")) assert_equal([], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "a")) assert_equal([2], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "ab")) assert_equal([2], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "abc")) end def test_alt assert_equal([], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "")) assert_equal([1], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "a")) assert_equal([1], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "b")) end def test_rep assert_equal([0], matchstr([:rep, [:lit, "a"]], "")) assert_equal([5,4,3,2,1,0], matchstr([:rep, [:lit, "a"]], "aaaaa")) end def test_anysym assert_equal([7,6,5,4,3,2,1,0], matchstr([:rep, [:anysym]], "abc\ndef")) end def test_string_start assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_start]], "")) assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_start]], "a")) assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_start]], "aaa")) end def test_string_end assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "")) assert_equal([1], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "a")) assert_equal([3], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "aaa")) assert_equal([4], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "aaaa")) assert_equal([1], matchstr([:cat, [:lit, "a"], [:string_end]], "a")) assert_equal([], matchstr([:cat, [:string_end], [:lit, "a"]], "a")) end def test_line_start assert_equal([0], matchstr([:cat, [:rep, [:anysym]], [:line_start]], "")) assert_equal([7,4,0], matchstr([:cat, [:rep, [:anysym]], [:line_start]], "aaa\naa\na\n")) assert_equal([9,7,4,0], matchstr([:cat, [:rep, [:anysym]], [:line_start]], "aaa\naa\na\na")) end def test_line_end assert_equal([8,7,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "abc\ndef\n")) assert_equal([9,8,4,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "abc\n\ndef\n")) assert_equal([8,5,2], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "ab\ncd\nef")) assert_equal([0], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "")) assert_equal([9,8,6,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "aaa\naa\na\n")) assert_equal([10,8,6,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "aaa\naa\na\na")) assert_equal([10,8,6,3,0], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "\naa\naa\na\na")) end # def test_notnewline # assert_equal([1], matchstr([:notnewline], "a")) # assert_equal([1], matchstr([:notnewline], "b")) # assert_equal([1], matchstr([:notnewline], "c")) # assert_equal([], matchstr([:notnewline], "\n")) # end # def test_opt assert_equal([1,0], matchstr([:opt, [:lit, "a"]], "a")) assert_equal([0], matchstr([:opt, [:lit, "a"]], "b")) assert_equal([0], matchstr([:opt, [:lit, "a"]], "")) assert_equal([2], matchstr([:cat, [:opt, [:lit, "a"]], [:lit, "b"]], "ab")) assert_equal([], matchstr([:cat, [:opt, [:lit, "a"]], [:lit, "b"]], "ac")) end def test_opt_lazy assert_equal([0,1], matchstr([:opt_lazy, [:lit, "a"]], "a")) assert_equal([0], matchstr([:opt_lazy, [:lit, "a"]], "b")) assert_equal([0], matchstr([:opt_lazy, [:lit, "a"]], "")) assert_equal([2], matchstr([:cat, [:opt_lazy, [:lit, "a"]], [:lit, "b"]], "ab")) assert_equal([], matchstr([:cat, [:opt_lazy, [:lit, "a"]], [:lit, "b"]], "ac")) end def test_plus assert_equal([], matchstr([:plus, [:lit, "a"]], "")) assert_equal([1], matchstr([:plus, [:lit, "a"]], "a")) assert_equal([5,4,3,2,1], matchstr([:plus, [:lit, "a"]], "aaaaa")) end def test_rep_lazy assert_equal([0], matchstr([:rep_lazy, [:lit, "a"]], "")) assert_equal([0,1,2,3,4,5], matchstr([:rep_lazy, [:lit, "a"]], "aaaaa")) end def test_ntimes assert_equal([0], matchstr([:ntimes, 0, [:lit, "a"]], "")) assert_equal([], matchstr([:ntimes, 1, [:lit, "a"]], "")) assert_equal([], matchstr([:ntimes, 3, [:lit, "a"]], "aa")) assert_equal([3], matchstr([:ntimes, 3, [:lit, "a"]], "aaa")) assert_equal([3], matchstr([:ntimes, 3, [:lit, "a"]], "aaaa")) assert_equal([3], matchstr([:ntimes, 3, [:alt, [:lit, "a"], [:lit, "b"]]], "aaaa")) assert_equal([3], matchstr([:ntimes, 3, [:alt, [:lit, "a"], [:lit, "b"]]], "aaba")) assert_equal([3], matchstr([:ntimes, 3, [:alt, [:lit, "a"], [:lit, "b"]]], "abaa")) assert_equal([3], matchstr([:ntimes, 3, [:alt, [:lit, "a"], [:lit, "b"]]], "abba")) assert_equal([3], matchstr([:ntimes, 3, [:alt, [:lit, "a"], [:lit, "b"]]], "aaaa")) assert_equal([3], matchstr([:ntimes, 3, [:alt, [:lit, "a"], [:lit, "b"]]], "baba")) assert_equal([3], matchstr([:ntimes, 3, [:alt, [:lit, "a"], [:lit, "b"]]], "bbaa")) assert_equal([3], matchstr([:ntimes, 3, [:alt, [:lit, "a"], [:lit, "b"]]], "bbba")) assert_equal([2, 1, 1, 0], matchstr([:ntimes, 2, [:opt, [:lit, "a"]]], "aaa")) assert_equal([0, 1, 1, 2], matchstr([:ntimes, 2, [:opt_lazy, [:lit, "a"]]], "aaa")) assert_equal([6], matchstr([:cat, [:rep, [:lit, "b"]], [:ntimes, 3, [:lit, "a"]]], "bbbaaaa")) end end end