# usage: # ruby rx.rb # run the unit test # ruby -rrx -e 'p matchstr(...)' # use rx as library. don't run the unit test. def find_match(ary, pat, beg=0) beg.upto(ary.length) {|s| try(pat, ary, s, {}) {|e, md| return [s, e, md] } } nil end def subst(str, pat) ary = str.split(//) r = find_match(ary, pat) return str if !r s, e, md = r h = {} md.each {|k, r| h[k] = ary[r].join } ary[0...s].join + yield(ary[s...e].join, h) + ary[e..-1].join end def gsubst(str, pat) # report end def matchcap(exp, str) result = [] seq = str.split(//) try(exp, seq, 0, {}) {|pos, md| h = {} md.each {|k, r| h[k] = [seq[r].join, r] } result << [pos, h] } result end def matchstr(exp, str) result = [] try(exp, str.split(//), 0, {}) {|pos, md| result << pos } result end def startwithmatch(exp, str) ary = str.split(//) try(exp, ary, 0, {}) { return 0 } nil end def hasmatch(exp, str) ary = str.split(//) 0.upto(ary.length) {|i| try(exp, ary, i, {}) { return i } } nil end def count_try(exp, str) $try_count = 0 matchstr(exp, str) $try_count end $try_count = 0 def try(exp, seq, pos, md, &block) #p [pos, exp] $try_count += 1 case exp[0] when :empseq try_empseq(seq, pos, md, &block) when :lit _, sym = exp try_lit(sym, seq, pos, md, &block) when :cat _, e1, e2 = exp try_cat(e1, e2, seq, pos, md, &block) when :alt _, e1, e2 = exp try_alt(e1, e2, seq, pos, md, &block) when :rep _, e = exp try_rep(e, seq, pos, md, &block) when :anysym try_anysym(seq, pos, md, &block) when :string_start try_string_start(seq, pos, md, &block) when :string_end try_string_end(seq, pos, md, &block) when :line_start try_line_start(seq, pos, md, &block) when :line_end try_line_end(seq, pos, md, &block) when :opt _, e = exp try_opt(e, seq, pos, md, &block) when :plus _, e = exp try_plus(e, seq, pos, md, &block) when :rep_lazy _, e = exp try_rep_lazy(e, seq, pos, md, &block) when :opt_lazy _, e = exp try_opt_lazy(e, seq, pos, md, &block) when :plus_lazy _, e = exp try_plus_lazy(e, seq, pos, md, &block) when :capture _, n, e = exp try_capture(n, e, seq, pos, md, &block) else raise "unexpected AST: #{exp.inspect}" end end def try_capture(n, e, seq, pos, md, &block) try(e, seq, pos, md) {|pos2, md2| md3 = md2.dup md3[n] = pos...pos2 yield pos2, md3 } end def try_opt(e, seq, pos, md, &block) try(e, seq, pos, md, &block) yield pos, md end def try_opt_lazy(e, seq, pos, md, &block) yield pos, md try(e, seq, pos, md, &block) end def try_string_end(seq, pos, md) yield pos, md if pos == seq.length end def try_string_start(seq, pos, md) yield pos, md if pos == 0 end def try_line_start(seq, pos, md) if pos == 0 || (pos < seq.length && seq[pos-1] == "\n") yield pos, md end end def try_line_end(seq, pos, md) if pos == seq.length || seq[pos] == "\n" yield pos, md end end def try_anysym(seq, pos, md, &block) if pos < seq.length yield pos+1, md end end def try_empseq(seq, pos, md) yield pos, md end def try_lit(sym, seq, pos, md) #p [:try, sym, seq, pos] if pos < seq.length && seq[pos] == sym yield pos+1, md end end def try_cat(e1, e2, seq, pos, md, &block) try(e1, seq, pos, md) {|pos2, md2| try(e2, seq, pos2, md2, &block) } end def try_alt(e1, e2, seq, pos, md, &block) try(e1, seq, pos, md, &block) try(e2, seq, pos, md, &block) end def try_rep(exp, seq, pos, md, &block) try(exp, seq, pos, md) {|pos2, md2| try_rep(exp, seq, pos2, md2, &block) if pos < pos2 } yield pos, md end def try_rep_lazy(e, seq, pos, md, &block) yield pos, md try(e, seq, pos, md) {|pos2, md2| try_rep_lazy(e, seq, pos2, md2, &block) if pos < pos2 } end def try_plus_lazy(e, seq, pos, md, &block) try(e, seq, pos, md) {|pos2, md2| try_rep_lazy(e, seq, pos2, md2, &block) } end def try_plus(e, seq, pos, md, &block) try(e, seq, pos, md) {|pos2, md2| try_rep(e, seq, pos2, md2, &block) } end if $0 == __FILE__ # The trick to run the unit test only for non-library execution. require 'test/unit' class TestRX < Test::Unit::TestCase def test_empseq assert_equal([0], matchstr([:empseq], "")) end def test_lit assert_equal([], matchstr([:lit, "a"], "")) assert_equal([1], matchstr([:lit, "a"], "a")) assert_equal([1], matchstr([:lit, "a"], "aa")) assert_equal([], matchstr([:lit, "a"], "b")) end def test_cat assert_equal([], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "")) assert_equal([], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "a")) assert_equal([2], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "ab")) assert_equal([2], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "abc")) end def test_alt assert_equal([], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "")) assert_equal([1], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "a")) assert_equal([1], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "b")) end def test_rep assert_equal([0], matchstr([:rep, [:lit, "a"]], "")) assert_equal([5,4,3,2,1,0], matchstr([:rep, [:lit, "a"]], "aaaaa")) end def test_anysym assert_equal([7,6,5,4,3,2,1,0], matchstr([:rep, [:anysym]], "abc\ndef")) end def test_string_start assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_start]], "")) assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_start]], "a")) assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_start]], "aaa")) end def test_string_end assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "")) assert_equal([1], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "a")) assert_equal([3], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "aaa")) assert_equal([4], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "aaaa")) assert_equal([1], matchstr([:cat, [:lit, "a"], [:string_end]], "a")) assert_equal([], matchstr([:cat, [:string_end], [:lit, "a"]], "a")) end def test_line_start assert_equal([0], matchstr([:cat, [:rep, [:anysym]], [:line_start]], "")) assert_equal([7,4,0], matchstr([:cat, [:rep, [:anysym]], [:line_start]], "aaa\naa\na\n")) assert_equal([9,7,4,0], matchstr([:cat, [:rep, [:anysym]], [:line_start]], "aaa\naa\na\na")) end def test_line_end assert_equal([8,7,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "abc\ndef\n")) assert_equal([9,8,4,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "abc\n\ndef\n")) assert_equal([8,5,2], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "ab\ncd\nef")) assert_equal([0], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "")) assert_equal([9,8,6,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "aaa\naa\na\n")) assert_equal([10,8,6,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "aaa\naa\na\na")) assert_equal([10,8,6,3,0], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "\naa\naa\na\na")) end # def test_notnewline # assert_equal([1], matchstr([:notnewline], "a")) # assert_equal([1], matchstr([:notnewline], "b")) # assert_equal([1], matchstr([:notnewline], "c")) # assert_equal([], matchstr([:notnewline], "\n")) # end # def test_opt assert_equal([1,0], matchstr([:opt, [:lit, "a"]], "a")) assert_equal([0], matchstr([:opt, [:lit, "a"]], "b")) assert_equal([0], matchstr([:opt, [:lit, "a"]], "")) assert_equal([2], matchstr([:cat, [:opt, [:lit, "a"]], [:lit, "b"]], "ab")) assert_equal([], matchstr([:cat, [:opt, [:lit, "a"]], [:lit, "b"]], "ac")) end def test_opt_lazy assert_equal([0,1], matchstr([:opt_lazy, [:lit, "a"]], "a")) assert_equal([0], matchstr([:opt_lazy, [:lit, "a"]], "b")) assert_equal([0], matchstr([:opt_lazy, [:lit, "a"]], "")) assert_equal([2], matchstr([:cat, [:opt_lazy, [:lit, "a"]], [:lit, "b"]], "ab")) assert_equal([], matchstr([:cat, [:opt_lazy, [:lit, "a"]], [:lit, "b"]], "ac")) end def test_plus assert_equal([], matchstr([:plus, [:lit, "a"]], "")) assert_equal([1], matchstr([:plus, [:lit, "a"]], "a")) assert_equal([5,4,3,2,1], matchstr([:plus, [:lit, "a"]], "aaaaa")) end def test_rep_lazy assert_equal([0], matchstr([:rep_lazy, [:lit, "a"]], "")) assert_equal([0,1,2,3,4,5], matchstr([:rep_lazy, [:lit, "a"]], "aaaaa")) end def test_find_match assert_equal([1,3, {}], find_match(["a","b","b","c","c","c"], [:plus, [:lit, "b"]])) end def test_subst assert_equal("aZccc", subst("abbccc", [:plus, [:lit, "b"]]) { "Z" }) assert_equal("aZcccbb", subst("abbcccbb", [:plus, [:lit, "b"]]) { "Z" }) end def test_capture assert_equal([1,3, {:m=>1...3}], find_match(["a","b","b","c","c","c"], [:capture, :m, [:plus, [:lit, "b"]]])) seq = ["f","o","o","=","b","a","r"] s, e, md = find_match(seq, [:cat, [:capture, :key, [:rep, [:anysym]]], [:cat, [:lit, "="], [:capture, :val, [:rep, [:anysym]]]]]) assert_equal([0,7, {:key=>0...3,:val=>4...7}], [s, e, md]) assert_equal(["f","o","o"], seq[md[:key]]) assert_equal(["b","a","r"], seq[md[:val]]) end def test_gsubst assert_equal("", gsubst("", [:lit, "a"]) { "Z" }) assert_equal("bbb", gsubst("bbb", [:lit, "a"]) { "Z" }) assert_equal("aZcccZd", gsubst("abbcccbbbbd", [:plus, [:lit, "b"]]) { "Z" }) assert_equal("aBBcccBBBBd", gsubst("abbcccbbbbd", [:plus, [:lit, "b"]]) {|s, h| s.upcase }) assert_equal("abbccbbbbbbbbd", gsubst("abbcccbbbbd", [:cat, [:lit, "c"], [:capture, :n, [:plus, [:lit, "b"]]]]) {|s, h| h[:n] * 2 }) assert_equal("bbbaaabbbabbaaaba", gsubst("aaabbbabbbaaabbab", [:cat, [:capture, :a, [:plus, [:lit, "a"]]], [:capture, :b, [:plus, [:lit, "b"]]]]) {|s, md| md[:b] + md[:a] }) assert_equal("Z", gsubst("", [:empseq]) { "Z" }) end end end