# usage: # ruby rx.rb # run the unit test # ruby -rrx -e 'p matchstr(...)' # use rx as library. don't run the unit test. def subst(str, pat) ary = str.split(//) r = find_match(ary, pat) return str if !r s, e, md = r h = {} md.each {|k, r| h[k] = ary[r].join } ary[0...s].join + yield(ary[s...e].join, h) + ary[e..-1].join end def find_match(ary, pat, beg=0) beg.upto(ary.length) {|s| try(pat, ary, s, {}) {|e, md| return [s, e, md] } } nil end def matchstr(exp, str) result = [] try(exp, str.split(//), 0, {}) {|pos, md| result << pos } result end def hasmatch(exp, str) ary = str.split(//) 0.upto(ary.length) {|i| try(exp, ary, i, {}) { return i } } nil end $try_count = 0 def count_try(exp, str) $try_count = 0 matchstr(exp, str) $try_count end def try(exp, seq, pos, md, &block) #p [pos, exp] $try_count += 1 case exp[0] when :anysym try_anysym(seq, pos, md, &block) when :string_start try_string_start(seq, pos, md, &block) when :string_end try_string_end(seq, pos, md, &block) when :line_start try_line_start(seq, pos, md, &block) when :line_end try_line_end(seq, pos, md, &block) when :empseq try_empseq(seq, pos, md, &block) when :lit _, sym = exp try_lit(sym, seq, pos, md, &block) when :cat _, e1, e2 = exp try_cat(e1, e2, seq, pos, md, &block) when :alt _, e1, e2 = exp try_alt(e1, e2, seq, pos, md, &block) when :rep _, e = exp try_rep(e, seq, pos, md, &block) when :opt _, e = exp try_opt(e, seq, pos, md, &block) when :plus _, e = exp try_plus(e, seq, pos, md, &block) when :times _, e, m, n = exp try_times(e, m, n, seq, pos, md, &block) when :rep_lazy _, e = exp try_rep_lazy(e, seq, pos, md, &block) when :opt_lazy _, e = exp try_opt_lazy(e, seq, pos, md, &block) when :plus_lazy _, e = exp try_plus_lazy(e, seq, pos, md, &block) when :times_lazy _, e, m, n = exp try_times_lazy(e, m, n, seq, pos, md, &block) when :capture _, n, e = exp try_capture(n, e, seq, pos, md, &block) else raise "unexpected AST: #{exp.inspect}" end end def try_anysym(seq, pos, md) yield pos + 1, md if pos < seq.length end def try_string_start(seq, pos, md) yield pos, md if pos == 0 end def try_string_end(seq, pos, md) yield pos, md if pos == seq.length end def try_line_start(seq, pos, md) yield pos, md if pos == 0 || (pos < seq.length && seq[pos-1] == "\n") end def try_line_end(seq, pos, md) yield pos, md if pos == seq.length || (pos < seq.length && seq[pos] == "\n") end def try_empseq(seq, pos, md) yield pos, md end def try_lit(sym, seq, pos, md) if pos < seq.length && seq[pos] == sym yield pos + 1, md end end def try_cat(e1, e2, seq, pos, md, &block) try(e1, seq, pos, md) {|pos2, md2| try(e2, seq, pos2, md2, &block) } end def try_alt(e1, e2, seq, pos, md, &block) try(e1, seq, pos, md, &block) try(e2, seq, pos, md, &block) end def try_rep(exp, seq, pos, md, &block) try(exp, seq, pos, md) {|pos2, md2| try_rep(exp, seq, pos2, md2, &block) if pos < pos2 } yield pos, md end def try_opt(e, seq, pos, md, &block) try(e, seq, pos, md, &block) yield pos, md end def try_plus(e, seq, pos, md, &block) try(e, seq, pos, md) {|pos2, md2| try_rep(e, seq, pos2, md2, &block) } end def try_times(e, m, n, seq, pos, md, &block) if 0 < n try(e, seq, pos, md) {|pos2, md2| try_times(e, m-1, n-1, seq, pos2, md2, &block) } end if m <= 0 yield pos, md end end def try_rep_lazy(e, seq, pos, md, &block) yield pos, md try(e, seq, pos, md) {|pos2, md2| try_rep_lazy(e, seq, pos2, md2, &block) if pos < pos2 } end def try_opt_lazy(e, seq, pos, md, &block) yield pos, md try(e, seq, pos, md, &block) end def try_plus_lazy(e, seq, pos, md, &block) try(e, seq, pos, md) {|pos2, md2| try_rep_lazy(e, seq, pos2, md2, &block) } end def try_times_lazy(e, m, n, seq, pos, md, &block) if m <= 0 yield pos, md end if 0 < n try(e, seq, pos, md) {|pos2, md2| try_times_lazy(e, m-1, n-1, seq, pos2, md2, &block) } end end def try_capture(n, e, seq, pos, md, &block) try(e, seq, pos, md) {|pos2, md2| md3 = md2.dup md3[n] = pos...pos2 yield pos2, md3 } end if $0 == __FILE__ # The trick to run the unit test only for non-library execution. require 'test/unit' class TestRX < Test::Unit::TestCase def test_empseq assert_equal([0], matchstr([:empseq], "")) end def test_lit assert_equal([], matchstr([:lit, "a"], "")) assert_equal([1], matchstr([:lit, "a"], "a")) assert_equal([1], matchstr([:lit, "a"], "aa")) assert_equal([], matchstr([:lit, "a"], "b")) end def test_cat assert_equal([], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "")) assert_equal([], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "a")) assert_equal([2], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "ab")) assert_equal([2], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "abc")) end def test_alt assert_equal([], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "")) assert_equal([1], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "a")) assert_equal([1], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "b")) end def test_rep assert_equal([0], matchstr([:rep, [:lit, "a"]], "")) assert_equal([5,4,3,2,1,0], matchstr([:rep, [:lit, "a"]], "aaaaa")) end def test_opt assert_equal([1,0], matchstr([:opt, [:lit, "a"]], "a")) assert_equal([0], matchstr([:opt, [:lit, "a"]], "b")) end def test_plus assert_equal([3,2,1], matchstr([:plus, [:lit, "a"]], "aaa")) assert_equal([3], matchstr([:cat, [:plus, [:lit, "a"]], [:lit, "b"]], "aaba")) assert_equal([], matchstr([:cat, [:plus, [:lit, "a"]], [:lit, "b"]], "ba")) end def test_times assert_equal([4,3,2], matchstr([:times, [:lit, "a"], 2, 4], "aaaaaa")) assert_equal([4,3,2], matchstr([:times, [:lit, "a"], 2, 4], "aaaaa")) assert_equal([4,3,2], matchstr([:times, [:lit, "a"], 2, 4], "aaaa")) assert_equal([3,2], matchstr([:times, [:lit, "a"], 2, 4], "aaa")) assert_equal([2], matchstr([:times, [:lit, "a"], 2, 4], "aa")) assert_equal([], matchstr([:times, [:lit, "a"], 2, 4], "a")) assert_equal([], matchstr([:times, [:lit, "a"], 2, 4], "")) end def test_rep_lazy assert_equal([0], matchstr([:rep_lazy, [:lit, "a"]], "")) assert_equal([0,1,2,3,4,5], matchstr([:rep_lazy, [:lit, "a"]], "aaaaa")) end def test_opt_lazy assert_equal([0,1], matchstr([:opt_lazy, [:lit, "a"]], "a")) assert_equal([0], matchstr([:opt_lazy, [:lit, "a"]], "b")) end end end