require 'test_helper'

class EyeOfNewt::TokenizerTest < ActiveSupport::TestCase
  test "tokenizes WORD" do
    t = tok("hello world to-day")
    assert_equal [:WORD, "hello"], t.next_token
    assert_equal [:WORD, "world"], t.next_token
    assert_equal [:WORD, "to-day"], t.next_token
    assert_nil t.next_token
  end

  test "tokenizes TEXT after a comma" do
    t = tok(", cut into 2-inch chunks")
    assert_equal [',', ","], t.next_token
    assert_equal [:TEXT, "cut into 2-inch chunks"], t.next_token
    assert_nil t.next_token
  end

  test "tokenizes TEXT between parens" do
    t = tok("(this is a note)")
    assert_equal ['(', "("], t.next_token
    assert_equal [:TEXT, "this is a note"], t.next_token
    assert_equal [')', ')'], t.next_token
    assert_nil t.next_token
  end

  test "tokenizes TEXT between number and unit" do
    t = tok("1 (46 oz) can", ["can"])
    assert_equal [:NUMBER, "1"], t.next_token
    assert_equal ['(', "("], t.next_token
    assert_equal [:TEXT, "46 oz"], t.next_token
    assert_equal [')', ')'], t.next_token
    assert_equal [:UNIT, 'can'], t.next_token
    assert_nil t.next_token
  end

  test "tokenizes TEXT after a comma, with parens" do
    t = tok(", sliced (this is a note)")
    assert_equal [',', ","], t.next_token
    assert_equal [:TEXT, "sliced"], t.next_token
    assert_equal ['(', "("], t.next_token
    assert_equal [:TEXT, "this is a note"], t.next_token
    assert_equal [')', ')'], t.next_token
    assert_nil t.next_token
  end

  test "tokenizes TO_TASTE" do
    t = tok("salt to taste")
    assert_equal [:WORD, "salt"], t.next_token
    assert_equal [:TO_TASTE, "to taste"], t.next_token
    assert_nil t.next_token
  end

  test "tokenizes OF" do
    t = tok("piece of cake")
    assert_equal [:WORD, "piece"], t.next_token
    assert_equal [:OF, "of"], t.next_token
    assert_equal [:WORD, "cake"], t.next_token
    assert_nil t.next_token
  end

  test "tokenizes fractions" do
    t = tok("1 1/2")
    assert_equal [:NUMBER, "1"], t.next_token
    assert_equal [:NUMBER, "1"], t.next_token
    assert_equal ['/', '/'], t.next_token
    assert_equal [:NUMBER, "2"], t.next_token
    assert_nil t.next_token
  end

  test "tokenizes recognized units as UNIT" do
    t = tok("1 cup spinach", ["cup"])
    assert_equal [:NUMBER, "1"], t.next_token
    assert_equal [:UNIT, "cup"], t.next_token
    assert_equal [:WORD, "spinach"], t.next_token
  end

  test "recognizes the longest version of UNIT" do
    t = tok("1 cup", ["c", "cup"])
    assert_equal [:NUMBER, "1"], t.next_token
    assert_equal [:UNIT, "cup"], t.next_token
  end

  test "does not recognize partial units" do
    t = tok("tomato", ["t"])
    assert_equal [:WORD, "tomato"], t.next_token
  end

  test "does not require a space between number and unit" do
    t = tok("1ml", ["ml"])
    assert_equal [:NUMBER, "1"], t.next_token
    assert_equal [:UNIT, "ml"], t.next_token
  end

  def tok(string, units=[])
    EyeOfNewt::Tokenizer.new(string, units: units)
  end
end