ext/hpricot_scan/HpricotScanService.java in why-hpricot-0.6.210 vs ext/hpricot_scan/HpricotScanService.java in why-hpricot-0.7.229
- old
+ new
@@ -1,154 +1,521 @@
// line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
import java.io.IOException;
import org.jruby.Ruby;
+import org.jruby.RubyArray;
import org.jruby.RubyClass;
import org.jruby.RubyHash;
import org.jruby.RubyModule;
import org.jruby.RubyNumeric;
+import org.jruby.RubyObject;
import org.jruby.RubyObjectAdapter;
+import org.jruby.RubyRegexp;
import org.jruby.RubyString;
+import org.jruby.anno.JRubyMethod;
+import org.jruby.exceptions.RaiseException;
import org.jruby.javasupport.JavaEmbedUtils;
+import org.jruby.runtime.Arity;
import org.jruby.runtime.Block;
-import org.jruby.runtime.CallbackFactory;
+import org.jruby.runtime.ObjectAllocator;
+import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
+import org.jruby.runtime.callback.Callback;
import org.jruby.exceptions.RaiseException;
import org.jruby.runtime.load.BasicLibraryService;
+import org.jruby.util.ByteList;
public class HpricotScanService implements BasicLibraryService {
- public static String NO_WAY_SERIOUSLY="*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
- private static RubyObjectAdapter rubyApi;
+ public static byte[] realloc(byte[] input, int size) {
+ byte[] newArray = new byte[size];
+ System.arraycopy(input, 0, newArray, 0, input.length);
+ return newArray;
+ }
- public void ELE(IRubyObject N) {
- if (te > ts || text) {
- IRubyObject raw_string = runtime.getNil();
- ele_open = false; text = false;
- if (ts != -1 && N != cdata && N != sym_text && N != procins && N != comment) {
- raw_string = runtime.newString(new String(buf,ts,te-ts));
- }
- rb_yield_tokens(N, tag[0], attr, raw_string, taint);
- }
- }
+ // hpricot_state
+ public static class State {
+ public IRubyObject doc;
+ public IRubyObject focus;
+ public IRubyObject last;
+ public IRubyObject EC;
+ public boolean xml, strict, fixup;
+ }
- public void SET(IRubyObject[] N, int E) {
- int mark = 0;
- if(N == tag) {
- if(mark_tag == -1 || E == mark_tag) {
- tag[0] = runtime.newString("");
- } else if(E > mark_tag) {
- tag[0] = runtime.newString(new String(buf,mark_tag, E-mark_tag));
- }
- } else if(N == akey) {
- if(mark_akey == -1 || E == mark_akey) {
- akey[0] = runtime.newString("");
- } else if(E > mark_akey) {
- akey[0] = runtime.newString(new String(buf,mark_akey, E-mark_akey));
- }
- } else if(N == aval) {
- if(mark_aval == -1 || E == mark_aval) {
- aval[0] = runtime.newString("");
- } else if(E > mark_aval) {
- aval[0] = runtime.newString(new String(buf,mark_aval, E-mark_aval));
- }
- }
- }
+ static boolean OPT(IRubyObject opts, String key) {
+ Ruby runtime = opts.getRuntime();
+ return !opts.isNil() && ((RubyHash)opts).op_aref(runtime.getCurrentContext(), runtime.newSymbol(key)).isTrue();
+ }
- public void CAT(IRubyObject[] N, int E) {
- if(N[0].isNil()) {
- SET(N,E);
- } else {
- int mark = 0;
- if(N == tag) {
- mark = mark_tag;
- } else if(N == akey) {
- mark = mark_akey;
- } else if(N == aval) {
- mark = mark_aval;
- }
- ((RubyString)(N[0])).append(runtime.newString(new String(buf, mark, E-mark)));
- }
- }
+ // H_PROP(name, H_ELE_TAG)
+ public static IRubyObject hpricot_ele_set_name(IRubyObject self, IRubyObject x) {
+ H_ELE_SET(self, H_ELE_TAG, x);
+ return self;
+ }
- public void SLIDE(Object N) {
- int mark = 0;
- if(N == tag) {
- mark = mark_tag;
- } else if(N == akey) {
- mark = mark_akey;
- } else if(N == aval) {
- mark = mark_aval;
- }
- if(mark > ts) {
- if(N == tag) {
- mark_tag -= ts;
- } else if(N == akey) {
- mark_akey -= ts;
- } else if(N == aval) {
- mark_aval -= ts;
- }
- }
- }
+ public static IRubyObject hpricot_ele_clear_name(IRubyObject self) {
+ H_ELE_SET(self, H_ELE_TAG, self.getRuntime().getNil());
+ return self.getRuntime().getTrue();
+ }
- public void ATTR(IRubyObject K, IRubyObject V) {
- if(!K.isNil()) {
- if(attr.isNil()) {
- attr = RubyHash.newHash(runtime);
- }
- ((RubyHash)attr).op_aset(runtime.getCurrentContext(),K,V);
- // ((RubyHash)attr).aset(K,V);
- }
- }
+ public static IRubyObject hpricot_ele_get_name(IRubyObject self) {
+ return H_ELE_GET(self, H_ELE_TAG);
+ }
- public void ATTR(IRubyObject[] K, IRubyObject V) {
- ATTR(K[0],V);
- }
+ // H_PROP(raw, H_ELE_RAW)
+ public static IRubyObject hpricot_ele_set_raw(IRubyObject self, IRubyObject x) {
+ H_ELE_SET(self, H_ELE_RAW, x);
+ return self;
+ }
- public void ATTR(IRubyObject K, IRubyObject[] V) {
- ATTR(K,V[0]);
- }
+ public static IRubyObject hpricot_ele_clear_raw(IRubyObject self) {
+ H_ELE_SET(self, H_ELE_RAW, self.getRuntime().getNil());
+ return self.getRuntime().getTrue();
+ }
- public void ATTR(IRubyObject[] K, IRubyObject[] V) {
- ATTR(K[0],V[0]);
- }
+ public static IRubyObject hpricot_ele_get_raw(IRubyObject self) {
+ return H_ELE_GET(self, H_ELE_RAW);
+ }
- public void TEXT_PASS() {
- if(!text) {
- if(ele_open) {
- ele_open = false;
- if(ts > -1) {
- mark_tag = ts;
- }
- } else {
- mark_tag = p;
- }
- attr = runtime.getNil();
- tag[0] = runtime.getNil();
- text = true;
- }
- }
+ // H_PROP(parent, H_ELE_PARENT)
+ public static IRubyObject hpricot_ele_set_parent(IRubyObject self, IRubyObject x) {
+ H_ELE_SET(self, H_ELE_PARENT, x);
+ return self;
+ }
- public void EBLK(IRubyObject N, int T) {
- CAT(tag, p - T + 1);
- ELE(N);
- }
+ public static IRubyObject hpricot_ele_clear_parent(IRubyObject self) {
+ H_ELE_SET(self, H_ELE_PARENT, self.getRuntime().getNil());
+ return self.getRuntime().getTrue();
+ }
+ public static IRubyObject hpricot_ele_get_parent(IRubyObject self) {
+ return H_ELE_GET(self, H_ELE_PARENT);
+ }
- public void rb_raise(RubyClass error, String message) {
- throw new RaiseException(runtime, error, message, true);
- }
+ // H_PROP(attr, H_ELE_ATTR)
+ public static IRubyObject hpricot_ele_set_attr(IRubyObject self, IRubyObject x) {
+ H_ELE_SET(self, H_ELE_ATTR, x);
+ return self;
+ }
- public IRubyObject rb_str_new2(String s) {
- return runtime.newString(s);
- }
+ public static IRubyObject hpricot_ele_clear_attr(IRubyObject self) {
+ H_ELE_SET(self, H_ELE_ATTR, self.getRuntime().getNil());
+ return self.getRuntime().getTrue();
+ }
-// line 189 "ext/hpricot_scan/hpricot_scan.java.rl"
+ public static IRubyObject hpricot_ele_get_attr(IRubyObject self) {
+ return H_ELE_GET(self, H_ELE_ATTR);
+ }
+ // H_PROP(etag, H_ELE_ETAG)
+ public static IRubyObject hpricot_ele_set_etag(IRubyObject self, IRubyObject x) {
+ H_ELE_SET(self, H_ELE_ETAG, x);
+ return self;
+ }
+ public static IRubyObject hpricot_ele_clear_etag(IRubyObject self) {
+ H_ELE_SET(self, H_ELE_ETAG, self.getRuntime().getNil());
+ return self.getRuntime().getTrue();
+ }
-// line 150 "ext/hpricot_scan/HpricotScanService.java"
+ public static IRubyObject hpricot_ele_get_etag(IRubyObject self) {
+ return H_ELE_GET(self, H_ELE_ETAG);
+ }
+
+ // H_PROP(children, H_ELE_CHILDREN)
+ public static IRubyObject hpricot_ele_set_children(IRubyObject self, IRubyObject x) {
+ H_ELE_SET(self, H_ELE_CHILDREN, x);
+ return self;
+ }
+
+ public static IRubyObject hpricot_ele_clear_children(IRubyObject self) {
+ H_ELE_SET(self, H_ELE_CHILDREN, self.getRuntime().getNil());
+ return self.getRuntime().getTrue();
+ }
+
+ public static IRubyObject hpricot_ele_get_children(IRubyObject self) {
+ return H_ELE_GET(self, H_ELE_CHILDREN);
+ }
+
+ // H_ATTR(target)
+ public static IRubyObject hpricot_ele_set_target(IRubyObject self, IRubyObject x) {
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("target"), x);
+ return self;
+ }
+
+ public static IRubyObject hpricot_ele_get_target(IRubyObject self) {
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("target"));
+ }
+
+ // H_ATTR(encoding)
+ public static IRubyObject hpricot_ele_set_encoding(IRubyObject self, IRubyObject x) {
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("encoding"), x);
+ return self;
+ }
+
+ public static IRubyObject hpricot_ele_get_encoding(IRubyObject self) {
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("encoding"));
+ }
+
+ // H_ATTR(version)
+ public static IRubyObject hpricot_ele_set_version(IRubyObject self, IRubyObject x) {
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("version"), x);
+ return self;
+ }
+
+ public static IRubyObject hpricot_ele_get_version(IRubyObject self) {
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("version"));
+ }
+
+ // H_ATTR(standalone)
+ public static IRubyObject hpricot_ele_set_standalone(IRubyObject self, IRubyObject x) {
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("standalone"), x);
+ return self;
+ }
+
+ public static IRubyObject hpricot_ele_get_standalone(IRubyObject self) {
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("standalone"));
+ }
+
+ // H_ATTR(system_id)
+ public static IRubyObject hpricot_ele_set_system_id(IRubyObject self, IRubyObject x) {
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("system_id"), x);
+ return self;
+ }
+
+ public static IRubyObject hpricot_ele_get_system_id(IRubyObject self) {
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("system_id"));
+ }
+
+ // H_ATTR(public_id)
+ public static IRubyObject hpricot_ele_set_public_id(IRubyObject self, IRubyObject x) {
+ ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).fastASet(self.getRuntime().newSymbol("public_id"), x);
+ return self;
+ }
+
+ public static IRubyObject hpricot_ele_get_public_id(IRubyObject self) {
+ return ((RubyHash)H_ELE_GET(self, H_ELE_ATTR)).op_aref(self.getRuntime().getCurrentContext(), self.getRuntime().newSymbol("public_id"));
+ }
+
+ public static class Scanner {
+ public IRubyObject SET(int mark, int E, IRubyObject org) {
+ if(mark == -1 || E == mark) {
+ return runtime.newString("");
+ } else if(E > mark) {
+ return RubyString.newString(runtime, data, mark, E-mark);
+ } else {
+ return org;
+ }
+ }
+
+ public int SLIDE(int N) {
+ if(N > ts) {
+ return N - ts;
+ } else {
+ return N;
+ }
+ }
+
+ public IRubyObject CAT(IRubyObject N, int mark, int E) {
+ if(N.isNil()) {
+ return SET(mark, E, N);
+ } else {
+ ((RubyString)N).cat(data, mark, E-mark);
+ return N;
+ }
+ }
+
+ public void ATTR(IRubyObject K, IRubyObject V) {
+ if(!K.isNil()) {
+ if(attr.isNil()) {
+ attr = RubyHash.newHash(runtime);
+ }
+ ((RubyHash)attr).fastASet(K, V);
+ }
+ }
+
+ public void TEXT_PASS() {
+ if(!text) {
+ if(ele_open) {
+ ele_open = false;
+ if(ts != -1) {
+ mark_tag = ts;
+ }
+ } else {
+ mark_tag = p;
+ }
+ attr = runtime.getNil();
+ tag = runtime.getNil();
+ text = true;
+ }
+ }
+
+ public void ELE(IRubyObject N) {
+ if(te > ts || text) {
+ int raw = -1;
+ int rawlen = 0;
+ ele_open = false;
+ text = false;
+
+ if(ts != -1 && N != x.sym_cdata && N != x.sym_text && N != x.sym_procins && N != x.sym_comment) {
+ raw = ts;
+ rawlen = te - ts;
+ }
+
+ if(block.isGiven()) {
+ IRubyObject raw_string = runtime.getNil();
+ if(raw != -1) {
+ raw_string = RubyString.newString(runtime, data, raw, rawlen);
+ }
+ yieldTokens(N, tag, attr, runtime.getNil(), taint);
+ } else {
+ hpricotToken(S, N, tag, attr, raw, rawlen, taint);
+ }
+ }
+ }
+
+
+ public void EBLK(IRubyObject N, int T) {
+ tag = CAT(tag, mark_tag, p - T + 1);
+ ELE(N);
+ }
+
+ public void hpricotAdd(IRubyObject focus, IRubyObject ele) {
+ IRubyObject children = H_ELE_GET(focus, H_ELE_CHILDREN);
+ if(children.isNil()) {
+ H_ELE_SET(focus, H_ELE_CHILDREN, children = RubyArray.newArray(runtime, 1));
+ }
+ ((RubyArray)children).append(ele);
+ H_ELE_SET(ele, H_ELE_PARENT, focus);
+ }
+
+ private static class TokenInfo {
+ public IRubyObject sym;
+ public IRubyObject tag;
+ public IRubyObject attr;
+ public int raw;
+ public int rawlen;
+ public IRubyObject ec;
+ public IRubyObject ele;
+ public Extra x;
+ public Ruby runtime;
+ public Scanner scanner;
+ public State S;
+
+ public void H_ELE(RubyClass klass) {
+ ele = klass.allocate();
+ if(klass == x.cElem) {
+ H_ELE_SET(ele, H_ELE_TAG, tag);
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
+ H_ELE_SET(ele, H_ELE_EC, ec);
+ if(raw != -1 && (sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_doctype)) {
+ H_ELE_SET(ele, H_ELE_RAW, RubyString.newString(runtime, scanner.data, raw, rawlen));
+ }
+ } else if(klass == x.cDocType || klass == x.cProcIns || klass == x.cXMLDecl || klass == x.cBogusETag) {
+ if(klass == x.cBogusETag) {
+ H_ELE_SET(ele, H_ELE_TAG, tag);
+ if(raw != -1) {
+ H_ELE_SET(ele, H_ELE_ATTR, RubyString.newString(runtime, scanner.data, raw, rawlen));
+ }
+ } else {
+ if(klass == x.cDocType) {
+ scanner.ATTR(runtime.newSymbol("target"), tag);
+ }
+ H_ELE_SET(ele, H_ELE_ATTR, attr);
+ if(klass != x.cProcIns) {
+ tag = runtime.getNil();
+ if(raw != -1) {
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
+ }
+ }
+ H_ELE_SET(ele, H_ELE_TAG, tag);
+ }
+ } else {
+ H_ELE_SET(ele, H_ELE_TAG, tag);
+ }
+ S.last = ele;
+ }
+
+ public void hpricotToken(boolean taint) {
+ //
+ // in html mode, fix up start tags incorrectly formed as empty tags
+ //
+ if(!S.xml) {
+ if(sym == x.sym_emptytag || sym == x.sym_stag || sym == x.sym_etag) {
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
+ if(ec.isNil()) {
+ tag = tag.callMethod(scanner.ctx, "downcase");
+ ec = ((RubyHash)S.EC).op_aref(scanner.ctx, tag);
+ }
+ }
+
+ if(H_ELE_GET(S.focus, H_ELE_EC) == x.sym_CDATA &&
+ (sym != x.sym_procins && sym != x.sym_comment && sym != x.sym_cdata && sym != x.sym_text) &&
+ !(sym == x.sym_etag && runtime.newFixnum(tag.hashCode()).equals(H_ELE_GET(S.focus, H_ELE_HASH)))) {
+ sym = x.sym_text;
+ tag = RubyString.newString(runtime, scanner.data, raw, rawlen);
+ }
+
+ if(!ec.isNil()) {
+ if(sym == x.sym_emptytag) {
+ if(ec != x.sym_EMPTY) {
+ sym = x.sym_stag;
+ }
+ } else if(sym == x.sym_stag) {
+ if(ec == x.sym_EMPTY) {
+ sym = x.sym_emptytag;
+ }
+ }
+ }
+ }
+
+ if(sym == x.sym_emptytag || sym == x.sym_stag) {
+ IRubyObject name = runtime.newFixnum(tag.hashCode());
+ H_ELE(x.cElem);
+ H_ELE_SET(ele, H_ELE_HASH, name);
+
+ if(!S.xml) {
+ IRubyObject match = runtime.getNil(), e = S.focus;
+ while(e != S.doc) {
+ IRubyObject hEC = H_ELE_GET(e, H_ELE_EC);
+ if(hEC instanceof RubyHash) {
+ IRubyObject has = ((RubyHash)hEC).op_aref(scanner.ctx, name);
+ if(!has.isNil()) {
+ if(has == runtime.getTrue()) {
+ if(match.isNil()) {
+ match = e;
+ }
+ } else if(has == x.symAllow) {
+ match = S.focus;
+ } else if(has == x.symDeny) {
+ match = runtime.getNil();
+ }
+ }
+ }
+ e = H_ELE_GET(e, H_ELE_PARENT);
+ }
+
+ if(match.isNil()) {
+ match = S.focus;
+ }
+ S.focus = match;
+ }
+
+ scanner.hpricotAdd(S.focus, ele);
+
+ //
+ // in the case of a start tag that should be empty, just
+ // skip the step that focuses the element. focusing moves
+ // us deeper into the document.
+ //
+ if(sym == x.sym_stag) {
+ if(S.xml || ec != x.sym_EMPTY) {
+ S.focus = ele;
+ S.last = runtime.getNil();
+ }
+ }
+ } else if(sym == x.sym_etag) {
+ IRubyObject name, match = runtime.getNil(), e = S.focus;
+ if(S.strict) {
+ if(((RubyHash)S.EC).op_aref(scanner.ctx, tag).isNil()) {
+ tag = runtime.newString("div");
+ }
+ }
+
+ name = runtime.newFixnum(tag.hashCode());
+ while(e != S.doc) {
+ if(H_ELE_GET(e, H_ELE_HASH).equals(name)) {
+ match = e;
+ break;
+ }
+ e = H_ELE_GET(e, H_ELE_PARENT);
+
+ }
+ if(match.isNil()) {
+ H_ELE(x.cBogusETag);
+ scanner.hpricotAdd(S.focus, ele);
+ } else {
+ ele = runtime.getNil();
+ if(raw != -1) {
+ ele = RubyString.newString(runtime, scanner.data, raw, rawlen);
+ }
+ H_ELE_SET(match, H_ELE_ETAG, ele);
+ S.focus = H_ELE_GET(match, H_ELE_PARENT);
+ S.last = runtime.getNil();
+
+ }
+ } else if(sym == x.sym_cdata) {
+ H_ELE(x.cCData);
+ scanner.hpricotAdd(S.focus, ele);
+ } else if(sym == x.sym_comment) {
+ H_ELE(x.cComment);
+ scanner.hpricotAdd(S.focus, ele);
+ } else if(sym == x.sym_doctype) {
+ H_ELE(x.cDocType);
+ if(S.strict) {
+ RubyHash h = (RubyHash)attr;
+ h.fastASet(runtime.newSymbol("system_id"), runtime.newString("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"));
+ h.fastASet(runtime.newSymbol("public_id"), runtime.newString("-//W3C//DTD XHTML 1.0 Strict//EN"));
+ }
+ scanner.hpricotAdd(S.focus, ele);
+ } else if(sym == x.sym_procins) {
+ IRubyObject match = tag.callMethod(scanner.ctx, "match", x.reProcInsParse);
+ tag = RubyRegexp.nth_match(1, match);
+ attr = RubyRegexp.nth_match(2, match);
+ H_ELE(x.cProcIns);
+ scanner.hpricotAdd(S.focus, ele);
+ } else if(sym == x.sym_text) {
+ if(!S.last.isNil() && S.last.getType() == x.cText) {
+ ((RubyString)H_ELE_GET(S.last, H_ELE_TAG)).append(tag);
+ } else {
+ H_ELE(x.cText);
+ scanner.hpricotAdd(S.focus, ele);
+ }
+ } else if(sym == x.sym_xmldecl) {
+ H_ELE(x.cXMLDecl);
+ scanner.hpricotAdd(S.focus, ele);
+ }
+ }
+ }
+
+ public void hpricotToken(State S, IRubyObject _sym, IRubyObject _tag, IRubyObject _attr, int _raw, int _rawlen, boolean taint) {
+ TokenInfo t = new TokenInfo();
+ t.sym = _sym;
+ t.tag = _tag;
+ t.attr = _attr;
+ t.raw = _raw;
+ t.rawlen = _rawlen;
+ t.ec = runtime.getNil();
+ t.ele = runtime.getNil();
+ t.x = x;
+ t.runtime = runtime;
+ t.scanner = this;
+ t.S = S;
+
+ t.hpricotToken(taint);
+ }
+
+ public void yieldTokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
+ if(sym == x.sym_text) {
+ raw = tag;
+ }
+ IRubyObject ary = RubyArray.newArrayNoCopy(runtime, new IRubyObject[]{sym, tag, attr, raw});
+ if(taint) {
+ ary.setTaint(true);
+ tag.setTaint(true);
+ attr.setTaint(true);
+ raw.setTaint(true);
+ }
+
+ block.yield(ctx, ary);
+ }
+
+// line 561 "ext/hpricot_scan/hpricot_scan.java.rl"
+
+
+
+// line 517 "ext/hpricot_scan/HpricotScanService.java"
private static byte[] init__hpricot_scan_actions_0()
{
return new byte [] {
0, 1, 1, 1, 2, 1, 4, 1, 5, 1, 6, 1,
7, 1, 8, 1, 9, 1, 10, 1, 11, 1, 12, 1,
@@ -750,125 +1117,170 @@
static final int hpricot_scan_en_html_comment = 214;
static final int hpricot_scan_en_html_cdata = 216;
static final int hpricot_scan_en_html_procins = 218;
static final int hpricot_scan_en_main = 204;
-// line 192 "ext/hpricot_scan/hpricot_scan.java.rl"
+// line 564 "ext/hpricot_scan/hpricot_scan.java.rl"
-public final static int BUFSIZE=16384;
+ public final static int BUFSIZE = 16384;
-private void rb_yield_tokens(IRubyObject sym, IRubyObject tag, IRubyObject attr, IRubyObject raw, boolean taint) {
- IRubyObject ary;
- if (sym == runtime.newSymbol("text")) {
- raw = tag;
- }
- ary = runtime.newArray(new IRubyObject[]{sym, tag, attr, raw});
- if (taint) {
- ary.setTaint(true);
- tag.setTaint(true);
- attr.setTaint(true);
- raw.setTaint(true);
- }
- block.yield(runtime.getCurrentContext(), ary, null, null, false);
-}
+ private int cs, act, have = 0, nread = 0, curline = 1;
+ private int ts = 0, te = 0, eof = -1, p = -1, pe = -1, buf = 0;
+ private byte[] data;
+ private State S = null;
+ private IRubyObject port, opts, attr, tag, akey, aval, bufsize;
+ private int mark_tag = -1, mark_akey = -1, mark_aval = -1;
+ private boolean done = false, ele_open = false, taint = false, io = false, text = false;
+ private int buffer_size = 0;
-int cs, act, have = 0, nread = 0, curline = 1, p=-1;
-boolean text = false;
-int ts=-1, te;
-int eof=-1;
-char[] buf;
-Ruby runtime;
-IRubyObject attr, bufsize;
-IRubyObject[] tag, akey, aval;
-int mark_tag, mark_akey, mark_aval;
-boolean done = false, ele_open = false;
-int buffer_size = 0;
-boolean taint = false;
-Block block = null;
+ private Extra x;
+ private IRubyObject self;
+ private Ruby runtime;
+ private ThreadContext ctx;
+ private Block block;
-IRubyObject xmldecl, doctype, procins, stag, etag, emptytag, comment,
- cdata, sym_text;
+ private IRubyObject xmldecl, doctype, stag, etag, emptytag, comment, cdata, procins;
-IRubyObject hpricot_scan(IRubyObject recv, IRubyObject port) {
- attr = bufsize = runtime.getNil();
- tag = new IRubyObject[]{runtime.getNil()};
- akey = new IRubyObject[]{runtime.getNil()};
- aval = new IRubyObject[]{runtime.getNil()};
+ private RaiseException newRaiseException(RubyClass exceptionClass, String message) {
+ return new RaiseException(runtime, exceptionClass, message, true);
+ }
- RubyClass rb_eHpricotParseError = runtime.getModule("Hpricot").getClass("ParseError");
+ public Scanner(IRubyObject self, IRubyObject[] args, Block block) {
+ this.self = self;
+ this.runtime = self.getRuntime();
+ this.ctx = runtime.getCurrentContext();
+ this.block = block;
+ attr = runtime.getNil();
+ tag = runtime.getNil();
+ akey = runtime.getNil();
+ aval = runtime.getNil();
+ bufsize = runtime.getNil();
- taint = port.isTaint();
- if ( !port.respondsTo("read")) {
- if ( port.respondsTo("to_str")) {
- port = port.callMethod(runtime.getCurrentContext(),"to_str");
- } else {
- throw runtime.newArgumentError("bad Hpricot argument, String or IO only please.");
- }
- }
+ this.x = (Extra)this.runtime.getModule("Hpricot").dataGetStruct();
- buffer_size = BUFSIZE;
- if (rubyApi.getInstanceVariable(recv, "@buffer_size") != null) {
- bufsize = rubyApi.getInstanceVariable(recv, "@buffer_size");
- if (!bufsize.isNil()) {
- buffer_size = RubyNumeric.fix2int(bufsize);
- }
- }
- buf = new char[buffer_size];
+ this.xmldecl = x.sym_xmldecl;
+ this.doctype = x.sym_doctype;
+ this.stag = x.sym_stag;
+ this.etag = x.sym_etag;
+ this.emptytag = x.sym_emptytag;
+ this.comment = x.sym_comment;
+ this.cdata = x.sym_cdata;
+ this.procins = x.sym_procins;
-
-// line 821 "ext/hpricot_scan/HpricotScanService.java"
+ port = args[0];
+ if(args.length == 2) {
+ opts = args[1];
+ } else {
+ opts = runtime.getNil();
+ }
+
+ taint = port.isTaint();
+ io = port.respondsTo("read");
+ if(!io) {
+ if(port.respondsTo("to_str")) {
+ port = port.callMethod(ctx, "to_str");
+ port = port.convertToString();
+ } else {
+ throw runtime.newArgumentError("an Hpricot document must be built from an input source (a String or IO object.)");
+ }
+ }
+
+ if(!(opts instanceof RubyHash)) {
+ opts = runtime.getNil();
+ }
+
+ if(!block.isGiven()) {
+ S = new State();
+ S.doc = x.cDoc.allocate();
+ S.focus = S.doc;
+ S.last = runtime.getNil();
+ S.xml = OPT(opts, "xml");
+ S.strict = OPT(opts, "xhtml_strict");
+ S.fixup = OPT(opts, "fixup_tags");
+ if(S.strict) {
+ S.fixup = true;
+ }
+ S.doc.getInstanceVariables().fastSetInstanceVariable("@options", opts);
+ S.EC = x.mHpricot.getConstant("ElementContent");
+ }
+
+ buffer_size = BUFSIZE;
+ if(self.getInstanceVariables().fastHasInstanceVariable("@buffer_size")) {
+ bufsize = self.getInstanceVariables().fastGetInstanceVariable("@buffer_size");
+ if(!bufsize.isNil()) {
+ buffer_size = RubyNumeric.fix2int(bufsize);
+ }
+ }
+
+ if(io) {
+ buf = 0;
+ data = new byte[buffer_size];
+ }
+ }
+
+ private int len, space;
+ // hpricot_scan
+ public IRubyObject scan() {
+
+// line 1227 "ext/hpricot_scan/HpricotScanService.java"
{
cs = hpricot_scan_start;
ts = -1;
te = -1;
act = 0;
}
-// line 256 "ext/hpricot_scan/hpricot_scan.java.rl"
+// line 667 "ext/hpricot_scan/hpricot_scan.java.rl"
+ while(!done) {
+ p = pe = len = buf;
+ space = buffer_size - have;
+
+ if(io) {
+ if(space == 0) {
+ /* We've used up the entire buffer storing an already-parsed token
+ * prefix that must be preserved. Likely caused by super-long attributes.
+ * Increase buffer size and continue */
+ buffer_size += BUFSIZE;
+ data = realloc(data, buffer_size);
+ space = buffer_size - have;
+ }
- while( !done ) {
- IRubyObject str;
- p = have;
- int pe;
- int len, space = buffer_size - have;
+ p = have;
+ IRubyObject str = port.callMethod(ctx, "read", runtime.newFixnum(space));
+ ByteList bl = str.convertToString().getByteList();
+ len = bl.realSize;
+ System.arraycopy(bl.bytes, bl.begin, data, p, len);
+ } else {
+ ByteList bl = port.convertToString().getByteList();
+ data = bl.bytes;
+ buf = bl.begin;
+ p = bl.begin;
+ len = bl.realSize + 1;
+ if(p + len >= data.length) {
+ data = new byte[len];
+ System.arraycopy(bl.bytes, bl.begin, data, 0, bl.realSize);
+ p = 0;
+ buf = 0;
+ }
+ done = true;
+ eof = p + len;
+ }
- if ( space == 0 ) {
- /* We've used up the entire buffer storing an already-parsed token
- * prefix that must be preserved. Likely caused by super-long attributes.
- * See ticket #13. */
- buffer_size += BUFSIZE;
- char[] new_buf = new char[buffer_size];
- System.arraycopy(buf, 0, new_buf, 0, buf.length);
- buf = new_buf;
- space = buffer_size - have;
- }
+ nread += len;
- if (port.respondsTo("read")) {
- str = port.callMethod(runtime.getCurrentContext(),"read",runtime.newFixnum(space));
- } else {
- str = ((RubyString)port).substr(nread,space);
- }
+ /* If this is the last buffer, tack on an EOF. */
+ if(io && len < space) {
+ data[p + len++] = 0;
+ eof = p + len;
+ done = true;
+ }
- str = str.convertToString();
- String sss = str.toString();
- char[] chars = sss.toCharArray();
- System.arraycopy(chars,0,buf,p,chars.length);
+ pe = p + len;
- len = sss.length();
- nread += len;
-
- if ( len < space ) {
- len++;
- done = true;
- }
-
- pe = p + len;
- char[] data = buf;
-
-
-// line 870 "ext/hpricot_scan/HpricotScanService.java"
+
+// line 1282 "ext/hpricot_scan/HpricotScanService.java"
{
int _klen;
int _trans = 0;
int _acts;
int _nacts;
@@ -889,11 +1301,11 @@
switch ( _hpricot_scan_actions[_acts++] ) {
case 21:
// line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
{ts = p;}
break;
-// line 895 "ext/hpricot_scan/HpricotScanService.java"
+// line 1307 "ext/hpricot_scan/HpricotScanService.java"
}
}
_match: do {
_keys = _hpricot_scan_key_offsets[cs];
@@ -954,87 +1366,93 @@
while ( _nacts-- > 0 )
{
switch ( _hpricot_scan_actions[_acts++] )
{
case 0:
-// line 147 "ext/hpricot_scan/hpricot_scan.java.rl"
+// line 514 "ext/hpricot_scan/hpricot_scan.java.rl"
{
- if (text) {
- CAT(tag, p);
- ELE(sym_text);
- text = false;
+ if(text) {
+ tag = CAT(tag, mark_tag, p);
+ ELE(x.sym_text);
+ text = false;
}
attr = runtime.getNil();
- tag[0] = runtime.getNil();
+ tag = runtime.getNil();
mark_tag = -1;
ele_open = true;
}
break;
case 1:
-// line 159 "ext/hpricot_scan/hpricot_scan.java.rl"
+// line 526 "ext/hpricot_scan/hpricot_scan.java.rl"
{ mark_tag = p; }
break;
case 2:
-// line 160 "ext/hpricot_scan/hpricot_scan.java.rl"
+// line 527 "ext/hpricot_scan/hpricot_scan.java.rl"
{ mark_aval = p; }
break;
case 3:
-// line 161 "ext/hpricot_scan/hpricot_scan.java.rl"
+// line 528 "ext/hpricot_scan/hpricot_scan.java.rl"
{ mark_akey = p; }
break;
case 4:
-// line 162 "ext/hpricot_scan/hpricot_scan.java.rl"
- { SET(tag, p); }
+// line 529 "ext/hpricot_scan/hpricot_scan.java.rl"
+ { tag = SET(mark_tag, p, tag); }
break;
case 5:
-// line 164 "ext/hpricot_scan/hpricot_scan.java.rl"
- { SET(aval, p); }
+// line 531 "ext/hpricot_scan/hpricot_scan.java.rl"
+ { aval = SET(mark_aval, p, aval); }
break;
case 6:
-// line 165 "ext/hpricot_scan/hpricot_scan.java.rl"
- {
- if (buf[p-1] == '"' || buf[p-1] == '\'') { SET(aval, p-1); }
- else { SET(aval, p); }
+// line 532 "ext/hpricot_scan/hpricot_scan.java.rl"
+ {
+ if(data[p-1] == '"' || data[p-1] == '\'') {
+ aval = SET(mark_aval, p-1, aval);
+ } else {
+ aval = SET(mark_aval, p, aval);
+ }
}
break;
case 7:
-// line 169 "ext/hpricot_scan/hpricot_scan.java.rl"
- { SET(akey, p); }
+// line 539 "ext/hpricot_scan/hpricot_scan.java.rl"
+ { akey = SET(mark_akey, p, akey); }
break;
case 8:
-// line 170 "ext/hpricot_scan/hpricot_scan.java.rl"
- { SET(aval, p); ATTR(rb_str_new2("version"), aval); }
+// line 540 "ext/hpricot_scan/hpricot_scan.java.rl"
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("version"), aval); }
break;
case 9:
-// line 171 "ext/hpricot_scan/hpricot_scan.java.rl"
- { SET(aval, p); ATTR(rb_str_new2("encoding"), aval); }
+// line 541 "ext/hpricot_scan/hpricot_scan.java.rl"
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("encoding"), aval); }
break;
case 10:
-// line 172 "ext/hpricot_scan/hpricot_scan.java.rl"
- { SET(aval, p); ATTR(rb_str_new2("standalone"), aval); }
+// line 542 "ext/hpricot_scan/hpricot_scan.java.rl"
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("standalone"), aval); }
break;
case 11:
-// line 173 "ext/hpricot_scan/hpricot_scan.java.rl"
- { SET(aval, p); ATTR(rb_str_new2("public_id"), aval); }
+// line 543 "ext/hpricot_scan/hpricot_scan.java.rl"
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("public_id"), aval); }
break;
case 12:
-// line 174 "ext/hpricot_scan/hpricot_scan.java.rl"
- { SET(aval, p); ATTR(rb_str_new2("system_id"), aval); }
+// line 544 "ext/hpricot_scan/hpricot_scan.java.rl"
+ { aval = SET(mark_aval, p, aval); ATTR(runtime.newSymbol("system_id"), aval); }
break;
case 13:
-// line 176 "ext/hpricot_scan/hpricot_scan.java.rl"
- {
- akey[0] = runtime.getNil();
- aval[0] = runtime.getNil();
- mark_akey = -1;
- mark_aval = -1;
+// line 546 "ext/hpricot_scan/hpricot_scan.java.rl"
+ {
+ akey = runtime.getNil();
+ aval = runtime.getNil();
+ mark_akey = -1;
+ mark_aval = -1;
}
break;
case 14:
-// line 183 "ext/hpricot_scan/hpricot_scan.java.rl"
- {
- ATTR(akey, aval);
+// line 553 "ext/hpricot_scan/hpricot_scan.java.rl"
+ {
+ if(!S.xml) {
+ akey = akey.callMethod(runtime.getCurrentContext(), "downcase");
+ }
+ ATTR(akey, aval);
}
break;
case 15:
// line 9 "ext/hpricot_scan/hpricot_scan.java.rl"
{curline += 1;}
@@ -1187,11 +1605,11 @@
{{p = ((te))-1;} TEXT_PASS(); }
break;
}
}
break;
-// line 1193 "ext/hpricot_scan/HpricotScanService.java"
+// line 1611 "ext/hpricot_scan/HpricotScanService.java"
}
}
}
case 2:
@@ -1201,11 +1619,11 @@
switch ( _hpricot_scan_actions[_acts++] ) {
case 20:
// line 1 "ext/hpricot_scan/hpricot_scan.java.rl"
{ts = -1;}
break;
-// line 1207 "ext/hpricot_scan/HpricotScanService.java"
+// line 1625 "ext/hpricot_scan/HpricotScanService.java"
}
}
if ( ++p != pe ) {
_goto_targ = 1;
@@ -1223,83 +1641,445 @@
case 5:
}
break; }
}
-// line 297 "ext/hpricot_scan/hpricot_scan.java.rl"
-
- if ( cs == hpricot_scan_error ) {
- if(!tag[0].isNil()) {
- rb_raise(rb_eHpricotParseError, "parse error on element <"+tag.toString()+">, starting on line "+curline+".\n" + NO_WAY_SERIOUSLY);
- } else {
- rb_raise(rb_eHpricotParseError, "parse error on line "+curline+".\n" + NO_WAY_SERIOUSLY);
- }
+// line 714 "ext/hpricot_scan/hpricot_scan.java.rl"
+
+ if(cs == hpricot_scan_error) {
+ if(!tag.isNil()) {
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on element <" + tag + ">, starting on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
+ } else {
+ throw newRaiseException(x.rb_eHpricotParseError, "parse error on line " + curline + ".\n" + NO_WAY_SERIOUSLY);
+ }
+ }
+
+ if(done && ele_open) {
+ ele_open = false;
+ if(ts > 0) {
+ mark_tag = ts;
+ ts = 0;
+ text = true;
+ }
+ }
+
+ if(ts == -1) {
+ have = 0;
+ if(mark_tag != -1 && text) {
+ if(done) {
+ if(mark_tag < p - 1) {
+ tag = CAT(tag, mark_tag, p-1);
+ ELE(x.sym_text);
+ }
+ } else {
+ tag = CAT(tag, mark_tag, p);
+ }
+ }
+ if(io) {
+ mark_tag = 0;
+ } else {
+ mark_tag = ((RubyString)port).getByteList().begin;
+ }
+ } else if(io) {
+ have = pe - ts;
+ System.arraycopy(data, ts, data, buf, have);
+ mark_tag = SLIDE(mark_tag);
+ mark_akey = SLIDE(mark_akey);
+ mark_aval = SLIDE(mark_aval);
+ te -= ts;
+ ts = 0;
+ }
+ }
+
+ if(S != null) {
+ return S.doc;
+ }
+
+ return runtime.getNil();
+ }
}
+
+ public static class HpricotModule {
+ // hpricot_scan
+ @JRubyMethod(module = true, optional = 1, required = 1, frame = true)
+ public static IRubyObject scan(IRubyObject self, IRubyObject[] args, Block block) {
+ return new Scanner(self, args, block).scan();
+ }
+
+ // hpricot_css
+ @JRubyMethod(module = true)
+ public static IRubyObject css(IRubyObject self, IRubyObject mod, IRubyObject str, IRubyObject node) {
+ return new HpricotCss(self, mod, str, node).scan();
+ }
+ }
+
+ public static class CData {
+ @JRubyMethod
+ public static IRubyObject content(IRubyObject self) {
+ return hpricot_ele_get_name(self);
+ }
+
+ @JRubyMethod(name = "content=")
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_name(self, value);
+ }
+ }
+
+ public static class Comment {
+ @JRubyMethod
+ public static IRubyObject content(IRubyObject self) {
+ return hpricot_ele_get_name(self);
+ }
+
+ @JRubyMethod(name = "content=")
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_name(self, value);
+ }
+ }
+
+ public static class DocType {
+ @JRubyMethod
+ public static IRubyObject raw_string(IRubyObject self) {
+ return hpricot_ele_get_name(self);
+ }
+
+ @JRubyMethod
+ public static IRubyObject clear_raw(IRubyObject self) {
+ return hpricot_ele_clear_name(self);
+ }
+
+ @JRubyMethod
+ public static IRubyObject target(IRubyObject self) {
+ return hpricot_ele_get_target(self);
+ }
+
+ @JRubyMethod(name = "target=")
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_target(self, value);
+ }
+
+ @JRubyMethod
+ public static IRubyObject public_id(IRubyObject self) {
+ return hpricot_ele_get_public_id(self);
+ }
+
+ @JRubyMethod(name = "public_id=")
+ public static IRubyObject public_id_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_public_id(self, value);
+ }
+
+ @JRubyMethod
+ public static IRubyObject system_id(IRubyObject self) {
+ return hpricot_ele_get_system_id(self);
+ }
+
+ @JRubyMethod(name = "system_id=")
+ public static IRubyObject system_id_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_system_id(self, value);
+ }
+ }
+
+ public static class Elem {
+ @JRubyMethod
+ public static IRubyObject clear_raw(IRubyObject self) {
+ return hpricot_ele_clear_raw(self);
+ }
+ }
+
+ public static class BogusETag {
+ @JRubyMethod
+ public static IRubyObject raw_string(IRubyObject self) {
+ return hpricot_ele_get_attr(self);
+ }
+
+ @JRubyMethod
+ public static IRubyObject clear_raw(IRubyObject self) {
+ return hpricot_ele_clear_attr(self);
+ }
+ }
+
+ public static class Text {
+ @JRubyMethod
+ public static IRubyObject raw_string(IRubyObject self) {
+ return hpricot_ele_get_name(self);
+ }
+
+ @JRubyMethod
+ public static IRubyObject clear_raw(IRubyObject self) {
+ return hpricot_ele_clear_name(self);
+ }
+
+ @JRubyMethod
+ public static IRubyObject content(IRubyObject self) {
+ return hpricot_ele_get_name(self);
+ }
+
+ @JRubyMethod(name = "content=")
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_name(self, value);
+ }
+ }
+
+ public static class XMLDecl {
+ @JRubyMethod
+ public static IRubyObject raw_string(IRubyObject self) {
+ return hpricot_ele_get_name(self);
+ }
+
+ @JRubyMethod
+ public static IRubyObject clear_raw(IRubyObject self) {
+ return hpricot_ele_clear_name(self);
+ }
+
+ @JRubyMethod
+ public static IRubyObject encoding(IRubyObject self) {
+ return hpricot_ele_get_encoding(self);
+ }
+
+ @JRubyMethod(name = "encoding=")
+ public static IRubyObject encoding_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_encoding(self, value);
+ }
+
+ @JRubyMethod
+ public static IRubyObject standalone(IRubyObject self) {
+ return hpricot_ele_get_standalone(self);
+ }
+
+ @JRubyMethod(name = "standalone=")
+ public static IRubyObject standalone_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_standalone(self, value);
+ }
+
+ @JRubyMethod
+ public static IRubyObject version(IRubyObject self) {
+ return hpricot_ele_get_version(self);
+ }
+
+ @JRubyMethod(name = "version=")
+ public static IRubyObject version_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_version(self, value);
+ }
+ }
+
+ public static class ProcIns {
+ @JRubyMethod
+ public static IRubyObject target(IRubyObject self) {
+ return hpricot_ele_get_name(self);
+ }
+
+ @JRubyMethod(name = "target=")
+ public static IRubyObject target_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_name(self, value);
+ }
+
+ @JRubyMethod
+ public static IRubyObject content(IRubyObject self) {
+ return hpricot_ele_get_attr(self);
+ }
+
+ @JRubyMethod(name = "content=")
+ public static IRubyObject content_set(IRubyObject self, IRubyObject value) {
+ return hpricot_ele_set_attr(self, value);
+ }
+ }
+
+ public final static String NO_WAY_SERIOUSLY = "*** This should not happen, please send a bug report with the HTML you're parsing to why@whytheluckystiff.net. So sorry!";
+
+ public final static int H_ELE_TAG = 0;
+ public final static int H_ELE_PARENT = 1;
+ public final static int H_ELE_ATTR = 2;
+ public final static int H_ELE_ETAG = 3;
+ public final static int H_ELE_RAW = 4;
+ public final static int H_ELE_EC = 5;
+ public final static int H_ELE_HASH = 6;
+ public final static int H_ELE_CHILDREN = 7;
+
+ public static IRubyObject H_ELE_GET(IRubyObject recv, int n) {
+ return ((IRubyObject[])recv.dataGetStruct())[n];
+ }
+
+ public static IRubyObject H_ELE_SET(IRubyObject recv, int n, IRubyObject value) {
+ ((IRubyObject[])recv.dataGetStruct())[n] = value;
+ return value;
+ }
+
+ private static class RefCallback implements Callback {
+ private final int n;
+ public RefCallback(int n) { this.n = n; }
+
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
+ return H_ELE_GET(recv, n);
+ }
+
+ public Arity getArity() {
+ return Arity.NO_ARGUMENTS;
+ }
+ }
+
+ private static class SetCallback implements Callback {
+ private final int n;
+ public SetCallback(int n) { this.n = n; }
+
+ public IRubyObject execute(IRubyObject recv, IRubyObject[] args, Block block) {
+ return H_ELE_SET(recv, n, args[0]);
+ }
+
+ public Arity getArity() {
+ return Arity.ONE_ARGUMENT;
+ }
+ }
+
+ private final static Callback[] ref_func = new Callback[]{
+ new RefCallback(0),
+ new RefCallback(1),
+ new RefCallback(2),
+ new RefCallback(3),
+ new RefCallback(4),
+ new RefCallback(5),
+ new RefCallback(6),
+ new RefCallback(7),
+ new RefCallback(8),
+ new RefCallback(9)};
+
+ private final static Callback[] set_func = new Callback[]{
+ new SetCallback(0),
+ new SetCallback(1),
+ new SetCallback(2),
+ new SetCallback(3),
+ new SetCallback(4),
+ new SetCallback(5),
+ new SetCallback(6),
+ new SetCallback(7),
+ new SetCallback(8),
+ new SetCallback(9)};
+
+ public final static ObjectAllocator alloc_hpricot_struct = new ObjectAllocator() {
+ // alloc_hpricot_struct
+ public IRubyObject allocate(Ruby runtime, RubyClass klass) {
+ RubyClass kurrent = klass;
+ Object sz = kurrent.fastGetInternalVariable("__size__");
+ while(sz == null && kurrent != null) {
+ kurrent = kurrent.getSuperClass();
+ sz = kurrent.fastGetInternalVariable("__size__");
+ }
+ int size = RubyNumeric.fix2int((RubyObject)sz);
+ RubyObject obj = new RubyObject(runtime, klass);
+ IRubyObject[] all = new IRubyObject[size];
+ java.util.Arrays.fill(all, runtime.getNil());
+ obj.dataWrapStruct(all);
+ return obj;
+ }
+ };
+
+ public static RubyClass makeHpricotStruct(Ruby runtime, IRubyObject[] members) {
+ RubyClass klass = RubyClass.newClass(runtime, runtime.getObject());
+ klass.fastSetInternalVariable("__size__", runtime.newFixnum(members.length));
+ klass.setAllocator(alloc_hpricot_struct);
+
+ for(int i = 0; i < members.length; i++) {
+ String id = members[i].toString();
+ klass.defineMethod(id, ref_func[i]);
+ klass.defineMethod(id + "=", set_func[i]);
+ }
- if ( done && ele_open ) {
- ele_open = false;
- if(ts > -1) {
- mark_tag = ts;
- ts = -1;
- text = true;
- }
+ return klass;
}
- if(ts == -1) {
- have = 0;
- /* text nodes have no ts because each byte is parsed alone */
- if(mark_tag != -1 && text) {
- if (done) {
- if(mark_tag < p-1) {
- CAT(tag, p-1);
- ELE(sym_text);
- }
- } else {
- CAT(tag, p);
+ public boolean basicLoad(final Ruby runtime) throws IOException {
+ Init_hpricot_scan(runtime);
+ return true;
+ }
+
+ public static class Extra {
+ IRubyObject symAllow, symDeny, sym_xmldecl, sym_doctype,
+ sym_procins, sym_stag, sym_etag, sym_emptytag,
+ sym_allowed, sym_children, sym_comment,
+ sym_cdata, sym_name, sym_parent,
+ sym_raw_attributes, sym_raw_string, sym_tagno,
+ sym_text, sym_EMPTY, sym_CDATA;
+
+ public RubyModule mHpricot;
+ public RubyClass structElem;
+ public RubyClass structAttr;
+ public RubyClass structBasic;
+ public RubyClass cDoc;
+ public RubyClass cCData;
+ public RubyClass cComment;
+ public RubyClass cDocType;
+ public RubyClass cElem;
+ public RubyClass cBogusETag;
+ public RubyClass cText;
+ public RubyClass cXMLDecl;
+ public RubyClass cProcIns;
+ public RubyClass rb_eHpricotParseError;
+ public IRubyObject reProcInsParse;
+
+ public Extra(Ruby runtime) {
+ symAllow = runtime.newSymbol("allow");
+ symDeny = runtime.newSymbol("deny");
+ sym_xmldecl = runtime.newSymbol("xmldecl");
+ sym_doctype = runtime.newSymbol("doctype");
+ sym_procins = runtime.newSymbol("procins");
+ sym_stag = runtime.newSymbol("stag");
+ sym_etag = runtime.newSymbol("etag");
+ sym_emptytag = runtime.newSymbol("emptytag");
+ sym_allowed = runtime.newSymbol("allowed");
+ sym_children = runtime.newSymbol("children");
+ sym_comment = runtime.newSymbol("comment");
+ sym_cdata = runtime.newSymbol("cdata");
+ sym_name = runtime.newSymbol("name");
+ sym_parent = runtime.newSymbol("parent");
+ sym_raw_attributes = runtime.newSymbol("raw_attributes");
+ sym_raw_string = runtime.newSymbol("raw_string");
+ sym_tagno = runtime.newSymbol("tagno");
+ sym_text = runtime.newSymbol("text");
+ sym_EMPTY = runtime.newSymbol("EMPTY");
+ sym_CDATA = runtime.newSymbol("CDATA");
}
- }
- mark_tag = 0;
- } else {
- have = pe - ts;
- System.arraycopy(buf,ts,buf,0,have);
- SLIDE(tag);
- SLIDE(akey);
- SLIDE(aval);
- te = (te - ts);
- ts = 0;
}
- }
- return runtime.getNil();
-}
-public static IRubyObject __hpricot_scan(IRubyObject recv, IRubyObject port, Block block) {
- Ruby runtime = recv.getRuntime();
- HpricotScanService service = new HpricotScanService();
- service.runtime = runtime;
- service.xmldecl = runtime.newSymbol("xmldecl");
- service.doctype = runtime.newSymbol("doctype");
- service.procins = runtime.newSymbol("procins");
- service.stag = runtime.newSymbol("stag");
- service.etag = runtime.newSymbol("etag");
- service.emptytag = runtime.newSymbol("emptytag");
- service.comment = runtime.newSymbol("comment");
- service.cdata = runtime.newSymbol("cdata");
- service.sym_text = runtime.newSymbol("text");
- service.block = block;
- return service.hpricot_scan(recv, port);
-}
+ public static void Init_hpricot_scan(Ruby runtime) {
+ Extra x = new Extra(runtime);
+ x.mHpricot = runtime.defineModule("Hpricot");
+ x.mHpricot.dataWrapStruct(x);
-public boolean basicLoad(final Ruby runtime) throws IOException {
- Init_hpricot_scan(runtime);
- return true;
-}
+ x.mHpricot.getSingletonClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
+ x.mHpricot.defineAnnotatedMethods(HpricotModule.class);
-public static void Init_hpricot_scan(Ruby runtime) {
- RubyModule mHpricot = runtime.defineModule("Hpricot");
- mHpricot.getMetaClass().attr_accessor(runtime.getCurrentContext(),new IRubyObject[]{runtime.newSymbol("buffer_size")});
- CallbackFactory fact = runtime.callbackFactory(HpricotScanService.class);
- mHpricot.getMetaClass().defineMethod("scan",fact.getSingletonMethod("__hpricot_scan",IRubyObject.class));
- mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
- rubyApi = JavaEmbedUtils.newObjectAdapter();
-}
+ x.rb_eHpricotParseError = x.mHpricot.defineClassUnder("ParseError",runtime.getClass("StandardError"),runtime.getClass("StandardError").getAllocator());
+
+ x.structElem = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes, x.sym_etag, x.sym_raw_string, x.sym_allowed, x.sym_tagno, x.sym_children});
+ x.structAttr = makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent, x.sym_raw_attributes});
+ x.structBasic= makeHpricotStruct(runtime, new IRubyObject[] {x.sym_name, x.sym_parent});
+
+ x.cDoc = x.mHpricot.defineClassUnder("Doc", x.structElem, x.structElem.getAllocator());
+
+ x.cCData = x.mHpricot.defineClassUnder("CData", x.structBasic, x.structBasic.getAllocator());
+ x.cCData.defineAnnotatedMethods(CData.class);
+
+ x.cComment = x.mHpricot.defineClassUnder("Comment", x.structBasic, x.structBasic.getAllocator());
+ x.cComment.defineAnnotatedMethods(Comment.class);
+
+ x.cDocType = x.mHpricot.defineClassUnder("DocType", x.structAttr, x.structAttr.getAllocator());
+ x.cDocType.defineAnnotatedMethods(DocType.class);
+
+ x.cElem = x.mHpricot.defineClassUnder("Elem", x.structElem, x.structElem.getAllocator());
+ x.cElem.defineAnnotatedMethods(Elem.class);
+
+ x.cBogusETag = x.mHpricot.defineClassUnder("BogusETag", x.structAttr, x.structAttr.getAllocator());
+ x.cBogusETag.defineAnnotatedMethods(BogusETag.class);
+
+ x.cText = x.mHpricot.defineClassUnder("Text", x.structBasic, x.structBasic.getAllocator());
+ x.cText.defineAnnotatedMethods(Text.class);
+
+ x.cXMLDecl = x.mHpricot.defineClassUnder("XMLDecl", x.structAttr, x.structAttr.getAllocator());
+ x.cXMLDecl.defineAnnotatedMethods(XMLDecl.class);
+
+ x.cProcIns = x.mHpricot.defineClassUnder("ProcIns", x.structAttr, x.structAttr.getAllocator());
+ x.cProcIns.defineAnnotatedMethods(ProcIns.class);
+
+ x.reProcInsParse = runtime.evalScriptlet("/\\A<\\?(\\S+)\\s+(.+)/m");
+ x.mHpricot.setConstant("ProcInsParse", x.reProcInsParse);
+ }
}