Sha256: a96f1f8a0f348bd0e5c8bf34e9524adda3a98c69355d13835d37d661babc17af

Contents?: true

Size: 1.53 KB

Versions: 7

Compression:

Stored size: 1.53 KB

Contents

%declare wukong_dir '/Users/flip/ics/core/wukong'
%declare data_dir   '$wukong_dir/data'
%declare dsfp_dir   '/Users/flip/ics/data_science_fun_pack'
;

register '$dsfp_dir/pig/varaha/target/varaha-1.0-SNAPSHOT.jar';
register '$dsfp_dir/pig/varaha/lib/mallet-2.0.7-RC2.jar';
register '$dsfp_dir/pig/varaha/lib/trove-2.0.4.jar';
register '$dsfp_dir/pig/varaha/lib/lucene-core-3.1.0.jar';
register '$dsfp_dir/pig/varaha/lib/pygmalion-1.1.0-SNAPSHOT.jar';
register '$dsfp_dir/pig/pigsy/target/pigsy-2.1.0-SNAPSHOT.jar';
register '$dsfp_dir/pig/datafu/dist/datafu-0.0.6-SNAPSHOT.jar';

define JsonStrToString pigsy.pig.json.JsonStrToString();
define TokenizeText    varaha.text.TokenizeText();
define LDATopics       varaha.topic.LDATopics();
define RangeConcat     org.pygmalion.udf.RangeBasedStringConcat('0', ' ');

-- Load the markup-stripped wikipedia text
torture_strings = LOAD '$data_dir/helpers/torture/string_handling_test.tsv' AS (
  desc:chararray, len:int, bytesize:int, str:chararray, has_str:int, jsonized_str:chararray,
  escaped_chars:chararray, escaped_bytes:chararray, chars_list:chararray, bytes_list:chararray
  );

-- Generate a random integer between 0 and n
decoded = FOREACH torture_strings {
  unjsonized_str = JsonStrToString(jsonized_str);
  is_str_equal   = (((str == unjsonized_str) OR (has_str == 0)) ? 1 : 0);
  is_len_equal   = (  len == SIZE(unjsonized_str) ? 1 : 0  );
  GENERATE
    is_str_equal,
    is_len_equal,
    str AS str,
    unjsonized_str AS unjsonized_str
    ;
};

-- take a dump on your terminal window
DUMP decoded;

Version data entries

7 entries across 7 versions & 2 rubygems

Version Path
ul-wukong-4.1.1 examples/text/regional_flavor/article_wordbags.pig
ul-wukong-4.1.0 examples/text/regional_flavor/article_wordbags.pig
wukong-4.0.0 examples/text/regional_flavor/article_wordbags.pig
wukong-3.0.1 examples/text/regional_flavor/article_wordbags.pig
wukong-3.0.0 examples/text/regional_flavor/article_wordbags.pig
wukong-3.0.0.pre3 examples/text/regional_flavor/article_wordbags.pig
wukong-3.0.0.pre2 examples/text/regional_flavor/article_wordbags.pig