Sha256: 5adcd2b81c1aaa5b75a89317d6566233bc262f276c1e093952ba4c38366eb5ae

Contents?: true

Size: 1.67 KB

Versions: 13

Compression:

Stored size: 1.67 KB

Contents


LOAD  common_pages       FROM 'data/common_pages' AS (ip:chararray, from_path:chararray, into_path:chararray);

--
-- Build adjacency list <A pr B,C,D> from edges (<A B>, <A C>, <A D>)
-- 

adj_list_j         = GROUP common_pages BY from_path;
adj_list           = FOREACH adj_list_j GENERATE
  group                   AS from_path,
  1.0F                    AS pagerank:float,
  common_pages.(dest)     AS into_paths
  ;
STORE adj_list INTO 'data/pagerank/pr_iter_00';


--
-- Iterate pagerank <A pr_00 B,C,D> to become <A pr_01 B,C,D>
--

--   find partial share: A.rank / A.into_paths.length
--   dispatch <into_path partial_share> to each page
sent_shares  = FOREACH adj_list GENERATE
        FLATTEN(into_paths)                         AS path,
        (float)(pagerank / (float)SIZE(into_paths)) AS share:float;

--   dispatch <from_path into_paths>    to yourself, so you have the links still around
sent_edges   = FOREACH adj_list GENERATE
        from_path AS path, into_paths;

--   assemble all the received shared, and the self-sent edge list;
rcvd_shares  = COGROUP sent_edges BY path INNER, sent_shares BY path PARALLEL $PARALLEL;

--   calculate the new rank, and emit a record that looked just like the input.
next_iter    = FOREACH rcvd_shares {
        raw_rank    = (float)SUM(sent_shares.share);
        -- treat the case that a node has no in links                   
        damped_rank = ((raw_rank IS NOT NULL AND raw_rank > 1.0e-12f) ? raw_rank*0.85f + 0.15f : 0.0f);
        GENERATE
                group         AS from_path,
                damped_rank   AS rank,
                FLATTEN(sent_edges.into_paths)
       ; };

STORE next_iter INTO 'data/pagerank/pr_iter_01';


Version data entries

13 entries across 7 versions & 2 rubygems

Version Path
ul-wukong-4.1.1 examples/server_logs/page_counts.pig
ul-wukong-4.1.1 examples/serverlogs/visit_paths/page_counts.pig
ul-wukong-4.1.0 examples/serverlogs/visit_paths/page_counts.pig
ul-wukong-4.1.0 examples/server_logs/page_counts.pig
wukong-4.0.0 examples/serverlogs/visit_paths/page_counts.pig
wukong-4.0.0 examples/server_logs/page_counts.pig
wukong-3.0.1 examples/serverlogs/visit_paths/page_counts.pig
wukong-3.0.1 examples/server_logs/page_counts.pig
wukong-3.0.0 examples/server_logs/page_counts.pig
wukong-3.0.0 examples/serverlogs/visit_paths/page_counts.pig
wukong-3.0.0.pre3 examples/serverlogs/visit_paths/page_counts.pig
wukong-3.0.0.pre3 examples/server_logs/page_counts.pig
wukong-3.0.0.pre2 examples/server_logs/page_counts.pig