Sha256: 09620c0f3015299e50b43eb8c15805ac7a68eab983bdfb539c21c339200c2cf5
Contents?: true
Size: 1.2 KB
Versions: 1
Compression:
Stored size: 1.2 KB
Contents
/* * Takes a directed edge list and transforms it into an undirected edge list * that stores edge direction as metadata. * * Input table should be of the format (from_id:int, into_id:int ... ) * * Output format: * from_id:int, into_id:int, a_into_b:int , b_into_a:int, symmetric:int * * a_into_b, b_into_a, and symmetric are really booleans. */ %default AUGMENTED_PAGELINKS '/data/results/wikipedia/full/pagelinks' -- all wikipedia pagelinks (see augment_pagelinks.pig) %default UNDIRECTED_PAGELINKS_OUT '/data/results/wikipedia/full/undirected_pagelinks' -- undirected pagelinks edges = LOAD '$AUGMENTED_PAGELINKS' AS (from:int, into:int); edges_sorted = FOREACH edges GENERATE ((from <= into)? from : into) AS node_a, ((from <= into)? into : from) AS node_b, ((from <= into)? 1 : 0) AS a_to_b, ((from <= into)? 0 : 1) AS b_to_a; edges_grouped = GROUP edges_sorted by (node_a, node_b); edges_final = FOREACH edges_grouped GENERATE group.node_a AS node_a, group.node_b AS node_b, ((SUM(edges.$2) > 0) ? 1:0) AS a_into_b, ((SUM(edges.$3) > 0) ? 1:0) AS b_into_a, ((SUM(edges.$2) > 0 AND SUM(edges.$3) > 0) ? 1:0) as symmetric:int; STORE edges final INTO '$UNDIRECTED_PAGELINKS_OUT';
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
wukong-3.0.0.pre2 | examples/munging/wikipedia/pagelinks/undirect_pagelinks.pig |