/*
 * Takes a directed edge list and transforms it into an undirected edge list 
 * that stores edge direction as metadata.
 * 
 * Input table should be of the format (from_id:int, into_id:int ... )
 *
 * Output format:
 * from_id:int, into_id:int, a_into_b:int , b_into_a:int, symmetric:int
 *
 * a_into_b, b_into_a, and symmetric are really booleans.
 */

%default AUGMENTED_PAGELINKS      '/data/results/wikipedia/full/pagelinks'            -- all wikipedia pagelinks (see augment_pagelinks.pig)
%default UNDIRECTED_PAGELINKS_OUT '/data/results/wikipedia/full/undirected_pagelinks' -- undirected pagelinks

edges = LOAD '$AUGMENTED_PAGELINKS' AS (from:int, into:int);
edges_sorted = FOREACH edges GENERATE 
  ((from <= into)? from : into) AS node_a,
  ((from <= into)? into : from) AS node_b,
  ((from <= into)? 1 : 0) AS a_to_b,
  ((from <= into)? 0 : 1) AS b_to_a;
edges_grouped = GROUP edges_sorted by (node_a, node_b);
edges_final = FOREACH edges_grouped GENERATE 
  group.node_a AS node_a,
  group.node_b AS node_b,
  ((SUM(edges.$2) > 0) ? 1:0) AS a_into_b,
  ((SUM(edges.$3) > 0) ? 1:0) AS b_into_a,
  ((SUM(edges.$2) > 0 AND SUM(edges.$3) > 0) ? 1:0) as symmetric:int;
STORE edges final INTO '$UNDIRECTED_PAGELINKS_OUT';