; Copyright (c) Rich Hickey. All rights reserved. ; The use and distribution terms for this software are covered by the ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) ; which can be found in the file epl-v10.html at the root of this distribution. ; By using this software in any fashion, you are agreeing to be bound by ; the terms of this license. ; You must not remove this notice, or any other, from this software. (ns ^{:doc "Set operations such as union/intersection." :author "Rich Hickey"} clojure.set) (defn- bubble-max-key [k coll] "Move a maximal element of coll according to fn k (which returns a number) to the front of coll." (let [max (apply max-key k coll)] (cons max (remove #(identical? max %) coll)))) (defn union "Return a set that is the union of the input sets" ([] #{}) ([s1] s1) ([s1 s2] (if (< (count s1) (count s2)) (reduce conj s2 s1) (reduce conj s1 s2))) ([s1 s2 & sets] (let [bubbled-sets (bubble-max-key count (conj sets s2 s1))] (reduce into (first bubbled-sets) (rest bubbled-sets))))) (defn intersection "Return a set that is the intersection of the input sets" ([s1] s1) ([s1 s2] (if (< (count s2) (count s1)) (recur s2 s1) (reduce (fn [result item] (if (contains? s2 item) result (disj result item))) s1 s1))) ([s1 s2 & sets] (let [bubbled-sets (bubble-max-key #(- (count %)) (conj sets s2 s1))] (reduce intersection (first bubbled-sets) (rest bubbled-sets))))) (defn difference "Return a set that is the first set without elements of the remaining sets" ([s1] s1) ([s1 s2] (if (< (count s1) (count s2)) (reduce (fn [result item] (if (contains? s2 item) (disj result item) result)) s1 s1) (reduce disj s1 s2))) ([s1 s2 & sets] (reduce difference s1 (conj sets s2)))) (defn select "Returns a set of the elements for which pred is true" [pred xset] (reduce (fn [s k] (if (pred k) s (disj s k))) xset xset)) (defn project "Returns a rel of the elements of xrel with only the keys in ks" [xrel ks] (set (map #(select-keys % ks) xrel))) (defn rename-keys "Returns the map with the keys in kmap renamed to the vals in kmap" [map kmap] (reduce (fn [m [old new]] (if (and (not= old new) (contains? m old)) (-> m (assoc new (get m old)) (dissoc old)) m)) map kmap)) (defn rename "Returns a rel of the maps in xrel with the keys in kmap renamed to the vals in kmap" [xrel kmap] (set (map #(rename-keys % kmap) xrel))) (defn index "Returns a map of the distinct values of ks in the xrel mapped to a set of the maps in xrel with the corresponding values of ks." [xrel ks] (reduce (fn [m x] (let [ik (select-keys x ks)] (assoc m ik (conj (get m ik #{}) x)))) {} xrel)) (defn map-invert "Returns the map with the vals mapped to the keys." [m] (reduce (fn [m [k v]] (assoc m v k)) {} m)) (defn join "When passed 2 rels, returns the rel corresponding to the natural join. When passed an additional keymap, joins on the corresponding keys." ([xrel yrel] ;natural join (if (and (seq xrel) (seq yrel)) (let [ks (intersection (set (keys (first xrel))) (set (keys (first yrel)))) [r s] (if (<= (count xrel) (count yrel)) [xrel yrel] [yrel xrel]) idx (index r ks)] (reduce (fn [ret x] (let [found (idx (select-keys x ks))] (if found (reduce #(conj %1 (merge %2 x)) ret found) ret))) #{} s)) #{})) ([xrel yrel km] ;arbitrary key mapping (let [[r s k] (if (<= (count xrel) (count yrel)) [xrel yrel (map-invert km)] [yrel xrel km]) idx (index r (vals k))] (reduce (fn [ret x] (let [found (idx (rename-keys (select-keys x (keys k)) k))] (if found (reduce #(conj %1 (merge %2 x)) ret found) ret))) #{} s)))) (defn subset? "Is set1 a subset of set2?" [set1 set2] (and (<= (count set1) (count set2)) (every? #(contains? set2 %) set1))) (defn superset? "Is set1 a superset of set2?" [set1 set2] (and (>= (count set1) (count set2)) (every? #(contains? set1 %) set2))) (comment (refer 'set) (def xs #{{:a 11 :b 1 :c 1 :d 4} {:a 2 :b 12 :c 2 :d 6} {:a 3 :b 3 :c 3 :d 8 :f 42}}) (def ys #{{:a 11 :b 11 :c 11 :e 5} {:a 12 :b 11 :c 12 :e 3} {:a 3 :b 3 :c 3 :e 7 }}) (join xs ys) (join xs (rename ys {:b :yb :c :yc}) {:a :a}) (union #{:a :b :c} #{:c :d :e }) (difference #{:a :b :c} #{:c :d :e}) (intersection #{:a :b :c} #{:c :d :e}) (index ys [:b]))