Sha256: 4156d3d8bb48d4aee6e8ffe5efcf8709b819024928b087bc049e6e61b01a8bc9
Contents?: true
Size: 1.15 KB
Versions: 327
Compression:
Stored size: 1.15 KB
Contents
defmodule Frequency do @doc """ Count word frequency in parallel. Returns a map of characters to frequencies. """ def frequency(texts, workers) do groups = Enum.map(0..(workers-1), &stripe(&1, texts, workers)) Enum.map(groups, &Frequency.count_texts/1) #:rpc.pmap({Frequency, :count_texts}, [], groups) |> merge_freqs() end defp stripe(n, texts, workers) do Enum.drop(texts, n) |> Enum.take_every(workers) end # Needs to be public because of how it's invoked by `:rpc.pmap/4`. @doc false def count_texts(texts) do Enum.map(texts, &count_text/1) |> merge_freqs() end defp count_text(string) do String.downcase(string) |> String.graphemes() # At the time of writing Elixir doesn't yet have a way to determine if a # character is a letter. So use a workaround with Regex. String.replace(string, ~r/\P{L}+/u, "") # \P{L} = anything but a letter |> String.downcase() |> String.graphemes() |> Enum.reduce(%{}, fn c, acc -> Map.update(acc, c, 1, &(&1+1)) end) end defp merge_freqs(map) do Enum.reduce(map, %{}, fn d, acc -> Map.merge(acc, d, fn _, a, b -> a+b end) end) end end
Version data entries
327 entries across 327 versions & 1 rubygems