require 'pycall/import'
include PyCall::Import

pyimport 'numpy', as: :np
pyfrom 'sklearn.cross_validation', import: :train_test_split
pyfrom 'sklearn.preprocessing', import: :StandardScaler
pyfrom 'sklearn.datasets', import: %i(make_moons make_circles make_classification)
pyfrom 'sklearn.neighbors', import: :KNeighborsClassifier
pyfrom 'sklearn.svm', import: :SVC
pyfrom 'sklearn.tree', import: :DecisionTreeClassifier
pyfrom 'sklearn.ensemble', import: %i(RandomForestClassifier AdaBoostClassifier)
pyfrom 'sklearn.naive_bayes', import: :GaussianNB
pyfrom 'sklearn.discriminant_analysis', import: %i(LinearDiscriminantAnalysis QuadraticDiscriminantAnalysis)

# FIXME: MacOSX backend is not usable through pycall.  I want to fix this issue but the reason is unclear.
pyimport 'matplotlib', as: :mp
mp.rcParams[:backend] = 'TkAgg' if mp.rcParams[:backend] == 'MacOSX'

pyimport 'matplotlib.pyplot', as: :plt
pyimport 'matplotlib.colors', as: :mplc

h = 0.02  # step size in the mesh

names = [
  'Nearest Neighbors',
  'Linear SVM',
  'RBF SVM',
  'Decision Tree',
  'Random Forest',
  'AdaBoost',
  'Naive Bayes',
  'Linear Discriminant Analysis',
  'Quadratic Discriminant Analysis'
]

classifiers = [
  KNeighborsClassifier.(3),
  SVC.(kernel: 'linear', C: 0.025),
  SVC.(gamma: 2, C: 1),
  DecisionTreeClassifier.(max_depth: 5),
  RandomForestClassifier.(max_depth: 5, n_estimators: 10, max_features: 1),
  AdaBoostClassifier.(),
  GaussianNB.(),
  LinearDiscriminantAnalysis.(),
  QuadraticDiscriminantAnalysis.()
]

x, y = make_classification.(
  n_features: 2,
  n_redundant: 0,
  n_informative: 2,
  random_state: 1,
  n_clusters_per_class: 1
)

np.random.seed.(42)
x += 2 * np.random.random_sample.(x.shape)
linearly_separable = PyCall.tuple(x, y)

datasets = [
  make_moons.(noise: 0.3, random_state: 0),
  make_circles.(noise: 0.2, factor: 0.5, random_state: 1),
  linearly_separable
]

fig = plt.figure.(figsize: PyCall.tuple(27, 9))
i = 1
all = PyCall.slice(nil)
datasets.each do |ds|
  x, y = ds
  x = StandardScaler.().fit_transform.(x)
  x_train, x_test, y_train, y_test = train_test_split.(x, y, test_size: 0.4)

  x_min, x_max = np.min.(x[all, 0]) - 0.5, np.max.(x[all, 0]) + 0.5
  y_min, y_max = np.min.(x[all, 1]) - 0.5, np.max.(x[all, 1]) + 0.5

  xx, yy = np.meshgrid.(
    np.linspace.(x_min, x_max, ((x_max - x_min)/h).round),
    np.linspace.(y_min, y_max, ((y_max - y_min)/h).round),
  )
  mesh_points = np.dstack.(PyCall.tuple(xx.ravel.(), yy.ravel.()))[0, all, all]

  # just plot the dataset first
  cm = plt.cm.RdBu
  cm_bright = mplc.ListedColormap.(["#FF0000", "#0000FF"])
  ax = plt.subplot.(datasets.length, classifiers.length + 1, i)
  # plot the training points
  ax.scatter.(x_train[all, 0], x_train[all, 1], c: y_train, cmap: cm_bright)
  # and testing points
  ax.scatter.(x_test[all, 0], x_test[all, 1], c: y_test, cmap: cm_bright, alpha: 0.6)

  ax.set_xlim.(np.min.(xx), np.max.(xx))
  ax.set_ylim.(np.min.(yy), np.max.(yy))
  ax.set_xticks.(PyCall.tuple())
  ax.set_yticks.(PyCall.tuple())
  i += 1

  # iterate over classifiers
  names.zip(classifiers).each do |name, clf|
    ax = plt.subplot.(datasets.length, classifiers.length + 1, i)
    clf.fit.(x_train, y_train)
    scor = clf.score.(x_test, y_test)

    # Plot the decision boundary.  For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max]
    begin
      # not implemented for some
      z = clf.decision_function.(mesh_points)
    rescue
      z = clf.predict_proba.(mesh_points)[all, 1]
    end

    # Put the result into a color plot
    z = z.reshape.(xx.shape)
    ax.contourf.(xx, yy, z, cmap: cm, alpha: 0.8)

    # Plot also the training points
    ax.scatter.(x_train[all, 0], x_train[all, 1], c: y_train, cmap: cm_bright)
    # and testing points
    ax.scatter.(x_test[all, 0], x_test[all, 1], c: y_test, cmap: cm_bright, alpha: 0.6)

    ax.set_xlim.(np.min.(xx), np.max.(xx))
    ax.set_ylim.(np.min.(yy), np.max.(yy))
    ax.set_xticks.(PyCall.tuple())
    ax.set_yticks.(PyCall.tuple())
    ax.set_title.(name)

    ax.text.(np.max.(xx) - 0.3, np.min.(yy) + 0.3, "%.2f" % scor, size: 15, horizontalalignment: 'right')

    i += 1
  end
end

fig.subplots_adjust.(left: 0.02, right: 0.98)
plt.show.()