# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import unittest
from datasketches import quantiles_ints_sketch, quantiles_floats_sketch, quantiles_doubles_sketch, ks_test
import numpy as np

class QuantilesTest(unittest.TestCase):
    def test_quantiles_example(self):
      k = 128
      n = 2 ** 20

      # create a sketch and inject ~1 million N(0,1) points as an array and as a single item
      quantiles = quantiles_floats_sketch(k)
      quantiles.update(np.random.normal(size=n-1))
      quantiles.update(0.0)

      # 0 should be near the median
      self.assertAlmostEqual(0.5, quantiles.get_rank(0.0), delta=0.035)
      
      # the median should be near 0
      self.assertAlmostEqual(0.0, quantiles.get_quantile(0.5), delta=0.035)

      # we also track the min/max independently from the rest of the data
      # which lets us know the full observed data range
      self.assertLessEqual(quantiles.get_min_value(), quantiles.get_quantile(0.01))
      self.assertLessEqual(0.0, quantiles.get_rank(quantiles.get_min_value()))
      self.assertGreaterEqual(quantiles.get_max_value(), quantiles.get_quantile(0.99))
      self.assertGreaterEqual(1.0, quantiles.get_rank(quantiles.get_max_value()))

      # we can also extract a list of values at a time,
      # here the values should give us something close to [-2, -1, 0, 1, 2].
      # then get the CDF, which will return something close to
      # the original values used in get_quantiles()
      # finally, can check the normalized rank error bound
      pts = quantiles.get_quantiles([0.0228, 0.1587, 0.5, 0.8413, 0.9772])
      cdf = quantiles.get_cdf(pts)  # include 1.0 at end to account for all probability mass
      self.assertEqual(len(cdf), len(pts)+1)
      err = quantiles.normalized_rank_error(False)
      self.assertEqual(err, quantiles_floats_sketch.get_normalized_rank_error(k, False))

      # and a few basic queries about the sketch
      self.assertFalse(quantiles.is_empty())
      self.assertTrue(quantiles.is_estimation_mode())
      self.assertEqual(quantiles.get_n(), n)
      self.assertEqual(quantiles.get_k(), k)
      self.assertLess(quantiles.get_num_retained(), n)

      # merging itself will double the number of items the sketch has seen
      quantiles.merge(quantiles)
      self.assertEqual(quantiles.get_n(), 2*n)

      # we can then serialize and reconstruct the sketch
      quantiles_bytes = quantiles.serialize()
      new_quantiles = quantiles.deserialize(quantiles_bytes)
      self.assertEqual(quantiles.get_num_retained(), new_quantiles.get_num_retained())
      self.assertEqual(quantiles.get_min_value(), new_quantiles.get_min_value())
      self.assertEqual(quantiles.get_max_value(), new_quantiles.get_max_value())
      self.assertEqual(quantiles.get_quantile(0.7), new_quantiles.get_quantile(0.7))
      self.assertEqual(quantiles.get_rank(0.0), new_quantiles.get_rank(0.0))

      # If we create a new sketch with a very different distribution, a Kolmogorov-Smirnov Test
      # of the two should return True: we can reject the null hypothesis that the sketches
      # come from the same distributions.
      unif_quantiles = quantiles_floats_sketch(k)
      unif_quantiles.update(np.random.uniform(10, 20, size=n-1))
      self.assertTrue(ks_test(quantiles, unif_quantiles, 0.001))

    def test_quantiles_ints_sketch(self):
        k = 128
        n = 10
        quantiles = quantiles_ints_sketch(k)
        for i in range(0, n):
          quantiles.update(i)

        self.assertEqual(quantiles.get_min_value(), 0)
        self.assertEqual(quantiles.get_max_value(), n-1)
        self.assertEqual(quantiles.get_n(), n)
        self.assertFalse(quantiles.is_empty())
        self.assertFalse(quantiles.is_estimation_mode()) # n < k
        self.assertEqual(quantiles.get_k(), k)

        pmf = quantiles.get_pmf([round(n/2)])
        self.assertIsNotNone(pmf)
        self.assertEqual(len(pmf), 2)

        cdf = quantiles.get_cdf([round(n/2)])
        self.assertIsNotNone(cdf)
        self.assertEqual(len(cdf), 2)

        self.assertEqual(quantiles.get_quantile(0.5), round(n/2))
        quants = quantiles.get_quantiles([0.25, 0.5, 0.75])
        self.assertIsNotNone(quants)
        self.assertEqual(len(quants), 3)

        self.assertEqual(quantiles.get_rank(round(n/2)), 0.5)

        # merge self
        quantiles.merge(quantiles)
        self.assertEqual(quantiles.get_n(), 2 * n)

        sk_bytes = quantiles.serialize()
        self.assertTrue(isinstance(quantiles_ints_sketch.deserialize(sk_bytes), quantiles_ints_sketch))

    def test_quantiles_doubles_sketch(self):
      # already tested floats and ints and it's templatized, so just make sure it instantiates properly
      k = 128
      quantiles = quantiles_doubles_sketch(k)
      self.assertTrue(quantiles.is_empty())

if __name__ == '__main__':
    unittest.main()