diff --git a/ldvspec/lofardata/tests/test_util_funcs.py b/ldvspec/lofardata/tests/test_util_funcs.py index d1e54032a0143f1040f8f6bdcc8e1214c2fa9136..959115a3ef47ead7fab38f1882a2558aa9ef4844 100644 --- a/ldvspec/lofardata/tests/test_util_funcs.py +++ b/ldvspec/lofardata/tests/test_util_funcs.py @@ -60,7 +60,7 @@ class ComputeInputsHistogram(unittest.TestCase): self.assertEqual(53832140800, max_size) self.assertEqual(2, biggest_bucket) self.assertListEqual([1, 2, 2, 0, 0, 1], counts) - self.assertListEqual(['20.3GB', '25.3GB', '30.3GB', '35.2GB', '40.2GB', '45.2GB', '50.1GB'], bins) + self.assertListEqual(['20.333GB', '25.300GB', '30.267GB', '35.234GB', '40.201GB', '45.168GB', '50.135GB'], bins) def test_single_range(self): test_data = {"surls": [{"size": 1, "surl": "test"}, @@ -77,7 +77,7 @@ class ComputeInputsHistogram(unittest.TestCase): self.assertEqual(1, max_size) self.assertEqual(6, biggest_bucket) self.assertListEqual([6], counts) - self.assertListEqual(['0.5B', '1.5B'], bins) + self.assertListEqual(['0.500B', '1.500B'], bins) def test_extreme_wide_range(self): test_data = {"surls": [{"size": 1, "surl": "test"}, @@ -94,4 +94,30 @@ class ComputeInputsHistogram(unittest.TestCase): self.assertEqual(1000000000000000000, max_size) self.assertEqual(5, biggest_bucket) self.assertListEqual([5, 0, 0, 0, 0, 1], counts) # TODO: if this is the case, adapt it to logarithmic scale - self.assertListEqual(['1.0B', '148.0PB', '296.1PB', '444.1PB', '592.1PB', '740.1PB', '888.2PB'], bins) + self.assertListEqual(['1.000B', '148.030PB', '296.059PB', '444.089PB', '592.119PB', '740.149PB', '888.178PB'], bins) + + def test_two_ranges(self): + test_data = {"surls": [{"size": 97873920, "surl": "test"}, + {"size": 97873920, "surl": "test"}, + {"size": 97873920, "surl": "test"}, + {"size": 97873920, "surl": "test"}, + {"size": 97873920, "surl": "test"}, + {"size": 97873920, "surl": "test"}, + {"size": 97873920, "surl": "test"}, + {"size": 97873920, "surl": "test"}, + {"size": 97884160, "surl": "test"}, + {"size": 97884160, "surl": "test"}, + {"size": 97884160, "surl": "test"}, + {"size": 97884160, "surl": "test"}, + {"size": 97884160, "surl": "test"}, + {"size": 97884160, "surl": "test"}, + {"size": 97884160, "surl": "test"}]} + + min_size, max_size, n_bins, counts, biggest_bucket, bins = compute_inputs_histogram(test_data) + + self.assertEqual(2, n_bins) + self.assertEqual(97873920, min_size) + self.assertEqual(97884160, max_size) + self.assertEqual(8, biggest_bucket) + self.assertListEqual([8,7], counts) # TODO: if this is the case, adapt it to logarithmic scale + self.assertListEqual(['93.340MB', '93.345MB', '93.350MB'], bins) diff --git a/ldvspec/lofardata/views.py b/ldvspec/lofardata/views.py index 0074f966f12a515cb37a2c49d132bf49aaed63c2..74f7bd6db1ea9a499b9501c06fd96f687b7757ff 100644 --- a/ldvspec/lofardata/views.py +++ b/ldvspec/lofardata/views.py @@ -73,7 +73,8 @@ def compute_inputs_histogram(inputs): min_size = inputs_sizes.min() max_size = inputs_sizes.max() - n_bins = 1 if min_size == max_size else (inputs_sizes.__len__() if inputs_sizes.__len__() < 100 else 100) + n_distinct_sizes = numpy.unique(inputs_sizes).__len__() + n_bins = n_distinct_sizes if n_distinct_sizes < 100 else 100 counts, buckets = numpy.histogram(inputs_sizes, bins=n_bins, range=(min_size, max_size)) formatted_bins = [format_size(bucket) % bucket for bucket in buckets] @@ -85,7 +86,7 @@ def format_size(num, suffix="B"): return "-" for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: if abs(num) < 1024.0: - return f"{num:3.1f}{unit}{suffix}" + return f"{num:3.3f}{unit}{suffix}" num /= 1024.0 return f"{num:.1f}Yi{suffix}"