Skip to content
Snippets Groups Projects
Commit ec196eb5 authored by Fanna Lautenbach's avatar Fanna Lautenbach
Browse files

Merge branch 'hotfix/fix-chart-buckets' into 'main'

fix n_bins for unique values and change formatting to 3 decimals

See merge request !50
parents 1609a838 b51de4de
No related branches found
No related tags found
1 merge request!50fix n_bins for unique values and change formatting to 3 decimals
Pipeline #41549 passed
......@@ -60,7 +60,7 @@ class ComputeInputsHistogram(unittest.TestCase):
self.assertEqual(53832140800, max_size)
self.assertEqual(2, biggest_bucket)
self.assertListEqual([1, 2, 2, 0, 0, 1], counts)
self.assertListEqual(['20.3GB', '25.3GB', '30.3GB', '35.2GB', '40.2GB', '45.2GB', '50.1GB'], bins)
self.assertListEqual(['20.333GB', '25.300GB', '30.267GB', '35.234GB', '40.201GB', '45.168GB', '50.135GB'], bins)
def test_single_range(self):
test_data = {"surls": [{"size": 1, "surl": "test"},
......@@ -77,7 +77,7 @@ class ComputeInputsHistogram(unittest.TestCase):
self.assertEqual(1, max_size)
self.assertEqual(6, biggest_bucket)
self.assertListEqual([6], counts)
self.assertListEqual(['0.5B', '1.5B'], bins)
self.assertListEqual(['0.500B', '1.500B'], bins)
def test_extreme_wide_range(self):
test_data = {"surls": [{"size": 1, "surl": "test"},
......@@ -94,4 +94,30 @@ class ComputeInputsHistogram(unittest.TestCase):
self.assertEqual(1000000000000000000, max_size)
self.assertEqual(5, biggest_bucket)
self.assertListEqual([5, 0, 0, 0, 0, 1], counts) # TODO: if this is the case, adapt it to logarithmic scale
self.assertListEqual(['1.0B', '148.0PB', '296.1PB', '444.1PB', '592.1PB', '740.1PB', '888.2PB'], bins)
self.assertListEqual(['1.000B', '148.030PB', '296.059PB', '444.089PB', '592.119PB', '740.149PB', '888.178PB'], bins)
def test_two_ranges(self):
test_data = {"surls": [{"size": 97873920, "surl": "test"},
{"size": 97873920, "surl": "test"},
{"size": 97873920, "surl": "test"},
{"size": 97873920, "surl": "test"},
{"size": 97873920, "surl": "test"},
{"size": 97873920, "surl": "test"},
{"size": 97873920, "surl": "test"},
{"size": 97873920, "surl": "test"},
{"size": 97884160, "surl": "test"},
{"size": 97884160, "surl": "test"},
{"size": 97884160, "surl": "test"},
{"size": 97884160, "surl": "test"},
{"size": 97884160, "surl": "test"},
{"size": 97884160, "surl": "test"},
{"size": 97884160, "surl": "test"}]}
min_size, max_size, n_bins, counts, biggest_bucket, bins = compute_inputs_histogram(test_data)
self.assertEqual(2, n_bins)
self.assertEqual(97873920, min_size)
self.assertEqual(97884160, max_size)
self.assertEqual(8, biggest_bucket)
self.assertListEqual([8,7], counts) # TODO: if this is the case, adapt it to logarithmic scale
self.assertListEqual(['93.340MB', '93.345MB', '93.350MB'], bins)
......@@ -73,7 +73,8 @@ def compute_inputs_histogram(inputs):
min_size = inputs_sizes.min()
max_size = inputs_sizes.max()
n_bins = 1 if min_size == max_size else (inputs_sizes.__len__() if inputs_sizes.__len__() < 100 else 100)
n_distinct_sizes = numpy.unique(inputs_sizes).__len__()
n_bins = min(n_distinct_sizes, 100)
counts, buckets = numpy.histogram(inputs_sizes, bins=n_bins, range=(min_size, max_size))
formatted_bins = [format_size(bucket) % bucket for bucket in buckets]
......@@ -85,7 +86,7 @@ def format_size(num, suffix="B"):
return "-"
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
if abs(num) < 1024.0:
return f"{num:3.1f}{unit}{suffix}"
return f"{num:3.3f}{unit}{suffix}"
num /= 1024.0
return f"{num:.1f}Yi{suffix}"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment