Calculate probabilities of the number of uniformely distributed items ending up in a bucket

  1. def p(items_in_bucket, cur_items, m):
  2.  
  3. if items_in_bucket not in m:
  4. m[items_in_bucket] = {}
  5.  
  6. if cur_items in m[items_in_bucket]:
  7. return m[items_in_bucket][cur_items]
  8.  
  9. if items_in_bucket == 0:
  10. # Out of all cur_items, none ended up in the bucket
  11. m[items_in_bucket][cur_items] = pow(1-1/buckets,cur_items)
  12. elif items_in_bucket == cur_items:
  13. # Out of all cur_items, all ended up in the bucket
  14. m[items_in_bucket][cur_items] = pow(1/buckets, cur_items)
  15. else:
  16. # P(already have enough items in this bucket)*P(item will hit another bucket) +
  17. # P(missing only one item in this bucket)*P(item will hit this bucket)
  18. m[items_in_bucket][cur_items] = \
  19. p(items_in_bucket, cur_items-1, m)*(1-1/buckets) + \
  20. p(items_in_bucket-1, cur_items-1, m)*1/buckets
  21.  
  22. return m[items_in_bucket][cur_items]
  23.  
  24. items = 996
  25. bucket_total_colors = {
  26. 100:"\033[92m", # green
  27. 1000:"\033[91m", # red
  28. 10000:"\033[94m"} # blue
  29.  
  30. for buckets in [100,1000,10000]:
  31. print(bucket_total_colors[buckets])
  32. print(f"{buckets} buckets, {items} items (Poisson λ={items/buckets:0.4})\n")
  33. memo = {}
  34. for i in range(20):
  35. prob = p(i,items,memo)
  36. print(f"{i}-item buckets: {prob*buckets:.3f}, P={prob:.7f}, {prob*100:.5f}%")
  37. print("\033[0m")