@TechReport{NetworkStats_TR2002,
AUTHOR = {Erik D. Demaine and Alejandro L\'opez-Ortiz and J. Ian Munro},
TITLE = {Frequency Estimation of Internet Packet Streams with Limited Space},
INSTITUTION = {Massachusetts Institute of Technology},
MONTH = {August},
YEAR = 2002,
LENGTH = {14 pages},
PAPERS = {NetworkStats_ESA2002},
}
We present an algorithm that deterministically finds (in particular) all categories having a frequency above 1/(m+1) using m counters, which we prove is best possible in the worst case. We also present a sampling-based algorithm for the case that packet categories follow an arbitrary distribution, but their order over time is permuted uniformly at random. Under this model, our algorithm identifies flows above a frequency threshold of roughly 1/√n m with high probability, where m is the number of counters and n is the number of packets observed. This guarantee is not far off from the ideal of identifying all flows (probability 1/n), and we prove that it is best possible up to a logarithmic factor. We show that the algorithm ranks the identified flows according to frequency within any desired constant factor of accuracy.