@InProceedings{NetworkStats_ESA2002,
AUTHOR = {Erik D. Demaine and Alejandro L\'opez-Ortiz and J. Ian Munro},
TITLE = {Frequency Estimation of Internet Packet Streams with Limited Space},
BOOKTITLE = {Proceedings of the 10th Annual European Symposium on Algorithms (ESA 2002)},
BOOKURL = {http://www.dis.uniroma1.it/~algo02/esa02/},
SERIES = {Lecture Notes in Computer Science},
SERIESURL = {http://www.springer.de/comp/lncs/},
VOLUME = 2461,
ADDRESS = {Rome, Italy},
MONTH = {September 17--21},
YEAR = 2002,
PAGES = {348--360},
copyright = {The paper is \copyright Springer-Verlag.},
length = {12 pages},
papers = {NetworkStats_TR2002},
doi = {https://dx.doi.org/10.1007/3-540-45749-6_33},
dblp = {https://dblp.org/rec/conf/esa/DemaineLM02},
comments = {This paper is also available from <A HREF="https://doi.org/10.1007/3-540-45749-6_33">SpringerLink</A>.},
updates = {Our deterministic algorithm is identical to an earlier
algorithm of J. Misra and D. Gries, “Finding repeated
elements”, <I>Science of Programming</I>,
2:143–152, 1982.},
}
We present an algorithm that deterministically finds (in particular) all categories having a frequency above 1/(m+1) using m counters, which we prove is best possible in the worst case. We also present a sampling-based algorithm for the case that packet categories follow an arbitrary distribution, but their order over time is permuted uniformly at random. Under this model, our algorithm identifies flows above a frequency threshold of roughly 1/√n m with high probability, where m is the number of counters and n is the number of packets observed. This guarantee is not far off from the ideal of identifying all flows (probability 1/n), and we prove that it is best possible up to a logarithmic factor. We show that the algorithm ranks the identified flows according to frequency within any desired constant factor of accuracy.