@InProceedings{Clustering_APPROX2003,
AUTHOR = {Erik D. Demaine and Nicole Immorlica},
TITLE = {Correlation Clustering with Partial Information},
BOOKTITLE = {Proceedings of the 6th International Workshop on
Approximation Algorithms for Combinatorial Optimization
Problems and 7th International Workshop on Randomization
and Approximation Techniques in Computer Science
(RANDOM-APPROX 2003)},
ADDRESS = {Princeton, New Jersey},
MONTH = {August 24--26},
YEAR = 2003,
PAGES = {1--13},
papers = {Clustering_TCS},
length = {12 pages},
doi = {https://dx.doi.org/10.1007/978-3-540-45198-3_1},
dblp = {https://dblp.org/rec/conf/random/DemaineI03},
comments = {This paper is also available from <A HREF="https://doi.org/10.1007/978-3-540-45198-3_1">SpringerLink</A>.},
withstudent = 1,
}
Correlation clustering was introduced by Bansal, Blum, and Chawla [1], motivated by both document clustering and agnostic learning. They proved NP-hardness and gave constant-factor approximation algorithms for the special case in which the graph is complete (full information) and every edge has weight +1 or −1. We give an O(log n)-approximation algorithm for the general case based on a linear-programming rounding and the “region-growing” technique. We also prove that this linear program has a gap of Ω(log n), and therefore our approximation is tight under this approach. We also give an O(r3)-approximation algorithm for Kr, r-minor-free graphs. On the other hand, we show that the problem is APX-hard, and any o(log n)-approximation would require improving the best approximation algorithms known for minimum multicut.