@Article{Clustering_TCS,
AUTHOR = {Erik D. Demaine and Dotan Emanuel and Amos Fiat and
Nicole Immorlica},
TITLE = {Correlation Clustering in General Weighted Graphs},
JOURNAL = {Theoretical Computer Science},
journalurl = {https://www.journals.elsevier.com/theoretical-computer-science},
VOLUME = 361,
NUMBER = {2--3},
MONTH = {September},
YEAR = 2006,
PAGES = {172--187},
NOTE = {Special issue on approximation and online algorithms.},
length = {20 pages},
papers = {Clustering_APPROX2003},
replaces = {Clustering_APPROX2003},
withstudent = 1,
doi = {https://dx.doi.org/10.1016/J.TCS.2006.05.008},
dblp = {https://dblp.org/rec/journals/tcs/DemaineEFI06},
comments = {This paper is also available from
<A HREF="http://dx.doi.org/10.1016/j.tcs.2006.05.008">ScienceDirect</A>.},
}
Correlation clustering was introduced by Bansal, Blum, and Chawla [1], motivated by both document clustering and agnostic learning. They proved NP-hardness and gave constant-factor approximation algorithms for the special case in which the graph is complete (full information) and every edge has the same weight. We give an O(log n)-approximation algorithm for the general case based on a linear-programming rounding and the “region-growing” technique. We also prove that this linear program has a gap of Ω(log n), and therefore our approximation is tight under this approach. We also give an O(r3)-approximation algorithm for Kr, r-minor-free graphs. On the other hand, we show that the problem is equivalent to minimum multicut, and therefore APX-hard and difficult to approximate better than Θ(log n).