@inproceedings{e77b2f6f8f9c4711a89bac72fea6a977,
title = "Big Data Clustering Using MapReduce Framework: A Review",
abstract = "The clustering is an essential technique of data analysis that extracts distribution patterns or similar groups within data. Because of the crucial role of clustering in many scientific applications, numerous research is concerned with developing new algorithms for big data clustering. Despite this fact, the clustering remains a challenge in big data as the size and variety of datasets are rapidly increasing in the real-world. Recently, several clustering algorithms have been proposed to handle large datasets using MapReduce framework. This paper provides an overview of the clustering algorithms using MapReduce, it introduces a categorization of these algorithms based on the clustering technique and discusses their strengths and limitations. Finally, the paper discusses the main issues of each clustering approach in MapReduce framework to serve as a step for future enhancements.",
keywords = "Big data, Clustering, Density clustering, MapReduce framework",
author = "Khader, \{Mariam S.\} and Ghazi Al-Naymat",
note = "Publisher Copyright: {\textcopyright} 2021, Springer Nature Switzerland AG.; Intelligent Systems Conference, IntelliSys 2020 ; Conference date: 03-09-2020 Through 04-09-2020",
year = "2021",
doi = "10.1007/978-3-030-55187-2\_42",
language = "English",
isbn = "9783030551865",
series = "Advances in Intelligent Systems and Computing",
publisher = "Springer",
pages = "575--593",
editor = "Kohei Arai and Supriya Kapoor and Rahul Bhatia",
booktitle = "Intelligent Systems and Applications - Proceedings of the 2020 Intelligent Systems Conference IntelliSys Volume 2",
address = "Germany",
}