@inproceedings{f460fda2956d4a4baad2603b23d39508,
title = "A New Merging Numerous Small Files Approach for Hadoop Distributed File System",
abstract = "In the current era of big data, enormous data is being recorded every second from multiple streams and multiple environments of different types. This hugely generated data is processed with the support of specialized tools such as Hadoop which ensures the processing of data by considering the memory, process allocation, size, and storage. Hadoop framework is known to be efficient with few files of large size rather than many files of small size which caused lots of issues for the framework to work efficiently and the time required for the processing is hugely increased. To eliminate this issue, this work proposes a new algorithm for merging many files of small size into a single large file based on certain match criteria (type and size). This process will be executed before the files are passed to the Hadoop framework. The proposed algorithm ensures that it will generate the least number of large files that reduces the I/O memory load and correlates with the efficiency of the Hadoop framework. The results prove that the proposed algorithm increases the efficiency and the time required by the Hadoop framework for processing by approximately 40\% over all the possible factors that hinder the performance.",
keywords = "HDFS, Hadoop, Map Reduce, big data, data mining",
author = "Adnan Ali and Mirza, \{Nada Masood\} and \{Khairi Ishak\}, Mohamad",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 19th International Conference on Electrical Engineering/Electronics, Computer, Telecommunications and Information Technology, ECTI-CON 2022 ; Conference date: 24-05-2022 Through 27-05-2022",
year = "2022",
doi = "10.1109/ECTI-CON54298.2022.9795369",
language = "English",
series = "19th International Conference on Electrical Engineering/Electronics, Computer, Telecommunications and Information Technology, ECTI-CON 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "19th International Conference on Electrical Engineering/Electronics, Computer, Telecommunications and Information Technology, ECTI-CON 2022",
address = "United States",
}