@inproceedings{5bfbf4e9ea584e68a6847f8957defb49,
title = "Idempotent Task Cache System for Handling Intermediate Data Skew in MapReduce on Cloud Computing",
abstract = "A MapReduce system gradually becomes a popular platform for developing cloud applications while MapReduce is the de facto standard programming model of the applications. However, a MapReduce system may suffer intermediate data skew to degrade performances because input data is unpredictable and the Map function of the application may generate different quantities of intermediate data according to the application algorithm. A MapReduce system can use the Idempotent Task Cache System (ITCS) proposed in this paper to handle intermediate data skew. A MapReduce system can avoid negative performance impacts of intermediate data skew with ITCS by using caches to skip the high workload of processing skewed intermediate data in certain Reduce tasks. In experiments, a MapReduce system is tested with several popular applications to prove that ITCS not only alleviates performance penalties when intermediate data skew happens, but also greatly outperforms native MapReduce systems without any help of ITCS.",
author = "Huang, {Tzu Chi} and Chu, {Kuo Chih} and Lin, {Jia Hui} and Shieh, {Ce Kuen}",
year = "2017",
month = feb,
day = "16",
doi = "10.1109/ICS.2016.0111",
language = "English",
series = "Proceedings - 2016 International Computer Symposium, ICS 2016",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "531--536",
booktitle = "Proceedings - 2016 International Computer Symposium, ICS 2016",
address = "United States",
note = "2016 International Computer Symposium, ICS 2016 ; Conference date: 15-12-2016 Through 17-12-2016",
}