@inproceedings{626fc36e36854b31a186843ec19c5d8c,
title = "Load Balancing Algorithms and Their Impacts on Apache Kafka",
abstract = "Apache Kafka is a distributed data streaming platform that is widely adopted in the industry. Producers introduce messages to Kafka server nodes, and consumers fetch interested messages in real time. In Kafka, the loads of nodes that serve streaming traffic may be imbalanced because some traffic is relatively popular, resulting in load imbalance. Balancing loads among nodes enables producers and consumers to avoid performance bottleneck, shortening the latencies of sending and receiving their messages and thus improving quality of service. Partitions are the fundamental entities that serve loads in Kafka and are responsible for hosting message payloads. Kafka provides a built-in load balancer that addresses load imbalance. The built-in load balancer distributes partitions evenly to the system, which is not designed to deal with load imbalance due to the heterogeneity of loads. The state-of-the-art load balancer released by LinkedIn, namely, Cruise Control (CC), complements the Kafka built-in load balancer. The performance quality of CC is sensitive and highly depends on the ordering of performance metric constraints. We propose Yet Another (YA) load balancer for Kafka, aiming to achieve simplicity and robustness for the average case. We compare YA with the Kafka built-in load balancer and CC in a real cluster environment. Performance results validate our findings and indicate that load balancers based on gathered performance metrics can effectively reduce the end-to-end delay perceived by streaming applications up to a ratio of approximately 20. CC and our proposed load balancer clearly outperform the Kafka built-in load balancer, yet while our proposed load balancer is comparable to CC, it is more robust.",
author = "Hsiao, {Hung Chang} and Tsai, {Chia Ping} and Li, {Zheng Xian} and Lee, {Chao Heng} and Chen, {Jia Sheng} and Lai, {Yu Chen} and Wang, {Jia Chi} and Li, {Shao Chi} and Gao, {Jhih Cyuan} and Lee, {Yi Huan}",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2023 IEEE International Conference on Big Data, BigData 2023 ; Conference date: 15-12-2023 Through 18-12-2023",
year = "2023",
doi = "10.1109/BigData59044.2023.10386734",
language = "English",
series = "Proceedings - 2023 IEEE International Conference on Big Data, BigData 2023",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1726--1735",
editor = "Jingrui He and Themis Palpanas and Xiaohua Hu and Alfredo Cuzzocrea and Dejing Dou and Dominik Slezak and Wei Wang and Aleksandra Gruca and Lin, {Jerry Chun-Wei} and Rakesh Agrawal",
booktitle = "Proceedings - 2023 IEEE International Conference on Big Data, BigData 2023",
address = "United States",
}