My application is Kakfa streams 3.4.1 for a streaming processing, and the topology is simply one input topic and 2 stateful processing nodes and a sink.
the following are the basic configurations
- number of application instances: 12
- number of threads per application instance: 4
- number of partitions of the input topic:30
- num.standby.replicas=0(default)
- max.warmup.replicas=2(default)
While investigating the issue of some unexpected rebalances, I noticed the following log( formatted for viewing easily) from HighAvailabilityTaskAssignor.
I tried to read the code to understand the processing of the code, but I still can’t figure out why the following assignment requires a probing rebalance(probingRebalanceNeeded=true).
Decided on assignment:
{
97405f8e-c74c-494e-886b-3eb8063c0fdf=
[
activeTasks: ([0_0, 0_2, 0_14])
standbyTasks: ([])
prevActiveTasks: ([0_0, 0_2, 0_14])
prevStandbyTasks: ([])
changelogOffsetTotalsByTask: ([0_0=-2, 0_2=-2, 0_14=-2])
taskLagTotals: ([0_0=-2, 0_1=2019630220, 0_2=-2, 0_3=2024870678, 0_4=2028179500,
0_5=2017468850, 0_6=2018701470, 0_7=2017341808, 0_8=2020834068, 0_9=2020712115,
0_10=2029211665, 0_11=2026533055, 0_12=2031519148, 0_13=2022231538, 0_14=-2,
0_15=2026682653, 0_16=2018526533, 0_17=2020990444, 0_18=2012430598, 0_19=2018594340,
0_20=2027859564, 0_21=2026399108, 0_22=2027341433, 0_23=2016975135, 0_24=2024962902,
0_25=2018789694, 0_26=2029583989, 0_27=2026366783, 0_28=2023064868, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 3
],
b9e806f4-930a-4a12-8396-cec93d0a003e=
[
activeTasks: ([0_15, 0_27])
standbyTasks: ([0_1])
prevActiveTasks: ([0_15, 0_27])
prevStandbyTasks: ([0_1])
changelogOffsetTotalsByTask: ([0_1=2001229294, 0_15=-2, 0_27=-2])
taskLagTotals: ([0_0=2020326550, 0_1=18400926, 0_2=2019506715, 0_3=2024870678, 0_4=2028179500,
0_5=2017468850, 0_6=2018701470, 0_7=2017341808, 0_8=2020834068, 0_9=2020712115,
0_10=2029211665, 0_11=2026533055, 0_12=2031519148, 0_13=2022231538, 0_14=2022325839,
0_15=-2, 0_16=2018526533, 0_17=2020990444, 0_18=2012430598, 0_19=2018594340,
0_20=2027859564, 0_21=2026399108, 0_22=2027341433, 0_23=2016975135, 0_24=2024962902,
0_25=2018789694, 0_26=2029583989, 0_27=-2, 0_28=2023064868, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 3
],
08890537-ee25-42b4-aa3e-01060723a2a6=
[
activeTasks: ([0_3, 0_4, 0_16, 0_29])
standbyTasks: ([0_2])
prevActiveTasks: ([0_3, 0_4, 0_16, 0_29])
prevStandbyTasks: ([0_2])
changelogOffsetTotalsByTask: ([0_2=2001225092, 0_3=-2, 0_4=-2, 0_16=-2, 0_29=-2])
taskLagTotals: ([0_0=2020326550, 0_1=2019630220, 0_2=18281623, 0_3=-2, 0_4=-2,
0_5=2017468850, 0_6=2018701470, 0_7=2017341808, 0_8=2020834068, 0_9=2020712115,
0_10=2029211665, 0_11=2026533055, 0_12=2031519148, 0_13=2022231538, 0_14=2022325839,
0_15=2026682653, 0_16=-2, 0_17=2020990444, 0_18=2012430598, 0_19=2018594340,
0_20=2027859564, 0_21=2026399108, 0_22=2027341433, 0_23=2016975135, 0_24=2024962902,
0_25=2018789694, 0_26=2029583989, 0_27=2026366783, 0_28=2023064868, 0_29=-2])
clientTags: ([])
capacity: 4
assigned: 5],
124575d3-049b-4bab-b134-68d26fad5a6f=
[
activeTasks: ([0_17, 0_19, 0_21])
standbyTasks: ([])
prevActiveTasks: ([0_17, 0_19, 0_21])
prevStandbyTasks: ([])
changelogOffsetTotalsByTask: ([0_17=-2, 0_19=-2, 0_21=-2])
taskLagTotals: ([0_0=2020326550, 0_1=2019630220, 0_2=2019506715, 0_3=2024870678, 0_4=2028179500,
0_5=2017468850, 0_6=2018701470, 0_7=2017341808, 0_8=2020834068, 0_9=2020712115,
0_10=2029211665, 0_11=2026533055, 0_12=2031519148, 0_13=2022231538, 0_14=2022325839,
0_15=2026682653, 0_16=2018526533, 0_17=-2, 0_18=2012430598, 0_19=-2,
0_20=2027859564, 0_21=-2, 0_22=2027341433, 0_23=2016975135, 0_24=2024962902,
0_25=2018789694, 0_26=2029583989, 0_27=2026366783, 0_28=2023064868, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 3],
1dfe27d3-6a5d-4956-b4db-997c588882cd=
[
activeTasks: ([0_1, 0_18, 0_20])
standbyTasks: ([])
prevActiveTasks: ([0_1, 0_18, 0_20])
prevStandbyTasks: ([])
changelogOffsetTotalsByTask: ([0_1=-2, 0_18=-2, 0_20=-2])
taskLagTotals: ([0_0=2020326550, 0_1=-2, 0_2=2019506715, 0_3=2024870678, 0_4=2028179500,
0_5=2017468850, 0_6=2018701470, 0_7=2017341808, 0_8=2020834068, 0_9=2020712115,
0_10=2029211665, 0_11=2026533055, 0_12=2031519148, 0_13=2022231538, 0_14=2022325839,
0_15=2026682653, 0_16=2018526533, 0_17=2020990444, 0_18=-2, 0_19=2018594340,
0_20=-2, 0_21=2026399108, 0_22=2027341433, 0_23=2016975135, 0_24=2024962902,
0_25=2018789694, 0_26=2029583989, 0_27=2026366783, 0_28=2023064868, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 3
],
34489b10-15af-42f8-a8cf-22a64fc80c7d=
[
activeTasks: ([0_5, 0_6])
standbyTasks: ([])
prevActiveTasks: ([0_5, 0_6])
prevStandbyTasks: ([])
changelogOffsetTotalsByTask: ([0_5=-2, 0_6=-2])
taskLagTotals: ([0_0=2020326550, 0_1=2019630220, 0_2=2019506715, 0_3=2024870678, 0_4=2028179500,
0_5=-2, 0_6=-2, 0_7=2017341808, 0_8=2020834068, 0_9=2020712115,
0_10=2029211665, 0_11=2026533055, 0_12=2031519148, 0_13=2022231538, 0_14=2022325839,
0_15=2026682653, 0_16=2018526533, 0_17=2020990444, 0_18=2012430598, 0_19=2018594340,
0_20=2027859564, 0_21=2026399108, 0_22=2027341433, 0_23=2016975135, 0_24=2024962902,
0_25=2018789694, 0_26=2029583989, 0_27=2026366783, 0_28=2023064868, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 2
],
41abcc49-54c9-4afb-9a19-b2952f522abd=
[
activeTasks: ([0_8, 0_22])
standbyTasks: ([])
prevActiveTasks: ([0_8, 0_22])
prevStandbyTasks: ([])
changelogOffsetTotalsByTask: ([0_8=-2, 0_22=-2])
taskLagTotals: ([0_0=2020326550, 0_1=2019630220, 0_2=2019506715, 0_3=2024870678, 0_4=2028179500,
0_5=2017468850, 0_6=2018701470, 0_7=2017341808, 0_8=-2, 0_9=2020712115,
0_10=2029211665, 0_11=2026533055, 0_12=2031519148, 0_13=2022231538, 0_14=2022325839,
0_15=2026682653, 0_16=2018526533, 0_17=2020990444, 0_18=2012430598, 0_19=2018594340,
0_20=2027859564, 0_21=2026399108, 0_22=-2, 0_23=2016975135, 0_24=2024962902,
0_25=2018789694, 0_26=2029583989, 0_27=2026366783, 0_28=2023064868, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 2
],
4598e980-b8e9-485f-b8ee-7293e1da26b3=
[
activeTasks: ([0_9, 0_23])
standbyTasks: ([])
prevActiveTasks: ([0_9, 0_23])
prevStandbyTasks: ([])
changelogOffsetTotalsByTask: ([0_9=-2, 0_23=-2])
taskLagTotals: ([0_0=2020326550, 0_1=2019630220, 0_2=2019506715, 0_3=2024870678, 0_4=2028179500,
0_5=2017468850, 0_6=2018701470, 0_7=2017341808, 0_8=2020834068, 0_9=-2,
0_10=2029211665, 0_11=2026533055, 0_12=2031519148, 0_13=2022231538, 0_14=2022325839,
0_15=2026682653, 0_16=2018526533, 0_17=2020990444, 0_18=2012430598, 0_19=2018594340,
0_20=2027859564, 0_21=2026399108, 0_22=2027341433, 0_23=-2,
0_24=2024962902, 0_25=2018789694, 0_26=2029583989, 0_27=2026366783, 0_28=2023064868, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 2
],
486f129a-ad5c-42a8-a137-ba13559a476c=
[
activeTasks: ([0_10, 0_12, 0_24])
standbyTasks: ([])
prevActiveTasks: ([0_10, 0_12, 0_24])
prevStandbyTasks: ([])
changelogOffsetTotalsByTask: ([0_10=-2, 0_12=-2, 0_24=-2])
taskLagTotals: ([0_0=2020326550, 0_1=2019630220, 0_2=2019506715, 0_3=2024870678, 0_4=2028179500,
0_5=2017468850, 0_6=2018701470, 0_7=2017341808, 0_8=2020834068, 0_9=2020712115,
0_10=-2, 0_11=2026533055, 0_12=-2, 0_13=2022231538, 0_14=2022325839, 0_15=2026682653,
0_16=2018526533, 0_17=2020990444, 0_18=2012430598, 0_19=2018594340, 0_20=2027859564,
0_21=2026399108, 0_22=2027341433, 0_23=2016975135, 0_24=-2,
0_25=2018789694, 0_26=2029583989, 0_27=2026366783, 0_28=2023064868, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 3
],
5774fb10-4da7-470d-969a-b17848b4d1f2=
[
activeTasks: ([0_11, 0_13, 0_28])
standbyTasks: ([])
prevActiveTasks: ([0_11, 0_13, 0_28])
prevStandbyTasks: ([])
changelogOffsetTotalsByTask: ([0_11=-2, 0_13=-2, 0_28=-2])
taskLagTotals: ([0_0=2020326550, 0_1=2019630220, 0_2=2019506715, 0_3=2024870678, 0_4=2028179500,
0_5=2017468850, 0_6=2018701470, 0_7=2017341808, 0_8=2020834068, 0_9=2020712115,
0_10=2029211665, 0_11=-2, 0_12=2031519148, 0_13=-2, 0_14=2022325839,
0_15=2026682653, 0_16=2018526533, 0_17=2020990444, 0_18=2012430598, 0_19=2018594340,
0_20=2027859564, 0_21=2026399108, 0_22=2027341433, 0_23=2016975135, 0_24=2024962902,
0_25=2018789694, 0_26=2029583989, 0_27=2026366783, 0_28=-2, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 3
],
6fac0f01-44a5-41e7-b393-2846fa9cad75=
[
activeTasks: ([0_26])
standbyTasks: ([])
prevActiveTasks: ([0_26])
prevStandbyTasks: ([])
changelogOffsetTotalsByTask: ([0_26=-2])
taskLagTotals: ([0_0=2020326550, 0_1=2019630220, 0_2=2019506715, 0_3=2024870678, 0_4=2028179500,
0_5=2017468850, 0_6=2018701470, 0_7=2017341808, 0_8=2020834068, 0_9=2020712115,
0_10=2029211665, 0_11=2026533055, 0_12=2031519148, 0_13=2022231538, 0_14=2022325839,
0_15=2026682653, 0_16=2018526533, 0_17=2020990444, 0_18=2012430598, 0_19=2018594340,
0_20=2027859564, 0_21=2026399108, 0_22=2027341433, 0_23=2016975135, 0_24=2024962902,
0_25=2018789694, 0_26=-2, 0_27=2026366783, 0_28=2023064868, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 1
],
701050ba-a2da-479c-a3a8-e1d9e9392453=
[
activeTasks: ([0_7, 0_25])
standbyTasks: ([])
prevActiveTasks: ([0_7, 0_25])
prevStandbyTasks: ([])
changelogOffsetTotalsByTask: ([0_7=-2, 0_25=-2])
taskLagTotals: ([0_0=2020326550, 0_1=2019630220, 0_2=2019506715, 0_3=2024870678, 0_4=2028179500,
0_5=2017468850, 0_6=2018701470, 0_7=-2, 0_8=2020834068, 0_9=2020712115,
0_10=2029211665, 0_11=2026533055, 0_12=2031519148, 0_13=2022231538, 0_14=2022325839,
0_15=2026682653, 0_16=2018526533, 0_17=2020990444, 0_18=2012430598, 0_19=2018594340,
0_20=2027859564, 0_21=2026399108, 0_22=2027341433, 0_23=2016975135, 0_24=2024962902,
0_25=-2, 0_26=2029583989, 0_27=2026366783, 0_28=2023064868, 0_29=2023926478])
clientTags: ([])
capacity: 4
assigned: 2
]
}
with followup probing rebalance.