1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
use super::*;

pub(crate) trait BroadcastStats {
    fn update(&mut self, new_stats: &Self);
    fn report_stats(&mut self, slot: Slot, slot_start: Instant);
}

#[derive(Clone)]
pub(crate) struct BroadcastShredBatchInfo {
    pub(crate) slot: Slot,
    pub(crate) num_expected_batches: Option<usize>,
    pub(crate) slot_start_ts: Instant,
}

#[derive(Default, Clone)]
pub struct TransmitShredsStats {
    pub transmit_elapsed: u64,
    pub send_mmsg_elapsed: u64,
    pub get_peers_elapsed: u64,
    pub shred_select: u64,
    pub num_shreds: usize,
}

impl BroadcastStats for TransmitShredsStats {
    fn update(&mut self, new_stats: &TransmitShredsStats) {
        self.transmit_elapsed += new_stats.transmit_elapsed;
        self.send_mmsg_elapsed += new_stats.send_mmsg_elapsed;
        self.get_peers_elapsed += new_stats.get_peers_elapsed;
        self.num_shreds += new_stats.num_shreds;
        self.shred_select += new_stats.shred_select;
    }
    fn report_stats(&mut self, slot: Slot, slot_start: Instant) {
        datapoint_info!(
            "broadcast-transmit-shreds-stats",
            ("slot", slot as i64, i64),
            (
                "end_to_end_elapsed",
                // `slot_start` signals when the first batch of shreds was
                // received, used to measure duration of broadcast
                slot_start.elapsed().as_micros() as i64,
                i64
            ),
            ("transmit_elapsed", self.transmit_elapsed as i64, i64),
            ("send_mmsg_elapsed", self.send_mmsg_elapsed as i64, i64),
            ("get_peers_elapsed", self.get_peers_elapsed as i64, i64),
            ("num_shreds", self.num_shreds as i64, i64),
            ("shred_select", self.shred_select as i64, i64),
        );
    }
}

#[derive(Default, Clone)]
pub(crate) struct InsertShredsStats {
    pub(crate) insert_shreds_elapsed: u64,
    pub(crate) num_shreds: usize,
}
impl BroadcastStats for InsertShredsStats {
    fn update(&mut self, new_stats: &InsertShredsStats) {
        self.insert_shreds_elapsed += new_stats.insert_shreds_elapsed;
        self.num_shreds += new_stats.num_shreds;
    }
    fn report_stats(&mut self, slot: Slot, slot_start: Instant) {
        datapoint_info!(
            "broadcast-insert-shreds-stats",
            ("slot", slot as i64, i64),
            (
                "end_to_end_elapsed",
                // `slot_start` signals when the first batch of shreds was
                // received, used to measure duration of broadcast
                slot_start.elapsed().as_micros() as i64,
                i64
            ),
            (
                "insert_shreds_elapsed",
                self.insert_shreds_elapsed as i64,
                i64
            ),
            ("num_shreds", self.num_shreds as i64, i64),
        );
    }
}

// Tracks metrics of type `T` across multiple threads
#[derive(Default)]
pub(crate) struct BatchCounter<T: BroadcastStats + Default> {
    // The number of batches processed across all threads so far
    num_batches: usize,
    // Filled in when the last batch of shreds is received,
    // signals how many batches of shreds to expect
    num_expected_batches: Option<usize>,
    broadcast_shred_stats: T,
}

impl<T: BroadcastStats + Default> BatchCounter<T> {
    #[cfg(test)]
    pub(crate) fn num_batches(&self) -> usize {
        self.num_batches
    }
}

#[derive(Default)]
pub(crate) struct SlotBroadcastStats<T: BroadcastStats + Default>(HashMap<Slot, BatchCounter<T>>);

impl<T: BroadcastStats + Default> SlotBroadcastStats<T> {
    #[cfg(test)]
    pub(crate) fn get(&self, slot: Slot) -> Option<&BatchCounter<T>> {
        self.0.get(&slot)
    }
    pub(crate) fn update(&mut self, new_stats: &T, batch_info: &Option<BroadcastShredBatchInfo>) {
        if let Some(batch_info) = batch_info {
            let mut should_delete = false;
            {
                let slot_batch_counter = self.0.entry(batch_info.slot).or_default();
                slot_batch_counter.broadcast_shred_stats.update(new_stats);
                // Only count the ones where `broadcast_shred_batch_info`.is_some(), because
                // there could potentially be other `retransmit` slots inserted into the
                // transmit pipeline (signaled by ReplayStage) that are not created by the
                // main shredding/broadcast pipeline
                slot_batch_counter.num_batches += 1;
                if let Some(num_expected_batches) = batch_info.num_expected_batches {
                    slot_batch_counter.num_expected_batches = Some(num_expected_batches);
                }
                if let Some(num_expected_batches) = slot_batch_counter.num_expected_batches {
                    if slot_batch_counter.num_batches == num_expected_batches {
                        slot_batch_counter
                            .broadcast_shred_stats
                            .report_stats(batch_info.slot, batch_info.slot_start_ts);
                        should_delete = true;
                    }
                }
            }
            if should_delete {
                self.0
                    .remove(&batch_info.slot)
                    .expect("delete should be successful");
            }
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[derive(Default)]
    struct TestStats {
        sender: Option<Sender<(usize, Slot, Instant)>>,
        count: usize,
    }

    impl BroadcastStats for TestStats {
        fn update(&mut self, new_stats: &TestStats) {
            self.count += new_stats.count;
            self.sender = new_stats.sender.clone();
        }
        fn report_stats(&mut self, slot: Slot, slot_start: Instant) {
            self.sender
                .as_ref()
                .unwrap()
                .send((self.count, slot, slot_start))
                .unwrap()
        }
    }

    #[test]
    fn test_update_broadcast() {
        let start = Instant::now();
        let mut slot_broadcast_stats = SlotBroadcastStats::default();
        slot_broadcast_stats.update(
            &TransmitShredsStats {
                transmit_elapsed: 1,
                get_peers_elapsed: 2,
                send_mmsg_elapsed: 3,
                shred_select: 4,
                num_shreds: 5,
            },
            &Some(BroadcastShredBatchInfo {
                slot: 0,
                num_expected_batches: Some(2),
                slot_start_ts: start,
            }),
        );

        // Singular update
        let slot_0_stats = slot_broadcast_stats.0.get(&0).unwrap();
        assert_eq!(slot_0_stats.num_batches, 1);
        assert_eq!(slot_0_stats.num_expected_batches.unwrap(), 2);
        assert_eq!(slot_0_stats.broadcast_shred_stats.transmit_elapsed, 1);
        assert_eq!(slot_0_stats.broadcast_shred_stats.get_peers_elapsed, 2);
        assert_eq!(slot_0_stats.broadcast_shred_stats.send_mmsg_elapsed, 3);
        assert_eq!(slot_0_stats.broadcast_shred_stats.shred_select, 4);
        assert_eq!(slot_0_stats.broadcast_shred_stats.num_shreds, 5);

        slot_broadcast_stats.update(
            &TransmitShredsStats {
                transmit_elapsed: 7,
                get_peers_elapsed: 8,
                send_mmsg_elapsed: 9,
                shred_select: 10,
                num_shreds: 11,
            },
            &None,
        );

        // If BroadcastShredBatchInfo == None, then update should be ignored
        let slot_0_stats = slot_broadcast_stats.0.get(&0).unwrap();
        assert_eq!(slot_0_stats.num_batches, 1);
        assert_eq!(slot_0_stats.num_expected_batches.unwrap(), 2);
        assert_eq!(slot_0_stats.broadcast_shred_stats.transmit_elapsed, 1);
        assert_eq!(slot_0_stats.broadcast_shred_stats.get_peers_elapsed, 2);
        assert_eq!(slot_0_stats.broadcast_shred_stats.send_mmsg_elapsed, 3);
        assert_eq!(slot_0_stats.broadcast_shred_stats.shred_select, 4);
        assert_eq!(slot_0_stats.broadcast_shred_stats.num_shreds, 5);

        // If another batch is given, then total number of batches == num_expected_batches == 2,
        // so the batch should be purged from the HashMap
        slot_broadcast_stats.update(
            &TransmitShredsStats {
                transmit_elapsed: 1,
                get_peers_elapsed: 1,
                send_mmsg_elapsed: 1,
                shred_select: 1,
                num_shreds: 1,
            },
            &Some(BroadcastShredBatchInfo {
                slot: 0,
                num_expected_batches: None,
                slot_start_ts: start,
            }),
        );

        assert!(slot_broadcast_stats.0.get(&0).is_none());
    }

    #[test]
    #[allow(clippy::eq_op)]
    fn test_update_multi_threaded() {
        for round in 0..50 {
            let start = Instant::now();
            let slot_broadcast_stats = Arc::new(Mutex::new(SlotBroadcastStats::default()));
            let num_threads = 5;
            let slot = 0;
            let (sender, receiver) = channel();
            let thread_handles: Vec<_> = (0..num_threads)
                .map(|i| {
                    let slot_broadcast_stats = slot_broadcast_stats.clone();
                    let sender = Some(sender.clone());
                    let test_stats = TestStats { sender, count: 1 };
                    let mut broadcast_batch_info = BroadcastShredBatchInfo {
                        slot,
                        num_expected_batches: None,
                        slot_start_ts: start,
                    };
                    if i == round % num_threads {
                        broadcast_batch_info.num_expected_batches = Some(num_threads);
                    }
                    Builder::new()
                        .name("test_update_multi_threaded".to_string())
                        .spawn(move || {
                            slot_broadcast_stats
                                .lock()
                                .unwrap()
                                .update(&test_stats, &Some(broadcast_batch_info))
                        })
                        .unwrap()
                })
                .collect();

            for t in thread_handles {
                t.join().unwrap();
            }

            assert!(slot_broadcast_stats.lock().unwrap().0.get(&slot).is_none());
            let (returned_count, returned_slot, _returned_instant) = receiver.recv().unwrap();
            assert_eq!(returned_count, num_threads);
            assert_eq!(returned_slot, slot);
        }
    }
}