feat: [torrust#1096] use exact IP banning list

josecelano · josecelano · commit 9eb98484ed29 · 2024-12-16T16:02:40.000Z
We are using a Counting Bloom Filter to count IPs sending wrong
connections IDs. IPs are banned after sending 10 wrong connections IDs.

CBFs are fast and use litle memory but they are also innaccurate. They
have False Positives meaning some IPs would be banned only becuase there
are bucket colissions (IPs sharing the same counter).

To avoid banning IPs incorrectly we decided to introduce a second
counter, which is a HashMap counting error is a exact way. IPs are only
banned when this counter reaches the limit.

We keep the CBF as a first level filter. It's a fast check to filter IPs
without affecting tracker's performance. When the IP is banned according
tho the first filter we start a counter for that IP in the second
exact counter.

This solution should be good if the number of IPs is low. We have to
find another solution anyway for IPv6 where is cheaper to own a range if
IPs.
diff --git a/src/servers/udp/server/banning.rs b/src/servers/udp/server/banning.rs
@@ -2,12 +2,20 @@
 //!
 //! It bans clients that send invalid connection id's.
 //!
-//! It uses a Counting Bloom Filter to keep track of the number of connection id
-//! errors per ip. That means there can be false positives, but not false
-//! negatives.
+//! It uses two levels of filtering:
 //!
-//! 1 out of 100000 requests will be a false positive and the client will not
-//! receive a response.
+//! 1. First, tt uses a Counting Bloom Filter to keep track of the number of
+//!    connection ID errors per ip. That means there can be false positives, but
+//!    not false negatives. 1 out of 100000 requests will be a false positive
+//!    and the client will be banned and not receive a response.
+//! 2. Since we want to avoid false positives (banning a client that is not
+//!    sending invalid connection id's), we use a `HashMap` to keep track of the
+//!    exact number of connection ID errors per ip.
+//!
+//! This two level filtering is to avoid false positives. It has the advantage
+//! of being fast by using a Counting Bloom Filter and not having false
+//! negatives at the cost of increasing the memory usage.
+use std::collections::HashMap;
 use std::net::IpAddr;
 
 use bloom::{CountingBloomFilter, ASMS};
@@ -18,7 +26,8 @@ use crate::servers::udp::UDP_TRACKER_LOG_TARGET;
 
 pub struct BanService {
     max_connection_id_errors_per_ip: u32,
-    cbf: CountingBloomFilter,
+    fuzzy_error_counter: CountingBloomFilter,
+    accurate_error_counter: HashMap<IpAddr, u32>,
     local_addr: Url,
     last_connection_id_errors_reset: Instant,
 }
@@ -29,30 +38,47 @@ impl BanService {
         Self {
             max_connection_id_errors_per_ip,
             local_addr,
-            cbf: CountingBloomFilter::with_rate(4, 0.01, 100),
+            fuzzy_error_counter: CountingBloomFilter::with_rate(4, 0.01, 100),
+            accurate_error_counter: HashMap::new(),
             last_connection_id_errors_reset: tokio::time::Instant::now(),
         }
     }
 
     pub fn increase_counter(&mut self, ip: &IpAddr) {
-        self.cbf.insert(&ip.to_string());
+        self.fuzzy_error_counter.insert(&ip.to_string());
+        *self.accurate_error_counter.entry(*ip).or_insert(0) += 1;
     }
 
-    pub fn get_counter(&mut self, ip: &IpAddr) -> u32 {
-        self.cbf.estimate_count(&ip.to_string())
+    #[must_use]
+    pub fn get_count(&self, ip: &IpAddr) -> Option<u32> {
+        self.accurate_error_counter.get(ip).copied()
+    }
+
+    #[must_use]
+    pub fn get_estimate_count(&self, ip: &IpAddr) -> u32 {
+        self.fuzzy_error_counter.estimate_count(&ip.to_string())
     }
 
     /// Returns true if the given ip address is banned.
     #[must_use]
     pub fn is_banned(&self, ip: &IpAddr) -> bool {
-        let connection_id_errors_from_ip = self.cbf.estimate_count(&ip.to_string());
+        // First check if the ip is in the bloom filter (fast check)
+        if self.fuzzy_error_counter.estimate_count(&ip.to_string()) <= self.max_connection_id_errors_per_ip {
+            return false;
+        }
 
-        connection_id_errors_from_ip > self.max_connection_id_errors_per_ip
+        // Check with the exact counter (to avoid false positives)
+        match self.get_count(ip) {
+            Some(count) => count > self.max_connection_id_errors_per_ip,
+            None => false,
+        }
     }
 
-    /// Resets the filter and updates the reset timestamp.
+    /// Resets the filters and updates the reset timestamp.
     pub fn reset_bans(&mut self) {
-        self.cbf.clear();
+        self.fuzzy_error_counter.clear();
+
+        self.accurate_error_counter.clear();
 
         self.last_connection_id_errors_reset = Instant::now();
 
@@ -74,14 +100,14 @@ mod tests {
     }
 
     #[test]
-    fn it_should_increase_the_ip_counter() {
+    fn it_should_increase_the_errors_counter_for_a_given_ip() {
         let mut ban_service = ban_service(1);
 
         let ip: IpAddr = "127.0.0.2".parse().unwrap();
 
         ban_service.increase_counter(&ip);
 
-        assert_eq!(ban_service.get_counter(&ip), 1);
+        assert_eq!(ban_service.get_count(&ip), Some(1));
     }
 
     #[test]
@@ -93,6 +119,8 @@ mod tests {
         ban_service.increase_counter(&ip); // Counter = 1
         ban_service.increase_counter(&ip); // Counter = 2
 
+        println!("Counter: {}", ban_service.get_count(&ip).unwrap());
+
         assert!(ban_service.is_banned(&ip));
     }
 
@@ -117,6 +145,6 @@ mod tests {
 
         ban_service.reset_bans();
 
-        assert_eq!(ban_service.get_counter(&ip), 0);
+        assert_eq!(ban_service.get_estimate_count(&ip), 0);
     }
 }