Feature: support custom int/float type (#7)

* support custom int/float type
vimt · Jun 1, 2024 · 6f3c73c · 6f3c73c
1 parent 3dc1faf
commit 6f3c73c
Show file tree

Hide file tree

Showing 12 changed files with 959 additions and 181 deletions.
diff --git a/README.md b/README.md
@@ -26,6 +26,58 @@ assert r == {'country': 'COUNTRY', 'isp': 'ISP'}
 
 ## Examples
 see [csv_to_mmdb.py](./examples/csv_to_mmdb.py)
+Here is a professional and clear translation of the README.md section from Chinese into English:
+
+## Using the Java Client
+
+### TLDR
+
+When generating an MMDB file for use with the Java client, you must specify the `int_type`:
+
+```python
+from mmdb_writer import MMDBWriter
+
+writer = MMDBWriter(int_type='int32')
+```
+
+Alternatively, you can explicitly specify data types using the [Type Enforcement](#type-enforcement) section.
+
+### Underlying Principles
+
+In Java, when deserializing to a structure, the numeric types will use the original MMDB numeric types. The specific
+conversion relationships are as follows:
+
+| mmdb type    | java type  |
+|--------------|------------|
+| float (15)   | Float      |
+| double (3)   | Double     |
+| int32 (8)    | Integer    |
+| uint16 (5)   | Integer    |
+| uint32 (6)   | Long       |
+| uint64 (9)   | BigInteger |
+| uint128 (10) | BigInteger |
+
+When using the Python writer to generate an MMDB file, by default, it converts integers to the corresponding MMDB type
+based on the size of the `int`. For instance, `int(1)` would convert to `uint16`, and `int(2**16+1)` would convert
+to `uint32`. This may cause deserialization failures in Java clients. Therefore, it is necessary to specify
+the `int_type` parameter when generating MMDB files to define the numeric type accurately.
+
+## Type Enforcement
+
+MMDB supports a variety of numeric types such as `int32`, `uint16`, `uint32`, `uint64`, `uint128` for integers,
+and `f32`, `f64` for floating points, while Python only has one integer type and one float type (actually `f64`).
+
+Therefore, when generating an MMDB file, you need to specify the `int_type` parameter to define the numeric type of the
+MMDB file. The behaviors for different `int_type` settings are:
+
+| int_type       | Behavior                                                                                                                                                                                                                                                      |
+|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| auto (default) | Automatically selects the MMDB numeric type based on the value size. <br/>Rules: <br/>`int32` for value < 0 <br/>`uint16` for 0 <= value < 2^16<br/>`uint32` for 2^16 <= value < 2^32<br/>`uint64` for 2^32 <= value < 2^64<br/> `uint128` for value >= 2^64. |
+| i32            | Stores all integer types as `int32`.                                                                                                                                                                                                                          |
+| u16            | Stores all integer types as `uint16`.                                                                                                                                                                                                                         |
+| u32            | Stores all integer types as `uint32`.                                                                                                                                                                                                                         |
+| u64            | Stores all integer types as `uint64`.                                                                                                                                                                                                                         |
+| u128           | Stores all integer types as `uint128`.                                                                                                                                                                                                                        |
 
 
 ## Reference: 

diff --git a/examples/csv_to_mmdb.py b/examples/csv_to_mmdb.py
@@ -8,25 +8,30 @@
 
 
 def main():
-    writer = MMDBWriter(4, 'Test.GeoIP', languages=['EN'], description="Test IP library")
+    writer = MMDBWriter(
+        4, "Test.GeoIP", languages=["EN"], description="Test IP library"
+    )
     data = defaultdict(list)
 
     # merge cidr
-    with open('fake_ip_info.csv', 'r') as f:
+    with open("fake_ip_info.csv", "r") as f:
         reader = csv.DictReader(f)
         for line in reader:
-            data[(line['country'], line['isp'])].append(IPNetwork(f'{line["ip"]}/{line["prefixlen"]}'))
+            data[(line["country"], line["isp"])].append(
+                IPNetwork(f'{line["ip"]}/{line["prefixlen"]}')
+            )
     for index, cidrs in data.items():
-        writer.insert_network(IPSet(cidrs), {'country': index[0], 'isp': index[1]})
-    writer.to_db_file('fake_ip_library.mmdb')
+        writer.insert_network(IPSet(cidrs), {"country": index[0], "isp": index[1]})
+    writer.to_db_file("fake_ip_library.mmdb")
 
 
 def test_read():
     import maxminddb
-    m = maxminddb.open_database('fake_ip_library.mmdb')
-    r = m.get('3.1.1.1')
+
+    m = maxminddb.open_database("fake_ip_library.mmdb")
+    r = m.get("3.1.1.1")
     print(r)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()