Skip to content

Commit a15866a

Browse files
committed
Support BTC checksum
1 parent 50903fb commit a15866a

File tree

3 files changed

+296
-0
lines changed

3 files changed

+296
-0
lines changed

sds/src/scanner/regex_rule/config.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ pub enum SecondaryValidator {
138138
AbaRtnChecksum,
139139
BrazilianCpfChecksum,
140140
BrazilianCnpjChecksum,
141+
BtcChecksum,
141142
ChineseIdChecksum,
142143
GithubTokenChecksum,
143144
IbanChecker,
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
use crate::secondary_validation::Validator;
2+
use sha2::{Digest, Sha256};
3+
use std::collections::HashMap;
4+
5+
use crate::secondary_validation::base58::decode_base58;
6+
7+
pub struct BtcChecksum;
8+
9+
const BECH32_CHARSET: &str = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
10+
const BECH32_CONST: u32 = 1;
11+
const BECH32M_CONST: u32 = 0x2bc830a3;
12+
const BASE58_CHECKSUM_LENGTH: usize = 4;
13+
14+
use lazy_static::lazy_static;
15+
16+
lazy_static! {
17+
static ref BECH32_MAP: HashMap<char, u8> = {
18+
let mut m = HashMap::new();
19+
for (i, c) in BECH32_CHARSET.chars().enumerate() {
20+
m.insert(c, i as u8);
21+
}
22+
m
23+
};
24+
}
25+
26+
impl Validator for BtcChecksum {
27+
fn is_valid_match(&self, regex_match: &str) -> bool {
28+
// Strip any whitespace and convert to bytes for validation
29+
let clean_input = regex_match
30+
.chars()
31+
.filter(|c| c.is_alphanumeric())
32+
.collect::<String>();
33+
34+
if clean_input
35+
.chars()
36+
.next()
37+
.filter(|c| c.is_ascii_digit())
38+
.is_some()
39+
{
40+
return decode_base58_check(&clean_input);
41+
}
42+
bech32_check(&clean_input)
43+
}
44+
}
45+
46+
/// Decode a Base58Check encoded string
47+
fn decode_base58_check(input: &str) -> bool {
48+
// https://github.com/bitcoin/bips/blob/master/bip-0013.mediawiki
49+
// First decode the base58 string
50+
let decoded = match decode_base58(input) {
51+
Ok(decoded) => decoded,
52+
Err(_) => return false,
53+
};
54+
55+
// Check minimum length (payload + 4 byte checksum)
56+
if decoded.len() < BASE58_CHECKSUM_LENGTH {
57+
return false;
58+
}
59+
60+
// Split payload and checksum
61+
let (payload, checksum) = decoded.split_at(decoded.len() - BASE58_CHECKSUM_LENGTH);
62+
63+
// Calculate double SHA256 hash of payload
64+
let hash1 = Sha256::digest(payload);
65+
let hash2 = Sha256::digest(hash1);
66+
67+
// Compare first 4 bytes of hash with provided checksum
68+
&hash2[0..4] == checksum
69+
}
70+
71+
fn bech32_check(input: &str) -> bool {
72+
if let Some(bech32_spec) = bech32_decode(input) {
73+
if let Some(fourth_char) = input.chars().nth(3) {
74+
return bech32_spec
75+
.get_fourth_char()
76+
.eq_ignore_ascii_case(&fourth_char);
77+
}
78+
}
79+
false
80+
}
81+
82+
#[derive(Debug, PartialEq)]
83+
enum Bech32Spec {
84+
Bech32,
85+
Bech32m,
86+
}
87+
88+
impl Bech32Spec {
89+
fn get_fourth_char(&self) -> char {
90+
match self {
91+
Bech32Spec::Bech32 => 'q',
92+
Bech32Spec::Bech32m => 'p',
93+
}
94+
}
95+
}
96+
97+
/// Decode a Bech32/Bech32m string and determine HRP and data
98+
fn bech32_decode(input: &str) -> Option<Bech32Spec> {
99+
// https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki
100+
// https://github.com/bitcoin/bips/blob/master/bip-0350.mediawiki
101+
// Check case consistency
102+
let has_lower = input.chars().any(|c| c.is_lowercase());
103+
let has_upper = input.chars().any(|c| c.is_uppercase());
104+
if has_lower && has_upper {
105+
return None;
106+
}
107+
108+
let bech = input.to_lowercase();
109+
110+
let parts: Vec<&str> = bech.rsplitn(2, '1').collect();
111+
let hrp = parts[1];
112+
let data_part = parts[0];
113+
if hrp.is_empty() || data_part.len() < 6 {
114+
return None;
115+
}
116+
117+
let mut data: Vec<u8> = vec![];
118+
for c in data_part.chars() {
119+
if let Some(value) = BECH32_MAP.get(&c) {
120+
data.push(*value);
121+
} else {
122+
return None;
123+
}
124+
}
125+
bech32_verify_checksum(hrp, &data)
126+
}
127+
128+
/// Verify a Bech32 checksum given HRP and converted data characters
129+
fn bech32_verify_checksum(hrp: &str, data: &[u8]) -> Option<Bech32Spec> {
130+
let mut values = bech32_hrp_expand(hrp);
131+
values.extend_from_slice(data);
132+
133+
let const_value = bech32_poly_mod(&values);
134+
135+
if const_value == BECH32_CONST {
136+
Some(Bech32Spec::Bech32)
137+
} else if const_value == BECH32M_CONST {
138+
Some(Bech32Spec::Bech32m)
139+
} else {
140+
None
141+
}
142+
}
143+
144+
/// Expand the HRP into values for checksum computation
145+
fn bech32_hrp_expand(hrp: &str) -> Vec<u8> {
146+
let mut result = Vec::new();
147+
148+
// High bits
149+
for c in hrp.chars() {
150+
result.push((c as u8) >> 5);
151+
}
152+
153+
result.push(0);
154+
155+
// Low bits
156+
for c in hrp.chars() {
157+
result.push((c as u8) & 31);
158+
}
159+
160+
result
161+
}
162+
163+
/// Compute the Bech32 checksum
164+
fn bech32_poly_mod(values: &[u8]) -> u32 {
165+
const GENERATOR: [u32; 5] = [0x3B6A57B2, 0x26508E6D, 0x1EA119FA, 0x3D4233DD, 0x2A1462B3];
166+
167+
let mut chk: u32 = 1;
168+
169+
for &value in values {
170+
let top = chk >> 25;
171+
chk = (chk & 0x1FFFFFF) << 5 ^ (value as u32);
172+
173+
for (idx, value) in GENERATOR.iter().enumerate() {
174+
if (top >> idx) & 1 != 0 {
175+
chk ^= value;
176+
}
177+
}
178+
}
179+
180+
chk
181+
}
182+
#[cfg(test)]
183+
mod test {
184+
use crate::secondary_validation::*;
185+
186+
#[test]
187+
fn test_valid_bitcoin_addresses() {
188+
let valid_addresses = vec![
189+
// P2PKH addresses (start with '1')
190+
"1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa",
191+
"1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN2",
192+
"12cbQLTFMXRnSzktFkuoG3eHoMeFtpTu3S",
193+
"1AGNa15ZQXAZUgFiqJ2i7Z2DPU2J6hW62i",
194+
"17NdbrSGoUotzeGCcMMCqnFkEvLymoou9j",
195+
"1Q1pE5vPGEEMqRcVRMbtBK842Y6Pzo6nK9",
196+
// P2SH addresses (start with '3')
197+
"3J98t1WpEZ73CNmQviecrnyiWrnqRhWNLy",
198+
"3QJmV3qfvL9SuYo34YihAf3sRCW3qSinyC",
199+
// Bech32 addresses (P2WPKH and P2WSH)
200+
"bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4",
201+
"bc1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3qccfmv3",
202+
// Bech32m addresses (P2TR)
203+
"bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqzk5jj0",
204+
];
205+
for address in valid_addresses {
206+
assert!(
207+
BtcChecksum.is_valid_match(address),
208+
"Failed for address: {}",
209+
address
210+
);
211+
}
212+
}
213+
214+
#[test]
215+
fn test_invalid_bitcoin_addresses() {
216+
let invalid_addresses = vec![
217+
// Invalid Base58Check checksum
218+
"1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNb",
219+
"1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN3",
220+
"1AGNa15ZQXAZUgFiqJ3i7Z2DPU2J6hW62i",
221+
"1AGNa15ZQXAZUgFiqJ2i7Z2DPU2J6hW62j",
222+
"1AGNa15ZQXAZUgFiqJ2i7Z2DPU2J6hW62X",
223+
"1ANNa15ZQXAZUgFiqJ2i7Z2DPU2J6hW62i",
224+
"1A Na15ZQXAZUgFiqJ2i7Z2DPU2J6hW62i",
225+
"1AGNa15ZQXAZUgFiqJ2i7Z2DPU2J6hW62iz",
226+
"1AGNa15ZQXAZUgFiqJ2i7Z2DPU2J6hW62izz",
227+
"1Q1pE5vPGEEMqRcVRMbtBK842Y6Pzo6nJ9",
228+
"1AGNa15ZQXAZUgFiqJ2i7Z2DPU2J6hW62I",
229+
// Invalid Base58Check characters
230+
"1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfN0", // Contains '0'
231+
"1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNO", // Contains 'O'
232+
"1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNI", // Contains 'I'
233+
"1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNl", // Contains 'l'
234+
"17NdbrSGoUotzeGCcMMC?nFkEvLymoou9j", // Contains '?'
235+
// Invalid Bech32 checksum
236+
"bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t5", // Last character changed
237+
"bc1qrp33g0q5c5txsp9arysrx4k6zdkfs4nce4xj0gdcccefvpysxf3qccfmv4", // Last character changed
238+
// Invalid Bech32 characters
239+
"bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3tb", // Contains 'b' (not in Bech32 charset)
240+
"bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3tO", // Contains 'O' (not in Bech32 charset)
241+
// Mixed case (invalid for Bech32)
242+
"BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7Kv8f3t4", // Mixed case
243+
// Too short
244+
"1",
245+
"12",
246+
"bc1",
247+
];
248+
for address in invalid_addresses {
249+
assert!(
250+
!BtcChecksum.is_valid_match(address),
251+
"Should be invalid: {}",
252+
address
253+
);
254+
}
255+
}
256+
257+
#[test]
258+
fn test_addresses_with_whitespace() {
259+
// Should handle addresses with whitespace
260+
assert!(BtcChecksum.is_valid_match(" 1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa "));
261+
assert!(BtcChecksum.is_valid_match("1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa\n"));
262+
assert!(BtcChecksum.is_valid_match("\t1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa\t"));
263+
264+
// Bech32 with whitespace
265+
assert!(BtcChecksum.is_valid_match(" bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4 "));
266+
assert!(BtcChecksum.is_valid_match("bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4\n"));
267+
}
268+
269+
#[test]
270+
fn test_bech32_specific_validation() {
271+
// Test specific Bech32 features
272+
assert!(BtcChecksum.is_valid_match("bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4"));
273+
assert!(BtcChecksum.is_valid_match("BC1QW508D6QEJXTDG4Y5R3ZARVARY0C5XW7KV8F3T4")); // uppercase should work
274+
assert!(BtcChecksum.is_valid_match("bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4")); // lowercase should work
275+
276+
// Bech32m (taproot)
277+
assert!(BtcChecksum
278+
.is_valid_match("bc1p0xlxvlhemja6c4dqv22uapctqupfhlxm9h8z3k2e72q4k9hcz7vqzk5jj0"));
279+
280+
// Testnet addresses
281+
assert!(BtcChecksum.is_valid_match("tb1qw508d6qejxtdg4y5r3zarvary0c5xw7kxpjzsx"));
282+
283+
// Invalid: mixed case
284+
assert!(!BtcChecksum.is_valid_match("bc1QW508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t4"));
285+
286+
// Invalid: wrong checksum
287+
assert!(!BtcChecksum.is_valid_match("bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3t5"));
288+
289+
// Invalid: contains invalid Bech32 character
290+
assert!(!BtcChecksum.is_valid_match("bc1qw508d6qejxtdg4y5r3zarvary0c5xw7kv8f3tb"));
291+
}
292+
}

sds/src/secondary_validation/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ mod aba_rtn_checksum;
22
mod base58;
33
mod brazilian_cnpj_checksum;
44
mod brazilian_cpf_checksum;
5+
mod btc_checksum;
56
mod chinese_id_checksum;
67
mod france_ssn_checksum;
78
mod github_token_checksum;
@@ -21,6 +22,7 @@ use crate::scanner::regex_rule::config::SecondaryValidator;
2122
pub use crate::secondary_validation::aba_rtn_checksum::AbaRtnChecksum;
2223
pub use crate::secondary_validation::brazilian_cnpj_checksum::BrazilianCnpjChecksum;
2324
pub use crate::secondary_validation::brazilian_cpf_checksum::BrazilianCpfChecksum;
25+
pub use crate::secondary_validation::btc_checksum::BtcChecksum;
2426
pub use crate::secondary_validation::chinese_id_checksum::ChineseIdChecksum;
2527
pub use crate::secondary_validation::france_ssn_checksum::FranceSsnChecksum;
2628
pub use crate::secondary_validation::github_token_checksum::GithubTokenChecksum;
@@ -75,6 +77,7 @@ impl Validator for SecondaryValidator {
7577
SecondaryValidator::BrazilianCnpjChecksum => {
7678
BrazilianCnpjChecksum.is_valid_match(regex_match)
7779
}
80+
SecondaryValidator::BtcChecksum => BtcChecksum.is_valid_match(regex_match),
7881
SecondaryValidator::AbaRtnChecksum => AbaRtnChecksum.is_valid_match(regex_match),
7982
SecondaryValidator::PolishNationalIdChecksum => {
8083
PolishNationalIdChecksum.is_valid_match(regex_match)

0 commit comments

Comments
 (0)