From 6db5b4a094263a07eeb82ba92b47754d25a8d0d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BF=97=E5=AE=87?= Date: Sat, 16 Jul 2022 20:01:54 +0800 Subject: [PATCH] Introduce `get_checksum_bytes` method and improvements `get_checksum_bytes` returns a descriptor checksum as `[u8; 8]` instead of `String`, potentially improving performance and memory usage. In addition to this, since descriptors only use charaters that fit within a UTF-8 8-bit code unit, there is no need to use the `char` type (which is 4 bytes). This can also potentially bring in some performance and memory-usage benefits. --- CHANGELOG.md | 1 + src/descriptor/checksum.rs | 44 +++++++++++++++++--------------------- src/descriptor/error.rs | 4 ++-- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c28ca2be..dcda4371 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Add `descriptor::checksum::get_checksum_bytes` method. ## [v0.20.0] - [v0.19.0] diff --git a/src/descriptor/checksum.rs b/src/descriptor/checksum.rs index e9c99909..5ed1151b 100644 --- a/src/descriptor/checksum.rs +++ b/src/descriptor/checksum.rs @@ -14,12 +14,10 @@ //! This module contains a re-implementation of the function used by Bitcoin Core to calculate the //! checksum of a descriptor -use std::iter::FromIterator; - use crate::descriptor::DescriptorError; -const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ "; -const CHECKSUM_CHARSET: &str = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"; +const INPUT_CHARSET: &[u8] = b"0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ "; +const CHECKSUM_CHARSET: &[u8] = b"qpzry9x8gf2tvdw0s3jn54khce6mua7l"; fn poly_mod(mut c: u64, val: u64) -> u64 { let c0 = c >> 35; @@ -43,15 +41,17 @@ fn poly_mod(mut c: u64, val: u64) -> u64 { c } -/// Compute the checksum of a descriptor -pub fn get_checksum(desc: &str) -> Result { +/// Computes the checksum bytes of a descriptor +pub fn get_checksum_bytes(desc: &str) -> Result<[u8; 8], DescriptorError> { let mut c = 1; let mut cls = 0; let mut clscount = 0; - for ch in desc.chars() { + + for ch in desc.as_bytes() { let pos = INPUT_CHARSET - .find(ch) - .ok_or(DescriptorError::InvalidDescriptorCharacter(ch))? as u64; + .iter() + .position(|b| b == ch) + .ok_or(DescriptorError::InvalidDescriptorCharacter(*ch))? as u64; c = poly_mod(c, pos & 31); cls = cls * 3 + (pos >> 5); clscount += 1; @@ -67,17 +67,18 @@ pub fn get_checksum(desc: &str) -> Result { (0..8).for_each(|_| c = poly_mod(c, 0)); c ^= 1; - let mut chars = Vec::with_capacity(8); + let mut checksum = [0_u8; 8]; for j in 0..8 { - chars.push( - CHECKSUM_CHARSET - .chars() - .nth(((c >> (5 * (7 - j))) & 31) as usize) - .unwrap(), - ); + checksum[j] = CHECKSUM_CHARSET[((c >> (5 * (7 - j))) & 31) as usize]; } - Ok(String::from_iter(chars)) + Ok(checksum) +} + +/// Compute the checksum of a descriptor +pub fn get_checksum(desc: &str) -> Result { + // unsafe is okay here as the checksum only uses bytes in `CHECKSUM_CHARSET` + get_checksum_bytes(desc).map(|b| unsafe { String::from_utf8_unchecked(b.to_vec()) }) } #[cfg(test)] @@ -97,17 +98,12 @@ mod test { #[test] fn test_get_checksum_invalid_character() { - let sparkle_heart = vec![240, 159, 146, 150]; - let sparkle_heart = std::str::from_utf8(&sparkle_heart) - .unwrap() - .chars() - .next() - .unwrap(); + let sparkle_heart = unsafe { std::str::from_utf8_unchecked(&[240, 159, 146, 150]) }; let invalid_desc = format!("wpkh(tprv8ZgxMBicQKsPdpkqS7Eair4YxjcuuvDPNYmKX3sCniCf16tHEVrjjiSXEkFRnUH77yXc6ZcwHHcL{}fjdi5qUvw3VDfgYiH5mNsj5izuiu2N/1/2/*)", sparkle_heart); assert!(matches!( get_checksum(&invalid_desc).err(), - Some(DescriptorError::InvalidDescriptorCharacter(invalid_char)) if invalid_char == sparkle_heart + Some(DescriptorError::InvalidDescriptorCharacter(invalid_char)) if invalid_char == sparkle_heart.as_bytes()[0] )); } } diff --git a/src/descriptor/error.rs b/src/descriptor/error.rs index 3b231aca..efbb14e3 100644 --- a/src/descriptor/error.rs +++ b/src/descriptor/error.rs @@ -28,8 +28,8 @@ pub enum Error { /// Error while extracting and manipulating policies Policy(crate::descriptor::policy::PolicyError), - /// Invalid character found in the descriptor checksum - InvalidDescriptorCharacter(char), + /// Invalid byte found in the descriptor checksum + InvalidDescriptorCharacter(u8), /// BIP32 error Bip32(bitcoin::util::bip32::Error),