Introduce get_checksum_bytes method and improvements

`get_checksum_bytes` returns a descriptor checksum as `[u8; 8]` instead
of `String`, potentially improving performance and memory usage.

In addition to this, since descriptors only use charaters that fit
within a UTF-8 8-bit code unit, there is no need to use the `char` type
(which is 4 bytes). This can also potentially bring in some performance
and memory-usage benefits.
This commit is contained in:
志宇 2022-07-16 20:01:54 +08:00
parent 9165faef95
commit 6db5b4a094
No known key found for this signature in database
GPG Key ID: F6345C9837C2BDE8
3 changed files with 23 additions and 26 deletions

View File

@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
- Add `descriptor::checksum::get_checksum_bytes` method.
## [v0.20.0] - [v0.19.0]

View File

@ -14,12 +14,10 @@
//! This module contains a re-implementation of the function used by Bitcoin Core to calculate the
//! checksum of a descriptor
use std::iter::FromIterator;
use crate::descriptor::DescriptorError;
const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ ";
const CHECKSUM_CHARSET: &str = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
const INPUT_CHARSET: &[u8] = b"0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ ";
const CHECKSUM_CHARSET: &[u8] = b"qpzry9x8gf2tvdw0s3jn54khce6mua7l";
fn poly_mod(mut c: u64, val: u64) -> u64 {
let c0 = c >> 35;
@ -43,15 +41,17 @@ fn poly_mod(mut c: u64, val: u64) -> u64 {
c
}
/// Compute the checksum of a descriptor
pub fn get_checksum(desc: &str) -> Result<String, DescriptorError> {
/// Computes the checksum bytes of a descriptor
pub fn get_checksum_bytes(desc: &str) -> Result<[u8; 8], DescriptorError> {
let mut c = 1;
let mut cls = 0;
let mut clscount = 0;
for ch in desc.chars() {
for ch in desc.as_bytes() {
let pos = INPUT_CHARSET
.find(ch)
.ok_or(DescriptorError::InvalidDescriptorCharacter(ch))? as u64;
.iter()
.position(|b| b == ch)
.ok_or(DescriptorError::InvalidDescriptorCharacter(*ch))? as u64;
c = poly_mod(c, pos & 31);
cls = cls * 3 + (pos >> 5);
clscount += 1;
@ -67,17 +67,18 @@ pub fn get_checksum(desc: &str) -> Result<String, DescriptorError> {
(0..8).for_each(|_| c = poly_mod(c, 0));
c ^= 1;
let mut chars = Vec::with_capacity(8);
let mut checksum = [0_u8; 8];
for j in 0..8 {
chars.push(
CHECKSUM_CHARSET
.chars()
.nth(((c >> (5 * (7 - j))) & 31) as usize)
.unwrap(),
);
checksum[j] = CHECKSUM_CHARSET[((c >> (5 * (7 - j))) & 31) as usize];
}
Ok(String::from_iter(chars))
Ok(checksum)
}
/// Compute the checksum of a descriptor
pub fn get_checksum(desc: &str) -> Result<String, DescriptorError> {
// unsafe is okay here as the checksum only uses bytes in `CHECKSUM_CHARSET`
get_checksum_bytes(desc).map(|b| unsafe { String::from_utf8_unchecked(b.to_vec()) })
}
#[cfg(test)]
@ -97,17 +98,12 @@ mod test {
#[test]
fn test_get_checksum_invalid_character() {
let sparkle_heart = vec![240, 159, 146, 150];
let sparkle_heart = std::str::from_utf8(&sparkle_heart)
.unwrap()
.chars()
.next()
.unwrap();
let sparkle_heart = unsafe { std::str::from_utf8_unchecked(&[240, 159, 146, 150]) };
let invalid_desc = format!("wpkh(tprv8ZgxMBicQKsPdpkqS7Eair4YxjcuuvDPNYmKX3sCniCf16tHEVrjjiSXEkFRnUH77yXc6ZcwHHcL{}fjdi5qUvw3VDfgYiH5mNsj5izuiu2N/1/2/*)", sparkle_heart);
assert!(matches!(
get_checksum(&invalid_desc).err(),
Some(DescriptorError::InvalidDescriptorCharacter(invalid_char)) if invalid_char == sparkle_heart
Some(DescriptorError::InvalidDescriptorCharacter(invalid_char)) if invalid_char == sparkle_heart.as_bytes()[0]
));
}
}

View File

@ -28,8 +28,8 @@ pub enum Error {
/// Error while extracting and manipulating policies
Policy(crate::descriptor::policy::PolicyError),
/// Invalid character found in the descriptor checksum
InvalidDescriptorCharacter(char),
/// Invalid byte found in the descriptor checksum
InvalidDescriptorCharacter(u8),
/// BIP32 error
Bip32(bitcoin::util::bip32::Error),