From 03deafb5538e55462443cd8ead0114bf788a7765 Mon Sep 17 00:00:00 2001 From: Daniela Brozzoni Date: Wed, 1 Mar 2023 11:16:05 +0100 Subject: [PATCH] Move bdk_file_store into the bdk repo :tada: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original repository: https://github.com/LLFourn/bdk_core_staging/tree/250b4f1dcce10805adfb2f201901675eb6076554/bdk_file_store Co-authored-by: 志宇 Co-authored-by: LLFourn Co-authored-by: Vladimir Fomene --- Cargo.toml | 1 + crates/file_store/Cargo.toml | 10 + crates/file_store/src/file_store.rs | 286 +++++++++++++++++++++ crates/file_store/src/lib.rs | 32 +++ crates/file_store/tests/test_file_store.rs | 158 ++++++++++++ 5 files changed, 487 insertions(+) create mode 100644 crates/file_store/Cargo.toml create mode 100644 crates/file_store/src/file_store.rs create mode 100644 crates/file_store/src/lib.rs create mode 100644 crates/file_store/tests/test_file_store.rs diff --git a/Cargo.toml b/Cargo.toml index 83b199b9..51145963 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ members = [ "crates/bdk", "crates/bdk_chain", + "crates/bdk_file_store", "example-crates/esplora-wallet", "example-crates/electrum-wallet", ] diff --git a/crates/file_store/Cargo.toml b/crates/file_store/Cargo.toml new file mode 100644 index 00000000..1670cc09 --- /dev/null +++ b/crates/file_store/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "bdk_file_store" +version = "0.0.1" +edition = "2021" +license = "MIT OR Apache-2.0" + +[dependencies] +bdk_chain = { path = "../bdk_chain", version = "0.3", features = [ "serde", "miniscript" ] } +bincode = { version = "2.0.0-rc.2", features = [ "serde" ] } +serde = { version = "1", features = ["derive"] } diff --git a/crates/file_store/src/file_store.rs b/crates/file_store/src/file_store.rs new file mode 100644 index 00000000..0d3c0c99 --- /dev/null +++ b/crates/file_store/src/file_store.rs @@ -0,0 +1,286 @@ +//! Module for persisting data on-disk. +//! +//! The star of the show is [`KeychainStore`] which maintains an append-only file of +//! [`KeychainChangeSet`]s which can be used to restore a [`KeychainTracker`]. +use bdk_chain::{ + bitcoin::Transaction, + keychain::{KeychainChangeSet, KeychainTracker}, + sparse_chain, AsTransaction, +}; +use core::marker::PhantomData; +use std::{ + fs::{File, OpenOptions}, + io::{self, Read, Seek, Write}, + path::Path, +}; + +/// BDK File Store magic bytes length. +pub const MAGIC_BYTES_LEN: usize = 12; + +/// BDK File Store magic bytes. +pub const MAGIC_BYTES: [u8; MAGIC_BYTES_LEN] = [98, 100, 107, 102, 115, 48, 48, 48, 48, 48, 48, 48]; + +/// Persists an append only list of `KeychainChangeSet` to a single file. +/// [`KeychainChangeSet`] record the changes made to a [`KeychainTracker`]. +#[derive(Debug)] +pub struct KeychainStore { + db_file: File, + changeset_type_params: core::marker::PhantomData<(K, P, T)>, +} + +impl KeychainStore +where + K: Ord + Clone + core::fmt::Debug, + P: sparse_chain::ChainPosition, + T: Ord + AsTransaction + Clone, + KeychainChangeSet: serde::Serialize + serde::de::DeserializeOwned, +{ + /// Creates a new store from a [`File`]. + /// + /// The file must have been opened with read, write permissions. + /// + /// [`File`]: std::fs::File + pub fn new(mut file: File) -> Result { + file.rewind()?; + + let mut magic_bytes = [0_u8; MAGIC_BYTES_LEN]; + file.read_exact(&mut magic_bytes)?; + + if magic_bytes != MAGIC_BYTES { + return Err(FileError::InvalidMagicBytes(magic_bytes)); + } + + Ok(Self { + db_file: file, + changeset_type_params: Default::default(), + }) + } + + /// Creates or loads a a store from `db_path`. If no file exists there it will be created. + pub fn new_from_path>(db_path: D) -> Result { + let already_exists = db_path.as_ref().try_exists()?; + + let mut db_file = OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(db_path)?; + + if !already_exists { + db_file.write_all(&MAGIC_BYTES)?; + } + + Self::new(db_file) + } + + /// Iterates over the stored changeset from first to last changing the seek position at each + /// iteration. + /// + /// The iterator may fail to read an entry and therefore return an error. However the first time + /// it returns an error will be the last. After doing so the iterator will always yield `None`. + /// + /// **WARNING**: This method changes the write position in the underlying file. You should + /// always iterate over all entries until `None` is returned if you want your next write to go + /// at the end, otherwise you writing over existing enties. + pub fn iter_changesets( + &mut self, + ) -> Result>, io::Error> { + self.db_file + .seek(io::SeekFrom::Start(MAGIC_BYTES_LEN as _))?; + + Ok(EntryIter::new(&mut self.db_file)) + } + + /// Loads all the changesets that have been stored as one giant changeset. + /// + /// This function returns a tuple of the aggregate changeset and a result which indicates + /// whether an error occurred while reading or deserializing one of the entries. If so the + /// changeset will consist of all of those it was able to read. + /// + /// You should usually check the error. In many applications it may make sense to do a full + /// wallet scan with a stop gap after getting an error since it is likely that one of the + /// changesets it was unable to read changed the derivation indicies of the tracker. + /// + /// **WARNING**: This method changes the write position of the underlying file. The next + /// changeset will be written over the erroring entry (or the end of the file if none existed). + pub fn aggregate_changeset(&mut self) -> (KeychainChangeSet, Result<(), IterError>) { + let mut changeset = KeychainChangeSet::default(); + let result = (|| { + let iter_changeset = self.iter_changesets()?; + for next_changeset in iter_changeset { + changeset.append(next_changeset?); + } + Ok(()) + })(); + + (changeset, result) + } + + /// Reads and applies all the changesets stored sequentially to tracker, stopping when it fails + /// to read the next one. + /// + /// **WARNING**: This method changes the write position of the underlying file. The next + /// changeset will be written over the erroring entry (or the end of the file if none existed). + pub fn load_into_keychain_tracker( + &mut self, + tracker: &mut KeychainTracker, + ) -> Result<(), IterError> { + for changeset in self.iter_changesets()? { + tracker.apply_changeset(changeset?) + } + Ok(()) + } + + /// Append a new changeset to the file and truncate file to the end of the appended changeset. + /// + /// The truncation is to avoid the possibility of having a valid, but inconsistent changeset + /// directly after the appended changeset. + pub fn append_changeset( + &mut self, + changeset: &KeychainChangeSet, + ) -> Result<(), io::Error> { + if changeset.is_empty() { + return Ok(()); + } + + bincode::encode_into_std_write( + bincode::serde::Compat(changeset), + &mut self.db_file, + bincode::config::standard(), + ) + .map_err(|e| match e { + bincode::error::EncodeError::Io { inner, .. } => inner, + unexpected_err => panic!("unexpected bincode error: {}", unexpected_err), + })?; + + // truncate file after this changeset addition + // if this is not done, data after this changeset may represent valid changesets, however + // applying those changesets on top of this one may result in inconsistent state + let pos = self.db_file.stream_position()?; + self.db_file.set_len(pos)?; + + // We want to make sure that derivation indexe changes are written to disk as soon as + // possible so you know about the write failure before you give ou the address in the application. + if !changeset.derivation_indices.is_empty() { + self.db_file.sync_data()?; + } + + Ok(()) + } +} + +/// Error that occurs due to problems encountered with the file. +#[derive(Debug)] +pub enum FileError { + /// IO error, this may mean that the file is too short. + Io(io::Error), + /// Magic bytes do not match expected. + InvalidMagicBytes([u8; MAGIC_BYTES_LEN]), +} + +impl core::fmt::Display for FileError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::Io(e) => write!(f, "io error trying to read file: {}", e), + Self::InvalidMagicBytes(b) => write!( + f, + "file has invalid magic bytes: expected={:?} got={:?}", + MAGIC_BYTES, b + ), + } + } +} + +impl From for FileError { + fn from(value: io::Error) -> Self { + Self::Io(value) + } +} + +impl std::error::Error for FileError {} + +/// Error type for [`EntryIter`]. +#[derive(Debug)] +pub enum IterError { + /// Failure to read from file. + Io(io::Error), + /// Failure to decode data from file. + Bincode(bincode::error::DecodeError), +} + +impl core::fmt::Display for IterError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + IterError::Io(e) => write!(f, "io error trying to read entry {}", e), + IterError::Bincode(e) => write!(f, "bincode error while reading entry {}", e), + } + } +} + +impl std::error::Error for IterError {} + +/// Iterator over entries in a file store. +/// +/// Reads and returns an entry each time [`next`] is called. If an error occurs while reading the +/// iterator will yield a `Result::Err(_)` instead and then `None` for the next call to `next`. +/// +/// [`next`]: Self::next +pub struct EntryIter<'a, V> { + db_file: &'a mut File, + types: PhantomData, + error_exit: bool, +} + +impl<'a, V> EntryIter<'a, V> { + pub fn new(db_file: &'a mut File) -> Self { + Self { + db_file, + types: PhantomData, + error_exit: false, + } + } +} + +impl<'a, V> Iterator for EntryIter<'a, V> +where + V: serde::de::DeserializeOwned, +{ + type Item = Result; + + fn next(&mut self) -> Option { + let result = (|| { + let pos = self.db_file.stream_position()?; + + match bincode::decode_from_std_read(self.db_file, bincode::config::standard()) { + Ok(bincode::serde::Compat(changeset)) => Ok(Some(changeset)), + Err(e) => { + if let bincode::error::DecodeError::Io { inner, .. } = &e { + if inner.kind() == io::ErrorKind::UnexpectedEof { + let eof = self.db_file.seek(io::SeekFrom::End(0))?; + if pos == eof { + return Ok(None); + } + } + } + + self.db_file.seek(io::SeekFrom::Start(pos))?; + Err(IterError::Bincode(e)) + } + } + })(); + + let result = result.transpose(); + + if let Some(Err(_)) = &result { + self.error_exit = true; + } + + result + } +} + +impl From for IterError { + fn from(value: io::Error) -> Self { + IterError::Io(value) + } +} diff --git a/crates/file_store/src/lib.rs b/crates/file_store/src/lib.rs new file mode 100644 index 00000000..9e34acfb --- /dev/null +++ b/crates/file_store/src/lib.rs @@ -0,0 +1,32 @@ +mod file_store; +use bdk_chain::{ + bitcoin::Transaction, + keychain::{KeychainChangeSet, KeychainTracker, PersistBackend}, + sparse_chain::ChainPosition, +}; +pub use file_store::*; + +impl<'de, K, P> PersistBackend for KeychainStore +where + K: Ord + Clone + core::fmt::Debug, + P: ChainPosition, + KeychainChangeSet: serde::Serialize + serde::de::DeserializeOwned, +{ + type WriteError = std::io::Error; + + type LoadError = IterError; + + fn append_changeset( + &mut self, + changeset: &KeychainChangeSet, + ) -> Result<(), Self::WriteError> { + KeychainStore::append_changeset(self, changeset) + } + + fn load_into_keychain_tracker( + &mut self, + tracker: &mut KeychainTracker, + ) -> Result<(), Self::LoadError> { + KeychainStore::load_into_keychain_tracker(self, tracker) + } +} diff --git a/crates/file_store/tests/test_file_store.rs b/crates/file_store/tests/test_file_store.rs new file mode 100644 index 00000000..5230c097 --- /dev/null +++ b/crates/file_store/tests/test_file_store.rs @@ -0,0 +1,158 @@ +use bdk_chain::{ + bitcoin::Transaction, + keychain::{KeychainChangeSet, KeychainTracker}, + TxHeight, +}; +use bdk_file_store::{FileError, IterError, KeychainStore, MAGIC_BYTES, MAGIC_BYTES_LEN}; +use serde; +use std::{ + format, + fs::{File, OpenOptions}, + io::{Read, Write}, + path::{Path, PathBuf}, + vec::Vec, +}; + +struct TempPath(PathBuf); + +impl TempPath { + fn new() -> Self { + let now = std::time::UNIX_EPOCH + .elapsed() + .expect("must get epoch") + .as_nanos(); + let mut file_path = std::env::temp_dir(); + file_path.push(format!("bdk_test_{}", now)); + Self(file_path) + } + + fn open(&self) -> File { + OpenOptions::new() + .read(true) + .write(true) + .create(true) + .open(self.0.as_path()) + .expect("must open") + } +} + +impl AsRef for TempPath { + fn as_ref(&self) -> &Path { + self.0.as_path() + } +} + +impl Drop for TempPath { + fn drop(&mut self) { + if let Err(e) = std::fs::remove_file(self.0.as_path()) { + if e.kind() != std::io::ErrorKind::NotFound { + panic!("remove file unexpected error: {}", e); + } + }; + } +} + +#[derive( + Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, +)] +enum TestKeychain { + External, + Internal, +} + +impl core::fmt::Display for TestKeychain { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::External => write!(f, "external"), + Self::Internal => write!(f, "internal"), + } + } +} + +#[test] +fn magic_bytes() { + assert_eq!(&MAGIC_BYTES, "bdkfs0000000".as_bytes()); +} + +#[test] +fn new_fails_if_file_is_too_short() { + let path = TempPath::new(); + path.open() + .write_all(&MAGIC_BYTES[..MAGIC_BYTES_LEN - 1]) + .expect("should write"); + + match KeychainStore::::new(path.open()) { + Err(FileError::Io(e)) => assert_eq!(e.kind(), std::io::ErrorKind::UnexpectedEof), + unexpected => panic!("unexpected result: {:?}", unexpected), + }; +} + +#[test] +fn new_fails_if_magic_bytes_are_invalid() { + let invalid_magic_mnemonic = "ldkfs0000000"; + + let path = TempPath::new(); + path.open() + .write_all(invalid_magic_mnemonic.as_bytes()) + .expect("should write"); + + match KeychainStore::::new(path.open()) { + Err(FileError::InvalidMagicBytes(b)) => { + assert_eq!(b, invalid_magic_mnemonic.as_bytes()) + } + unexpected => panic!("unexpected result: {:?}", unexpected), + }; +} + +#[test] +fn append_changeset_truncates_invalid_bytes() { + use bdk_chain::miniscript; + use core::str::FromStr; + // initial data to write to file (magic bytes + invalid data) + let mut data = [255_u8; 2000]; + data[..MAGIC_BYTES_LEN].copy_from_slice(&MAGIC_BYTES); + + let descriptor = miniscript::Descriptor::from_str("tr([73c5da0a/86'/0'/0']xpub6BgBgsespWvERF3LHQu6CnqdvfEvtMcQjYrcRzx53QJjSxarj2afYWcLteoGVky7D3UKDP9QyrLprQ3VCECoY49yfdDEHGCtMMj92pReUsQ/0/*)#rg247h69").unwrap(); + let mut tracker = KeychainTracker::::default(); + tracker.add_keychain(TestKeychain::External, descriptor); + let changeset = KeychainChangeSet { + derivation_indices: tracker + .txout_index + .reveal_to_target(&TestKeychain::External, 21) + .1, + chain_graph: Default::default(), + }; + + let path = TempPath::new(); + path.open().write_all(&data).expect("should write"); + + let mut store = KeychainStore::::new(path.open()) + .expect("should open"); + match store.iter_changesets().expect("seek should succeed").next() { + Some(Err(IterError::Bincode(_))) => {} + unexpected_res => panic!("unexpected result: {:?}", unexpected_res), + } + + store.append_changeset(&changeset).expect("should append"); + + drop(store); + + let got_bytes = { + let mut buf = Vec::new(); + path.open().read_to_end(&mut buf).expect("should read"); + buf + }; + + let expected_bytes = { + let mut buf = MAGIC_BYTES.to_vec(); + bincode::encode_into_std_write( + bincode::serde::Compat(&changeset), + &mut buf, + bincode::config::standard(), + ) + .expect("should encode"); + buf + }; + + assert_eq!(got_bytes, expected_bytes); +}