bdk/crates/file_store/src/store.rs

use std::{
    fmt::Debug,
    fs::{File, OpenOptions},
    io::{self, Read, Seek, Write},
    marker::PhantomData,
    path::Path,
};

use bdk_chain::{Append, PersistBackend};
use bincode::Options;

use crate::{bincode_options, EntryIter, FileError, IterError};

/// Persists an append-only list of changesets (`C`) to a single file.
///
/// The changesets are the results of altering a tracker implementation (`T`).
#[derive(Debug)]
pub struct Store<C> {
    magic_len: usize,
    db_file: File,
    marker: PhantomData<C>,
}

impl<C> PersistBackend<C> for Store<C>
where
    C: Append + serde::Serialize + serde::de::DeserializeOwned,
{
    type WriteError = std::io::Error;

    type LoadError = IterError;

    fn write_changes(&mut self, changeset: &C) -> Result<(), Self::WriteError> {
        self.append_changeset(changeset)
    }

    fn load_from_persistence(&mut self) -> Result<Option<C>, Self::LoadError> {
        self.aggregate_changesets().map_err(|e| e.iter_error)
    }
}

impl<C> Store<C>
where
    C: Append + serde::Serialize + serde::de::DeserializeOwned,
{
    /// Create a new [`Store`] file in write-only mode; error if the file exists.
    ///
    /// `magic` is the prefixed bytes to write to the new file. This will be checked when opening
    /// the `Store` in the future with [`open`].
    ///
    /// [`open`]: Store::open
    pub fn create_new<P>(magic: &[u8], file_path: P) -> Result<Self, FileError>
    where
        P: AsRef<Path>,
    {
        if file_path.as_ref().exists() {
            // `io::Error` is used instead of a variant on `FileError` because there is already a
            // nightly-only `File::create_new` method
            return Err(FileError::Io(io::Error::new(
                io::ErrorKind::Other,
                "file already exists",
            )));
        }
        let mut f = OpenOptions::new()
            .create(true)
            .read(true)
            .write(true)
            .truncate(true)
            .open(file_path)?;
        f.write_all(magic)?;
        Ok(Self {
            magic_len: magic.len(),
            db_file: f,
            marker: Default::default(),
        })
    }

    /// Open an existing [`Store`].
    ///
    /// Use [`create_new`] to create a new `Store`.
    ///
    /// # Errors
    ///
    /// If the prefixed bytes of the opened file does not match the provided `magic`, the
    /// [`FileError::InvalidMagicBytes`] error variant will be returned.
    ///
    /// [`create_new`]: Store::create_new
    pub fn open<P>(magic: &[u8], file_path: P) -> Result<Self, FileError>
    where
        P: AsRef<Path>,
    {
        let mut f = OpenOptions::new().read(true).write(true).open(file_path)?;

        let mut magic_buf = vec![0_u8; magic.len()];
        f.read_exact(&mut magic_buf)?;
        if magic_buf != magic {
            return Err(FileError::InvalidMagicBytes {
                got: magic_buf,
                expected: magic.to_vec(),
            });
        }

        Ok(Self {
            magic_len: magic.len(),
            db_file: f,
            marker: Default::default(),
        })
    }

    /// Attempt to open existing [`Store`] file; create it if the file is non-existent.
    ///
    /// Internally, this calls either [`open`] or [`create_new`].
    ///
    /// [`open`]: Store::open
    /// [`create_new`]: Store::create_new
    pub fn open_or_create_new<P>(magic: &[u8], file_path: P) -> Result<Self, FileError>
    where
        P: AsRef<Path>,
    {
        if file_path.as_ref().exists() {
            Self::open(magic, file_path)
        } else {
            Self::create_new(magic, file_path)
        }
    }

    /// Iterates over the stored changeset from first to last, changing the seek position at each
    /// iteration.
    ///
    /// The iterator may fail to read an entry and therefore return an error. However, the first time
    /// it returns an error will be the last. After doing so, the iterator will always yield `None`.
    ///
    /// **WARNING**: This method changes the write position in the underlying file. You should
    /// always iterate over all entries until `None` is returned if you want your next write to go
    /// at the end; otherwise, you will write over existing entries.
    pub fn iter_changesets(&mut self) -> EntryIter<C> {
        EntryIter::new(self.magic_len as u64, &mut self.db_file)
    }

    /// Loads all the changesets that have been stored as one giant changeset.
    ///
    /// This function returns the aggregate changeset, or `None` if nothing was persisted.
    /// If reading or deserializing any of the entries fails, an error is returned that
    /// consists of all those it was able to read.
    ///
    /// You should usually check the error. In many applications, it may make sense to do a full
    /// wallet scan with a stop-gap after getting an error, since it is likely that one of the
    /// changesets it was unable to read changed the derivation indices of the tracker.
    ///
    /// **WARNING**: This method changes the write position of the underlying file. The next
    /// changeset will be written over the erroring entry (or the end of the file if none existed).
    pub fn aggregate_changesets(&mut self) -> Result<Option<C>, AggregateChangesetsError<C>> {
        let mut changeset = Option::<C>::None;
        for next_changeset in self.iter_changesets() {
            let next_changeset = match next_changeset {
                Ok(next_changeset) => next_changeset,
                Err(iter_error) => {
                    return Err(AggregateChangesetsError {
                        changeset,
                        iter_error,
                    })
                }
            };
            match &mut changeset {
                Some(changeset) => changeset.append(next_changeset),
                changeset => *changeset = Some(next_changeset),
            }
        }
        Ok(changeset)
    }

    /// Append a new changeset to the file and truncate the file to the end of the appended
    /// changeset.
    ///
    /// The truncation is to avoid the possibility of having a valid but inconsistent changeset
    /// directly after the appended changeset.
    pub fn append_changeset(&mut self, changeset: &C) -> Result<(), io::Error> {
        // no need to write anything if changeset is empty
        if changeset.is_empty() {
            return Ok(());
        }

        bincode_options()
            .serialize_into(&mut self.db_file, changeset)
            .map_err(|e| match *e {
                bincode::ErrorKind::Io(inner) => inner,
                unexpected_err => panic!("unexpected bincode error: {}", unexpected_err),
            })?;

        // truncate file after this changeset addition
        // if this is not done, data after this changeset may represent valid changesets, however
        // applying those changesets on top of this one may result in an inconsistent state
        let pos = self.db_file.stream_position()?;
        self.db_file.set_len(pos)?;

        Ok(())
    }
}

/// Error type for [`Store::aggregate_changesets`].
#[derive(Debug)]
pub struct AggregateChangesetsError<C> {
    /// The partially-aggregated changeset.
    pub changeset: Option<C>,

    /// The error returned by [`EntryIter`].
    pub iter_error: IterError,
}

impl<C> std::fmt::Display for AggregateChangesetsError<C> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        std::fmt::Display::fmt(&self.iter_error, f)
    }
}

impl<C: std::fmt::Debug> std::error::Error for AggregateChangesetsError<C> {}

#[cfg(test)]
mod test {
    use super::*;

    use bincode::DefaultOptions;
    use std::{
        collections::BTreeSet,
        io::{Read, Write},
        vec::Vec,
    };
    use tempfile::NamedTempFile;

    const TEST_MAGIC_BYTES_LEN: usize = 12;
    const TEST_MAGIC_BYTES: [u8; TEST_MAGIC_BYTES_LEN] =
        [98, 100, 107, 102, 115, 49, 49, 49, 49, 49, 49, 49];

    type TestChangeSet = BTreeSet<String>;

    #[derive(Debug)]
    struct TestTracker;

    /// Check behavior of [`Store::create_new`] and [`Store::open`].
    #[test]
    fn construct_store() {
        let temp_dir = tempfile::tempdir().unwrap();
        let file_path = temp_dir.path().join("db_file");
        let _ = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path)
            .expect_err("must not open as file does not exist yet");
        let _ = Store::<TestChangeSet>::create_new(&TEST_MAGIC_BYTES, &file_path)
            .expect("must create file");
        // cannot create new as file already exists
        let _ = Store::<TestChangeSet>::create_new(&TEST_MAGIC_BYTES, &file_path)
            .expect_err("must fail as file already exists now");
        let _ = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path)
            .expect("must open as file exists now");
    }

    #[test]
    fn open_or_create_new() {
        let temp_dir = tempfile::tempdir().unwrap();
        let file_path = temp_dir.path().join("db_file");
        let changeset = BTreeSet::from(["hello".to_string(), "world".to_string()]);

        {
            let mut db = Store::<TestChangeSet>::open_or_create_new(&TEST_MAGIC_BYTES, &file_path)
                .expect("must create");
            assert!(file_path.exists());
            db.append_changeset(&changeset).expect("must succeed");
        }

        {
            let mut db = Store::<TestChangeSet>::open_or_create_new(&TEST_MAGIC_BYTES, &file_path)
                .expect("must recover");
            let recovered_changeset = db.aggregate_changesets().expect("must succeed");
            assert_eq!(recovered_changeset, Some(changeset));
        }
    }

    #[test]
    fn new_fails_if_file_is_too_short() {
        let mut file = NamedTempFile::new().unwrap();
        file.write_all(&TEST_MAGIC_BYTES[..TEST_MAGIC_BYTES_LEN - 1])
            .expect("should write");

        match Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, file.path()) {
            Err(FileError::Io(e)) => assert_eq!(e.kind(), std::io::ErrorKind::UnexpectedEof),
            unexpected => panic!("unexpected result: {:?}", unexpected),
        };
    }

    #[test]
    fn new_fails_if_magic_bytes_are_invalid() {
        let invalid_magic_bytes = "ldkfs0000000";

        let mut file = NamedTempFile::new().unwrap();
        file.write_all(invalid_magic_bytes.as_bytes())
            .expect("should write");

        match Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, file.path()) {
            Err(FileError::InvalidMagicBytes { got, .. }) => {
                assert_eq!(got, invalid_magic_bytes.as_bytes())
            }
            unexpected => panic!("unexpected result: {:?}", unexpected),
        };
    }

    #[test]
    fn append_changeset_truncates_invalid_bytes() {
        // initial data to write to file (magic bytes + invalid data)
        let mut data = [255_u8; 2000];
        data[..TEST_MAGIC_BYTES_LEN].copy_from_slice(&TEST_MAGIC_BYTES);

        let changeset = TestChangeSet::from(["one".into(), "two".into(), "three!".into()]);

        let mut file = NamedTempFile::new().unwrap();
        file.write_all(&data).expect("should write");

        let mut store =
            Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, file.path()).expect("should open");
        match store.iter_changesets().next() {
            Some(Err(IterError::Bincode(_))) => {}
            unexpected_res => panic!("unexpected result: {:?}", unexpected_res),
        }

        store.append_changeset(&changeset).expect("should append");

        drop(store);

        let got_bytes = {
            let mut buf = Vec::new();
            file.reopen()
                .unwrap()
                .read_to_end(&mut buf)
                .expect("should read");
            buf
        };

        let expected_bytes = {
            let mut buf = TEST_MAGIC_BYTES.to_vec();
            DefaultOptions::new()
                .with_varint_encoding()
                .serialize_into(&mut buf, &changeset)
                .expect("should encode");
            buf
        };

        assert_eq!(got_bytes, expected_bytes);
    }

    #[test]
    fn last_write_is_short() {
        let temp_dir = tempfile::tempdir().unwrap();

        let changesets = [
            TestChangeSet::from(["1".into()]),
            TestChangeSet::from(["2".into(), "3".into()]),
            TestChangeSet::from(["4".into(), "5".into(), "6".into()]),
        ];
        let last_changeset = TestChangeSet::from(["7".into(), "8".into(), "9".into()]);
        let last_changeset_bytes = bincode_options().serialize(&last_changeset).unwrap();

        for short_write_len in 1..last_changeset_bytes.len() - 1 {
            let file_path = temp_dir.path().join(format!("{}.dat", short_write_len));
            println!("Test file: {:?}", file_path);

            // simulate creating a file, writing data where the last write is incomplete
            {
                let mut db =
                    Store::<TestChangeSet>::create_new(&TEST_MAGIC_BYTES, &file_path).unwrap();
                for changeset in &changesets {
                    db.append_changeset(changeset).unwrap();
                }
                // this is the incomplete write
                db.db_file
                    .write_all(&last_changeset_bytes[..short_write_len])
                    .unwrap();
            }

            // load file again and aggregate changesets
            // write the last changeset again (this time it succeeds)
            {
                let mut db = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path).unwrap();
                let err = db
                    .aggregate_changesets()
                    .expect_err("should return error as last read is short");
                assert_eq!(
                    err.changeset,
                    changesets.iter().cloned().reduce(|mut acc, cs| {
                        Append::append(&mut acc, cs);
                        acc
                    }),
                    "should recover all changesets that are written in full",
                );
                db.db_file.write_all(&last_changeset_bytes).unwrap();
            }

            // load file again - this time we should successfully aggregate all changesets
            {
                let mut db = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path).unwrap();
                let aggregated_changesets = db
                    .aggregate_changesets()
                    .expect("aggregating all changesets should succeed");
                assert_eq!(
                    aggregated_changesets,
                    changesets
                        .iter()
                        .cloned()
                        .chain(core::iter::once(last_changeset.clone()))
                        .reduce(|mut acc, cs| {
                            Append::append(&mut acc, cs);
                            acc
                        }),
                    "should recover all changesets",
                );
            }
        }
    }

    #[test]
    fn write_after_short_read() {
        let temp_dir = tempfile::tempdir().unwrap();

        let changesets = (0..20)
            .map(|n| TestChangeSet::from([format!("{}", n)]))
            .collect::<Vec<_>>();
        let last_changeset = TestChangeSet::from(["last".into()]);

        for read_count in 0..changesets.len() {
            let file_path = temp_dir.path().join(format!("{}.dat", read_count));
            println!("Test file: {:?}", file_path);

            // First, we create the file with all the changesets!
            let mut db = Store::<TestChangeSet>::create_new(&TEST_MAGIC_BYTES, &file_path).unwrap();
            for changeset in &changesets {
                db.append_changeset(changeset).unwrap();
            }
            drop(db);

            // We re-open the file and read `read_count` number of changesets.
            let mut db = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path).unwrap();
            let mut exp_aggregation = db
                .iter_changesets()
                .take(read_count)
                .map(|r| r.expect("must read valid changeset"))
                .fold(TestChangeSet::default(), |mut acc, v| {
                    Append::append(&mut acc, v);
                    acc
                });
            // We write after a short read.
            db.write_changes(&last_changeset)
                .expect("last write must succeed");
            Append::append(&mut exp_aggregation, last_changeset.clone());
            drop(db);

            // We open the file again and check whether aggregate changeset is expected.
            let aggregation = Store::<TestChangeSet>::open(&TEST_MAGIC_BYTES, &file_path)
                .unwrap()
                .aggregate_changesets()
                .expect("must aggregate changesets")
                .unwrap_or_default();
            assert_eq!(aggregation, exp_aggregation);
        }
    }
}