Merge rust-bitcoin/rust-bitcoin#672: New Witness struct to improve ser/de perfomance

dr-orlovsky · dr-orlovsky · commit 86055d9df51e · 2021-12-30T01:55:44.000+02:00
106acdc Add fuzzing for Witness struct (Riccardo Casatta) 2fd0125 Introduce Witness struct mainly to improve ser/de performance while keeping most usability. (Riccardo Casatta) Pull request description: At the moment the Witness struct is `Vec<Vec<u8>>`, the vec inside a vec cause a lot of allocations, specifically: - empty witness -> 1 allocation, while an empty vec doesn't allocate, the outer vec is not empty - witness with n elements -> n+1 allocations The proposed Witness struct contains the serialized format of the witness. This reduces the allocations to: - empty witness -> 0 allocations - witness with n elements -> 1 allocation for most common cases (you don't know how many bytes is long the entire witness beforehand, thus you need to estimate a good value, not too big to avoid wasting space and not too low to avoid vector reallocation, I used 128 since it covers about 80% of cases on mainnet) The inconvenience is having slightly less comfortable access to the witness, but the iterator is efficient (no allocations) and you can always collect the iteration to have a Vec of slices. If you collect the iteration you end up doing allocation anyway, but the rationale is that it is an operation you need to do rarely while ser/de is done much more often. I had to add a bigger block to better see the improvement (ae860247e191e2136d7c87382f78c96e0908d700), these are the results of the benches on my machine: ``` RCasatta/master_with_block test blockdata::block::benches::bench_block_deserialize ... bench: 5,496,821 ns/iter (+/- 298,859) test blockdata::block::benches::bench_block_serialize ... bench: 437,389 ns/iter (+/- 31,576) test blockdata::block::benches::bench_block_serialize_logic ... bench: 108,759 ns/iter (+/- 5,807) test blockdata::transaction::benches::bench_transaction_deserialize ... bench: 670 ns/iter (+/- 49) test blockdata::transaction::benches::bench_transaction_get_size ... bench: 7 ns/iter (+/- 0) test blockdata::transaction::benches::bench_transaction_serialize ... bench: 51 ns/iter (+/- 5) test blockdata::transaction::benches::bench_transaction_serialize_logic ... bench: 13 ns/iter (+/- 0) branch witness_with_block (this one) test blockdata::block::benches::bench_block_deserialize ... bench: 4,302,788 ns/iter (+/- 424,806) test blockdata::block::benches::bench_block_serialize ... bench: 366,493 ns/iter (+/- 42,216) test blockdata::block::benches::bench_block_serialize_logic ... bench: 84,646 ns/iter (+/- 7,366) test blockdata::transaction::benches::bench_transaction_deserialize ... bench: 648 ns/iter (+/- 77) test blockdata::transaction::benches::bench_transaction_get_size ... bench: 7 ns/iter (+/- 0) test blockdata::transaction::benches::bench_transaction_serialize ... bench: 50 ns/iter (+/- 5) test blockdata::transaction::benches::bench_transaction_serialize_logic ... bench: 14 ns/iter (+/- 0) ``` With an increased performance to deserialize a block of about 21% and to serialize a block of about 16% (seems even higher than expected, need to do more tests to confirm, I'll appreciate tests results from reviewers) ACKs for top commit: apoelstra: ACK 106acdc sanket1729: ACK 106acdc dr-orlovsky: utACK 106acdc Tree-SHA512: e4f23bdd55075c7ea788bc55846fd9e30f9cb76d5847cb259bddbf72523857715b0d4dbac505be3dfb9d4b1bcae289384ab39885b4887e188f8f1c06caf4049a
diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        fuzz_target: [deser_net_msg, deserialize_address, deserialize_amount, deserialize_block, deserialize_psbt, deserialize_script, deserialize_transaction, outpoint_string, uint128_fuzz, script_bytes_to_asm_fmt]
+        fuzz_target: [deser_net_msg, deserialize_address, deserialize_amount, deserialize_block, deserialize_psbt, deserialize_script, deserialize_transaction, deserialize_witness, outpoint_string, uint128_fuzz, script_bytes_to_asm_fmt]
     steps:
       - name: Install test dependencies
         run: sudo apt-get update -y && sudo apt-get install -y binutils-dev libunwind8-dev libcurl4-openssl-dev libelf-dev libdw-dev cmake gcc libiberty-dev
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
@@ -59,3 +59,7 @@ path = "fuzz_targets/uint128_fuzz.rs"
 [[bin]]
 name = "script_bytes_to_asm_fmt"
 path = "fuzz_targets/script_bytes_to_asm_fmt.rs"
+
+[[bin]]
+name = "deserialize_witness"
+path = "fuzz_targets/deserialize_witness.rs"
diff --git a/fuzz/fuzz_targets/deserialize_transaction.rs b/fuzz/fuzz_targets/deserialize_transaction.rs
@@ -10,7 +10,7 @@ fn do_test(data: &[u8]) {
             let len = ser.len();
             let calculated_weight = tx.get_weight();
             for input in &mut tx.input {
-                input.witness = vec![];
+                input.witness = bitcoin::blockdata::witness::Witness::default();
             }
             let no_witness_len = bitcoin::consensus::encode::serialize(&tx).len();
             // For 0-input transactions, `no_witness_len` will be incorrect because
diff --git a/fuzz/fuzz_targets/deserialize_witness.rs b/fuzz/fuzz_targets/deserialize_witness.rs
@@ -0,0 +1,59 @@
+extern crate bitcoin;
+
+use bitcoin::consensus::{serialize, deserialize};
+use bitcoin::blockdata::witness::Witness;
+
+fn do_test(data: &[u8]) {
+    let w: Result<Witness, _> = deserialize(data);
+    if let Ok(witness) = w {
+        let serialized = serialize(&witness);
+        assert_eq!(data, serialized);
+    }
+}
+
+#[cfg(feature = "afl")]
+#[macro_use] extern crate afl;
+#[cfg(feature = "afl")]
+fn main() {
+    fuzz!(|data| {
+        do_test(&data);
+    });
+}
+
+#[cfg(feature = "honggfuzz")]
+#[macro_use] extern crate honggfuzz;
+#[cfg(feature = "honggfuzz")]
+fn main() {
+    loop {
+        fuzz!(|data| {
+            do_test(data);
+        });
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    fn extend_vec_from_hex(hex: &str, out: &mut Vec<u8>) {
+        let mut b = 0;
+        for (idx, c) in hex.as_bytes().iter().enumerate() {
+            b <<= 4;
+            match *c {
+                b'A'..=b'F' => b |= c - b'A' + 10,
+                b'a'..=b'f' => b |= c - b'a' + 10,
+                b'0'..=b'9' => b |= c - b'0',
+                _ => panic!("Bad hex"),
+            }
+            if (idx & 1) == 1 {
+                out.push(b);
+                b = 0;
+            }
+        }
+    }
+
+    #[test]
+    fn duplicate_crash() {
+        let mut a = Vec::new();
+        extend_vec_from_hex("00", &mut a);
+        super::do_test(&a);
+    }
+}
diff --git a/src/blockdata/block.rs b/src/blockdata/block.rs
@@ -199,9 +199,10 @@ impl Block {
                         o.script_pubkey[0..6] == [0x6a, 0x24, 0xaa, 0x21, 0xa9, 0xed] }) {
                     let commitment = WitnessCommitment::from_slice(&coinbase.output[pos].script_pubkey.as_bytes()[6..38]).unwrap();
                     // witness reserved value is in coinbase input witness
-                    if coinbase.input[0].witness.len() == 1 && coinbase.input[0].witness[0].len() == 32 {
+                    let witness_vec: Vec<_> = coinbase.input[0].witness.iter().collect();
+                    if witness_vec.len() == 1 && witness_vec[0].len() == 32 {
                         match self.witness_root() {
-                            Some(witness_root) => return commitment == Self::compute_witness_commitment(&witness_root, coinbase.input[0].witness[0].as_slice()),
+                            Some(witness_root) => return commitment == Self::compute_witness_commitment(&witness_root, witness_vec[0]),
                             None => return false,
                         }
                     }
diff --git a/src/blockdata/constants.rs b/src/blockdata/constants.rs
@@ -29,6 +29,7 @@ use blockdata::opcodes;
 use blockdata::script;
 use blockdata::transaction::{OutPoint, Transaction, TxOut, TxIn};
 use blockdata::block::{Block, BlockHeader};
+use blockdata::witness::Witness;
 use network::constants::Network;
 use util::uint::Uint256;
 
@@ -93,7 +94,7 @@ fn bitcoin_genesis_tx() -> Transaction {
         previous_output: OutPoint::null(),
         script_sig: in_script,
         sequence: MAX_SEQUENCE,
-        witness: vec![],
+        witness: Witness::default(),
     });
 
     // Outputs
diff --git a/src/blockdata/mod.rs b/src/blockdata/mod.rs
@@ -23,4 +23,5 @@ pub mod opcodes;
 pub mod script;
 pub mod transaction;
 pub mod block;
+pub mod witness;
 
diff --git a/src/blockdata/transaction.rs b/src/blockdata/transaction.rs
@@ -36,6 +36,7 @@ use util::endian;
 use blockdata::constants::WITNESS_SCALE_FACTOR;
 #[cfg(feature="bitcoinconsensus")] use blockdata::script;
 use blockdata::script::Script;
+use blockdata::witness::Witness;
 use consensus::{encode, Decodable, Encodable};
 use consensus::encode::MAX_VEC_SIZE;
 use hash_types::{SigHash, Txid, Wtxid};
@@ -197,7 +198,7 @@ pub struct TxIn {
     /// Encodable/Decodable, as it is (de)serialized at the end of the full
     /// Transaction. It *is* (de)serialized with the rest of the TxIn in other
     /// (de)serialization routines.
-    pub witness: Vec<Vec<u8>>
+    pub witness: Witness
 }
 
 impl Default for TxIn {
@@ -206,7 +207,7 @@ impl Default for TxIn {
             previous_output: OutPoint::default(),
             script_sig: Script::new(),
             sequence: u32::max_value(),
-            witness: Vec::new(),
+            witness: Witness::default(),
         }
     }
 }
@@ -280,7 +281,7 @@ impl Transaction {
         let cloned_tx = Transaction {
             version: self.version,
             lock_time: self.lock_time,
-            input: self.input.iter().map(|txin| TxIn { script_sig: Script::new(), witness: vec![], .. *txin }).collect(),
+            input: self.input.iter().map(|txin| TxIn { script_sig: Script::new(), witness: Witness::default(), .. *txin }).collect(),
             output: self.output.clone(),
         };
         cloned_tx.txid().into()
@@ -357,7 +358,7 @@ impl Transaction {
                 previous_output: self.input[input_index].previous_output,
                 script_sig: script_pubkey.clone(),
                 sequence: self.input[input_index].sequence,
-                witness: vec![],
+                witness: Witness::default(),
             }];
         } else {
             tx.input = Vec::with_capacity(self.input.len());
@@ -366,7 +367,7 @@ impl Transaction {
                     previous_output: input.previous_output,
                     script_sig: if n == input_index { script_pubkey.clone() } else { Script::new() },
                     sequence: if n != input_index && (sighash == EcdsaSigHashType::Single || sighash == EcdsaSigHashType::None) { 0 } else { input.sequence },
-                    witness: vec![],
+                    witness: Witness::default(),
                 });
             }
         }
@@ -480,10 +481,7 @@ impl Transaction {
                 input.script_sig.len());
             if !input.witness.is_empty() {
                 inputs_with_witnesses += 1;
-                input_weight += VarInt(input.witness.len() as u64).len();
-                for elem in &input.witness {
-                    input_weight += VarInt(elem.len() as u64).len() + elem.len();
-                }
+                input_weight += input.witness.serialized_len();
             }
         }
         let mut output_size = 0;
@@ -585,7 +583,7 @@ impl Decodable for TxIn {
             previous_output: Decodable::consensus_decode(&mut d)?,
             script_sig: Decodable::consensus_decode(&mut d)?,
             sequence: Decodable::consensus_decode(d)?,
-            witness: vec![],
+            witness: Witness::default(),
         })
     }
 }
@@ -1458,6 +1456,7 @@ mod tests {
         use hashes::hex::FromHex;
         use std::collections::HashMap;
         use blockdata::script;
+        use blockdata::witness::Witness;
 
         // a random recent segwit transaction from blockchain using both old and segwit inputs
         let mut spending: Transaction = deserialize(Vec::from_hex("020000000001031cfbc8f54fbfa4a33a30068841371f80dbfe166211242213188428f437445c91000000006a47304402206fbcec8d2d2e740d824d3d36cc345b37d9f65d665a99f5bd5c9e8d42270a03a8022013959632492332200c2908459547bf8dbf97c65ab1a28dec377d6f1d41d3d63e012103d7279dfb90ce17fe139ba60a7c41ddf605b25e1c07a4ddcb9dfef4e7d6710f48feffffff476222484f5e35b3f0e43f65fc76e21d8be7818dd6a989c160b1e5039b7835fc00000000171600140914414d3c94af70ac7e25407b0689e0baa10c77feffffffa83d954a62568bbc99cc644c62eb7383d7c2a2563041a0aeb891a6a4055895570000000017160014795d04cc2d4f31480d9a3710993fbd80d04301dffeffffff06fef72f000000000017a91476fd7035cd26f1a32a5ab979e056713aac25796887a5000f00000000001976a914b8332d502a529571c6af4be66399cd33379071c588ac3fda0500000000001976a914fc1d692f8de10ae33295f090bea5fe49527d975c88ac522e1b00000000001976a914808406b54d1044c429ac54c0e189b0d8061667e088ac6eb68501000000001976a914dfab6085f3a8fb3e6710206a5a959313c5618f4d88acbba20000000000001976a914eb3026552d7e3f3073457d0bee5d4757de48160d88ac0002483045022100bee24b63212939d33d513e767bc79300051f7a0d433c3fcf1e0e3bf03b9eb1d70220588dc45a9ce3a939103b4459ce47500b64e23ab118dfc03c9caa7d6bfc32b9c601210354fd80328da0f9ae6eef2b3a81f74f9a6f66761fadf96f1d1d22b1fd6845876402483045022100e29c7e3a5efc10da6269e5fc20b6a1cb8beb92130cc52c67e46ef40aaa5cac5f0220644dd1b049727d991aece98a105563416e10a5ac4221abac7d16931842d5c322012103960b87412d6e169f30e12106bdf70122aabb9eb61f455518322a18b920a4dfa887d30700")
@@ -1496,7 +1495,9 @@ mod tests {
         }).is_err());
 
         // test that we get a failure if we corrupt a signature
-        spending.input[1].witness[0][10] = 42;
+        let mut witness: Vec<_> = spending.input[1].witness.to_vec();
+        witness[0][10] = 42;
+        spending.input[1].witness = Witness::from_vec(witness);
         match spending.verify(|point: &OutPoint| {
             if let Some(tx) = spent3.remove(&point.txid) {
                 return tx.output.get(point.vout as usize).cloned();
diff --git a/src/blockdata/witness.rs b/src/blockdata/witness.rs
diff --git a/src/util/bip143.rs b/src/util/bip143.rs
diff --git a/src/util/psbt/mod.rs b/src/util/psbt/mod.rs
diff --git a/src/util/sighash.rs b/src/util/sighash.rs

Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ fn do_test(data: &[u8]) {`
`10`	`10`	`let len = ser.len();`
`11`	`11`	`let calculated_weight = tx.get_weight();`
`12`	`12`	`for input in &mut tx.input {`
`13`		`- input.witness = vec![];`
	`13`	`+ input.witness = bitcoin::blockdata::witness::Witness::default();`
`14`	`14`	`}`
`15`	`15`	`let no_witness_len = bitcoin::consensus::encode::serialize(&tx).len();`
`16`	`16`	// For 0-input transactions, `no_witness_len` will be incorrect because
Original file line number	Diff line number	Diff line change
`@@ -199,9 +199,10 @@ impl Block {`
`199`	`199`	`o.script_pubkey[0..6] == [0x6a, 0x24, 0xaa, 0x21, 0xa9, 0xed] }) {`
`200`	`200`	`let commitment = WitnessCommitment::from_slice(&coinbase.output[pos].script_pubkey.as_bytes()[6..38]).unwrap();`
`201`	`201`	`// witness reserved value is in coinbase input witness`
`202`		`- if coinbase.input[0].witness.len() == 1 && coinbase.input[0].witness[0].len() == 32 {`
	`202`	`+ let witness_vec: Vec<_> = coinbase.input[0].witness.iter().collect();`
	`203`	`+ if witness_vec.len() == 1 && witness_vec[0].len() == 32 {`
`203`	`204`	`match self.witness_root() {`
`204`		`- Some(witness_root) => return commitment == Self::compute_witness_commitment(&witness_root, coinbase.input[0].witness[0].as_slice()),`
	`205`	`+ Some(witness_root) => return commitment == Self::compute_witness_commitment(&witness_root, witness_vec[0]),`
`205`	`206`	`None => return false,`
`206`	`207`	`}`
`207`	`208`	`}`