From 3ad4335accd08f2160aac489e4e16dceaae695be Mon Sep 17 00:00:00 2001
From: Martin Holst Swende <martin@swende.se>
Date: Mon, 2 Dec 2019 09:31:07 +0100
Subject: [PATCH] core/state/snapshot: node behavioural difference on bloom
 content

---
 core/state/snapshot/difflayer.go      | 19 ++++++++++++++++---
 core/state/snapshot/difflayer_test.go |  9 ++++++---
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/core/state/snapshot/difflayer.go b/core/state/snapshot/difflayer.go
index cf8c47c3e..634118a10 100644
--- a/core/state/snapshot/difflayer.go
+++ b/core/state/snapshot/difflayer.go
@@ -20,6 +20,7 @@ import (
 	"encoding/binary"
 	"fmt"
 	"math"
+	"math/rand"
 	"sort"
 	"sync"
 	"time"
@@ -63,8 +64,20 @@ var (
 	// bloom filter to keep its size to a minimum (given it's size and maximum
 	// entry count).
 	bloomFuncs = math.Round((bloomSize / float64(aggregatorItemLimit)) * math.Log(2))
+
+	// bloomHashesOffset is a runtime constant which determines which part of the
+	// the account/storage hash the hasher functions looks at, to determine the
+	// bloom key for an account/slot. This is randomized at init(), so that the
+	// global population of nodes do not all display the exact same behaviour with
+	// regards to bloom content
+	bloomHasherOffset = 0
 )
 
+func init() {
+	// Init bloomHasherOffset in the range [0:24] (requires 8 bytes)
+	bloomHasherOffset = rand.Intn(25)
+}
+
 // diffLayer represents a collection of modifications made to a state snapshot
 // after running a block on top. It contains one sorted list for the account trie
 // and one-one list for each storage tries.
@@ -100,7 +113,7 @@ func (h accountBloomHasher) Reset()                            { panic("not impl
 func (h accountBloomHasher) BlockSize() int                    { panic("not implemented") }
 func (h accountBloomHasher) Size() int                         { return 8 }
 func (h accountBloomHasher) Sum64() uint64 {
-	return binary.BigEndian.Uint64(h[:8])
+	return binary.BigEndian.Uint64(h[bloomHasherOffset : bloomHasherOffset+8])
 }
 
 // storageBloomHasher is a wrapper around a [2]common.Hash to satisfy the interface
@@ -114,7 +127,8 @@ func (h storageBloomHasher) Reset()                            { panic("not impl
 func (h storageBloomHasher) BlockSize() int                    { panic("not implemented") }
 func (h storageBloomHasher) Size() int                         { return 8 }
 func (h storageBloomHasher) Sum64() uint64 {
-	return binary.BigEndian.Uint64(h[0][:8]) ^ binary.BigEndian.Uint64(h[1][:8])
+	return binary.BigEndian.Uint64(h[0][bloomHasherOffset:bloomHasherOffset+8]) ^
+		binary.BigEndian.Uint64(h[1][bloomHasherOffset:bloomHasherOffset+8])
 }
 
 // newDiffLayer creates a new diff on top of an existing snapshot, whether that's a low
@@ -205,7 +219,6 @@ func (dl *diffLayer) rebloom(origin *diskLayer) {
 	k := float64(dl.diffed.K())
 	n := float64(dl.diffed.N())
 	m := float64(dl.diffed.M())
-
 	snapshotBloomErrorGauge.Update(math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k))
 }
 
diff --git a/core/state/snapshot/difflayer_test.go b/core/state/snapshot/difflayer_test.go
index 84220e359..7d7b21eb0 100644
--- a/core/state/snapshot/difflayer_test.go
+++ b/core/state/snapshot/difflayer_test.go
@@ -24,6 +24,7 @@ import (
 
 	"github.com/VictoriaMetrics/fastcache"
 	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/crypto"
 	"github.com/ethereum/go-ethereum/ethdb/memorydb"
 	"github.com/ethereum/go-ethereum/rlp"
 )
@@ -216,7 +217,7 @@ func BenchmarkSearch(b *testing.B) {
 		layer = fill(layer)
 	}
 
-	key := common.Hash{}
+	key := crypto.Keccak256Hash([]byte{0x13, 0x38})
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		layer.AccountRLP(key)
@@ -229,10 +230,12 @@ func BenchmarkSearch(b *testing.B) {
 // BenchmarkSearchSlot-6   	  100000	     14554 ns/op
 // BenchmarkSearchSlot-6   	  100000	     22254 ns/op (when checking parent root using mutex)
 // BenchmarkSearchSlot-6   	  100000	     14551 ns/op (when checking parent number using atomic)
+// With bloom filter:
+// BenchmarkSearchSlot-6   	 3467835	       351 ns/op
 func BenchmarkSearchSlot(b *testing.B) {
 	// First, we set up 128 diff layers, with 1K items each
-	accountKey := common.Hash{}
-	storageKey := common.HexToHash("0x1337")
+	accountKey := crypto.Keccak256Hash([]byte{0x13, 0x37})
+	storageKey := crypto.Keccak256Hash([]byte{0x13, 0x37})
 	accountRLP := randomAccount()
 	fill := func(parent snapshot) *diffLayer {
 		accounts := make(map[common.Hash][]byte)
-- 
GitLab