From d985b9052ae08f2538e6caa7e6b5ef351a00bc3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= <peterke@gmail.com>
Date: Tue, 27 Mar 2018 15:13:30 +0300
Subject: [PATCH] core/state: avoid linear overhead on journal dirty listing

---
 core/state/journal.go      | 113 ++++++++++++++++++++++++-------------
 core/state/state_object.go |   6 +-
 core/state/statedb.go      |  35 +++++-------
 core/state/statedb_test.go |   4 +-
 4 files changed, 92 insertions(+), 66 deletions(-)

diff --git a/core/state/journal.go b/core/state/journal.go
index b00a46224..a03ca57db 100644
--- a/core/state/journal.go
+++ b/core/state/journal.go
@@ -22,41 +22,66 @@ import (
 	"github.com/ethereum/go-ethereum/common"
 )
 
+// journalEntry is a modification entry in the state change journal that can be
+// reverted on demand.
 type journalEntry interface {
-	undo(*StateDB)
-	getAccount() *common.Address
+	// revert undoes the changes introduced by this journal entry.
+	revert(*StateDB)
+
+	// dirtied returns the Ethereum address modified by this journal entry.
+	dirtied() *common.Address
 }
 
+// journal contains the list of state modifications applied since the last state
+// commit. These are tracked to be able to be reverted in case of an execution
+// exception or revertal request.
 type journal struct {
-	entries        []journalEntry
-	dirtyOverrides []common.Address
+	entries []journalEntry         // Current changes tracked by the journal
+	dirties map[common.Address]int // Dirty accounts and the number of changes
+}
+
+// newJournal create a new initialized journal.
+func newJournal() *journal {
+	return &journal{
+		dirties: make(map[common.Address]int),
+	}
 }
 
+// append inserts a new modification entry to the end of the change journal.
 func (j *journal) append(entry journalEntry) {
 	j.entries = append(j.entries, entry)
+	if addr := entry.dirtied(); addr != nil {
+		j.dirties[*addr]++
+	}
 }
 
-func (j *journal) flatten() map[common.Address]struct{} {
+// revert undoes a batch of journalled modifications along with any reverted
+// dirty handling too.
+func (j *journal) revert(statedb *StateDB, snapshot int) {
+	for i := len(j.entries) - 1; i >= snapshot; i-- {
+		// Undo the changes made by the operation
+		j.entries[i].revert(statedb)
 
-	dirtyObjects := make(map[common.Address]struct{})
-	for _, journalEntry := range j.entries {
-		if addr := journalEntry.getAccount(); addr != nil {
-			dirtyObjects[*addr] = struct{}{}
+		// Drop any dirty tracking induced by the change
+		if addr := j.entries[i].dirtied(); addr != nil {
+			if j.dirties[*addr]--; j.dirties[*addr] == 0 {
+				delete(j.dirties, *addr)
+			}
 		}
 	}
-	for _, addr := range j.dirtyOverrides {
-		dirtyObjects[addr] = struct{}{}
-	}
-	return dirtyObjects
+	j.entries = j.entries[:snapshot]
 }
 
-// Length returns the number of journal entries in the journal
-func (j *journal) Length() int {
-	return len(j.entries)
+// dirty explicitly sets an address to dirty, even if the change entries would
+// otherwise suggest it as clean. This method is an ugly hack to handle the RIPEMD
+// precompile consensus exception.
+func (j *journal) dirty(addr common.Address) {
+	j.dirties[addr]++
 }
 
-func (j *journal) dirtyOverride(address common.Address) {
-	j.dirtyOverrides = append(j.dirtyOverrides, address)
+// length returns the current number of entries in the journal.
+func (j *journal) length() int {
+	return len(j.entries)
 }
 
 type (
@@ -108,78 +133,85 @@ type (
 	}
 )
 
-func (ch createObjectChange) undo(s *StateDB) {
+func (ch createObjectChange) revert(s *StateDB) {
 	delete(s.stateObjects, *ch.account)
 	delete(s.stateObjectsDirty, *ch.account)
 }
 
-func (ch createObjectChange) getAccount() *common.Address {
+func (ch createObjectChange) dirtied() *common.Address {
 	return ch.account
 }
 
-func (ch resetObjectChange) undo(s *StateDB) {
+func (ch resetObjectChange) revert(s *StateDB) {
 	s.setStateObject(ch.prev)
 }
 
-func (ch resetObjectChange) getAccount() *common.Address {
+func (ch resetObjectChange) dirtied() *common.Address {
 	return nil
 }
 
-func (ch suicideChange) undo(s *StateDB) {
+func (ch suicideChange) revert(s *StateDB) {
 	obj := s.getStateObject(*ch.account)
 	if obj != nil {
 		obj.suicided = ch.prev
 		obj.setBalance(ch.prevbalance)
 	}
 }
-func (ch suicideChange) getAccount() *common.Address {
+
+func (ch suicideChange) dirtied() *common.Address {
 	return ch.account
 }
 
 var ripemd = common.HexToAddress("0000000000000000000000000000000000000003")
 
-func (ch touchChange) undo(s *StateDB) {
+func (ch touchChange) revert(s *StateDB) {
 }
-func (ch touchChange) getAccount() *common.Address {
+
+func (ch touchChange) dirtied() *common.Address {
 	return ch.account
 }
 
-func (ch balanceChange) undo(s *StateDB) {
+func (ch balanceChange) revert(s *StateDB) {
 	s.getStateObject(*ch.account).setBalance(ch.prev)
 }
-func (ch balanceChange) getAccount() *common.Address {
+
+func (ch balanceChange) dirtied() *common.Address {
 	return ch.account
 }
 
-func (ch nonceChange) undo(s *StateDB) {
+func (ch nonceChange) revert(s *StateDB) {
 	s.getStateObject(*ch.account).setNonce(ch.prev)
 }
 
-func (ch nonceChange) getAccount() *common.Address {
+func (ch nonceChange) dirtied() *common.Address {
 	return ch.account
 }
-func (ch codeChange) undo(s *StateDB) {
+
+func (ch codeChange) revert(s *StateDB) {
 	s.getStateObject(*ch.account).setCode(common.BytesToHash(ch.prevhash), ch.prevcode)
 }
-func (ch codeChange) getAccount() *common.Address {
+
+func (ch codeChange) dirtied() *common.Address {
 	return ch.account
 }
 
-func (ch storageChange) undo(s *StateDB) {
+func (ch storageChange) revert(s *StateDB) {
 	s.getStateObject(*ch.account).setState(ch.key, ch.prevalue)
 }
-func (ch storageChange) getAccount() *common.Address {
+
+func (ch storageChange) dirtied() *common.Address {
 	return ch.account
 }
 
-func (ch refundChange) undo(s *StateDB) {
+func (ch refundChange) revert(s *StateDB) {
 	s.refund = ch.prev
 }
-func (ch refundChange) getAccount() *common.Address {
+
+func (ch refundChange) dirtied() *common.Address {
 	return nil
 }
 
-func (ch addLogChange) undo(s *StateDB) {
+func (ch addLogChange) revert(s *StateDB) {
 	logs := s.logs[ch.txhash]
 	if len(logs) == 1 {
 		delete(s.logs, ch.txhash)
@@ -188,14 +220,15 @@ func (ch addLogChange) undo(s *StateDB) {
 	}
 	s.logSize--
 }
-func (ch addLogChange) getAccount() *common.Address {
+
+func (ch addLogChange) dirtied() *common.Address {
 	return nil
 }
 
-func (ch addPreimageChange) undo(s *StateDB) {
+func (ch addPreimageChange) revert(s *StateDB) {
 	delete(s.preimages, ch.hash)
 }
 
-func (ch addPreimageChange) getAccount() *common.Address {
+func (ch addPreimageChange) dirtied() *common.Address {
 	return nil
 }
diff --git a/core/state/state_object.go b/core/state/state_object.go
index 523bb7150..3c40c2041 100644
--- a/core/state/state_object.go
+++ b/core/state/state_object.go
@@ -141,9 +141,9 @@ func (c *stateObject) touch() {
 		account: &c.address,
 	})
 	if c.address == ripemd {
-		//Explicitly put it in the dirty-cache, which is otherwise
-		// generated from flattened journals
-		c.db.journal.dirtyOverride(c.address)
+		// Explicitly put it in the dirty-cache, which is otherwise generated from
+		// flattened journals.
+		c.db.journal.dirty(c.address)
 	}
 }
 
diff --git a/core/state/statedb.go b/core/state/statedb.go
index 7a88b3b16..c7ef49f64 100644
--- a/core/state/statedb.go
+++ b/core/state/statedb.go
@@ -76,7 +76,7 @@ type StateDB struct {
 
 	// Journal of state modifications. This is the backbone of
 	// Snapshot and RevertToSnapshot.
-	journal        journal
+	journal        *journal
 	validRevisions []revision
 	nextRevisionId int
 
@@ -96,6 +96,7 @@ func New(root common.Hash, db Database) (*StateDB, error) {
 		stateObjectsDirty: make(map[common.Address]struct{}),
 		logs:              make(map[common.Hash][]*types.Log),
 		preimages:         make(map[common.Hash][]byte),
+		journal:           newJournal(),
 	}, nil
 }
 
@@ -456,21 +457,20 @@ func (self *StateDB) Copy() *StateDB {
 	self.lock.Lock()
 	defer self.lock.Unlock()
 
-	dirtyObjects := self.journal.flatten()
-
 	// Copy all the basic fields, initialize the memory ones
 	state := &StateDB{
 		db:                self.db,
 		trie:              self.db.CopyTrie(self.trie),
-		stateObjects:      make(map[common.Address]*stateObject, len(dirtyObjects)),
-		stateObjectsDirty: make(map[common.Address]struct{}, len(dirtyObjects)),
+		stateObjects:      make(map[common.Address]*stateObject, len(self.journal.dirties)),
+		stateObjectsDirty: make(map[common.Address]struct{}, len(self.journal.dirties)),
 		refund:            self.refund,
 		logs:              make(map[common.Hash][]*types.Log, len(self.logs)),
 		logSize:           self.logSize,
 		preimages:         make(map[common.Hash][]byte),
+		journal:           newJournal(),
 	}
 	// Copy the dirty states, logs, and preimages
-	for addr := range dirtyObjects {
+	for addr := range self.journal.dirties {
 		state.stateObjects[addr] = self.stateObjects[addr].deepCopy(state)
 		state.stateObjectsDirty[addr] = struct{}{}
 	}
@@ -488,7 +488,7 @@ func (self *StateDB) Copy() *StateDB {
 func (self *StateDB) Snapshot() int {
 	id := self.nextRevisionId
 	self.nextRevisionId++
-	self.validRevisions = append(self.validRevisions, revision{id, self.journal.Length()})
+	self.validRevisions = append(self.validRevisions, revision{id, self.journal.length()})
 	return id
 }
 
@@ -503,13 +503,8 @@ func (self *StateDB) RevertToSnapshot(revid int) {
 	}
 	snapshot := self.validRevisions[idx].journalIndex
 
-	// Replay the journal to undo changes.
-	for i := self.journal.Length() - 1; i >= snapshot; i-- {
-		self.journal.entries[i].undo(self)
-	}
-	self.journal.entries = self.journal.entries[:snapshot]
-
-	// Remove invalidated snapshots from the stack.
+	// Replay the journal to undo changes and remove invalidated snapshots
+	self.journal.revert(self, snapshot)
 	self.validRevisions = self.validRevisions[:idx]
 }
 
@@ -521,8 +516,7 @@ func (self *StateDB) GetRefund() uint64 {
 // Finalise finalises the state by removing the self destructed objects
 // and clears the journal as well as the refunds.
 func (s *StateDB) Finalise(deleteEmptyObjects bool) {
-
-	for addr, v := range s.journal.flatten() {
+	for addr := range s.journal.dirties {
 		stateObject := s.stateObjects[addr]
 		if stateObject.suicided || (deleteEmptyObjects && stateObject.empty()) {
 			s.deleteStateObject(stateObject)
@@ -530,7 +524,7 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) {
 			stateObject.updateRoot(s.db)
 			s.updateStateObject(stateObject)
 		}
-		s.stateObjectsDirty[addr] = v
+		s.stateObjectsDirty[addr] = struct{}{}
 	}
 	// Invalidate journal because reverting across transactions is not allowed.
 	s.clearJournalAndRefund()
@@ -574,7 +568,7 @@ func (s *StateDB) DeleteSuicides() {
 }
 
 func (s *StateDB) clearJournalAndRefund() {
-	s.journal = journal{}
+	s.journal = newJournal()
 	s.validRevisions = s.validRevisions[:0]
 	s.refund = 0
 }
@@ -583,10 +577,9 @@ func (s *StateDB) clearJournalAndRefund() {
 func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) {
 	defer s.clearJournalAndRefund()
 
-	for addr, v := range s.journal.flatten() {
-		s.stateObjectsDirty[addr] = v
+	for addr := range s.journal.dirties {
+		s.stateObjectsDirty[addr] = struct{}{}
 	}
-
 	// Commit objects to the trie.
 	for addr, stateObject := range s.stateObjects {
 		_, isDirty := s.stateObjectsDirty[addr]
diff --git a/core/state/statedb_test.go b/core/state/statedb_test.go
index 05bc0499b..340c840f1 100644
--- a/core/state/statedb_test.go
+++ b/core/state/statedb_test.go
@@ -414,11 +414,11 @@ func (s *StateSuite) TestTouchDelete(c *check.C) {
 	snapshot := s.state.Snapshot()
 	s.state.AddBalance(common.Address{}, new(big.Int))
 
-	if len(s.state.journal.flatten()) != 1 {
+	if len(s.state.journal.dirties) != 1 {
 		c.Fatal("expected one dirty state object")
 	}
 	s.state.RevertToSnapshot(snapshot)
-	if len(s.state.journal.flatten()) != 0 {
+	if len(s.state.journal.dirties) != 0 {
 		c.Fatal("expected no dirty state object")
 	}
 }
-- 
GitLab