From 62d9d638582fa5fd7bcfaa55e8810fcfb898a4dd Mon Sep 17 00:00:00 2001
From: lash <nolash@users.noreply.github.com>
Date: Thu, 28 Feb 2019 08:12:50 +0100
Subject: [PATCH] swarm/network: WIP consider all nodes for healthy iteration
 (#19155)

* swarm/network: WIP consider all nodes for healthy iteration

* swarm/network/simulation: extend TestWaitTillHealthy to really check kads are healthy

* cmd/swarm/swarm-snapshot: fixed bugs in snapshot creation binary

* swarm/network/simulation: addressed PR comments

* swarm/network/simulation: defer sim.Clsoe()

* swarm/network/simulation: fixed wrong sim.Close()

* swarm/network/simulation: addressed PR comments

* cmd/swarm/swarm-snapshot: reducing default to 8 nodes, more to 4

* cmd/swarm/swarm-snapshot: extended timeout to 3 mins, or 256 nodes snapshot times out

* swarm/network/simulation: More PR comments
---
 cmd/swarm/swarm-snapshot/create.go        |  11 +-
 cmd/swarm/swarm-snapshot/create_test.go   |   4 +-
 cmd/swarm/swarm-snapshot/main.go          |   2 +-
 swarm/network/simulation/kademlia.go      |   5 +-
 swarm/network/simulation/kademlia_test.go | 120 ++++++++++++++++++----
 5 files changed, 113 insertions(+), 29 deletions(-)

diff --git a/cmd/swarm/swarm-snapshot/create.go b/cmd/swarm/swarm-snapshot/create.go
index 127fde8ae..434561a49 100644
--- a/cmd/swarm/swarm-snapshot/create.go
+++ b/cmd/swarm/swarm-snapshot/create.go
@@ -59,13 +59,16 @@ func createSnapshot(filename string, nodes int, services []string) (err error) {
 	log.Debug("create snapshot", "filename", filename, "nodes", nodes, "services", services)
 
 	sim := simulation.New(map[string]simulation.ServiceFunc{
-		"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
+		"bzz": func(ctx *adapters.ServiceContext, bucket *sync.Map) (node.Service, func(), error) {
 			addr := network.NewAddr(ctx.Config.Node())
 			kad := network.NewKademlia(addr.Over(), network.NewKadParams())
 			hp := network.NewHiveParams()
 			hp.KeepAliveInterval = time.Duration(200) * time.Millisecond
 			hp.Discovery = true // discovery must be enabled when creating a snapshot
 
+			// store the kademlia in the bucket, needed later in the WaitTillHealthy function
+			bucket.Store(simulation.BucketKeyKademlia, kad)
+
 			config := &network.BzzConfig{
 				OverlayAddr:  addr.Over(),
 				UnderlayAddr: addr.Under(),
@@ -76,17 +79,17 @@ func createSnapshot(filename string, nodes int, services []string) (err error) {
 	})
 	defer sim.Close()
 
-	_, err = sim.AddNodes(nodes)
+	ids, err := sim.AddNodes(nodes)
 	if err != nil {
 		return fmt.Errorf("add nodes: %v", err)
 	}
 
-	err = sim.Net.ConnectNodesRing(nil)
+	err = sim.Net.ConnectNodesRing(ids)
 	if err != nil {
 		return fmt.Errorf("connect nodes: %v", err)
 	}
 
-	ctx, cancelSimRun := context.WithTimeout(context.Background(), 2*time.Minute)
+	ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancelSimRun()
 	if _, err := sim.WaitTillHealthy(ctx); err != nil {
 		return fmt.Errorf("wait for healthy kademlia: %v", err)
diff --git a/cmd/swarm/swarm-snapshot/create_test.go b/cmd/swarm/swarm-snapshot/create_test.go
index c9445168d..b2e30c201 100644
--- a/cmd/swarm/swarm-snapshot/create_test.go
+++ b/cmd/swarm/swarm-snapshot/create_test.go
@@ -48,7 +48,7 @@ func TestSnapshotCreate(t *testing.T) {
 		},
 		{
 			name:  "more nodes",
-			nodes: defaultNodes + 5,
+			nodes: defaultNodes + 4,
 		},
 		{
 			name:     "services",
@@ -81,7 +81,7 @@ func TestSnapshotCreate(t *testing.T) {
 			}
 			testCmd := runSnapshot(t, append(args, file.Name())...)
 
-			testCmd.ExpectExit()
+			testCmd.WaitExit()
 			if code := testCmd.ExitStatus(); code != 0 {
 				t.Fatalf("command exit code %v, expected 0", code)
 			}
diff --git a/cmd/swarm/swarm-snapshot/main.go b/cmd/swarm/swarm-snapshot/main.go
index 184727e4d..136295e51 100644
--- a/cmd/swarm/swarm-snapshot/main.go
+++ b/cmd/swarm/swarm-snapshot/main.go
@@ -27,7 +27,7 @@ import (
 var gitCommit string // Git SHA1 commit hash of the release (set via linker flags)
 
 // default value for "create" command --nodes flag
-const defaultNodes = 10
+const defaultNodes = 8
 
 func main() {
 	err := newApp().Run(os.Args)
diff --git a/swarm/network/simulation/kademlia.go b/swarm/network/simulation/kademlia.go
index c58d402b0..a3419c03f 100644
--- a/swarm/network/simulation/kademlia.go
+++ b/swarm/network/simulation/kademlia.go
@@ -58,7 +58,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
 			for k := range ill {
 				delete(ill, k)
 			}
-			log.Debug("kademlia health check", "addr count", len(addrs))
+			log.Debug("kademlia health check", "addr count", len(addrs), "kad len", len(kademlias))
 			for id, k := range kademlias {
 				//PeerPot for this node
 				addr := common.Bytes2Hex(k.BaseAddr())
@@ -70,7 +70,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
 				log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)
 				log.Debug("kademlia", "health", h.ConnectNN && h.KnowNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
 				log.Debug("kademlia", "ill condition", !h.ConnectNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
-				if !h.ConnectNN {
+				if !h.Healthy() {
 					ill[id] = k
 				}
 			}
@@ -85,6 +85,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
 // in simulation bucket.
 func (s *Simulation) kademlias() (ks map[enode.ID]*network.Kademlia) {
 	items := s.UpNodesItems(BucketKeyKademlia)
+	log.Debug("kademlia len items", "len", len(items))
 	ks = make(map[enode.ID]*network.Kademlia, len(items))
 	for id, v := range items {
 		k, ok := v.(*network.Kademlia)
diff --git a/swarm/network/simulation/kademlia_test.go b/swarm/network/simulation/kademlia_test.go
index bbc93ee8c..4cfcecd8e 100644
--- a/swarm/network/simulation/kademlia_test.go
+++ b/swarm/network/simulation/kademlia_test.go
@@ -22,45 +22,125 @@ import (
 	"testing"
 	"time"
 
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/node"
 	"github.com/ethereum/go-ethereum/p2p/simulations/adapters"
 	"github.com/ethereum/go-ethereum/swarm/network"
 )
 
+/*
+	TestWaitTillHealthy tests that we indeed get a healthy network after we wait for it.
+	For this to be tested, a bit of a snake tail bite needs to happen:
+		* First we create a first simulation
+		* Run it as nodes connected in a ring
+		* Wait until the network is healthy
+		* Then we create a snapshot
+		* With this snapshot we create a new simulation
+		* This simulation is expected to have a healthy configuration, as it uses the snapshot
+		* Thus we just iterate all nodes and check that their kademlias are healthy
+		* If all kademlias are healthy, the test succeeded, otherwise it failed
+*/
 func TestWaitTillHealthy(t *testing.T) {
-	sim := New(map[string]ServiceFunc{
-		"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
-			addr := network.NewAddr(ctx.Config.Node())
-			hp := network.NewHiveParams()
-			config := &network.BzzConfig{
-				OverlayAddr:  addr.Over(),
-				UnderlayAddr: addr.Under(),
-				HiveParams:   hp,
-			}
-			kad := network.NewKademlia(addr.Over(), network.NewKadParams())
-			// store kademlia in node's bucket under BucketKeyKademlia
-			// so that it can be found by WaitTillHealthy method.
-			b.Store(BucketKeyKademlia, kad)
-			return network.NewBzz(config, kad, nil, nil, nil), nil, nil
-		},
-	})
-	defer sim.Close()
 
-	_, err := sim.AddNodesAndConnectRing(10)
+	testNodesNum := 10
+
+	// create the first simulation
+	sim := New(createSimServiceMap(true))
+
+	// connect and...
+	nodeIDs, err := sim.AddNodesAndConnectRing(testNodesNum)
 	if err != nil {
 		t.Fatal(err)
 	}
 
+	// array of all overlay addresses
+	var addrs [][]byte
+	// iterate once to be able to build the peer map
+	for _, node := range nodeIDs {
+		//get the kademlia overlay address from this ID
+		a := node.Bytes()
+		//append it to the array of all overlay addresses
+		addrs = append(addrs, a)
+	}
+	// build a PeerPot only once
+	pp := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
+
 	ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
 	defer cancel()
+
+	// ...wait until healthy
 	ill, err := sim.WaitTillHealthy(ctx)
 	if err != nil {
 		for id, kad := range ill {
 			t.Log("Node", id)
 			t.Log(kad.String())
 		}
-		if err != nil {
-			t.Fatal(err)
+		t.Fatal(err)
+	}
+
+	// now create a snapshot of this network
+	snap, err := sim.Net.Snapshot()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// close the initial simulation
+	sim.Close()
+	// create a control simulation
+	controlSim := New(createSimServiceMap(false))
+	defer controlSim.Close()
+
+	// load the snapshot into this control simulation
+	err = controlSim.Net.Load(snap)
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = controlSim.WaitTillHealthy(ctx)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for _, node := range nodeIDs {
+		// ...get its kademlia
+		item, ok := controlSim.NodeItem(node, BucketKeyKademlia)
+		if !ok {
+			t.Fatal("No kademlia bucket item")
 		}
+		kad := item.(*network.Kademlia)
+		// get its base address
+		kid := common.Bytes2Hex(kad.BaseAddr())
+
+		//get the health info
+		info := kad.GetHealthInfo(pp[kid])
+		log.Trace("Health info", "info", info)
+		// check that it is healthy
+		healthy := info.Healthy()
+		if !healthy {
+			t.Fatalf("Expected node %v of control simulation to be healthy, but it is not, unhealthy kademlias: %v", node, kad.String())
+		}
+	}
+}
+
+// createSimServiceMap returns the services map
+// this function will create the sim services with or without discovery enabled
+// based on the flag passed
+func createSimServiceMap(discovery bool) map[string]ServiceFunc {
+	return map[string]ServiceFunc{
+		"bzz": func(ctx *adapters.ServiceContext, b *sync.Map) (node.Service, func(), error) {
+			addr := network.NewAddr(ctx.Config.Node())
+			hp := network.NewHiveParams()
+			hp.Discovery = discovery
+			config := &network.BzzConfig{
+				OverlayAddr:  addr.Over(),
+				UnderlayAddr: addr.Under(),
+				HiveParams:   hp,
+			}
+			kad := network.NewKademlia(addr.Over(), network.NewKadParams())
+			// store kademlia in node's bucket under BucketKeyKademlia
+			// so that it can be found by WaitTillHealthy method.
+			b.Store(BucketKeyKademlia, kad)
+			return network.NewBzz(config, kad, nil, nil, nil), nil, nil
+		},
 	}
 }
-- 
GitLab