From 81ed70015704737b3e5329314a62a3a5aaa74e8a Mon Sep 17 00:00:00 2001
From: holisticode <holistic.computing@gmail.com>
Date: Tue, 5 Mar 2019 06:54:46 -0500
Subject: [PATCH] Enable longrunning tests to run (#19208)

* p2p/simulations: increased snapshot load timeout for debugging

* swarm/network/stream: less nodes for snapshot longrunning tests

* swarm/network: fixed longrunning tests

* swarm/network/stream: store kademlia in bucket

* swarm/network/stream: disabled healthy check in delivery tests

* swarm/network/stream: longer SyncUpdateDelay for longrunning tests

* swarm/network/stream: more debug output

* swarm/network/stream: reduced longrunning snapshot tests to 64 nodes

* swarm/network/stream: don't WaitTillHealthy in SyncerSimulation

* swarm/network/stream: cleanup for PR
---
 p2p/simulations/network.go                    |  3 ++-
 swarm/network/stream/common_test.go           |  3 +++
 swarm/network/stream/delivery_test.go         |  6 ------
 .../network/stream/snapshot_retrieval_test.go | 19 +++++++++++++++----
 swarm/network/stream/snapshot_sync_test.go    |  4 ++--
 swarm/network/stream/streamer_test.go         | 16 ++++++++++------
 swarm/network/stream/syncer_test.go           |  4 ----
 7 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/p2p/simulations/network.go b/p2p/simulations/network.go
index 483d4ab87..f03c953e8 100644
--- a/p2p/simulations/network.go
+++ b/p2p/simulations/network.go
@@ -840,7 +840,8 @@ func (net *Network) snapshot(addServices []string, removeServices []string) (*Sn
 	return snap, nil
 }
 
-var snapshotLoadTimeout = 120 * time.Second
+// longrunning tests may need a longer timeout
+var snapshotLoadTimeout = 900 * time.Second
 
 // Load loads a network snapshot
 func (net *Network) Load(snap *Snapshot) error {
diff --git a/swarm/network/stream/common_test.go b/swarm/network/stream/common_test.go
index afd08d275..ec29e16e3 100644
--- a/swarm/network/stream/common_test.go
+++ b/swarm/network/stream/common_test.go
@@ -134,6 +134,9 @@ func netStoreAndDeliveryWithAddr(ctx *adapters.ServiceContext, bucket *sync.Map,
 	bucket.Store(bucketKeyDB, netStore)
 	bucket.Store(bucketKeyDelivery, delivery)
 	bucket.Store(bucketKeyFileStore, fileStore)
+	// for the kademlia object, we use the global key from the simulation package,
+	// as the simulation will try to access it in the WaitTillHealthy with that key
+	bucket.Store(simulation.BucketKeyKademlia, kad)
 
 	cleanup := func() {
 		netStore.Close()
diff --git a/swarm/network/stream/delivery_test.go b/swarm/network/stream/delivery_test.go
index 6ff18fbc6..50b788150 100644
--- a/swarm/network/stream/delivery_test.go
+++ b/swarm/network/stream/delivery_test.go
@@ -534,12 +534,6 @@ func testDeliveryFromNodes(t *testing.T, nodes, chunkCount int, skipCheck bool)
 				return err
 			}
 
-			log.Debug("Waiting for kademlia")
-			// TODO this does not seem to be correct usage of the function, as the simulation may have no kademlias
-			if _, err := sim.WaitTillHealthy(ctx); err != nil {
-				return err
-			}
-
 			//get the pivot node's filestore
 			item, ok := sim.NodeItem(pivot, bucketKeyFileStore)
 			if !ok {
diff --git a/swarm/network/stream/snapshot_retrieval_test.go b/swarm/network/stream/snapshot_retrieval_test.go
index 5e24a39f5..2fdf8e9e3 100644
--- a/swarm/network/stream/snapshot_retrieval_test.go
+++ b/swarm/network/stream/snapshot_retrieval_test.go
@@ -53,7 +53,7 @@ func TestFileRetrieval(t *testing.T) {
 		nodeCount = []int{16}
 
 		if *longrunning {
-			nodeCount = append(nodeCount, 32, 64, 128)
+			nodeCount = append(nodeCount, 32, 64)
 		} else if testutil.RaceEnabled {
 			nodeCount = []int{4}
 		}
@@ -86,7 +86,7 @@ func TestRetrieval(t *testing.T) {
 		chnkCnt := []int{32}
 
 		if *longrunning {
-			nodeCnt = []int{16, 32, 128}
+			nodeCnt = []int{16, 32, 64}
 			chnkCnt = []int{4, 32, 256}
 		} else if testutil.RaceEnabled {
 			nodeCnt = []int{4}
@@ -113,10 +113,15 @@ var retrievalSimServiceMap = map[string]simulation.ServiceFunc{
 			return nil, nil, err
 		}
 
+		syncUpdateDelay := 1 * time.Second
+		if *longrunning {
+			syncUpdateDelay = 3 * time.Second
+		}
+
 		r := NewRegistry(addr.ID(), delivery, netStore, state.NewInmemoryStore(), &RegistryOptions{
 			Retrieval:       RetrievalEnabled,
 			Syncing:         SyncingAutoSubscribe,
-			SyncUpdateDelay: 3 * time.Second,
+			SyncUpdateDelay: syncUpdateDelay,
 		}, nil)
 
 		cleanup = func() {
@@ -140,7 +145,7 @@ func runFileRetrievalTest(nodeCount int) error {
 	sim := simulation.New(retrievalSimServiceMap)
 	defer sim.Close()
 
-	log.Info("Initializing test config")
+	log.Info("Initializing test config", "node count", nodeCount)
 
 	conf := &synctestConfig{}
 	//map of discover ID to indexes of chunks expected at that ID
@@ -158,6 +163,8 @@ func runFileRetrievalTest(nodeCount int) error {
 	ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancelSimRun()
 
+	log.Info("Starting simulation")
+
 	result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) error {
 		nodeIDs := sim.UpNodeIDs()
 		for _, n := range nodeIDs {
@@ -185,6 +192,8 @@ func runFileRetrievalTest(nodeCount int) error {
 			return err
 		}
 
+		log.Info("network healthy, start file checks")
+
 		// File retrieval check is repeated until all uploaded files are retrieved from all nodes
 		// or until the timeout is reached.
 	REPEAT:
@@ -212,6 +221,8 @@ func runFileRetrievalTest(nodeCount int) error {
 		}
 	})
 
+	log.Info("Simulation terminated")
+
 	if result.Error != nil {
 		return result.Error
 	}
diff --git a/swarm/network/stream/snapshot_sync_test.go b/swarm/network/stream/snapshot_sync_test.go
index 330f39712..9737ec0a5 100644
--- a/swarm/network/stream/snapshot_sync_test.go
+++ b/swarm/network/stream/snapshot_sync_test.go
@@ -94,8 +94,8 @@ func TestSyncingViaGlobalSync(t *testing.T) {
 		//if the `longrunning` flag has been provided
 		//run more test combinations
 		if *longrunning {
-			chunkCounts = []int{1, 8, 32, 256, 1024}
-			nodeCounts = []int{16, 32, 64, 128, 256}
+			chunkCounts = []int{64, 128}
+			nodeCounts = []int{32, 64}
 		}
 
 		for _, chunkCount := range chunkCounts {
diff --git a/swarm/network/stream/streamer_test.go b/swarm/network/stream/streamer_test.go
index 755b74537..56e5e8903 100644
--- a/swarm/network/stream/streamer_test.go
+++ b/swarm/network/stream/streamer_test.go
@@ -1188,12 +1188,13 @@ func TestGetSubscriptionsRPC(t *testing.T) {
 
 	// arbitrarily set to 4
 	nodeCount := 4
+	// set the syncUpdateDelay for sync registrations to start
+	syncUpdateDelay := 200 * time.Millisecond
 	// run with more nodes if `longrunning` flag is set
 	if *longrunning {
 		nodeCount = 64
+		syncUpdateDelay = 10 * time.Second
 	}
-	// set the syncUpdateDelay for sync registrations to start
-	syncUpdateDelay := 200 * time.Millisecond
 	// holds the msg code for SubscribeMsg
 	var subscribeMsgCode uint64
 	var ok bool
@@ -1241,7 +1242,7 @@ func TestGetSubscriptionsRPC(t *testing.T) {
 	})
 	defer sim.Close()
 
-	ctx, cancelSimRun := context.WithTimeout(context.Background(), 1*time.Minute)
+	ctx, cancelSimRun := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancelSimRun()
 
 	// upload a snapshot
@@ -1267,6 +1268,9 @@ func TestGetSubscriptionsRPC(t *testing.T) {
 	go func() {
 		//for long running sims, waiting 1 sec will not be enough
 		waitDuration := time.Duration(nodeCount/16) * time.Second
+		if *longrunning {
+			waitDuration = syncUpdateDelay
+		}
 		for {
 			select {
 			case <-ctx.Done():
@@ -1328,11 +1332,11 @@ func TestGetSubscriptionsRPC(t *testing.T) {
 					}
 				}
 			}
+			log.Debug("All node streams counted", "realCount", realCount)
 		}
-		// every node is mutually subscribed to each other, so the actual count is half of it
 		emc := expectedMsgCount.count()
-		if realCount/2 != emc {
-			return fmt.Errorf("Real subscriptions and expected amount don't match; real: %d, expected: %d", realCount/2, emc)
+		if realCount != emc {
+			return fmt.Errorf("Real subscriptions and expected amount don't match; real: %d, expected: %d", realCount, emc)
 		}
 		return nil
 	})
diff --git a/swarm/network/stream/syncer_test.go b/swarm/network/stream/syncer_test.go
index df3008381..07586714e 100644
--- a/swarm/network/stream/syncer_test.go
+++ b/swarm/network/stream/syncer_test.go
@@ -173,10 +173,6 @@ func testSyncBetweenNodes(t *testing.T, nodes, chunkCount int, skipCheck bool, p
 			}
 		}
 		// here we distribute chunks of a random file into stores 1...nodes
-		if _, err := sim.WaitTillHealthy(ctx); err != nil {
-			return err
-		}
-
 		// collect hashes in po 1 bin for each node
 		hashes := make([][]storage.Address, nodes)
 		totalHashes := 0
-- 
GitLab