From 25478d07d42da86f79820eb1c5a42bfe195bc569 Mon Sep 17 00:00:00 2001 From: Brett Boston Date: Mon, 22 Jul 2024 12:09:31 -0700 Subject: [PATCH] More rigorously test old image in `MixedImageNetworkSurvey` mission Closes #166. Now that the old stable image in SSC supports the V2 survey, this change adds tests that check surveys originating from older nodes. --- .../MissionMixedImageNetworkSurvey.fs | 192 +++++++++--------- 1 file changed, 99 insertions(+), 93 deletions(-) diff --git a/src/FSLibrary/MissionMixedImageNetworkSurvey.fs b/src/FSLibrary/MissionMixedImageNetworkSurvey.fs index 98c5320..d294cca 100644 --- a/src/FSLibrary/MissionMixedImageNetworkSurvey.fs +++ b/src/FSLibrary/MissionMixedImageNetworkSurvey.fs @@ -4,11 +4,12 @@ module MissionMixedImageNetworkSurvey -// This test runs a survey with 1 old node and 2 new nodes. One of the new nodes -// runs the survey. The test primarily checks that no nodes crash. It also -// checks that HTTP endpoint responses indicate success, but it does not check -// the content of those responses for correctness (for example, it does not -// check the contents of the JSON response to the `getsurveyresult` command). +// This test runs a series of survey tests with 1 old node and 2 new nodes. In +// one set of tests a new node rus the survey, in another set an old node runs +// the survey. The test primarily checks that no nodes crash. It also checks +// that HTTP endpoint responses indicate success, but it does not check the +// content of those responses for correctness (for example, it does not check +// the contents of the JSON response to the `getsurveyresult` command). open Logging open StellarCoreSet @@ -20,40 +21,113 @@ open StellarCoreHTTP open StellarCorePeer open StellarDotnetSdk.Accounts -let mixedImageNetworkSurvey (context: MissionContext) = - let oldNodeCount = 1 - let newNodeCount = 2 +// Set max survey phase durations to 3 minutes +let private surveyPhaseDurationMinutes = 3 + +// KeyPairs of old and new nodes +let private oldKeys = [| KeyPair.Random() |] +let private newKeys = [| KeyPair.Random(); KeyPair.Random() |] + +// Run a series of survey tests with `surveyor` as the surveyor. `surveyorKeys` +// must be `surveyor`'s KeyPair. +let private testSurvey (surveyor: Peer) (surveyorKeys: KeyPair) = + // Helper functions to run survey commands from `surveyor` + + // Log a response to a survey command `commandName` and check that + // it satisfies `predicate` + let logAndCheckResponse (commandName: string) (response: string) (predicate: string -> bool) = + LogInfo "%s: %s" commandName response + + if not (predicate response) then + failwithf "Survey failed. Unexpected response from '%s': %s" commandName response + + let startSurveyCollecting () = + let nonce = 42 + let expected = "Requested network to start survey collecting." + logAndCheckResponse "startSurveyCollecting" (surveyor.StartSurveyCollecting nonce) ((=) expected) + + let stopSurveyCollecting () = + let expected = "Requested network to stop survey collecting." + logAndCheckResponse "stopSurveyCollecting" (surveyor.StopSurveyCollecting()) ((=) expected) + + let surveyTopologyTimeSliced (node: KeyPair) = + if not (node.Equals surveyorKeys) then + // `surveyTopologyTimeSliced` responds differently based on + // whether this is the first call to it or a subsequent call. + // However, the response starts with "Adding node." on success. + let start = "Adding node." + + logAndCheckResponse + "surveyTopologyTimeSliced" + (surveyor.SurveyTopologyTimeSliced node.AccountId 0 0) + (fun s -> s.StartsWith start) + + let getSurveyResult () = + // This just checks that the survey result starts with `{`, + // indicating that it returned a JSON object. `getsurveyresult` + // returns a string (not starting with `{`) on failure. + logAndCheckResponse "getSurveyResult" (surveyor.GetSurveyResult()) (fun s -> s.StartsWith '{') - // Set max survey phase durations to 3 minutes - let surveyPhaseDurationMinutes = 3 + let waitSeconds (seconds: int) = + LogInfo "Waiting %i seconds" seconds + System.Threading.Thread.Sleep(seconds * 1000) + // Start survey collecting + startSurveyCollecting () + + // Let survey collect for a minute + waitSeconds 60 + + // Stop survey collecting + stopSurveyCollecting () + + // Give message time to propagate + waitSeconds 30 + + // Request results from peers + Array.iter surveyTopologyTimeSliced (Array.append oldKeys newKeys) + + // Give time to propagate and respond + waitSeconds 60 + + // Get results + getSurveyResult () + + // Let survey expire. At this point the survey has spent 1.5 minutes + // in the reporting phase, so knock a minute off of the wait time + // leaving 30 seconds of buffer to ensure survey is truly expired + waitSeconds ((surveyPhaseDurationMinutes - 1) * 60) + + // Test collecting phase expiration by starting a new survey and letting it + // automatically transaction to the reporting phase, then expire. Add in a + // buffer to ensure the survey is truly expired + startSurveyCollecting () + waitSeconds (surveyPhaseDurationMinutes * 60 * 2 + 30) + +let mixedImageNetworkSurvey (context: MissionContext) = let newImage = context.image let oldImage = GetOrDefault context.oldImage newImage let oldName = "core-old" let newName = "core-new" - // keypairs of old and new surveyed nodes - let oldSurveyedKeys = KeyPair.Random() - let newSurveyedKeys = KeyPair.Random() - let oldCoreSet = { name = CoreSetName oldName - keys = [| oldSurveyedKeys |] + keys = oldKeys live = true options = { CoreSetOptions.GetDefault oldImage with - nodeCount = oldNodeCount + nodeCount = oldKeys.Length accelerateTime = false - surveyPhaseDuration = None } } + surveyPhaseDuration = Some surveyPhaseDurationMinutes } } let newCoreSet = { name = CoreSetName newName - keys = [| KeyPair.Random(); newSurveyedKeys |] + keys = newKeys live = true options = { CoreSetOptions.GetDefault newImage with - nodeCount = newNodeCount + nodeCount = newKeys.Length accelerateTime = false surveyPhaseDuration = Some surveyPhaseDurationMinutes } } @@ -70,78 +144,10 @@ let mixedImageNetworkSurvey (context: MissionContext) = let oldVersion = oldPeer.GetSupportedProtocolVersion() formation.UpgradeProtocol coreSets oldVersion - // Chose a new node to run the survey - let surveyor = formation.NetworkCfg.GetPeer newCoreSet 0 - - // Helper functions to run survey commands from `surveyor` - - // Log a response to a survey command `commandName` and check that - // it satisfies `predicate` - let logAndCheckResponse (commandName: string) (response: string) (predicate: string -> bool) = - LogInfo "%s: %s" commandName response - - if not (predicate response) then - failwithf "Survey failed. Unexpected response from '%s': %s" commandName response - - let startSurveyCollecting () = - let nonce = 42 - let expected = "Requested network to start survey collecting." - logAndCheckResponse "startSurveyCollecting" (surveyor.StartSurveyCollecting nonce) ((=) expected) - - let stopSurveyCollecting () = - let expected = "Requested network to stop survey collecting." - logAndCheckResponse "stopSurveyCollecting" (surveyor.StopSurveyCollecting()) ((=) expected) - - let surveyTopologyTimeSliced (node: KeyPair) = - // `surveyTopologyTimeSliced` responds differently based on - // whether this is the first call to it or a subsequent call. - // However, the response starts with "Adding node." on success. - let start = "Adding node." - - logAndCheckResponse - "surveyTopologyTimeSliced" - (surveyor.SurveyTopologyTimeSliced node.AccountId 0 0) - (fun s -> s.StartsWith start) - - let getSurveyResult () = - // This just checks that the survey result starts with `{`, - // indicating that it returned a JSON object. `getsurveyresult` - // returns a string (not starting with `{`) on failure. - logAndCheckResponse "getSurveyResult" (surveyor.GetSurveyResult()) (fun s -> s.StartsWith '{') - - let waitSeconds (seconds: int) = - LogInfo "Waiting %i seconds" seconds - System.Threading.Thread.Sleep(seconds * 1000) - - // Start survey collecting - startSurveyCollecting () - - // Let survey collect for a minute - waitSeconds 60 - - // Stop survey collecting - stopSurveyCollecting () - - // Give message time to propagate - waitSeconds 30 - - // Request results from peers - surveyTopologyTimeSliced oldSurveyedKeys - surveyTopologyTimeSliced newSurveyedKeys - - // Give time to propagate and respond - waitSeconds 60 - - // Get results - getSurveyResult () - - // Let survey expire. At this point the survey has spent 1.5 minutes - // in the reporting phase, so knock a minute off of the wait time - // leaving 30 seconds of buffer to ensure survey is truly expired - waitSeconds ((surveyPhaseDurationMinutes - 1) * 60) + // Run test with new node as surveyor + LogInfo "Running survey with new node as surveyor" + testSurvey (formation.NetworkCfg.GetPeer newCoreSet 0) (newKeys.[0]) - // Test collecting phase expiration by starting a new survey and - // letting it expire. Add in a buffer to ensure the survey is truly - // expired - startSurveyCollecting () - waitSeconds (surveyPhaseDurationMinutes * 60 + 30)) + // Run test with old node as surveyor + LogInfo "Running survey with old node as surveyor" + testSurvey oldPeer (oldKeys.[0]))