From 0e637c079a158b3f6f274d164bc2878f86fc0dcc Mon Sep 17 00:00:00 2001 From: Austen Lacy Date: Fri, 28 Apr 2023 13:02:26 -0400 Subject: [PATCH] Fix bug in `SwitchTraffic` that wasn't respecting `--dry_run` for readonly and replica tablets during a resharding event (#12992) * use switcher struct when switching shard reads during a reshard event Signed-off-by: austenLacy * Create failing test for bug reported in #12992, where a TrafficSwitch dry run for reads during resharding tries to actually switch reads and fails Signed-off-by: Rohit Nayak --------- Signed-off-by: austenLacy Signed-off-by: Rohit Nayak Co-authored-by: Rohit Nayak --- .../vreplication/vreplication_test.go | 40 +++++++++++++----- .../vreplication/vreplication_test_env.go | 42 ++----------------- go/test/endtoend/vreplication/vstream_test.go | 5 ++- go/vt/wrangler/traffic_switcher.go | 2 +- 4 files changed, 37 insertions(+), 52 deletions(-) diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 74e724ad52e..8c33d9b9038 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -744,7 +744,8 @@ func reshardCustomer2to4Split(t *testing.T, cells []*Cell, sourceCellOrAlias str t.Run("reshardCustomer2to4Split", func(t *testing.T) { ksName := "customer" counts := map[string]int{"zone1-600": 4, "zone1-700": 5, "zone1-800": 6, "zone1-900": 5} - reshard(t, ksName, "customer", "c2c4", "-80,80-", "-40,40-80,80-c0,c0-", 600, counts, nil, cells, sourceCellOrAlias) + reshard(t, ksName, "customer", "c2c4", "-80,80-", "-40,40-80,80-c0,c0-", + 600, counts, nil, nil, cells, sourceCellOrAlias, 1) waitForRowCount(t, vtgateConn, ksName, "customer", 20) query := "insert into customer (name) values('yoko')" execVtgateQuery(t, vtgateConn, ksName, query) @@ -756,7 +757,8 @@ func reshardMerchant2to3SplitMerge(t *testing.T) { t.Run("reshardMerchant2to3SplitMerge", func(t *testing.T) { ksName := merchantKeyspace counts := map[string]int{"zone1-1600": 0, "zone1-1700": 2, "zone1-1800": 0} - reshard(t, ksName, "merchant", "m2m3", "-80,80-", "-40,40-c0,c0-", 1600, counts, dryRunResultsSwitchWritesM2m3, nil, "") + reshard(t, ksName, "merchant", "m2m3", "-80,80-", "-40,40-c0,c0-", + 1600, counts, dryRunResultsSwitchReadM2m3, dryRunResultsSwitchWritesM2m3, nil, "", 1) waitForRowCount(t, vtgateConn, ksName, "merchant", 2) query := "insert into merchant (mname, category) values('amazon', 'electronics')" execVtgateQuery(t, vtgateConn, ksName, query) @@ -802,7 +804,8 @@ func reshardMerchant3to1Merge(t *testing.T) { t.Run("reshardMerchant3to1Merge", func(t *testing.T) { ksName := merchantKeyspace counts := map[string]int{"zone1-2000": 3} - reshard(t, ksName, "merchant", "m3m1", "-40,40-c0,c0-", "0", 2000, counts, nil, nil, "") + reshard(t, ksName, "merchant", "m3m1", "-40,40-c0,c0-", "0", + 2000, counts, nil, nil, nil, "", 1) waitForRowCount(t, vtgateConn, ksName, "merchant", 3) query := "insert into merchant (mname, category) values('flipkart', 'electronics')" execVtgateQuery(t, vtgateConn, ksName, query) @@ -814,7 +817,8 @@ func reshardCustomer3to2SplitMerge(t *testing.T) { //-40,40-80,80-c0 => merge/sp t.Run("reshardCustomer3to2SplitMerge", func(t *testing.T) { ksName := "customer" counts := map[string]int{"zone1-1000": 8, "zone1-1100": 8, "zone1-1200": 5} - reshard(t, ksName, "customer", "c4c3", "-40,40-80,80-c0", "-60,60-c0", 1000, counts, nil, nil, "") + reshard(t, ksName, "customer", "c4c3", "-40,40-80,80-c0", "-60,60-c0", + 1000, counts, nil, nil, nil, "", 1) }) } @@ -822,11 +826,14 @@ func reshardCustomer3to1Merge(t *testing.T) { //to unsharded t.Run("reshardCustomer3to1Merge", func(t *testing.T) { ksName := "customer" counts := map[string]int{"zone1-1500": 21} - reshard(t, ksName, "customer", "c3c1", "-60,60-c0,c0-", "0", 1500, counts, nil, nil, "") + reshard(t, ksName, "customer", "c3c1", "-60,60-c0,c0-", "0", + 1500, counts, nil, nil, nil, "", 3) }) } -func reshard(t *testing.T, ksName string, tableName string, workflow string, sourceShards string, targetShards string, tabletIDBase int, counts map[string]int, dryRunResultSwitchWrites []string, cells []*Cell, sourceCellOrAlias string) { +func reshard(t *testing.T, ksName string, tableName string, workflow string, sourceShards string, targetShards string, + tabletIDBase int, counts map[string]int, dryRunResultSwitchReads, dryRunResultSwitchWrites []string, cells []*Cell, sourceCellOrAlias string, + autoIncrementStep int) { t.Run("reshard", func(t *testing.T) { if cells == nil { cells = []*Cell{defaultCell} @@ -859,7 +866,10 @@ func reshard(t *testing.T, ksName string, tableName string, workflow string, sou } } vdiff1(t, ksWorkflow, "") - switchReads(t, allCellNames, ksWorkflow) + if dryRunResultSwitchReads != nil { + switchReadsDryRun(t, workflowType, allCellNames, ksWorkflow, dryRunResultSwitchReads) + } + switchReads(t, workflowType, allCellNames, ksWorkflow, false) if dryRunResultSwitchWrites != nil { switchWritesDryRun(t, ksWorkflow, dryRunResultSwitchWrites) } @@ -1193,10 +1203,18 @@ func applyVSchema(t *testing.T, vschema, keyspace string) { require.NoError(t, err) } -func switchReadsDryRun(t *testing.T, cells, ksWorkflow string, dryRunResults []string) { - output, err := vc.VtctlClient.ExecuteCommandWithOutput("SwitchReads", "--", "--cells="+cells, "--tablet_types=replica", "--dry_run", ksWorkflow) - require.NoError(t, err, fmt.Sprintf("SwitchReads DryRun Error: %s: %s", err, output)) - validateDryRunResults(t, output, dryRunResults) +func switchReadsDryRun(t *testing.T, workflowType, cells, ksWorkflow string, dryRunResults []string) { + if workflowType != binlogdatapb.VReplicationWorkflowType_name[int32(binlogdatapb.VReplicationWorkflowType_MoveTables)] && + workflowType != binlogdatapb.VReplicationWorkflowType_name[int32(binlogdatapb.VReplicationWorkflowType_Reshard)] { + require.FailNowf(t, "Invalid workflow type for SwitchTraffic, must be MoveTables or Reshard", + "workflow type specified: %s", workflowType) + } + output, err := vc.VtctlClient.ExecuteCommandWithOutput(workflowType, "--", "--cells="+cells, "--tablet_types=rdonly,replica", + "--dry_run", "SwitchTraffic", ksWorkflow) + require.NoError(t, err, fmt.Sprintf("Switching Reads DryRun Error: %s: %s", err, output)) + if dryRunResults != nil { + validateDryRunResults(t, output, dryRunResults) + } } func switchReads(t *testing.T, cells, ksWorkflow string) { diff --git a/go/test/endtoend/vreplication/vreplication_test_env.go b/go/test/endtoend/vreplication/vreplication_test_env.go index 8329ae5e490..a357b01846f 100644 --- a/go/test/endtoend/vreplication/vreplication_test_env.go +++ b/go/test/endtoend/vreplication/vreplication_test_env.go @@ -86,42 +86,8 @@ var dryRunResultsSwitchWritesM2m3 = []string{ "Unlock keyspace merchant-type", } -var dryRunResultsDropSourcesDropCustomerShard = []string{ - "Lock keyspace product", - "Lock keyspace customer", - "Dropping these tables from the database and removing them from the vschema for keyspace product:", - " Keyspace product Shard 0 DbName vt_product Tablet 100 Table Lead", - " Keyspace product Shard 0 DbName vt_product Tablet 100 Table Lead-1", - " Keyspace product Shard 0 DbName vt_product Tablet 100 Table customer", - " Keyspace product Shard 0 DbName vt_product Tablet 100 Table db_order_test", - "Denied tables [Lead,Lead-1,customer,db_order_test] will be removed from:", - " Keyspace product Shard 0 Tablet 100", - "Delete reverse vreplication streams on source:", - " Keyspace product Shard 0 Workflow p2c_reverse DbName vt_product Tablet 100", - "Delete vreplication streams on target:", - " Keyspace customer Shard -80 Workflow p2c DbName vt_customer Tablet 200", - " Keyspace customer Shard 80- Workflow p2c DbName vt_customer Tablet 300", - "Routing rules for participating tables will be deleted", - "Unlock keyspace customer", - "Unlock keyspace product", -} - -var dryRunResultsDropSourcesRenameCustomerShard = []string{ - "Lock keyspace product", - "Lock keyspace customer", - "Renaming these tables from the database and removing them from the vschema for keyspace product:", - " Keyspace product Shard 0 DbName vt_product Tablet 100 Table Lead", - " Keyspace product Shard 0 DbName vt_product Tablet 100 Table Lead-1", - " Keyspace product Shard 0 DbName vt_product Tablet 100 Table customer", - " Keyspace product Shard 0 DbName vt_product Tablet 100 Table db_order_test", - "Denied tables [Lead,Lead-1,customer,db_order_test] will be removed from:", - " Keyspace product Shard 0 Tablet 100", - "Delete reverse vreplication streams on source:", - " Keyspace product Shard 0 Workflow p2c_reverse DbName vt_product Tablet 100", - "Delete vreplication streams on target:", - " Keyspace customer Shard -80 Workflow p2c DbName vt_customer Tablet 200", - " Keyspace customer Shard 80- Workflow p2c DbName vt_customer Tablet 300", - "Routing rules for participating tables will be deleted", - "Unlock keyspace customer", - "Unlock keyspace product", +var dryRunResultsSwitchReadM2m3 = []string{ + "Lock keyspace merchant-type", + "Switch reads from keyspace merchant-type to keyspace merchant-type for shards -80,80- to shards -40,40-c0,c0-", + "Unlock keyspace merchant-type", } diff --git a/go/test/endtoend/vreplication/vstream_test.go b/go/test/endtoend/vreplication/vstream_test.go index 6899e7c583b..d3b409d7cf4 100644 --- a/go/test/endtoend/vreplication/vstream_test.go +++ b/go/test/endtoend/vreplication/vstream_test.go @@ -369,7 +369,8 @@ func testVStreamStopOnReshardFlag(t *testing.T, stopOnReshard bool, baseTabletID tickCount++ switch tickCount { case 1: - reshard(t, "sharded", "customer", "vstreamStopOnReshard", "-80,80-", "-40,40-", baseTabletID+400, nil, nil, nil, defaultCellName) + reshard(t, "sharded", "customer", "vstreamStopOnReshard", "-80,80-", + "-40,40-", baseTabletID+400, nil, nil, nil, nil, defaultCellName, 1) case 60: done = true } @@ -499,7 +500,7 @@ func testVStreamCopyMultiKeyspaceReshard(t *testing.T, baseTabletID int) numEven tickCount++ switch tickCount { case 1: - reshard(t, "sharded", "customer", "vstreamCopyMultiKeyspaceReshard", "-80,80-", "-40,40-", baseTabletID+400, nil, nil, nil, defaultCellName) + reshard(t, "sharded", "customer", "vstreamCopyMultiKeyspaceReshard", "-80,80-", "-40,40-", baseTabletID+400, nil, nil, nil, nil, defaultCellName, 1) reshardDone = true case 60: done = true diff --git a/go/vt/wrangler/traffic_switcher.go b/go/vt/wrangler/traffic_switcher.go index 9a98dfe24b1..6dd04501d86 100644 --- a/go/vt/wrangler/traffic_switcher.go +++ b/go/vt/wrangler/traffic_switcher.go @@ -397,7 +397,7 @@ func (wr *Wrangler) SwitchReads(ctx context.Context, targetKeyspace, workflowNam return sw.logs(), nil } wr.Logger().Infof("About to switchShardReads: %+v, %+v, %+v", cells, servedTypes, direction) - if err := ts.switchShardReads(ctx, cells, servedTypes, direction); err != nil { + if err := sw.switchShardReads(ctx, cells, servedTypes, direction); err != nil { ts.Logger().Errorf("switchShardReads failed: %v", err) return nil, err }