Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix attnum mismatch in pg_statistic after drop column and database restore #90

Merged
merged 19 commits into from
Aug 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions backup/queries_statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ type AttributeStatistic struct {
AttName string
Type string
Relid uint32 `db:"starelid"`
AttNumber int `db:"staattnum"`
Inherit bool `db:"stainherit"`
NullFraction float64 `db:"stanullfrac"`
Width int `db:"stawidth"`
Expand Down Expand Up @@ -81,10 +80,9 @@ func GetAttributeStatistics(connectionPool *dbconn.DBConn, tables []Table, proce
SELECT c.oid,
quote_ident(n.nspname) AS schema,
quote_ident(c.relname) AS table,
quote_ident(a.attname) AS attname,
a.attname,
RekGRpth marked this conversation as resolved.
Show resolved Hide resolved
quote_ident(t.typname) AS type,
s.starelid,
s.staattnum,
%s
s.stanullfrac,
s.stawidth,
Expand Down
12 changes: 7 additions & 5 deletions backup/statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,16 @@ func GenerateAttributeStatisticsQueries(table Table, attStat AttributeStatistic)
attributeSlotsQueryStr = generateAttributeSlotsQuery4(attStat)
}

attributeQueries = append(attributeQueries, fmt.Sprintf(`DELETE FROM pg_statistic WHERE starelid = %s AND staattnum = %d;`, starelidStr, attStat.AttNumber))
attributeQueries = append(attributeQueries, fmt.Sprintf(`INSERT INTO pg_statistic VALUES (
%s,
%d::smallint,%s
attributeQueries = append(attributeQueries, fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN
(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = %s AND attname = '%s');`, starelidStr, utils.EscapeSingleQuotes(attStat.AttName)))
attributeQueries = append(attributeQueries, fmt.Sprintf(`INSERT INTO pg_statistic SELECT
attrelid,
attnum,%s
%f::real,
%d::integer,
%f::real,
%s);`, starelidStr, attStat.AttNumber, inheritStr, attStat.NullFraction, attStat.Width, attStat.Distinct, attributeSlotsQueryStr))
%s
FROM pg_attribute WHERE attrelid = %s AND attname = '%s';`, inheritStr, attStat.NullFraction, attStat.Width, attStat.Distinct, attributeSlotsQueryStr, starelidStr, utils.EscapeSingleQuotes(attStat.AttName)))

/*
* If a type name starts with exactly one underscore, it describes an array
Expand Down
56 changes: 31 additions & 25 deletions backup/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ var _ = Describe("backup/statistics tests", func() {
tupleStat2 := backup.TupleStatistic{Schema: "testschema", Table: "testtable2"}
attStat2 := []backup.AttributeStatistic{
{Schema: "testschema", Table: "testtable2", AttName: "testattWithArray", Type: "_array"},
{Schema: "testschema", Table: "testtable2", AttName: "testatt", Type: "_array", Relid: 2, AttNumber: 3, NullFraction: .4,
{Schema: "testschema", Table: "testtable2", AttName: "testatt", Type: "_array", Relid: 2, NullFraction: .4,
Width: 10, Distinct: .5, Kind1: 20, Operator1: 10, Numbers1: []string{"1", "2", "3"}, Values1: []string{"4", "5", "6"}},
}

Expand Down Expand Up @@ -100,11 +100,11 @@ SET
reltuples = 0.000000::real
WHERE oid = 'testschema.testtable2'::regclass::oid;`,

`DELETE FROM pg_statistic WHERE starelid = 'testschema.testtable2'::regclass::oid AND staattnum = 0;`,

fmt.Sprintf(`INSERT INTO pg_statistic VALUES (
'testschema.testtable2'::regclass::oid,
0::smallint,%[1]s
`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN
(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testattWithArray');`,
fmt.Sprintf(`INSERT INTO pg_statistic SELECT
attrelid,
attnum,%[1]s
0.000000::real,
0::integer,
0.000000::real,
Expand All @@ -123,13 +123,14 @@ WHERE oid = 'testschema.testtable2'::regclass::oid;`,
NULL,
NULL,
NULL,%[5]s
NULL);`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5),

`DELETE FROM pg_statistic WHERE starelid = 'testschema.testtable2'::regclass::oid AND staattnum = 3;`,
NULL
FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testattWithArray';`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5),

fmt.Sprintf(`INSERT INTO pg_statistic VALUES (
'testschema.testtable2'::regclass::oid,
3::smallint,%[1]s
`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN
(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testatt');`,
fmt.Sprintf(`INSERT INTO pg_statistic SELECT
attrelid,
attnum,%[1]s
0.400000::real,
10::integer,
0.500000::real,
Expand All @@ -148,7 +149,8 @@ WHERE oid = 'testschema.testtable2'::regclass::oid;`,
NULL,
NULL,
NULL,%[5]s
NULL);`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5),
NULL
FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testatt';`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5),
}
testutils.AssertBufferContents(tocfile.StatisticsEntries, buffer, expected...)
})
Expand All @@ -171,20 +173,21 @@ WHERE oid = '"""test''schema"""."""test''table"""'::regclass::oid;`))

It("generates attribute statistics query for array type", func() {
attStats := backup.AttributeStatistic{Schema: "testschema", Table: "testtable", AttName: "testatt", Type: "_array", Relid: 2,
AttNumber: 3, NullFraction: .4, Width: 10, Distinct: .5, Kind1: 20, Operator1: 10,
NullFraction: .4, Width: 10, Distinct: .5, Kind1: 20, Operator1: 10,
Numbers1: pq.StringArray([]string{"1", "2", "3"}), Values1: pq.StringArray([]string{"4", "5", "6"})}
if connectionPool.Version.AtLeast("6") {
attStats.Kind5 = 10
attStats.Operator5 = 12
}

attStatsQueries := backup.GenerateAttributeStatisticsQueries(tableTestTable, attStats)
Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE starelid = 'testschema."test''table"'::regclass::oid AND staattnum = 3;`)))
Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN
(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt');`)))

insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5 := getStatInsertReplace(0, 0)
Expect(attStatsQueries[1]).To(Equal(fmt.Sprintf(`INSERT INTO pg_statistic VALUES (
'testschema."test''table"'::regclass::oid,
3::smallint,%s
Expect(attStatsQueries[1]).To(Equal(fmt.Sprintf(`INSERT INTO pg_statistic SELECT
attrelid,
attnum,%s
0.400000::real,
10::integer,
0.500000::real,
Expand All @@ -203,11 +206,12 @@ WHERE oid = '"""test''schema"""."""test''table"""'::regclass::oid;`))
NULL,
NULL,
NULL,%s
NULL);`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5)))
NULL
FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt';`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5)))
})
It("generates attribute statistics query for non-array type", func() {
attStats := backup.AttributeStatistic{Schema: "testschema", Table: "testtable", AttName: "testatt", Type: "testtype", Relid: 2,
AttNumber: 3, NullFraction: .4, Width: 10, Distinct: .5, Kind1: 20, Operator1: 10,
NullFraction: .4, Width: 10, Distinct: .5, Kind1: 20, Operator1: 10,
Numbers1: pq.StringArray([]string{"1", "2", "3"}), Values1: pq.StringArray([]string{"4", "5", "6"})}
if connectionPool.Version.AtLeast("6") {
attStats.Kind5 = 10
Expand All @@ -216,12 +220,13 @@ WHERE oid = '"""test''schema"""."""test''table"""'::regclass::oid;`))

attStatsQueries := backup.GenerateAttributeStatisticsQueries(tableTestTable, attStats)

Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE starelid = 'testschema."test''table"'::regclass::oid AND staattnum = 3;`)))
Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN
(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt');`)))

insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5 := getStatInsertReplace(10, 12)
Expect(attStatsQueries[1]).To(Equal(fmt.Sprintf(`INSERT INTO pg_statistic VALUES (
'testschema."test''table"'::regclass::oid,
3::smallint,%s
Expect(attStatsQueries[1]).To(Equal(fmt.Sprintf(`INSERT INTO pg_statistic SELECT
attrelid,
attnum,%s
0.400000::real,
10::integer,
0.500000::real,
Expand All @@ -240,7 +245,8 @@ WHERE oid = '"""test''schema"""."""test''table"""'::regclass::oid;`))
array_in('{"4","5","6"}', 'testtype'::regtype::oid, -1),
NULL,
NULL,%s
NULL);`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5)))
NULL
FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt';`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5)))
})
})
Describe("AnyValues", func() {
Expand Down
18 changes: 18 additions & 0 deletions end_to_end/end_to_end_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -969,18 +969,28 @@ var _ = Describe("backup and restore end to end tests", func() {
"DROP INDEX schema2.foo3_idx1")
testhelper.AssertQueryRuns(backupConn,
"ANALYZE schema2.foo3")
testhelper.AssertQueryRuns(backupConn,
`CREATE TABLE schema2.foo4 ("schema2." TEXT)`)
defer testhelper.AssertQueryRuns(backupConn,
"DROP TABLE schema2.foo4")
testhelper.AssertQueryRuns(backupConn,
`INSERT INTO schema2.foo4 VALUES ('schema2.'),('schema2.')`)
testhelper.AssertQueryRuns(backupConn,
"ANALYZE schema2.foo4")
output := gpbackup(gpbackupPath, backupHelperPath,
"--with-stats")
timestamp := getBackupTimestamp(string(output))

gprestore(gprestorePath, restoreHelperPath, timestamp,
"--redirect-db", "restoredb",
"--include-table", "schema2.foo3",
"--include-table", "schema2.foo4",
"--redirect-schema", "schema3",
"--with-stats")

schema3TupleCounts := map[string]int{
"schema3.foo3": 100,
"schema3.foo4": 2,
}
assertDataRestored(restoreConn, schema3TupleCounts)
assertPGClassStatsRestored(restoreConn, restoreConn, schema3TupleCounts)
Expand All @@ -992,6 +1002,14 @@ var _ = Describe("backup and restore end to end tests", func() {
actualStatisticCount := dbconn.MustSelectString(restoreConn,
`SELECT count(*) FROM pg_statistic WHERE starelid='schema3.foo3'::regclass::oid;`)
Expect(actualStatisticCount).To(Equal("1"))

actualStatisticCount = dbconn.MustSelectString(restoreConn,
`SELECT count(*) FROM pg_statistic WHERE starelid='schema3.foo4'::regclass::oid;`)
Expect(actualStatisticCount).To(Equal("1"))

stavaluesTableFoo4 := dbconn.MustSelectString(restoreConn,
`SELECT stavalues1 FROM pg_statistic WHERE starelid='schema3.foo4'::regclass::oid;`)
Expect(stavaluesTableFoo4).To(Equal("{schema2.}"))
})
It("runs gprestore with --redirect-schema to redirect data back to the original database which still contain the original tables", func() {
skipIfOldBackupVersionBefore("1.17.0")
Expand Down
58 changes: 58 additions & 0 deletions integration/statistics_create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,64 @@ var _ = Describe("backup integration tests", func() {
structmatcher.ExpectStructsToMatchExcluding(&oldAtts[i], &newAtts[i], "Oid", "Relid")
}
})
It("prints attribute and tuple statistics for a table with dropped column", func() {
tables := []backup.Table{
{Relation: backup.Relation{SchemaOid: 2200, Schema: "public", Name: "foo"}},
}

// Create and ANALYZE a table to generate statistics
testhelper.AssertQueryRuns(connectionPool, `CREATE TABLE public.foo(i int, j text, k bool, "i'2 3" int)`)
defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.foo")
testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.foo VALUES (1, 'a', 't', 1)")
testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.foo VALUES (2, 'b', 'f', 2)")
testhelper.AssertQueryRuns(connectionPool, "ANALYZE public.foo")
testhelper.AssertQueryRuns(connectionPool, "ALTER TABLE public.foo DROP COLUMN j")

oldTableOid := testutils.OidFromObjectName(connectionPool, "public", "foo", backup.TYPE_RELATION)
tables[0].Oid = oldTableOid

beforeAttStats := make(map[uint32][]backup.AttributeStatistic)
backup.GetAttributeStatistics(connectionPool, tables, func(attStat *backup.AttributeStatistic) {
beforeAttStats[attStat.Oid] = append(beforeAttStats[attStat.Oid], *attStat)
})
beforeTupleStats := make(map[uint32]backup.TupleStatistic)
backup.GetTupleStatistics(connectionPool, tables, func(tupleStat *backup.TupleStatistic) {
beforeTupleStats[tupleStat.Oid] = *tupleStat
})
beforeTupleStat := beforeTupleStats[oldTableOid]

// Drop and recreate the table to clear the statistics
testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.foo")
testhelper.AssertQueryRuns(connectionPool, `CREATE TABLE public.foo(i int, k bool, "i'2 3" int)`)

// Reload the retrieved statistics into the new table
PrintStatisticsStatements(backupfile, tocfile, tables, beforeAttStats, beforeTupleStats)
testhelper.AssertQueryRuns(connectionPool, buffer.String())

newTableOid := testutils.OidFromObjectName(connectionPool, "public", "foo", backup.TYPE_RELATION)
tables[0].Oid = newTableOid
afterAttStats := make(map[uint32][]backup.AttributeStatistic)
backup.GetAttributeStatistics(connectionPool, tables, func(attStat *backup.AttributeStatistic) {
afterAttStats[attStat.Oid] = append(afterAttStats[attStat.Oid], *attStat)
})
afterTupleStats := make(map[uint32]backup.TupleStatistic)
backup.GetTupleStatistics(connectionPool, tables, func(tupleStat *backup.TupleStatistic) {
afterTupleStats[tupleStat.Oid] = *tupleStat
})
afterTupleStat := afterTupleStats[newTableOid]

oldAtts := beforeAttStats[oldTableOid]
newAtts := afterAttStats[newTableOid]

// Ensure the statistics match
Expect(afterTupleStats).To(HaveLen(len(beforeTupleStats)))
structmatcher.ExpectStructsToMatchExcluding(&beforeTupleStat, &afterTupleStat, "Oid")
Expect(oldAtts).To(HaveLen(3))
Expect(newAtts).To(HaveLen(3))
for i := range oldAtts {
structmatcher.ExpectStructsToMatchExcluding(&oldAtts[i], &newAtts[i], "Oid", "Relid")
}
})
It("prints attribute and tuple statistics for a quoted table", func() {
tables := []backup.Table{
{Relation: backup.Relation{SchemaOid: 2200, Schema: "public", Name: "\"foo'\"\"''bar\""}},
Expand Down
6 changes: 3 additions & 3 deletions integration/statistics_queries_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ var _ = Describe("backup integration tests", func() {
* the same schema and data.
*/
expectedStats5I := backup.AttributeStatistic{Oid: tableOid, Schema: "public", Table: "foo", AttName: "i",
Type: "int4", Relid: tableOid, AttNumber: 1, Inherit: false, Width: 4, Distinct: -1, Kind1: 2, Kind2: 3, Operator1: 97,
Type: "int4", Relid: tableOid, Inherit: false, Width: 4, Distinct: -1, Kind1: 2, Kind2: 3, Operator1: 97,
Operator2: 97, Numbers2: []string{"1"}, Values1: []string{"1", "2", "3", "4"}}
expectedStats5J := backup.AttributeStatistic{Oid: tableOid, Schema: "public", Table: "foo", AttName: "j",
Type: "text", Relid: tableOid, AttNumber: 2, Inherit: false, Width: 2, Distinct: -1, Kind1: 2, Kind2: 3, Operator1: 664,
Type: "text", Relid: tableOid, Inherit: false, Width: 2, Distinct: -1, Kind1: 2, Kind2: 3, Operator1: 664,
Operator2: 664, Numbers2: []string{"1"}, Values1: []string{"a", "b", "c", "d"}}
expectedStats5K := backup.AttributeStatistic{Oid: tableOid, Schema: "public", Table: "foo", AttName: "k",
Type: "bool", Relid: tableOid, AttNumber: 3, Inherit: false, Width: 1, Distinct: -0.5, Kind1: 1, Kind2: 3, Operator1: 91,
Type: "bool", Relid: tableOid, Inherit: false, Width: 1, Distinct: -0.5, Kind1: 1, Kind2: 3, Operator1: 91,
Operator2: 58, Numbers1: []string{"0.5", "0.5"}, Numbers2: []string{"0.5"}, Values1: []string{"f", "t"}}
if connectionPool.Version.AtLeast("7") {
expectedStats5J.Collation1 = 100
Expand Down
14 changes: 13 additions & 1 deletion restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,11 +410,16 @@ func editStatementsRedirectSchema(statements []toc.StatementWithType, redirectSc
return
}

schemaMatch := `(?:".+?"|[^.]+?)` // matches either an unquoted schema with no dots or a quoted schema containing dots
schemaMatch := `(?:".+?"|[^."]+?)` // matches either an unquoted schema with no dots or quotes or a quoted schema containing dots
// This expression matches a GRANT or REVOKE statement on any object and captures the old schema name
permissionsRE := regexp.MustCompile(fmt.Sprintf(`(?m)(^(?:REVOKE|GRANT) .+ ON .+?) (%s)((\..+)? (?:FROM|TO) .+)`, schemaMatch))
// This expression matches an ATTACH PARTITION statement and captures both the parent and child schema names
attachRE := regexp.MustCompile(fmt.Sprintf(`(ALTER TABLE(?: ONLY)?) (%[1]s)(\..+ ATTACH PARTITION) (%[1]s)(\..+)`, schemaMatch))
// This expression matches a '<schema>.<table>'::regclass::oid expression.
tableMatch := schemaMatch // table name should follow the same rules as schema name
regclassOidRE := regexp.MustCompile(fmt.Sprintf(`'(%s)((\.%s)'\:\:regclass\:\:oid)`, schemaMatch, tableMatch))
// This expression matches the last occurence of the '<schema>.<table>'::regclass::oid expression
lastRegclassOidRE := regexp.MustCompile(fmt.Sprintf(`(?s)^(.*)(%s)(.*?)$`, regclassOidRE))
for i := range statements {
oldSchema := fmt.Sprintf("%s.", statements[i].Schema)
newSchema := fmt.Sprintf("%s.", redirectSchema)
Expand All @@ -440,6 +445,13 @@ func editStatementsRedirectSchema(statements []toc.StatementWithType, redirectSc
replaced = true
}

// Statistic statements needs one schema replacements. We replace the last occurence to avoid a very small
// chance that schema name was in the statistic data itself, that we do not want to alter.
if statements[i].ObjectType == toc.OBJ_STATISTICS {
statement = lastRegclassOidRE.ReplaceAllString(statement, fmt.Sprintf("${1}'%s${4}$6", redirectSchema))
replaced = true
}

// Only do a general replace if we haven't replaced anything yet, to avoid e.g. hitting a schema in a VIEW definition
if !replaced {
statement = strings.Replace(statement, oldSchema, newSchema, 1)
Expand Down
Loading
Loading