From 911f4a620ea569307bd999df257dfc232a10e55a Mon Sep 17 00:00:00 2001 From: Dilip Kumar Date: Wed, 17 Mar 2021 13:53:08 +0530 Subject: [PATCH v38 5/6] Alter table set compression Add support for changing the compression method associated with a column. There are only built-in methods so we don't need to rewrite the table, only the new tuples will be compressed with the new compression method. Dilip Kumar based on the patches from Ildus Kurbangaliev. Design input from Robert Haas and Tomas Vondra. Reviewed by Robert Haas, Tomas Vondra, Alexander Korotkov and Justin Pryzby --- doc/src/sgml/ref/alter_table.sgml | 16 +++ src/backend/access/heap/heapam_handler.c | 42 ++++++ src/backend/commands/tablecmds.c | 198 ++++++++++++++++++++++------ src/backend/parser/gram.y | 9 ++ src/bin/psql/tab-complete.c | 2 +- src/include/nodes/parsenodes.h | 3 +- src/test/regress/expected/compression.out | 47 ++++++- src/test/regress/expected/compression_1.out | 48 ++++++- src/test/regress/sql/compression.sql | 20 +++ 9 files changed, 338 insertions(+), 47 deletions(-) diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index f82dce4..77adc9c 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -54,6 +54,7 @@ ALTER TABLE [ IF EXISTS ] name ALTER [ COLUMN ] column_name SET ( attribute_option = value [, ... ] ) ALTER [ COLUMN ] column_name RESET ( attribute_option [, ... ] ) ALTER [ COLUMN ] column_name SET STORAGE { PLAIN | EXTERNAL | EXTENDED | MAIN } + ALTER [ COLUMN ] column_name SET COMPRESSION compression_method ADD table_constraint [ NOT VALID ] ADD table_constraint_using_index ALTER CONSTRAINT constraint_name [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ] @@ -103,6 +104,7 @@ WITH ( MODULUS numeric_literal, REM GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( sequence_options ) ] | UNIQUE index_parameters | PRIMARY KEY index_parameters | + COMPRESSION compression_method | REFERENCES reftable [ ( refcolumn ) ] [ MATCH FULL | MATCH PARTIAL | MATCH SIMPLE ] [ ON DELETE referential_action ] [ ON UPDATE referential_action ] } [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ] @@ -384,6 +386,20 @@ WITH ( MODULUS numeric_literal, REM + + SET COMPRESSION compression_method + + + + This sets the compression method for a column. The supported compression + methods are pglz and lz4. + lz4 is available only if --with-lz4 + was used when building PostgreSQL. + + + + + ADD table_constraint [ NOT VALID ] diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index bd5faf0..cbac0a1 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -19,6 +19,7 @@ */ #include "postgres.h" +#include "access/detoast.h" #include "access/genam.h" #include "access/heapam.h" #include "access/heaptoast.h" @@ -26,6 +27,7 @@ #include "access/rewriteheap.h" #include "access/syncscan.h" #include "access/tableam.h" +#include "access/toast_compression.h" #include "access/tsmapi.h" #include "access/xact.h" #include "catalog/catalog.h" @@ -2469,6 +2471,46 @@ reform_and_rewrite_tuple(HeapTuple tuple, { if (TupleDescAttr(newTupDesc, i)->attisdropped) isnull[i] = true; + + /* + * Since we are rewriting the table, use this opportunity to + * recompress any compressed attribute with current compression method + * of the attribute. Basically, if the compression method of the + * compressed varlena is not same as current compression method of the + * attribute then decompress it so that if it need to be compressed + * then it will be compressed with the current compression method of + * the attribute. + */ + else if (!isnull[i] && TupleDescAttr(newTupDesc, i)->attlen == -1) + { + struct varlena *new_value; + ToastCompressionId cmid; + char cmethod; + + new_value = (struct varlena *) DatumGetPointer(values[i]); + cmid = toast_get_compression_id(new_value); + + /* nothing to be done for uncompressed data */ + if (cmid == TOAST_INVALID_COMPRESSION_ID) + continue; + + /* convert compression id to compression method */ + switch (cmid) + { + case TOAST_PGLZ_COMPRESSION_ID: + cmethod = TOAST_PGLZ_COMPRESSION; + break; + case TOAST_LZ4_COMPRESSION_ID: + cmethod = TOAST_LZ4_COMPRESSION; + break; + default: + elog(ERROR, "invalid compression method id %d", cmid); + } + + /* if compression method doesn't match then detoast the value */ + if (TupleDescAttr(newTupDesc, i)->attcompression != cmethod) + values[i] = PointerGetDatum(detoast_attr(new_value)); + } } copiedTuple = heap_form_tuple(newTupDesc, values, isnull); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index b9300b9..0d76ae5 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -528,6 +528,8 @@ static void ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKM static void ATExecGenericOptions(Relation rel, List *options); static void ATExecSetRowSecurity(Relation rel, bool rls); static void ATExecForceNoForceRowSecurity(Relation rel, bool force_rls); +static ObjectAddress ATExecSetCompression(AlteredTableInfo *tab, Relation rel, + const char *column, Node *newValue, LOCKMODE lockmode); static void index_copy_data(Relation rel, RelFileNode newrnode); static const char *storage_name(char c); @@ -3973,6 +3975,7 @@ AlterTableGetLockLevel(List *cmds) */ case AT_GenericOptions: case AT_AlterColumnGenericOptions: + case AT_SetCompression: cmd_lockmode = AccessExclusiveLock; break; @@ -4500,7 +4503,8 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd, case AT_DisableRowSecurity: case AT_ForceRowSecurity: case AT_NoForceRowSecurity: - ATSimplePermissions(rel, ATT_TABLE); + case AT_SetCompression: + ATSimplePermissions(rel, ATT_TABLE | ATT_MATVIEW); /* These commands never recurse */ /* No command-specific prep needed */ pass = AT_PASS_MISC; @@ -4908,6 +4912,10 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel, Assert(rel->rd_rel->relkind == RELKIND_INDEX); ATExecAlterCollationRefreshVersion(rel, cmd->object); break; + case AT_SetCompression: + address = ATExecSetCompression(tab, rel, cmd->name, cmd->def, + lockmode); + break; default: /* oops */ elog(ERROR, "unrecognized alter table type: %d", (int) cmd->subtype); @@ -7773,6 +7781,67 @@ ATExecSetOptions(Relation rel, const char *colName, Node *options, } /* + * Helper function for ATExecSetStorage and ATExecSetCompression + * + * Set the attcompression and/or attstorage for the respective index attribute + * if the respective input values are valid. + */ +static void +ApplyChangesToIndexes(Relation rel, Relation attrelation, AttrNumber attnum, + char newcompression, char newstorage, LOCKMODE lockmode) +{ + HeapTuple tuple; + ListCell *lc; + Form_pg_attribute attrtuple; + + foreach(lc, RelationGetIndexList(rel)) + { + Oid indexoid = lfirst_oid(lc); + Relation indrel; + AttrNumber indattnum = 0; + + indrel = index_open(indexoid, lockmode); + + for (int i = 0; i < indrel->rd_index->indnatts; i++) + { + if (indrel->rd_index->indkey.values[i] == attnum) + { + indattnum = i + 1; + break; + } + } + + if (indattnum == 0) + { + index_close(indrel, lockmode); + continue; + } + + tuple = SearchSysCacheCopyAttNum(RelationGetRelid(indrel), indattnum); + + if (HeapTupleIsValid(tuple)) + { + attrtuple = (Form_pg_attribute) GETSTRUCT(tuple); + + if (CompressionMethodIsValid(newcompression)) + attrtuple->attcompression = newcompression; + + if (newstorage != '\0') + attrtuple->attstorage = newstorage; + + CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + attrtuple->attnum); + + heap_freetuple(tuple); + } + + index_close(indrel, lockmode); + } +} +/* * ALTER TABLE ALTER COLUMN SET STORAGE * * Return value is the address of the modified column @@ -7787,7 +7856,6 @@ ATExecSetStorage(Relation rel, const char *colName, Node *newValue, LOCKMODE loc Form_pg_attribute attrtuple; AttrNumber attnum; ObjectAddress address; - ListCell *lc; Assert(IsA(newValue, String)); storagemode = strVal(newValue); @@ -7851,47 +7919,8 @@ ATExecSetStorage(Relation rel, const char *colName, Node *newValue, LOCKMODE loc * Apply the change to indexes as well (only for simple index columns, * matching behavior of index.c ConstructTupleDescriptor()). */ - foreach(lc, RelationGetIndexList(rel)) - { - Oid indexoid = lfirst_oid(lc); - Relation indrel; - AttrNumber indattnum = 0; - - indrel = index_open(indexoid, lockmode); - - for (int i = 0; i < indrel->rd_index->indnatts; i++) - { - if (indrel->rd_index->indkey.values[i] == attnum) - { - indattnum = i + 1; - break; - } - } - - if (indattnum == 0) - { - index_close(indrel, lockmode); - continue; - } - - tuple = SearchSysCacheCopyAttNum(RelationGetRelid(indrel), indattnum); - - if (HeapTupleIsValid(tuple)) - { - attrtuple = (Form_pg_attribute) GETSTRUCT(tuple); - attrtuple->attstorage = newstorage; - - CatalogTupleUpdate(attrelation, &tuple->t_self, tuple); - - InvokeObjectPostAlterHook(RelationRelationId, - RelationGetRelid(rel), - attrtuple->attnum); - - heap_freetuple(tuple); - } - - index_close(indrel, lockmode); - } + ApplyChangesToIndexes(rel, attrelation, attnum, InvalidCompressionMethod, + newstorage, lockmode); table_close(attrelation, RowExclusiveLock); @@ -15017,6 +15046,89 @@ ATExecGenericOptions(Relation rel, List *options) } /* + * ALTER TABLE ALTER COLUMN SET COMPRESSION + * + * Return value is the address of the modified column + */ +static ObjectAddress +ATExecSetCompression(AlteredTableInfo *tab, + Relation rel, + const char *column, + Node *newValue, + LOCKMODE lockmode) +{ + Relation attrel; + HeapTuple tuple; + Form_pg_attribute atttableform; + AttrNumber attnum; + char *compression; + char typstorage; + Oid cmoid; + Datum values[Natts_pg_attribute]; + bool nulls[Natts_pg_attribute]; + bool replace[Natts_pg_attribute]; + ObjectAddress address; + + Assert(IsA(newValue, String)); + compression = strVal(newValue); + + attrel = table_open(AttributeRelationId, RowExclusiveLock); + + tuple = SearchSysCacheAttName(RelationGetRelid(rel), column); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" of relation \"%s\" does not exist", + column, RelationGetRelationName(rel)))); + + /* prevent them from altering a system attribute */ + atttableform = (Form_pg_attribute) GETSTRUCT(tuple); + attnum = atttableform->attnum; + if (attnum <= 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot alter system column \"%s\"", column))); + + typstorage = get_typstorage(atttableform->atttypid); + + /* prevent from setting compression methods for uncompressible type */ + if (!IsStorageCompressible(typstorage)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("column data type %s does not support compression", + format_type_be(atttableform->atttypid)))); + + /* initialize buffers for new tuple values */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(replace, false, sizeof(replace)); + + /* get the attribute compression method. */ + cmoid = GetAttributeCompression(atttableform, compression); + + atttableform->attcompression = cmoid; + CatalogTupleUpdate(attrel, &tuple->t_self, tuple); + + InvokeObjectPostAlterHook(RelationRelationId, + RelationGetRelid(rel), + atttableform->attnum); + + ReleaseSysCache(tuple); + + /* apply changes to the index column as well */ + ApplyChangesToIndexes(rel, attrel, attnum, cmoid, '\0', lockmode); + table_close(attrel, RowExclusiveLock); + + /* make changes visible */ + CommandCounterIncrement(); + + ObjectAddressSubSet(address, RelationRelationId, + RelationGetRelid(rel), atttableform->attnum); + return address; +} + + +/* * Preparation phase for SET LOGGED/UNLOGGED * * This verifies that we're not trying to change a temp table. Also, diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 9d923b5..5c4e779 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -2308,6 +2308,15 @@ alter_table_cmd: n->missing_ok = true; $$ = (Node *)n; } + /* ALTER TABLE ALTER [COLUMN] SET (COMPRESSION ) */ + | ALTER opt_column ColId SET optColumnCompression + { + AlterTableCmd *n = makeNode(AlterTableCmd); + n->subtype = AT_SetCompression; + n->name = $3; + n->def = (Node *) makeString($5); + $$ = (Node *)n; + } /* ALTER TABLE DROP [COLUMN] IF EXISTS [RESTRICT|CASCADE] */ | DROP opt_column IF_P EXISTS ColId opt_drop_behavior { diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index d3fb734..5980641 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2116,7 +2116,7 @@ psql_completion(const char *text, int start, int end) /* ALTER TABLE ALTER [COLUMN] SET */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET")) - COMPLETE_WITH("(", "DEFAULT", "NOT NULL", "STATISTICS", "STORAGE"); + COMPLETE_WITH("(", "COMPRESSION", "DEFAULT", "NOT NULL", "STATISTICS", "STORAGE"); /* ALTER TABLE ALTER [COLUMN] SET ( */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "(") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "(")) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 19d2ba2..f9a87de 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1901,7 +1901,8 @@ typedef enum AlterTableType AT_AddIdentity, /* ADD IDENTITY */ AT_SetIdentity, /* SET identity column options */ AT_DropIdentity, /* DROP IDENTITY */ - AT_AlterCollationRefreshVersion /* ALTER COLLATION ... REFRESH VERSION */ + AT_AlterCollationRefreshVersion, /* ALTER COLLATION ... REFRESH VERSION */ + AT_SetCompression /* SET COMPRESSION */ } AlterTableType; typedef struct ReplicaIdentityStmt diff --git a/src/test/regress/expected/compression.out b/src/test/regress/expected/compression.out index ea4d76e..995b5b5 100644 --- a/src/test/regress/expected/compression.out +++ b/src/test/regress/expected/compression.out @@ -241,12 +241,57 @@ CREATE TABLE cmdata2 (f1 text); --------+------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | text | | | | extended | lz4 | | +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; +INSERT INTO cmdata VALUES (repeat('123456789',4004)); +\d+ cmdata + Table "public.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | +Indexes: + "idx" btree (f1) + +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + lz4 +(2 rows) + +-- test alter compression method for the materialized view +ALTER MATERIALIZED VIEW mv ALTER COLUMN x SET COMPRESSION lz4; +\d+ mv + Materialized view "public.mv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | lz4 | | +View definition: + SELECT cmdata1.f1 AS x + FROM cmdata1; + +-- test alter compression method for the partitioned table +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789',1004)); +INSERT INTO cmpart VALUES (repeat('123456789',4004)); +SELECT pg_column_compression(f1) FROM cmpart; + pg_column_compression +----------------------- + lz4 + pglz + pglz + lz4 +(4 rows) + -- check data is ok SELECT length(f1) FROM cmdata; length -------- 10000 -(1 row) + 36036 +(2 rows) SELECT length(f1) FROM cmdata1; length diff --git a/src/test/regress/expected/compression_1.out b/src/test/regress/expected/compression_1.out index f5896f8..f2280b3 100644 --- a/src/test/regress/expected/compression_1.out +++ b/src/test/regress/expected/compression_1.out @@ -237,12 +237,58 @@ CREATE TABLE cmdata2 (f1 text); --------+------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | text | | | | extended | pglz | | +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; +ERROR: unsupported LZ4 compression method +DETAIL: This functionality requires the server to be built with lz4 support. +HINT: You need to rebuild PostgreSQL using --with-lz4. +INSERT INTO cmdata VALUES (repeat('123456789',4004)); +\d+ cmdata + Table "public.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | pglz | | +Indexes: + "idx" btree (f1) + +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + pglz +(2 rows) + +-- test alter compression method for the materialized view +ALTER MATERIALIZED VIEW mv ALTER COLUMN x SET COMPRESSION lz4; +ERROR: relation "mv" does not exist +\d+ mv +-- test alter compression method for the partitioned table +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ERROR: relation "cmpart1" does not exist +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; +ERROR: unsupported LZ4 compression method +DETAIL: This functionality requires the server to be built with lz4 support. +HINT: You need to rebuild PostgreSQL using --with-lz4. +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789',1004)); +ERROR: relation "cmpart" does not exist +LINE 1: INSERT INTO cmpart VALUES (repeat('123456789',1004)); + ^ +INSERT INTO cmpart VALUES (repeat('123456789',4004)); +ERROR: relation "cmpart" does not exist +LINE 1: INSERT INTO cmpart VALUES (repeat('123456789',4004)); + ^ +SELECT pg_column_compression(f1) FROM cmpart; +ERROR: relation "cmpart" does not exist +LINE 1: SELECT pg_column_compression(f1) FROM cmpart; + ^ -- check data is ok SELECT length(f1) FROM cmdata; length -------- 10000 -(1 row) + 36036 +(2 rows) SELECT length(f1) FROM cmdata1; ERROR: relation "cmdata1" does not exist diff --git a/src/test/regress/sql/compression.sql b/src/test/regress/sql/compression.sql index bc0c1bd..2eb92ea 100644 --- a/src/test/regress/sql/compression.sql +++ b/src/test/regress/sql/compression.sql @@ -102,6 +102,26 @@ DROP TABLE cmdata2; CREATE TABLE cmdata2 (f1 text); \d+ cmdata2 +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; +INSERT INTO cmdata VALUES (repeat('123456789',4004)); +\d+ cmdata +SELECT pg_column_compression(f1) FROM cmdata; + +-- test alter compression method for the materialized view +ALTER MATERIALIZED VIEW mv ALTER COLUMN x SET COMPRESSION lz4; +\d+ mv + +-- test alter compression method for the partitioned table +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; + +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789',1004)); +INSERT INTO cmpart VALUES (repeat('123456789',4004)); + +SELECT pg_column_compression(f1) FROM cmpart; + -- check data is ok SELECT length(f1) FROM cmdata; SELECT length(f1) FROM cmdata1; -- 1.8.3.1