Thread: Need explanation on index size

Need explanation on index size

From
Guillaume Luchet
Date:
Hi,

I’m facing of a comportement I don’t understand on indexes, here a quick example to reproduce my problem


test=# select version();
                                                             version                                                              
----------------------------------------------------------------------------------------------------------------------------------
 PostgreSQL 12.4 (Debian 12.4-1.pgdg90+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516, 64-bit
(1 row)

test=# create table plop (col_a int, col_b int, col_c int);
CREATE TABLE

test=# create unique index on plop (col_a);
CREATE INDEX

test=# create index on plop(col_b);
CREATE INDEX

test=# insert into plop (col_a, col_b) select generate_series(1, 10000), generate_series(1, 10000);
INSERT 0 10000

test=# SELECT schemaname, tablename,
         pg_size_pretty(SIZE) AS size_pretty,
         pg_size_pretty(total_size) AS total_size_pretty
    FROM (SELECT *, pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS SIZE,
            pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS total_size
          FROM pg_tables) AS TABLES where tablename = 'plop';
 schemaname | tablename | size_pretty | total_size_pretty 
------------+-----------+-------------+-------------------
 public     | plop      | 360 kB      | 864 kB
(1 row)

test=# update plop set col_c = floor(random() * 10 + 1)::int;
UPDATE 10000

test=# SELECT schemaname, tablename,
         pg_size_pretty(SIZE) AS size_pretty,
         pg_size_pretty(total_size) AS total_size_pretty
    FROM (SELECT *, pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS SIZE,
            pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS total_size
          FROM pg_tables) AS TABLES where tablename = 'plop';
 schemaname | tablename | size_pretty | total_size_pretty 
------------+-----------+-------------+-------------------
 public     | plop      | 792 kB      | 2160 kB
(1 row)

test=# reindex table plop;
REINDEX

test=# SELECT schemaname, tablename,
         pg_size_pretty(SIZE) AS size_pretty,
         pg_size_pretty(total_size) AS total_size_pretty
    FROM (SELECT *, pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS SIZE,
            pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS total_size
          FROM pg_tables) AS TABLES where tablename = 'plop';
 schemaname | tablename | size_pretty | total_size_pretty 
------------+-----------+-------------+-------------------
 public     | plop      | 792 kB      | 1304 kB
(1 row)

I don’t understand why after the update where I only update a non indexed column the indexes size is growing. Is it something someone can explain ?

Regards,
Guillaume


Re: Need explanation on index size

From
Guillaume Lelarge
Date:
Hi,

Le jeu. 24 sept. 2020 à 15:55, Guillaume Luchet <g.luchet@bilendi.com> a écrit :
Hi,

I’m facing of a comportement I don’t understand on indexes, here a quick example to reproduce my problem


test=# select version();
                                                             version                                                              
----------------------------------------------------------------------------------------------------------------------------------
 PostgreSQL 12.4 (Debian 12.4-1.pgdg90+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516, 64-bit
(1 row)

test=# create table plop (col_a int, col_b int, col_c int);
CREATE TABLE

test=# create unique index on plop (col_a);
CREATE INDEX

test=# create index on plop(col_b);
CREATE INDEX

test=# insert into plop (col_a, col_b) select generate_series(1, 10000), generate_series(1, 10000);
INSERT 0 10000

test=# SELECT schemaname, tablename,
         pg_size_pretty(SIZE) AS size_pretty,
         pg_size_pretty(total_size) AS total_size_pretty
    FROM (SELECT *, pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS SIZE,
            pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS total_size
          FROM pg_tables) AS TABLES where tablename = 'plop';
 schemaname | tablename | size_pretty | total_size_pretty 
------------+-----------+-------------+-------------------
 public     | plop      | 360 kB      | 864 kB
(1 row)

test=# update plop set col_c = floor(random() * 10 + 1)::int;
UPDATE 10000

test=# SELECT schemaname, tablename,
         pg_size_pretty(SIZE) AS size_pretty,
         pg_size_pretty(total_size) AS total_size_pretty
    FROM (SELECT *, pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS SIZE,
            pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS total_size
          FROM pg_tables) AS TABLES where tablename = 'plop';
 schemaname | tablename | size_pretty | total_size_pretty 
------------+-----------+-------------+-------------------
 public     | plop      | 792 kB      | 2160 kB
(1 row)

test=# reindex table plop;
REINDEX

test=# SELECT schemaname, tablename,
         pg_size_pretty(SIZE) AS size_pretty,
         pg_size_pretty(total_size) AS total_size_pretty
    FROM (SELECT *, pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS SIZE,
            pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS total_size
          FROM pg_tables) AS TABLES where tablename = 'plop';
 schemaname | tablename | size_pretty | total_size_pretty 
------------+-----------+-------------+-------------------
 public     | plop      | 792 kB      | 1304 kB
(1 row)

I don’t understand why after the update where I only update a non indexed column the indexes size is growing. Is it something someone can explain ?


Every tuple is now on a different location on the table (remember that update in PostgreSQL is more something like delete+insert). So even if the value of the column doesn't change, its tuple location changes, so the index needs to be updated to reflect that change.


--
Guillaume.

Re: Need explanation on index size

From
Chris Sterritt
Date:


On 24/09/2020 15:08, Guillaume Lelarge wrote:
Hi,

Le jeu. 24 sept. 2020 à 15:55, Guillaume Luchet <g.luchet@bilendi.com> a écrit :
Hi,

I’m facing of a comportement I don’t understand on indexes, here a quick example to reproduce my problem


test=# select version();
                                                             version                                                              
----------------------------------------------------------------------------------------------------------------------------------
 PostgreSQL 12.4 (Debian 12.4-1.pgdg90+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516, 64-bit
(1 row)

test=# create table plop (col_a int, col_b int, col_c int);
CREATE TABLE

test=# create unique index on plop (col_a);
CREATE INDEX

test=# create index on plop(col_b);
CREATE INDEX

test=# insert into plop (col_a, col_b) select generate_series(1, 10000), generate_series(1, 10000);
INSERT 0 10000

test=# SELECT schemaname, tablename,
         pg_size_pretty(SIZE) AS size_pretty,
         pg_size_pretty(total_size) AS total_size_pretty
    FROM (SELECT *, pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS SIZE,
            pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS total_size
          FROM pg_tables) AS TABLES where tablename = 'plop';
 schemaname | tablename | size_pretty | total_size_pretty 
------------+-----------+-------------+-------------------
 public     | plop      | 360 kB      | 864 kB
(1 row)

test=# update plop set col_c = floor(random() * 10 + 1)::int;
UPDATE 10000

test=# SELECT schemaname, tablename,
         pg_size_pretty(SIZE) AS size_pretty,
         pg_size_pretty(total_size) AS total_size_pretty
    FROM (SELECT *, pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS SIZE,
            pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS total_size
          FROM pg_tables) AS TABLES where tablename = 'plop';
 schemaname | tablename | size_pretty | total_size_pretty 
------------+-----------+-------------+-------------------
 public     | plop      | 792 kB      | 2160 kB
(1 row)

test=# reindex table plop;
REINDEX

test=# SELECT schemaname, tablename,
         pg_size_pretty(SIZE) AS size_pretty,
         pg_size_pretty(total_size) AS total_size_pretty
    FROM (SELECT *, pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS SIZE,
            pg_total_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)) AS total_size
          FROM pg_tables) AS TABLES where tablename = 'plop';
 schemaname | tablename | size_pretty | total_size_pretty 
------------+-----------+-------------+-------------------
 public     | plop      | 792 kB      | 1304 kB
(1 row)

I don’t understand why after the update where I only update a non indexed column the indexes size is growing. Is it something someone can explain ?


Every tuple is now on a different location on the table (remember that update in PostgreSQL is more something like delete+insert). So even if the value of the column doesn't change, its tuple location changes, so the index needs to be updated to reflect that change.


--
Guillaume.


If you execute
  vacuum full plop;
you will see the size shrink back as the dead tuples will have been removed.

Chris

Re: Need explanation on index size

From
Peter Geoghegan
Date:
On Thu, Sep 24, 2020 at 6:55 AM Guillaume Luchet <g.luchet@bilendi.com> wrote:
> I don’t understand why after the update where I only update a non indexed column the indexes size is growing. Is it
somethingsomeone can explain ? 

If you reduce the table fillfactor then these updates will all be HOT
updates. That will make the table larger initially, but leaving enough
space behind on the same heap pages for successor tuples makes it
possible to use HOT updates.


--
Peter Geoghegan