Bug in row_number() optimization - Mailing list pgsql-hackers

From Sergey Shinderuk
Subject Bug in row_number() optimization
Date
Msg-id 29184c50-429a-ebd7-f1fb-0589c6723a35@postgrespro.ru
Whole thread Raw
Responses Re: Bug in row_number() optimization
List pgsql-hackers
Hello,

We've accidentally found a subtle bug introduced by

commit 9d9c02ccd1aea8e9131d8f4edb21bf1687e40782
Author: David Rowley
Date:   Fri Apr 8 10:34:36 2022 +1200

     Teach planner and executor about monotonic window funcs


On a 32-bit system Valgrind reports use-after-free when running the 
"window" test:

==35487== Invalid read of size 4
==35487==    at 0x48398A4: memcpy (vg_replace_strmem.c:1035)
==35487==    by 0x1A2902: fill_val (heaptuple.c:287)
==35487==    by 0x1A2902: heap_fill_tuple (heaptuple.c:336)
==35487==    by 0x1A3C29: heap_form_minimal_tuple (heaptuple.c:1412)
==35487==    by 0x3D4555: tts_virtual_copy_minimal_tuple (execTuples.c:290)
==35487==    by 0x72FC33: ExecCopySlotMinimalTuple (tuptable.h:473)
==35487==    by 0x72FC33: tuplesort_puttupleslot (tuplesortvariants.c:610)
==35487==    by 0x403463: ExecSort (nodeSort.c:153)
==35487==    by 0x3D0C8E: ExecProcNodeFirst (execProcnode.c:464)
==35487==    by 0x40AF09: ExecProcNode (executor.h:259)
==35487==    by 0x40AF09: begin_partition (nodeWindowAgg.c:1106)
==35487==    by 0x40D259: ExecWindowAgg (nodeWindowAgg.c:2125)
==35487==    by 0x3D0C8E: ExecProcNodeFirst (execProcnode.c:464)
==35487==    by 0x405E17: ExecProcNode (executor.h:259)
==35487==    by 0x405E17: SubqueryNext (nodeSubqueryscan.c:53)
==35487==    by 0x3D41C7: ExecScanFetch (execScan.c:133)
==35487==    by 0x3D41C7: ExecScan (execScan.c:199)
==35487==  Address 0xe3e8af0 is 168 bytes inside a block of size 8,192 
alloc'd
==35487==    at 0x483463B: malloc (vg_replace_malloc.c:299)
==35487==    by 0x712B63: AllocSetContextCreateInternal (aset.c:446)
==35487==    by 0x3D82BE: CreateExprContextInternal (execUtils.c:253)
==35487==    by 0x3D84DC: CreateExprContext (execUtils.c:303)
==35487==    by 0x3D8750: ExecAssignExprContext (execUtils.c:482)
==35487==    by 0x40BC1A: ExecInitWindowAgg (nodeWindowAgg.c:2382)
==35487==    by 0x3D1232: ExecInitNode (execProcnode.c:346)
==35487==    by 0x4035E0: ExecInitSort (nodeSort.c:265)
==35487==    by 0x3D11AB: ExecInitNode (execProcnode.c:321)
==35487==    by 0x40BD36: ExecInitWindowAgg (nodeWindowAgg.c:2432)
==35487==    by 0x3D1232: ExecInitNode (execProcnode.c:346)
==35487==    by 0x405E99: ExecInitSubqueryScan (nodeSubqueryscan.c:126)


It's faster to run just this test under Valgrind:

    make installcheck-test TESTS='test_setup window'


This can also be reproduced on a 64-bit system by forcing int8 to be 
passed by reference:

--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -82,9 +82,7 @@
   *
   * Changing this requires an initdb.
   */
-#if SIZEOF_VOID_P >= 8
-#define USE_FLOAT8_BYVAL 1
-#endif
+#undef USE_FLOAT8_BYVAL

  /*
   * When we don't have native spinlocks, we use semaphores to simulate 
them.


Futhermore, zeroing freed memory makes the test fail:

--- a/src/include/utils/memdebug.h
+++ b/src/include/utils/memdebug.h
@@ -39,7 +39,7 @@ static inline void
  wipe_mem(void *ptr, size_t size)
  {
     VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
-   memset(ptr, 0x7F, size);
+   memset(ptr, 0, size);
     VALGRIND_MAKE_MEM_NOACCESS(ptr, size);
  }

$ cat src/test/regress/regression.diffs
diff -U3 
/home/sergey/pgwork/devel/src/src/test/regress/expected/window.out 
/home/sergey/pgwork/devel/src/src/test/regress/results/window.out
--- /home/sergey/pgwork/devel/src/src/test/regress/expected/window.out 
2022-11-03 18:26:52.203624217 +0300
+++ /home/sergey/pgwork/devel/src/src/test/regress/results/window.out 
2022-11-16 01:47:18.494273352 +0300
@@ -3721,7 +3721,8 @@
  -----------+-------+--------+-------------+----+----+----+----
   personnel |     5 |   3500 | 12-10-2007  |  2 |  1 |  2 |  2
   sales     |     3 |   4800 | 08-01-2007  |  3 |  1 |  3 |  3
-(2 rows)
+ sales     |     4 |   4800 | 08-08-2007  |  3 |  0 |  3 |  3
+(3 rows)

  -- Tests to ensure we don't push down the run condition when it's not 
valid to
  -- do so.


The failing query is:

SELECT * FROM
   (SELECT *,
           count(salary) OVER (PARTITION BY depname || '') c1, -- w1
           row_number() OVER (PARTITION BY depname) rn, -- w2
           count(*) OVER (PARTITION BY depname) c2, -- w2
           count(*) OVER (PARTITION BY '' || depname) c3 -- w3
    FROM empsalary
) e WHERE rn <= 1 AND c1 <= 3;


As far as I understand, ExecWindowAgg for the intermediate WindowAgg 
node switches into pass-through mode, stops evaluating row_number(), and 
returns the previous value instead. But if int8 is passed by reference, 
the previous value stored in econtext->ecxt_aggvalues becomes a dangling 
pointer when the per-output-tuple memory context is reset.

Attaching a patch that makes the window test fail on a 64-bit system.

Best regards,

-- 
Sergey Shinderuk        https://postgrespro.com/
Attachment

pgsql-hackers by date:

Previous
From: Simon Riggs
Date:
Subject: Re: Slow standby snapshot
Next
From: Andres Freund
Date:
Subject: Re: meson oddities