Re: Windows vs recovery tests - Mailing list pgsql-hackers

From Andres Freund
Subject Re: Windows vs recovery tests
Date
Msg-id 20220113054123.ib4khtafgq34lv4z@alap3.anarazel.de
Whole thread Raw
In response to Re: Windows vs recovery tests  (Andres Freund <andres@anarazel.de>)
List pgsql-hackers
Hi,

On 2022-01-12 20:03:14 -0800, Andres Freund wrote:
> So it looks like psql is hanging somewhere after that. I assume with an error
> popup that nobody can click on :/.

Not quite. Psql is actually just logging output in an endless loop. I
connected with cdb.exe.

kP:

00000000`007fd3c8 00007ffc`0d00f13a     ntdll!NtWriteFile+0x14
00000000`007fd3d0 00007ffc`03978ec3     KERNELBASE!WriteFile+0x7a
00000000`007fd440 00007ffc`03979d21     ucrtbased!write_text_ansi_nolock(
                        int fh = 0n2,
                        char * buffer = 0x00000000`007febb0 " ???",
                        unsigned int buffer_size = 1)+0x183
00000000`007fe8f0 00007ffc`039798a7     ucrtbased!_write_nolock(
                        int fh = 0n2,
                        void * buffer = 0x00000000`007febb0,
                        unsigned int buffer_size = 1,
                        class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x451
00000000`007fea80 00007ffc`03920e1d     ucrtbased!_write_internal(
                        int fh = 0n2,
                        void * buffer = 0x00000000`007febb0,
                        unsigned int size = 1,
                        class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x377
00000000`007feb20 00007ffc`0392090e     ucrtbased!write_buffer_nolock<char>(
                        char c = 0n32 ' ',
                        class __crt_stdio_stream stream = class __crt_stdio_stream,
                        class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x27d
00000000`007febb0 00007ffc`03921242     ucrtbased!common_flush_and_write_nolock<char>(
                        int c = 0n32,
                        class __crt_stdio_stream stream = class __crt_stdio_stream,
                        class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x22e
00000000`007fec20 00007ffc`038ddf5a     ucrtbased!__acrt_stdio_flush_and_write_narrow_nolock(
                        int c = 0n32,
                        struct _iobuf * stream = 0x00007ffc`03a27ce0,
                        class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x32
00000000`007fec60 00007ffc`038dd5a3     ucrtbased!_fwrite_nolock_internal(
                        void * buffer = 0x00000000`007ff020,
                        unsigned int64 element_size = 1,
                        unsigned int64 element_count = 7,
                        struct _iobuf * public_stream = 0x00007ffc`03a27ce0,
                        class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x79a
00000000`007fed60 00007ffc`038dd426     ucrtbased!<lambda_26974eb511f701c600fccfa2a97a8e1b>::operator()(void)+0x73
00000000`007fedd0 00007ffc`038dd4a8     ucrtbased!__crt_seh_guarded_call<unsigned
__int64>::operator()<<lambda_a2589f19c
515cac03caf6db9c38355e9>,<lambda_26974eb511f701c600fccfa2a97a8e1b> &,<lambda_ad9ce2f38261e34e8a422b9cc35dfe8d> >(
                        class __acrt_lock_stream_and_call::__l2::<lambda_a2589f19c515cac03caf6db9c38355e9> * setup =
0x0
0000000`007fee58,
                        class _fwrite_internal::__l2::<lambda_26974eb511f701c600fccfa2a97a8e1b> * action =
0x00000000`00
7feec0,
                        class __acrt_lock_stream_and_call::__l2::<lambda_ad9ce2f38261e34e8a422b9cc35dfe8d> * cleanup =
0
x00000000`007fee50)+0x36
00000000`007fee10 00007ffc`038dd72d
ucrtbased!__acrt_lock_stream_and_call<<lambda_26974eb511f701c600fccfa2a97a8e1b>
>(
                        struct _iobuf * stream = 0x00007ffc`03a27ce0,
                        class _fwrite_internal::__l2::<lambda_26974eb511f701c600fccfa2a97a8e1b> * action =
0x00000000`00
7feec0)+0x58
00000000`007fee70 00007ffc`038de046     ucrtbased!_fwrite_internal(
                        void * buffer = 0x00000000`007ff020,
                        unsigned int64 size = 1,
                        unsigned int64 count = 7,
                        struct _iobuf * stream = 0x00007ffc`03a27ce0,
                        class __crt_cached_ptd_host * ptd = 0x00000000`007fef40)+0x15d
00000000`007fef00 00000001`4004a639     ucrtbased!fwrite(
                        void * buffer = 0x00000000`007ff020,
                        unsigned int64 size = 1,
                        unsigned int64 count = 7,
                        struct _iobuf * stream = 0x00007ffc`03a27ce0)+0x56
00000000`007fef90 00000001`4004a165     psql!flushbuffer(
                        struct PrintfTarget * target = 0x00000000`007feff8)+0x59
00000000`007fefd0 00000001`4004a1e6     psql!pg_vfprintf(
                        struct _iobuf * stream = 0x00007ffc`03a27ce0,
                        char * fmt = 0x00000001`40094268 "error: ",
                        char * args = 0x00000000`007ff4a0 "@???")+0xa5
00000000`007ff450 00000001`40045962     psql!pg_fprintf(
                        struct _iobuf * stream = 0x00007ffc`03a27ce0,
                        char * fmt = 0x00000001`40094268 "error: ")+0x36
00000000`007ff490 00000001`40045644     psql!pg_log_generic_v(
                        pg_log_level level = PG_LOG_ERROR (0n4),
                        char * fmt = 0x00000001`40062e90 "unexpected PQresultStatus: %d",
                        char * ap = 0x00000000`007ff540 "???")+0x302
00000000`007ff4f0 00000001`4000ef1f     psql!pg_log_generic(
                        pg_log_level level = PG_LOG_ERROR (0n4),
                        char * fmt = 0x00000001`40062e90 "unexpected PQresultStatus: %d")+0x34
00000000`007ff530 00000001`4000e794     psql!AcceptResult(
                        struct pg_result * result = 0x00000000`0015af90,
                        bool show_error = false)+0x9f
00000000`007ff580 00000001`4000c8fe     psql!SendQueryAndProcessResults(
                        char * query = 0x00000000`00107570 "START_REPLICATION 0/0",
                        double * pelapsed_msec = 0x00000000`007ff718,
                        bool is_watch = false,
                        struct printQueryOpt * opt = 0x00000000`00000000,
                        struct _iobuf * printQueryFout = 0x00000000`00000000,
                        bool * tx_ended = 0x00000000`007ff6b3)+0x1a4
00000000`007ff680 00000001`40024104     psql!SendQuery(
                        char * query = 0x00000000`00107570 "START_REPLICATION 0/0")+0x42e
00000000`007ff750 00000001`40001524     psql!MainLoop(
                        struct _iobuf * source = 0x00007ffc`03a27c30)+0xf84
00000000`007ff890 00000001`40032903     psql!process_file(
                        char * filename = 0x00000001`400618e0 "<stdin>",
                        bool use_relative_path = false)+0x274
00000000`007ffcf0 00000001`400503f9     psql!main(
                        int argc = 0n8,
                        char ** argv = 0x00000000`0012a750)+0xc43
00000000`007ffe10 00000001`4005034e     psql!invoke_main(void)+0x39
00000000`007ffe60 00000001`4005020e     psql!__scrt_common_main_seh(void)+0x12e
00000000`007ffed0 00000001`4005046e     psql!__scrt_common_main(void)+0xe
00000000`007fff00 00007ffc`0d2a7974     psql!mainCRTStartup(
                        void * __formal = 0x00000000`002e6000)+0xe
00000000`007fff30 00007ffc`0fe1a2f1     KERNEL32!BaseThreadInitThunk+0x14
00000000`007fff60 00000000`00000000     ntdll!RtlUserThreadStart+0x21

bp psql!pg_log_generic

0:000> k3
Child-SP          RetAddr               Call Site
00000000`007ff528 00000001`4000ef1f     psql!pg_log_generic [C:\cirrus\src\common\logging.c @ 198]
00000000`007ff530 00000001`4000e794     psql!AcceptResult+0x9f [C:\cirrus\src\bin\psql\common.c @ 385]
00000000`007ff580 00000001`4000c8fe     psql!SendQueryAndProcessResults+0x1a4 [C:\cirrus\src\bin\psql\common.c @ 1163]
0:000> g
Breakpoint 0 hit
psql!pg_log_generic:
00000001`40045610 4889542410      mov     qword ptr [rsp+10h],rdx ss:00000000`007ff538=0000000100000006
0:000> k3
Child-SP          RetAddr               Call Site
00000000`007ff528 00000001`4000ef1f     psql!pg_log_generic [C:\cirrus\src\common\logging.c @ 198]
00000000`007ff530 00000001`4000e794     psql!AcceptResult+0x9f [C:\cirrus\src\bin\psql\common.c @ 385]
00000000`007ff580 00000001`4000c8fe     psql!SendQueryAndProcessResults+0x1a4 [C:\cirrus\src\bin\psql\common.c @ 1163]


Ah, I see the bug. It's a use-after-free introduced in the patch:

SendQueryAndProcessResults(const char *query, double *pelapsed_msec,
    bool is_watch, const printQueryOpt *opt, FILE *printQueryFout, bool *tx_ended)

...
    /* first result */
    result = PQgetResult(pset.db);

    while (result != NULL)

...
        if (!AcceptResult(result, false))
        {
...
            ClearOrSaveResult(result);
            success = false;

            /* and switch to next result */
            result_status = PQresultStatus(result);
            if (result_status == PGRES_COPY_BOTH ||
                result_status == PGRES_COPY_OUT ||
                result_status == PGRES_COPY_IN)

So we called ClearOrSaveResult() with did a PQclear(), and then we go and call
PQresultStatus().


So this really is unrelated to CI. I'll mention this message in the other
thread.

Greetings,

Andres Freund



pgsql-hackers by date:

Previous
From: Amit Langote
Date:
Subject: Re: a misbehavior of partition row movement (?)
Next
From: Andres Freund
Date:
Subject: Re: psql - add SHOW_ALL_RESULTS option