BUG #19424: Concurrent PQconnectdb() calls hang on Windows - Mailing list pgsql-bugs

From PG Bug reporting form
Subject BUG #19424: Concurrent PQconnectdb() calls hang on Windows
Date
Msg-id 19424-0ab4342f914b6296@postgresql.org
Whole thread Raw
List pgsql-bugs
The following bug has been logged on the website:

Bug reference:      19424
Logged by:          David Ritter
Email address:      david.ritter@gmail.com
PostgreSQL version: 18.3
Operating system:   Windows 11
Description:

Versions affected: libpq 17.4, 18.3 (likely all 17.x+)

Platform: Windows (MSVC 19.x, x86_64). Not reproducible on Linux (RHEL 9,
tested 1000 runs with 100 threads each).

Description: When multiple threads each call PQconnectdb() concurrently with
independent connection strings, most or all threads hang indefinitely.
PQisthreadsafe() returns 1. A single serial warmup connection succeeds. The
attached reproducer spawns N threads and reports how many complete within 30
seconds.

=======================
/*
 * pq_thread_test.c — Minimal reproducer: concurrent PQconnectdb() hangs on
Windows.
 *
 * Spawns N threads, each calling PQconnectdb() on an independent PGconn.
 * On Windows (tested with MSVC 19.x), most or all threads hang indefinitely
 * inside PQconnectdb(). The same program works correctly on Linux.
 *
 * Tested with:
 *   - libpq 17.4 (PQlibVersion() = 170004) — hangs on Windows
 *   - libpq 18.3 (PQlibVersion() = 180300) — hangs on Windows
 *   - libpq 17.4 on RHEL 9 / x86_64       — works (1000 consecutive runs, 0
failures)
 *
 * PQisthreadsafe() returns 1 in all cases.
 *
 * Build (MSVC / Windows):
 *   cl /nologo /MT pq_thread_test.c ^
 *      /I "<PG_INSTALL>\include" ^
 *      /link /LIBPATH:"<PG_INSTALL>\lib" libpq.lib ws2_32.lib
 *
 * Build (GCC / Linux):
 *   gcc -o pq_thread_test pq_thread_test.c \
 *       -I<PG_INSTALL>/include -L<PG_INSTALL>/lib -lpq -lpthread -lm
 *
 * Run:
 *   # Windows — ensure libpq.dll is on PATH:
 *   set PATH=<PG_INSTALL>\bin;%PATH%
 *   pq_thread_test.exe [num_threads]
 *
 *   # Linux:
 *   export LD_LIBRARY_PATH=<PG_INSTALL>/lib:$LD_LIBRARY_PATH
 *   ./pq_thread_test [num_threads]
 *
 * Connection parameters (override via environment variables):
 *   PG_HOST  (default: 127.0.0.1)
 *   PG_USER  (default: postgres)
 *   PG_PASS  (default: postgres)
 *   PG_DB    (default: postgres)
 *
 * Example:
 *   PG_HOST=127.0.0.1 PG_USER=postgres PG_PASS=secret PG_DB=mydb
./pq_thread_test 100
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libpq-fe.h>

#ifdef _WIN32
#  include <windows.h>
#else
#  include <pthread.h>
#  include <time.h>
#  include <errno.h>
#endif

#define DEFAULT_NUM_THREADS 100
#define DEFAULT_HOST "127.0.0.1"
#define DEFAULT_USER "postgres"
#define DEFAULT_PASS "postgres"
#define DEFAULT_DB   "postgres"
#define TIMEOUT_SECONDS 30
#define CONNINFO_MAXLEN 512

/* Global conninfo string, built in main() from env vars or defaults */
static char g_conninfo[CONNINFO_MAXLEN];

static double timer_now(void) {
#ifdef _WIN32
    LARGE_INTEGER cnt, freq;
    QueryPerformanceFrequency(&freq);
    QueryPerformanceCounter(&cnt);
    return (double)cnt.QuadPart / (double)freq.QuadPart;
#else
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return ts.tv_sec + ts.tv_nsec / 1e9;
#endif
}

typedef struct {
    int thread_id;
    volatile int done;       /* 1 = finished, 0 = still running */
    ConnStatusType status;
    char errmsg[256];
} ThreadResult;

static
#ifdef _WIN32
DWORD WINAPI
#else
void *
#endif
connect_thread(void *arg) {
    ThreadResult *res = (ThreadResult *)arg;
    PGconn *conn;
    double t_start = timer_now();

    conn = PQconnectdb(g_conninfo);

    double elapsed_ms = (timer_now() - t_start) * 1000.0;

    if (conn == NULL) {
        res->status = CONNECTION_BAD;
        strncpy(res->errmsg, "PQconnectdb returned NULL",
sizeof(res->errmsg) - 1);
    } else {
        res->status = PQstatus(conn);
        if (res->status != CONNECTION_OK) {
            const char *msg = PQerrorMessage(conn);
            strncpy(res->errmsg, msg ? msg : "(no message)",
sizeof(res->errmsg) - 1);
            fprintf(stderr, "[Thread %d] FAILED (%.1f ms): %s\n",
                    res->thread_id, elapsed_ms, res->errmsg);
        } else {
            fprintf(stderr, "[Thread %d] OK (%.1f ms)\n",
                    res->thread_id, elapsed_ms);
        }
        PQfinish(conn);
    }

    res->done = 1;
#ifdef _WIN32
    return 0;
#else
    return NULL;
#endif
}

int main(int argc, char *argv[]) {
    int i;
    int num_threads = DEFAULT_NUM_THREADS;

    if (argc > 1) {
        num_threads = atoi(argv[1]);
        if (num_threads < 1 || num_threads > 1000) {
            fprintf(stderr, "Usage: %s [num_threads (1-1000)]\n", argv[0]);
            return 1;
        }
    }

    /* Build conninfo from env vars, falling back to defaults */
    {
        const char *host = getenv("PG_HOST");
        const char *user = getenv("PG_USER");
        const char *pass = getenv("PG_PASS");
        const char *db   = getenv("PG_DB");
        snprintf(g_conninfo, CONNINFO_MAXLEN,
                 "host=%s user=%s password=%s dbname=%s connect_timeout=10",
                 host ? host : DEFAULT_HOST,
                 user ? user : DEFAULT_USER,
                 pass ? pass : DEFAULT_PASS,
                 db   ? db   : DEFAULT_DB);
    }

    fprintf(stderr, "=== libpq concurrent PQconnectdb() test ===\n");
    fprintf(stderr, "  Threads:          %d\n", num_threads);
    fprintf(stderr, "  Timeout:          %d seconds\n", TIMEOUT_SECONDS);
    fprintf(stderr, "  ConnInfo:         %s\n", g_conninfo);
    fprintf(stderr, "  PQisthreadsafe(): %d\n", PQisthreadsafe());
    fprintf(stderr, "  PQlibVersion():   %d\n", PQlibVersion());
#ifdef _WIN32
    fprintf(stderr, "  Platform:         Windows\n");
#else
    fprintf(stderr, "  Platform:         POSIX/Linux\n");
#endif
    fprintf(stderr, "\n");

    /* Allocate arrays */
#ifdef _WIN32
    HANDLE *threads = (HANDLE *)calloc(num_threads, sizeof(HANDLE));
#else
    pthread_t *threads = (pthread_t *)calloc(num_threads,
sizeof(pthread_t));
#endif
    ThreadResult *results = (ThreadResult *)calloc(num_threads,
sizeof(ThreadResult));
    if (!threads || !results) {
        fprintf(stderr, "Allocation failed\n");
        return 1;
    }

    /* Verify connectivity with a single serial connection first */
    fprintf(stderr, "--- Warmup: single serial connection ---\n");
    {
        PGconn *warmup = PQconnectdb(g_conninfo);
        if (warmup && PQstatus(warmup) == CONNECTION_OK) {
            fprintf(stderr, "Warmup OK (server version %d)\n\n",
                    PQserverVersion(warmup));
        } else {
            fprintf(stderr, "Warmup FAILED: %s\n",
                    warmup ? PQerrorMessage(warmup) : "NULL");
            if (warmup) PQfinish(warmup);
            free(threads);
            free(results);
            return 1;
        }
        PQfinish(warmup);
    }

    /* Launch all threads simultaneously */
    fprintf(stderr, "--- Launching %d concurrent threads ---\n",
num_threads);
    double t0 = timer_now();

    for (i = 0; i < num_threads; i++) {
        results[i].thread_id = i;
        results[i].done = 0;
        results[i].status = CONNECTION_BAD;
        results[i].errmsg[0] = '\0';
#ifdef _WIN32
        threads[i] = CreateThread(NULL, 0, connect_thread, &results[i], 0,
NULL);
        if (threads[i] == NULL) {
            fprintf(stderr, "CreateThread(%d) failed: %lu\n", i,
GetLastError());
            return 1;
        }
#else
        int rc = pthread_create(&threads[i], NULL, connect_thread,
&results[i]);
        if (rc != 0) {
            fprintf(stderr, "pthread_create(%d) failed: %d\n", i, rc);
            return 1;
        }
#endif
    }

    /* Wait with a timeout */
#ifdef _WIN32
    WaitForMultipleObjects(num_threads, threads, TRUE, TIMEOUT_SECONDS *
1000);
#else
    {
        struct timespec deadline;
        clock_gettime(CLOCK_REALTIME, &deadline);
        deadline.tv_sec += TIMEOUT_SECONDS;
        for (i = 0; i < num_threads; i++) {
#if defined(__linux__) || defined(__GLIBC__)
            int rc = pthread_timedjoin_np(threads[i], NULL, &deadline);
            if (rc == ETIMEDOUT) {
                /* Thread hung — leave it; we'll report below */
            }
#else
            pthread_join(threads[i], NULL);
#endif
        }
    }
#endif

    double total_ms = (timer_now() - t0) * 1000.0;
    fprintf(stderr, "\n--- Results (%.1f ms total) ---\n", total_ms);

    int ok_count = 0, fail_count = 0, hung_count = 0;
    for (i = 0; i < num_threads; i++) {
        if (results[i].done) {
            if (results[i].status == CONNECTION_OK) {
                ok_count++;
            } else {
                fprintf(stderr, "  Thread %d: FAILED - %s\n", i,
results[i].errmsg);
                fail_count++;
            }
        } else {
            fprintf(stderr, "  Thread %d: HUNG (did not complete in %ds)\n",
                    i, TIMEOUT_SECONDS);
            hung_count++;
        }
#ifdef _WIN32
        CloseHandle(threads[i]);
#endif
    }

    fprintf(stderr, "\nSummary: %d OK, %d failed, %d hung (of %d)\n",
            ok_count, fail_count, hung_count, num_threads);

    free(threads);
    free(results);

    if (hung_count > 0) {
        fprintf(stderr, "\n*** BUG: %d threads hung in PQconnectdb() ***\n",
hung_count);
        return 2;
    }
    if (fail_count > 0)
        return 1;

    fprintf(stderr, "\nAll threads connected successfully.\n");
    return 0;
}





pgsql-bugs by date:

Previous
From: Dirkjan Bussink
Date:
Subject: JSON_SERIALIZE for JSONB returns parts of the internal JSONB representation
Next
From: Richard Guo
Date:
Subject: Re: BUG #19418: SQL/JSON JSON_VALUE() does not conform to ISO/IEC 9075-2:2023(E) 6.34