Re: reducing statistics write overhead - Mailing list pgsql-hackers
From | Martin Pihlak |
---|---|
Subject | Re: reducing statistics write overhead |
Date | |
Msg-id | 48C527C0.2070707@gmail.com Whole thread Raw |
In response to | Re: reducing statistics write overhead (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: reducing statistics write overhead
|
List | pgsql-hackers |
Tom Lane wrote: > Timestamp within the file is certainly better than trying to rely on > filesystem timestamps. I doubt 1 sec resolution is good enough, and > I'd also be worried about issues like clock skew between the > postmaster's time and the filesystem's time. > Attached is a patch which adds a timestamp to pgstat.stat file header, backend_read_statsfile uses this to determine if the file is fresh. During the wait loop, the stats request message is retransmitted to compensate for possible loss of message(s). The collector only writes the file mostly at PGSTAT_STAT_INTERVAL frequency, currently no extra custom timeouts can be passed in the message. This can of course be added if need arises. regards, Martin Index: src/backend/postmaster/pgstat.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/postmaster/pgstat.c,v retrieving revision 1.181 diff -c -r1.181 pgstat.c *** src/backend/postmaster/pgstat.c 25 Aug 2008 18:55:43 -0000 1.181 --- src/backend/postmaster/pgstat.c 8 Sep 2008 12:17:21 -0000 *************** *** 85,90 **** --- 85,94 ---- #define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster * death; in seconds. */ + #define PGSTAT_POLL_RETRY_DELAY 10 /* How long to pause between statistics + * update requests; in milliseconds. */ + + #define PGSTAT_POLL_RETRIES 500 /* How many times to repeat stats inquiry */ /* ---------- * The initial size hints for the hash tables used in the collector. *************** *** 201,206 **** --- 205,215 ---- */ static PgStat_GlobalStats globalStats; + /* Last time the collector wrote the stats file */ + static TimestampTz last_statwrite; + /* Last time a backend requested a new file */ + static TimestampTz last_statrequest; + static volatile bool need_exit = false; static volatile bool need_statwrite = false; static volatile bool got_SIGHUP = false; *************** *** 223,229 **** NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]); static void pgstat_exit(SIGNAL_ARGS); - static void force_statwrite(SIGNAL_ARGS); static void pgstat_beshutdown_hook(int code, Datum arg); static void pgstat_sighup_handler(SIGNAL_ARGS); --- 232,237 ---- *************** *** 254,259 **** --- 262,268 ---- static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len); static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len); static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len); + static void pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len); /* ------------------------------------------------------------ *************** *** 2463,2468 **** --- 2472,2491 ---- hdr->m_type = mtype; } + /* ---------- + * pgstat_send_inquiry() - + * + * Notify collector that we need fresh data. + * ---------- + */ + static void + pgstat_send_inquiry(void) + { + PgStat_MsgInquiry msg; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY); + pgstat_send(&msg, sizeof(msg)); + } /* ---------- * pgstat_send() - *************** *** 2538,2545 **** NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]) { - struct itimerval write_timeout; - bool need_timer = false; int len; PgStat_Msg msg; --- 2561,2566 ---- *************** *** 2571,2583 **** /* * Ignore all signals usually bound to some action in the postmaster, ! * except SIGQUIT and SIGALRM. */ pqsignal(SIGHUP, pgstat_sighup_handler); pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, SIG_IGN); pqsignal(SIGQUIT, pgstat_exit); - pqsignal(SIGALRM, force_statwrite); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); pqsignal(SIGUSR2, SIG_IGN); --- 2592,2603 ---- /* * Ignore all signals usually bound to some action in the postmaster, ! * except SIGQUIT */ pqsignal(SIGHUP, pgstat_sighup_handler); pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, SIG_IGN); pqsignal(SIGQUIT, pgstat_exit); pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, SIG_IGN); pqsignal(SIGUSR2, SIG_IGN); *************** *** 2598,2608 **** */ need_statwrite = true; - /* Preset the delay between status file writes */ - MemSet(&write_timeout, 0, sizeof(struct itimerval)); - write_timeout.it_value.tv_sec = PGSTAT_STAT_INTERVAL / 1000; - write_timeout.it_value.tv_usec = (PGSTAT_STAT_INTERVAL % 1000) * 1000; - /* * Read in an existing statistics stats file or initialize the stats to * zero. --- 2618,2623 ---- *************** *** 2649,2668 **** } /* ! * If time to write the stats file, do so. Note that the alarm ! * interrupt isn't re-enabled immediately, but only after we next ! * receive a stats message; so no cycles are wasted when there is ! * nothing going on. */ if (need_statwrite) { /* Check for postmaster death; if so we'll write file below */ if (!PostmasterIsAlive(true)) break; ! pgstat_write_statsfile(false); need_statwrite = false; - need_timer = true; } /* --- 2664,2687 ---- } /* ! * If time to write the stats file, do so. */ if (need_statwrite) { + TimestampTz now = GetCurrentTimestamp(); + /* Check for postmaster death; if so we'll write file below */ if (!PostmasterIsAlive(true)) break; ! /* Write the file, but only if enough time has passed since last write */ ! if (TimestampDifferenceExceeds(last_statwrite, now, PGSTAT_STAT_INTERVAL)) ! { ! pgstat_write_statsfile(false); ! last_statwrite = now; ! } ! need_statwrite = false; } /* *************** *** 2797,2817 **** pgstat_recv_funcpurge((PgStat_MsgFuncpurge *) &msg, len); break; ! default: break; - } ! /* ! * If this is the first message after we wrote the stats file the ! * last time, enable the alarm interrupt to make it be written ! * again later. ! */ ! if (need_timer) ! { ! if (setitimer(ITIMER_REAL, &write_timeout, NULL)) ! ereport(ERROR, ! (errmsg("could not set statistics collector timer: %m"))); ! need_timer = false; } } else --- 2816,2827 ---- pgstat_recv_funcpurge((PgStat_MsgFuncpurge *) &msg, len); break; ! case PGSTAT_MTYPE_INQUIRY: ! pgstat_recv_inquiry((PgStat_MsgInquiry *)&msg, len); break; ! default: ! break; } } else *************** *** 2841,2853 **** need_exit = true; } - /* SIGALRM signal handler for collector process */ - static void - force_statwrite(SIGNAL_ARGS) - { - need_statwrite = true; - } - /* SIGHUP handler for collector process */ static void pgstat_sighup_handler(SIGNAL_ARGS) --- 2851,2856 ---- *************** *** 2937,2949 **** PgStat_StatFuncEntry *funcentry; FILE *fpout; int32 format_id; const char *tmpfile = permanent?PGSTAT_STAT_PERMANENT_TMPFILE:pgstat_stat_tmpname; const char *statfile = permanent?PGSTAT_STAT_PERMANENT_FILENAME:pgstat_stat_filename; /* * Open the statistics temp file to write out the current values. */ ! fpout = fopen(tmpfile, PG_BINARY_W); if (fpout == NULL) { ereport(LOG, --- 2940,2953 ---- PgStat_StatFuncEntry *funcentry; FILE *fpout; int32 format_id; + TimestampTz ts; const char *tmpfile = permanent?PGSTAT_STAT_PERMANENT_TMPFILE:pgstat_stat_tmpname; const char *statfile = permanent?PGSTAT_STAT_PERMANENT_FILENAME:pgstat_stat_filename; /* * Open the statistics temp file to write out the current values. */ ! fpout = AllocateFile(tmpfile, PG_BINARY_W); if (fpout == NULL) { ereport(LOG, *************** *** 2954,2963 **** } /* ! * Write the file header --- currently just a format ID. */ format_id = PGSTAT_FILE_FORMAT_ID; fwrite(&format_id, sizeof(format_id), 1, fpout); /* * Write global stats struct --- 2958,2969 ---- } /* ! * Write the file header --- currently just a format ID and a timestamp. */ format_id = PGSTAT_FILE_FORMAT_ID; + ts = GetCurrentTimestamp(); fwrite(&format_id, sizeof(format_id), 1, fpout); + fwrite(&ts, sizeof(ts), 1, fpout); /* * Write global stats struct *************** *** 3017,3026 **** (errcode_for_file_access(), errmsg("could not write temporary statistics file \"%s\": %m", tmpfile))); ! fclose(fpout); unlink(tmpfile); } ! else if (fclose(fpout) < 0) { ereport(LOG, (errcode_for_file_access(), --- 3023,3032 ---- (errcode_for_file_access(), errmsg("could not write temporary statistics file \"%s\": %m", tmpfile))); ! FreeFile(fpout); unlink(tmpfile); } ! else if (FreeFile(fpout) < 0) { ereport(LOG, (errcode_for_file_access(), *************** *** 3108,3113 **** --- 3114,3127 ---- goto done; } + /* Skip over the timestamp */ + if (fseek(fpin, sizeof(TimestampTz), SEEK_CUR) != 0) + { + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted pgstat.stat file"))); + goto done; + } + /* * Read global stats struct */ *************** *** 3297,3307 **** --- 3311,3365 ---- static void backend_read_statsfile(void) { + TimestampTz start; + bool stats_ready = false; + int count; + /* already read it? */ if (pgStatDBHash) return; Assert(!pgStatRunningInCollector); + start = GetCurrentTimestamp(); + + /* + * If the stats requests are sent too frequently the collector + * might refuse to write the file and cause a longish wait here. + * So we reuse the old file if we know that not enough time has + * passsed since last request. + */ + if (!TimestampDifferenceExceeds(last_statrequest, start, PGSTAT_STAT_INTERVAL)) + stats_ready = true; + last_statrequest = start; + + /* + * Loop until fresh stats file is available or we ran out of time. + * The stats inquiry message is resent in case collector drops our + * first message. + */ + for (count = 0; !stats_ready && count < PGSTAT_POLL_RETRIES; count++) + { + TimestampTz ts; + FILE *fp; + + CHECK_FOR_INTERRUPTS(); + pgstat_send_inquiry(); + pg_usleep(PGSTAT_POLL_RETRY_DELAY * 1000); + + /* Read the timestamp from beginning of the stats file */ + if ((fp = AllocateFile(pgstat_stat_filename, PG_BINARY_R)) != NULL) + { + fseek(fp, sizeof(int32), SEEK_SET); + fread(&ts, sizeof(ts), 1, fp); + if (!ferror(fp) && TimestampDifferenceExceeds(start, ts, 0)) + stats_ready = true; + FreeFile(fp); + } + } + + if (count >= PGSTAT_POLL_RETRIES) + elog(WARNING, "pgstat wait timeout"); + /* Autovacuum launcher wants stats about all databases */ if (IsAutoVacuumLauncherProcess()) pgStatDBHash = pgstat_read_statsfile(InvalidOid, false); *************** *** 3779,3781 **** --- 3837,3852 ---- HASH_REMOVE, NULL); } } + + /* ---------- + * pgstat_recv_inquiry() - + * + * Process stat inquiry requests. + * ---------- + */ + static void + pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len) + { + need_statwrite = true; + } + Index: src/include/pgstat.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/pgstat.h,v retrieving revision 1.78 diff -c -r1.78 pgstat.h *** src/include/pgstat.h 15 Aug 2008 08:37:40 -0000 1.78 --- src/include/pgstat.h 8 Sep 2008 12:17:23 -0000 *************** *** 42,48 **** PGSTAT_MTYPE_ANALYZE, PGSTAT_MTYPE_BGWRITER, PGSTAT_MTYPE_FUNCSTAT, ! PGSTAT_MTYPE_FUNCPURGE } StatMsgType; /* ---------- --- 42,49 ---- PGSTAT_MTYPE_ANALYZE, PGSTAT_MTYPE_BGWRITER, PGSTAT_MTYPE_FUNCSTAT, ! PGSTAT_MTYPE_FUNCPURGE, ! PGSTAT_MTYPE_INQUIRY } StatMsgType; /* ---------- *************** *** 385,390 **** --- 386,403 ---- /* ---------- + * PgStat_MsgInquiry Sent by the backend to tell the collector + * to write the stats file. + * ---------- + */ + + typedef struct PgStat_MsgInquiry + { + PgStat_MsgHdr m_hdr; + } PgStat_MsgInquiry; + + + /* ---------- * PgStat_Msg Union over all possible messages. * ---------- */ *************** *** 402,407 **** --- 415,421 ---- PgStat_MsgBgWriter msg_bgwriter; PgStat_MsgFuncstat msg_funcstat; PgStat_MsgFuncpurge msg_funcpurge; + PgStat_MsgInquiry msg_inquiry; } PgStat_Msg; *************** *** 413,419 **** * ------------------------------------------------------------ */ ! #define PGSTAT_FILE_FORMAT_ID 0x01A5BC97 /* ---------- * PgStat_StatDBEntry The collector's data per database --- 427,433 ---- * ------------------------------------------------------------ */ ! #define PGSTAT_FILE_FORMAT_ID 0x01A5BC98 /* ---------- * PgStat_StatDBEntry The collector's data per database
pgsql-hackers by date: