Thread: Additional DTrace Probes
The attached patch contains the probes (originally came from Theo Schlossnagle) that were removed from the previous submitted patch. Zdenek had some concerns about the way the probes were implemented http://archives.postgresql.org/pgsql-hackers/2008-07/msg01168.php. If there are specific recommendations, I'd be more than happy to make the changes and get them resubmitted. -Robert Index: src/backend/access/transam/slru.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/slru.c,v retrieving revision 1.45 diff -u -3 -p -r1.45 slru.c --- src/backend/access/transam/slru.c 1 Jan 2009 17:23:36 -0000 1.45 +++ src/backend/access/transam/slru.c 6 Mar 2009 04:01:56 -0000 @@ -57,6 +57,7 @@ #include "storage/fd.h" #include "storage/shmem.h" #include "miscadmin.h" +#include "pg_trace.h" /* @@ -372,6 +373,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen { SlruShared shared = ctl->shared; + TRACE_POSTGRESQL_SLRU_READPAGE_START((uintptr_t)ctl, pageno, write_ok, xid); /* Outer loop handles restart if we must wait for someone else's I/O */ for (;;) { @@ -399,6 +401,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen } /* Otherwise, it's ready to use */ SlruRecentlyUsed(shared, slotno); + TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno); return slotno; } @@ -446,6 +449,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen SlruReportIOError(ctl, pageno, xid); SlruRecentlyUsed(shared, slotno); + TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno); return slotno; } } @@ -470,6 +474,8 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, SlruShared shared = ctl->shared; int slotno; + TRACE_POSTGRESQL_SLRU_READPAGE_READONLY((uintptr_t)ctl, pageno, xid); + /* Try to find the page while holding only shared lock */ LWLockAcquire(shared->ControlLock, LW_SHARED); @@ -511,6 +517,8 @@ SimpleLruWritePage(SlruCtl ctl, int slot int pageno = shared->page_number[slotno]; bool ok; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_START((uintptr_t)ctl, pageno, slotno); + /* If a write is in progress, wait for it to finish */ while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS && shared->page_number[slotno] == pageno) @@ -525,7 +533,10 @@ SimpleLruWritePage(SlruCtl ctl, int slot if (!shared->page_dirty[slotno] || shared->page_status[slotno] != SLRU_PAGE_VALID || shared->page_number[slotno] != pageno) + { + TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE(); return; + } /* * Mark the slot write-busy, and clear the dirtybit. After this point, a @@ -569,6 +580,8 @@ SimpleLruWritePage(SlruCtl ctl, int slot /* Now it's okay to ereport if we failed */ if (!ok) SlruReportIOError(ctl, pageno, InvalidTransactionId); + + TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE(); } /* @@ -593,6 +606,8 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa SlruFileName(ctl, path, segno); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_START((uintptr_t)ctl, path, pageno, slotno); + /* * In a crash-and-restart situation, it's possible for us to receive * commands to set the commit status of transactions whose bits are in @@ -607,6 +622,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -614,6 +630,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa (errmsg("file \"%s\" doesn't exist, reading as zeroes", path))); MemSet(shared->page_buffer[slotno], 0, BLCKSZ); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, slru_errcause, slru_errno); return true; } @@ -622,6 +639,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa slru_errcause = SLRU_SEEK_FAILED; slru_errno = errno; close(fd); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -631,6 +649,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa slru_errcause = SLRU_READ_FAILED; slru_errno = errno; close(fd); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -638,9 +657,12 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa { slru_errcause = SLRU_CLOSE_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, slru_errcause, slru_errno); + return true; } @@ -668,6 +690,8 @@ SlruPhysicalWritePage(SlruCtl ctl, int p char path[MAXPGPATH]; int fd = -1; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_START((uintptr_t)ctl, pageno, slotno); + /* * Honor the write-WAL-before-data rule, if appropriate, so that we do not * write out data before associated WAL records. This is the same action @@ -753,6 +777,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -781,6 +806,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p slru_errno = errno; if (!fdata) close(fd); + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -794,6 +820,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p slru_errno = errno; if (!fdata) close(fd); + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -808,6 +835,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p slru_errcause = SLRU_FSYNC_FAILED; slru_errno = errno; close(fd); + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -815,10 +843,12 @@ SlruPhysicalWritePage(SlruCtl ctl, int p { slru_errcause = SLRU_CLOSE_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } } + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(true, slru_errcause,slru_errno); return true; } Index: src/backend/executor/execScan.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/execScan.c,v retrieving revision 1.44 diff -u -3 -p -r1.44 execScan.c --- src/backend/executor/execScan.c 1 Jan 2009 17:23:41 -0000 1.44 +++ src/backend/executor/execScan.c 6 Mar 2009 04:01:56 -0000 @@ -21,6 +21,7 @@ #include "executor/executor.h" #include "miscadmin.h" #include "utils/memutils.h" +#include "pg_trace.h" static bool tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno, TupleDesc tupdesc); @@ -60,6 +61,8 @@ ExecScan(ScanState *node, qual = node->ps.qual; projInfo = node->ps.ps_ProjInfo; + TRACE_POSTGRESQL_EXECUTOR_SCAN((uintptr_t)node, ((Scan *)node->ps.plan)->scanrelid, (uintptr_t)accessMtd); + /* * If we have neither a qual to check nor a projection to do, just skip * all the overhead and return the raw scan tuple. Index: src/backend/executor/nodeAgg.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeAgg.c,v retrieving revision 1.164 diff -u -3 -p -r1.164 nodeAgg.c --- src/backend/executor/nodeAgg.c 1 Jan 2009 17:23:41 -0000 1.164 +++ src/backend/executor/nodeAgg.c 6 Mar 2009 04:01:56 -0000 @@ -86,6 +86,7 @@ #include "utils/syscache.h" #include "utils/tuplesort.h" #include "utils/datum.h" +#include "pg_trace.h" /* @@ -814,6 +815,8 @@ ExecAgg(AggState *node) if (node->agg_done) return NULL; + TRACE_POSTGRESQL_EXECUTOR_AGG((uintptr_t)node, ((Agg *) node->ss.ps.plan)->aggstrategy); + /* * Check to see if we're still projecting out tuples from a previous agg * tuple (because there is a function-returning-set in the projection Index: src/backend/executor/nodeGroup.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeGroup.c,v retrieving revision 1.73 diff -u -3 -p -r1.73 nodeGroup.c --- src/backend/executor/nodeGroup.c 1 Jan 2009 17:23:41 -0000 1.73 +++ src/backend/executor/nodeGroup.c 6 Mar 2009 04:01:56 -0000 @@ -24,6 +24,7 @@ #include "executor/executor.h" #include "executor/nodeGroup.h" +#include "pg_trace.h" /* @@ -49,6 +50,8 @@ ExecGroup(GroupState *node) numCols = ((Group *) node->ss.ps.plan)->numCols; grpColIdx = ((Group *) node->ss.ps.plan)->grpColIdx; + TRACE_POSTGRESQL_EXECUTOR_GROUP((uintptr_t)node, numCols); + /* * Check to see if we're still projecting out tuples from a previous group * tuple (because there is a function-returning-set in the projection Index: src/backend/executor/nodeHash.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHash.c,v retrieving revision 1.117 diff -u -3 -p -r1.117 nodeHash.c --- src/backend/executor/nodeHash.c 1 Jan 2009 17:23:41 -0000 1.117 +++ src/backend/executor/nodeHash.c 6 Mar 2009 04:01:56 -0000 @@ -35,6 +35,7 @@ #include "utils/dynahash.h" #include "utils/memutils.h" #include "utils/lsyscache.h" +#include "pg_trace.h" static void ExecHashIncreaseNumBatches(HashJoinTable hashtable); @@ -70,6 +71,8 @@ MultiExecHash(HashState *node) ExprContext *econtext; uint32 hashvalue; + TRACE_POSTGRESQL_EXECUTOR_HASH_MULTI((uintptr_t)node); + /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStartNode(node->ps.instrument); Index: src/backend/executor/nodeHashjoin.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v retrieving revision 1.97 diff -u -3 -p -r1.97 nodeHashjoin.c --- src/backend/executor/nodeHashjoin.c 1 Jan 2009 17:23:41 -0000 1.97 +++ src/backend/executor/nodeHashjoin.c 6 Mar 2009 04:01:56 -0000 @@ -20,6 +20,7 @@ #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" #include "utils/memutils.h" +#include "pg_trace.h" /* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */ @@ -61,6 +62,8 @@ ExecHashJoin(HashJoinState *node) uint32 hashvalue; int batchno; + TRACE_POSTGRESQL_EXECUTOR_HASHJOIN((uintptr_t)node); + /* * get information from HashJoin node */ Index: src/backend/executor/nodeLimit.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeLimit.c,v retrieving revision 1.36 diff -u -3 -p -r1.36 nodeLimit.c --- src/backend/executor/nodeLimit.c 4 Mar 2009 10:55:00 -0000 1.36 +++ src/backend/executor/nodeLimit.c 6 Mar 2009 04:01:56 -0000 @@ -23,6 +23,7 @@ #include "executor/executor.h" #include "executor/nodeLimit.h" +#include "pg_trace.h" static void recompute_limits(LimitState *node); @@ -41,6 +42,8 @@ ExecLimit(LimitState *node) TupleTableSlot *slot; PlanState *outerPlan; + TRACE_POSTGRESQL_EXECUTOR_LIMIT((uintptr_t)node); + /* * get information from the node */ Index: src/backend/executor/nodeMaterial.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeMaterial.c,v retrieving revision 1.65 diff -u -3 -p -r1.65 nodeMaterial.c --- src/backend/executor/nodeMaterial.c 1 Jan 2009 17:23:42 -0000 1.65 +++ src/backend/executor/nodeMaterial.c 6 Mar 2009 04:01:56 -0000 @@ -24,6 +24,7 @@ #include "executor/executor.h" #include "executor/nodeMaterial.h" #include "miscadmin.h" +#include "pg_trace.h" /* ---------------------------------------------------------------- * ExecMaterial @@ -45,6 +46,8 @@ ExecMaterial(MaterialState *node) bool eof_tuplestore; TupleTableSlot *slot; + TRACE_POSTGRESQL_EXECUTOR_MATERIAL((uintptr_t)node); + /* * get state info from node */ Index: src/backend/executor/nodeMergejoin.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v retrieving revision 1.94 diff -u -3 -p -r1.94 nodeMergejoin.c --- src/backend/executor/nodeMergejoin.c 1 Jan 2009 17:23:42 -0000 1.94 +++ src/backend/executor/nodeMergejoin.c 6 Mar 2009 04:01:56 -0000 @@ -102,6 +102,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/syscache.h" +#include "pg_trace.h" /* @@ -565,6 +566,8 @@ ExecMergeJoin(MergeJoinState *node) bool doFillOuter; bool doFillInner; + TRACE_POSTGRESQL_EXECUTOR_MERGEJOIN((uintptr_t)node); + /* * get information from node */ Index: src/backend/executor/nodeNestloop.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v retrieving revision 1.50 diff -u -3 -p -r1.50 nodeNestloop.c --- src/backend/executor/nodeNestloop.c 1 Jan 2009 17:23:42 -0000 1.50 +++ src/backend/executor/nodeNestloop.c 6 Mar 2009 04:01:56 -0000 @@ -24,6 +24,7 @@ #include "executor/execdebug.h" #include "executor/nodeNestloop.h" #include "utils/memutils.h" +#include "pg_trace.h" /* ---------------------------------------------------------------- @@ -67,6 +68,8 @@ ExecNestLoop(NestLoopState *node) List *otherqual; ExprContext *econtext; + TRACE_POSTGRESQL_EXECUTOR_NESTLOOP((uintptr_t)node); + /* * get information from the node */ Index: src/backend/executor/nodeSetOp.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v retrieving revision 1.28 diff -u -3 -p -r1.28 nodeSetOp.c --- src/backend/executor/nodeSetOp.c 1 Jan 2009 17:23:42 -0000 1.28 +++ src/backend/executor/nodeSetOp.c 6 Mar 2009 04:01:56 -0000 @@ -47,6 +47,7 @@ #include "executor/executor.h" #include "executor/nodeSetOp.h" #include "utils/memutils.h" +#include "pg_trace.h" /* @@ -196,6 +197,8 @@ ExecSetOp(SetOpState *node) SetOp *plannode = (SetOp *) node->ps.plan; TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot; + TRACE_POSTGRESQL_EXECUTOR_SETOP((uintptr_t)node); + /* * If the previously-returned tuple needs to be returned more than once, * keep returning it. Index: src/backend/executor/nodeSort.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSort.c,v retrieving revision 1.63 diff -u -3 -p -r1.63 nodeSort.c --- src/backend/executor/nodeSort.c 1 Jan 2009 17:23:42 -0000 1.63 +++ src/backend/executor/nodeSort.c 6 Mar 2009 04:01:57 -0000 @@ -19,6 +19,7 @@ #include "executor/nodeSort.h" #include "miscadmin.h" #include "utils/tuplesort.h" +#include "pg_trace.h" /* ---------------------------------------------------------------- @@ -53,6 +54,8 @@ ExecSort(SortState *node) dir = estate->es_direction; tuplesortstate = (Tuplesortstate *) node->tuplesortstate; + TRACE_POSTGRESQL_EXECUTOR_SORT((uintptr_t)node, dir); + /* * If first time through, read all tuples from outer plan and pass them to * tuplesort.c. Subsequent calls just fetch tuples from tuplesort. Index: src/backend/executor/nodeSubplan.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v retrieving revision 1.96 diff -u -3 -p -r1.96 nodeSubplan.c --- src/backend/executor/nodeSubplan.c 1 Jan 2009 17:23:42 -0000 1.96 +++ src/backend/executor/nodeSubplan.c 6 Mar 2009 04:01:57 -0000 @@ -27,6 +27,7 @@ #include "utils/array.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "pg_trace.h" static Datum ExecSubPlan(SubPlanState *node, @@ -92,6 +93,8 @@ ExecHashSubPlan(SubPlanState *node, ExprContext *innerecontext = node->innerecontext; TupleTableSlot *slot; + TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_HASH((uintptr_t)node); + /* Shouldn't have any direct correlation Vars */ if (subplan->parParam != NIL || node->args != NIL) elog(ERROR, "hashed subplan with direct correlation not supported"); @@ -227,6 +230,8 @@ ExecScanSubPlan(SubPlanState *node, ListCell *l; ArrayBuildState *astate = NULL; + TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_SCAN((uintptr_t)node); + /* * We are probably in a short-lived expression-evaluation context. Switch * to the per-query context for manipulating the child plan's chgParam, Index: src/backend/executor/nodeUnique.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeUnique.c,v retrieving revision 1.58 diff -u -3 -p -r1.58 nodeUnique.c --- src/backend/executor/nodeUnique.c 1 Jan 2009 17:23:42 -0000 1.58 +++ src/backend/executor/nodeUnique.c 6 Mar 2009 04:01:57 -0000 @@ -36,6 +36,7 @@ #include "executor/executor.h" #include "executor/nodeUnique.h" #include "utils/memutils.h" +#include "pg_trace.h" /* ---------------------------------------------------------------- @@ -50,6 +51,8 @@ ExecUnique(UniqueState *node) TupleTableSlot *slot; PlanState *outerPlan; + TRACE_POSTGRESQL_EXECUTOR_UNIQUE((uintptr_t)node); + /* * get information from the node */ Index: src/backend/utils/probes.d =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/probes.d,v retrieving revision 1.6 diff -u -3 -p -r1.6 probes.d --- src/backend/utils/probes.d 1 Jan 2009 17:23:48 -0000 1.6 +++ src/backend/utils/probes.d 6 Mar 2009 04:01:57 -0000 @@ -10,6 +10,7 @@ /* typedefs used in PostgreSQL */ #define LocalTransactionId unsigned int +#define TransactionId unsigned int #define LWLockId int #define LWLockMode int #define LOCKMODE int @@ -91,4 +92,29 @@ provider postgresql { probe xlog__switch(); probe wal__buffer__write__dirty__start(); probe wal__buffer__write__dirty__done(); + + probe slru__readpage__start(unsigned int, int, bool, TransactionId); + probe slru__readpage__done(int); + probe slru__readpage__readonly(unsigned int, int, TransactionId); + probe slru__writepage__start(unsigned int, int, int); + probe slru__writepage__done(); + probe slru__readpage__physical__start(unsigned int, char *, int, int); + probe slru__readpage__physical__done(int, int, int); + probe slru__writepage__physical__start(unsigned int, int, int); + probe slru__writepage__physical__done(int, int, int); + + probe executor__scan(unsigned int, unsigned int, unsigned int); + probe executor__agg(unsigned int, int); + probe executor__group(unsigned int, int); + probe executor__hash__multi(unsigned int); + probe executor__hashjoin(unsigned int); + probe executor__limit(unsigned int); + probe executor__material(unsigned int); + probe executor__mergejoin(unsigned int); + probe executor__nestloop(unsigned int); + probe executor__setop(unsigned int); + probe executor__sort(unsigned int, int); + probe executor__subplan__hash(unsigned int); + probe executor__subplan__scan(unsigned int); + probe executor__unique(unsigned int); };
Hi, Robert Lor <Robert.Lor@Sun.COM> wrote: > The attached patch contains the probes (originally came from Theo > Schlossnagle) that were removed from the previous submitted patch. > Zdenek had some concerns about the way the probes were implemented > http://archives.postgresql.org/pgsql-hackers/2008-07/msg01168.php. If > there are specific recommendations, I'd be more than happy to make the > changes and get them resubmitted. This will introduce SLRU and Executor probes. We already have Lock, LWLock, Buffer, I/O and XLogs probes. I'd like to have the following probes, too. In my experience, they could be bottlenecks or consume much time in some situations. - idle in transaction - spinlock wait-and-retry - CPU: Trigger execution - CPU: Encoding conversion - Network:send() and recv() - smgr: lseek() calls - Tempfile reads and writes Regards, --- ITAGAKI Takahiro NTT Open Source Software Center
Hi Robert, I tested your patch and it looks good. however I have several comments/questions: 1) probes contains pointer but in probe.h it is declared as int. Is it correct? 2) Maybe TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1); would be better. Because slru_errcause, slru_errno can contains garbage in situation when everything goes fine. Same for write. I think it is committable for 8.4. Zdenek Robert Lor píše v čt 05. 03. 2009 v 22:39 -0600: > The attached patch contains the probes (originally came from Theo > Schlossnagle) that were removed from the previous submitted patch. > Zdenek had some concerns about the way the probes were implemented > http://archives.postgresql.org/pgsql-hackers/2008-07/msg01168.php. If > there are specific recommendations, I'd be more than happy to make the > changes and get them resubmitted. > > -Robert > > Prostý textový dokument příloha (dtrace-probes.patch) > Index: src/backend/access/transam/slru.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/slru.c,v > retrieving revision 1.45 > diff -u -3 -p -r1.45 slru.c > --- src/backend/access/transam/slru.c 1 Jan 2009 17:23:36 -0000 1.45 > +++ src/backend/access/transam/slru.c 6 Mar 2009 04:01:56 -0000 > @@ -57,6 +57,7 @@ > #include "storage/fd.h" > #include "storage/shmem.h" > #include "miscadmin.h" > +#include "pg_trace.h" > > > /* > @@ -372,6 +373,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen > { > SlruShared shared = ctl->shared; > > + TRACE_POSTGRESQL_SLRU_READPAGE_START((uintptr_t)ctl, pageno, write_ok, xid); > /* Outer loop handles restart if we must wait for someone else's I/O */ > for (;;) > { > @@ -399,6 +401,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen > } > /* Otherwise, it's ready to use */ > SlruRecentlyUsed(shared, slotno); > + TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno); > return slotno; > } > > @@ -446,6 +449,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen > SlruReportIOError(ctl, pageno, xid); > > SlruRecentlyUsed(shared, slotno); > + TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno); > return slotno; > } > } > @@ -470,6 +474,8 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, > SlruShared shared = ctl->shared; > int slotno; > > + TRACE_POSTGRESQL_SLRU_READPAGE_READONLY((uintptr_t)ctl, pageno, xid); > + > /* Try to find the page while holding only shared lock */ > LWLockAcquire(shared->ControlLock, LW_SHARED); > > @@ -511,6 +517,8 @@ SimpleLruWritePage(SlruCtl ctl, int slot > int pageno = shared->page_number[slotno]; > bool ok; > > + TRACE_POSTGRESQL_SLRU_WRITEPAGE_START((uintptr_t)ctl, pageno, slotno); > + > /* If a write is in progress, wait for it to finish */ > while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS && > shared->page_number[slotno] == pageno) > @@ -525,7 +533,10 @@ SimpleLruWritePage(SlruCtl ctl, int slot > if (!shared->page_dirty[slotno] || > shared->page_status[slotno] != SLRU_PAGE_VALID || > shared->page_number[slotno] != pageno) > + { > + TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE(); > return; > + } > > /* > * Mark the slot write-busy, and clear the dirtybit. After this point, a > @@ -569,6 +580,8 @@ SimpleLruWritePage(SlruCtl ctl, int slot > /* Now it's okay to ereport if we failed */ > if (!ok) > SlruReportIOError(ctl, pageno, InvalidTransactionId); > + > + TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE(); > } > > /* > @@ -593,6 +606,8 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa > > SlruFileName(ctl, path, segno); > > + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_START((uintptr_t)ctl, path, pageno, slotno); > + > /* > * In a crash-and-restart situation, it's possible for us to receive > * commands to set the commit status of transactions whose bits are in > @@ -607,6 +622,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa > { > slru_errcause = SLRU_OPEN_FAILED; > slru_errno = errno; > + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); > return false; > } > > @@ -614,6 +630,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa > (errmsg("file \"%s\" doesn't exist, reading as zeroes", > path))); > MemSet(shared->page_buffer[slotno], 0, BLCKSZ); > + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, slru_errcause, slru_errno); > return true; > } > > @@ -622,6 +639,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa > slru_errcause = SLRU_SEEK_FAILED; > slru_errno = errno; > close(fd); > + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); > return false; > } > > @@ -631,6 +649,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa > slru_errcause = SLRU_READ_FAILED; > slru_errno = errno; > close(fd); > + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); > return false; > } > > @@ -638,9 +657,12 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa > { > slru_errcause = SLRU_CLOSE_FAILED; > slru_errno = errno; > + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); > return false; > } > > + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, slru_errcause, slru_errno); > + > return true; > } > > @@ -668,6 +690,8 @@ SlruPhysicalWritePage(SlruCtl ctl, int p > char path[MAXPGPATH]; > int fd = -1; > > + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_START((uintptr_t)ctl, pageno, slotno); > + > /* > * Honor the write-WAL-before-data rule, if appropriate, so that we do not > * write out data before associated WAL records. This is the same action > @@ -753,6 +777,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p > { > slru_errcause = SLRU_OPEN_FAILED; > slru_errno = errno; > + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); > return false; > } > > @@ -781,6 +806,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p > slru_errno = errno; > if (!fdata) > close(fd); > + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); > return false; > } > > @@ -794,6 +820,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p > slru_errno = errno; > if (!fdata) > close(fd); > + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); > return false; > } > > @@ -808,6 +835,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p > slru_errcause = SLRU_FSYNC_FAILED; > slru_errno = errno; > close(fd); > + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); > return false; > } > > @@ -815,10 +843,12 @@ SlruPhysicalWritePage(SlruCtl ctl, int p > { > slru_errcause = SLRU_CLOSE_FAILED; > slru_errno = errno; > + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); > return false; > } > } > > + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(true, slru_errcause,slru_errno); > return true; > } > > Index: src/backend/executor/execScan.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/execScan.c,v > retrieving revision 1.44 > diff -u -3 -p -r1.44 execScan.c > --- src/backend/executor/execScan.c 1 Jan 2009 17:23:41 -0000 1.44 > +++ src/backend/executor/execScan.c 6 Mar 2009 04:01:56 -0000 > @@ -21,6 +21,7 @@ > #include "executor/executor.h" > #include "miscadmin.h" > #include "utils/memutils.h" > +#include "pg_trace.h" > > > static bool tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno, TupleDesc tupdesc); > @@ -60,6 +61,8 @@ ExecScan(ScanState *node, > qual = node->ps.qual; > projInfo = node->ps.ps_ProjInfo; > > + TRACE_POSTGRESQL_EXECUTOR_SCAN((uintptr_t)node, ((Scan *)node->ps.plan)->scanrelid, (uintptr_t)accessMtd); > + > /* > * If we have neither a qual to check nor a projection to do, just skip > * all the overhead and return the raw scan tuple. > Index: src/backend/executor/nodeAgg.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeAgg.c,v > retrieving revision 1.164 > diff -u -3 -p -r1.164 nodeAgg.c > --- src/backend/executor/nodeAgg.c 1 Jan 2009 17:23:41 -0000 1.164 > +++ src/backend/executor/nodeAgg.c 6 Mar 2009 04:01:56 -0000 > @@ -86,6 +86,7 @@ > #include "utils/syscache.h" > #include "utils/tuplesort.h" > #include "utils/datum.h" > +#include "pg_trace.h" > > > /* > @@ -814,6 +815,8 @@ ExecAgg(AggState *node) > if (node->agg_done) > return NULL; > > + TRACE_POSTGRESQL_EXECUTOR_AGG((uintptr_t)node, ((Agg *) node->ss.ps.plan)->aggstrategy); > + > /* > * Check to see if we're still projecting out tuples from a previous agg > * tuple (because there is a function-returning-set in the projection > Index: src/backend/executor/nodeGroup.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeGroup.c,v > retrieving revision 1.73 > diff -u -3 -p -r1.73 nodeGroup.c > --- src/backend/executor/nodeGroup.c 1 Jan 2009 17:23:41 -0000 1.73 > +++ src/backend/executor/nodeGroup.c 6 Mar 2009 04:01:56 -0000 > @@ -24,6 +24,7 @@ > > #include "executor/executor.h" > #include "executor/nodeGroup.h" > +#include "pg_trace.h" > > > /* > @@ -49,6 +50,8 @@ ExecGroup(GroupState *node) > numCols = ((Group *) node->ss.ps.plan)->numCols; > grpColIdx = ((Group *) node->ss.ps.plan)->grpColIdx; > > + TRACE_POSTGRESQL_EXECUTOR_GROUP((uintptr_t)node, numCols); > + > /* > * Check to see if we're still projecting out tuples from a previous group > * tuple (because there is a function-returning-set in the projection > Index: src/backend/executor/nodeHash.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHash.c,v > retrieving revision 1.117 > diff -u -3 -p -r1.117 nodeHash.c > --- src/backend/executor/nodeHash.c 1 Jan 2009 17:23:41 -0000 1.117 > +++ src/backend/executor/nodeHash.c 6 Mar 2009 04:01:56 -0000 > @@ -35,6 +35,7 @@ > #include "utils/dynahash.h" > #include "utils/memutils.h" > #include "utils/lsyscache.h" > +#include "pg_trace.h" > > > static void ExecHashIncreaseNumBatches(HashJoinTable hashtable); > @@ -70,6 +71,8 @@ MultiExecHash(HashState *node) > ExprContext *econtext; > uint32 hashvalue; > > + TRACE_POSTGRESQL_EXECUTOR_HASH_MULTI((uintptr_t)node); > + > /* must provide our own instrumentation support */ > if (node->ps.instrument) > InstrStartNode(node->ps.instrument); > Index: src/backend/executor/nodeHashjoin.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v > retrieving revision 1.97 > diff -u -3 -p -r1.97 nodeHashjoin.c > --- src/backend/executor/nodeHashjoin.c 1 Jan 2009 17:23:41 -0000 1.97 > +++ src/backend/executor/nodeHashjoin.c 6 Mar 2009 04:01:56 -0000 > @@ -20,6 +20,7 @@ > #include "executor/nodeHash.h" > #include "executor/nodeHashjoin.h" > #include "utils/memutils.h" > +#include "pg_trace.h" > > > /* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */ > @@ -61,6 +62,8 @@ ExecHashJoin(HashJoinState *node) > uint32 hashvalue; > int batchno; > > + TRACE_POSTGRESQL_EXECUTOR_HASHJOIN((uintptr_t)node); > + > /* > * get information from HashJoin node > */ > Index: src/backend/executor/nodeLimit.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeLimit.c,v > retrieving revision 1.36 > diff -u -3 -p -r1.36 nodeLimit.c > --- src/backend/executor/nodeLimit.c 4 Mar 2009 10:55:00 -0000 1.36 > +++ src/backend/executor/nodeLimit.c 6 Mar 2009 04:01:56 -0000 > @@ -23,6 +23,7 @@ > > #include "executor/executor.h" > #include "executor/nodeLimit.h" > +#include "pg_trace.h" > > static void recompute_limits(LimitState *node); > > @@ -41,6 +42,8 @@ ExecLimit(LimitState *node) > TupleTableSlot *slot; > PlanState *outerPlan; > > + TRACE_POSTGRESQL_EXECUTOR_LIMIT((uintptr_t)node); > + > /* > * get information from the node > */ > Index: src/backend/executor/nodeMaterial.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeMaterial.c,v > retrieving revision 1.65 > diff -u -3 -p -r1.65 nodeMaterial.c > --- src/backend/executor/nodeMaterial.c 1 Jan 2009 17:23:42 -0000 1.65 > +++ src/backend/executor/nodeMaterial.c 6 Mar 2009 04:01:56 -0000 > @@ -24,6 +24,7 @@ > #include "executor/executor.h" > #include "executor/nodeMaterial.h" > #include "miscadmin.h" > +#include "pg_trace.h" > > /* ---------------------------------------------------------------- > * ExecMaterial > @@ -45,6 +46,8 @@ ExecMaterial(MaterialState *node) > bool eof_tuplestore; > TupleTableSlot *slot; > > + TRACE_POSTGRESQL_EXECUTOR_MATERIAL((uintptr_t)node); > + > /* > * get state info from node > */ > Index: src/backend/executor/nodeMergejoin.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v > retrieving revision 1.94 > diff -u -3 -p -r1.94 nodeMergejoin.c > --- src/backend/executor/nodeMergejoin.c 1 Jan 2009 17:23:42 -0000 1.94 > +++ src/backend/executor/nodeMergejoin.c 6 Mar 2009 04:01:56 -0000 > @@ -102,6 +102,7 @@ > #include "utils/lsyscache.h" > #include "utils/memutils.h" > #include "utils/syscache.h" > +#include "pg_trace.h" > > > /* > @@ -565,6 +566,8 @@ ExecMergeJoin(MergeJoinState *node) > bool doFillOuter; > bool doFillInner; > > + TRACE_POSTGRESQL_EXECUTOR_MERGEJOIN((uintptr_t)node); > + > /* > * get information from node > */ > Index: src/backend/executor/nodeNestloop.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v > retrieving revision 1.50 > diff -u -3 -p -r1.50 nodeNestloop.c > --- src/backend/executor/nodeNestloop.c 1 Jan 2009 17:23:42 -0000 1.50 > +++ src/backend/executor/nodeNestloop.c 6 Mar 2009 04:01:56 -0000 > @@ -24,6 +24,7 @@ > #include "executor/execdebug.h" > #include "executor/nodeNestloop.h" > #include "utils/memutils.h" > +#include "pg_trace.h" > > > /* ---------------------------------------------------------------- > @@ -67,6 +68,8 @@ ExecNestLoop(NestLoopState *node) > List *otherqual; > ExprContext *econtext; > > + TRACE_POSTGRESQL_EXECUTOR_NESTLOOP((uintptr_t)node); > + > /* > * get information from the node > */ > Index: src/backend/executor/nodeSetOp.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v > retrieving revision 1.28 > diff -u -3 -p -r1.28 nodeSetOp.c > --- src/backend/executor/nodeSetOp.c 1 Jan 2009 17:23:42 -0000 1.28 > +++ src/backend/executor/nodeSetOp.c 6 Mar 2009 04:01:56 -0000 > @@ -47,6 +47,7 @@ > #include "executor/executor.h" > #include "executor/nodeSetOp.h" > #include "utils/memutils.h" > +#include "pg_trace.h" > > > /* > @@ -196,6 +197,8 @@ ExecSetOp(SetOpState *node) > SetOp *plannode = (SetOp *) node->ps.plan; > TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot; > > + TRACE_POSTGRESQL_EXECUTOR_SETOP((uintptr_t)node); > + > /* > * If the previously-returned tuple needs to be returned more than once, > * keep returning it. > Index: src/backend/executor/nodeSort.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSort.c,v > retrieving revision 1.63 > diff -u -3 -p -r1.63 nodeSort.c > --- src/backend/executor/nodeSort.c 1 Jan 2009 17:23:42 -0000 1.63 > +++ src/backend/executor/nodeSort.c 6 Mar 2009 04:01:57 -0000 > @@ -19,6 +19,7 @@ > #include "executor/nodeSort.h" > #include "miscadmin.h" > #include "utils/tuplesort.h" > +#include "pg_trace.h" > > > /* ---------------------------------------------------------------- > @@ -53,6 +54,8 @@ ExecSort(SortState *node) > dir = estate->es_direction; > tuplesortstate = (Tuplesortstate *) node->tuplesortstate; > > + TRACE_POSTGRESQL_EXECUTOR_SORT((uintptr_t)node, dir); > + > /* > * If first time through, read all tuples from outer plan and pass them to > * tuplesort.c. Subsequent calls just fetch tuples from tuplesort. > Index: src/backend/executor/nodeSubplan.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v > retrieving revision 1.96 > diff -u -3 -p -r1.96 nodeSubplan.c > --- src/backend/executor/nodeSubplan.c 1 Jan 2009 17:23:42 -0000 1.96 > +++ src/backend/executor/nodeSubplan.c 6 Mar 2009 04:01:57 -0000 > @@ -27,6 +27,7 @@ > #include "utils/array.h" > #include "utils/lsyscache.h" > #include "utils/memutils.h" > +#include "pg_trace.h" > > > static Datum ExecSubPlan(SubPlanState *node, > @@ -92,6 +93,8 @@ ExecHashSubPlan(SubPlanState *node, > ExprContext *innerecontext = node->innerecontext; > TupleTableSlot *slot; > > + TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_HASH((uintptr_t)node); > + > /* Shouldn't have any direct correlation Vars */ > if (subplan->parParam != NIL || node->args != NIL) > elog(ERROR, "hashed subplan with direct correlation not supported"); > @@ -227,6 +230,8 @@ ExecScanSubPlan(SubPlanState *node, > ListCell *l; > ArrayBuildState *astate = NULL; > > + TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_SCAN((uintptr_t)node); > + > /* > * We are probably in a short-lived expression-evaluation context. Switch > * to the per-query context for manipulating the child plan's chgParam, > Index: src/backend/executor/nodeUnique.c > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeUnique.c,v > retrieving revision 1.58 > diff -u -3 -p -r1.58 nodeUnique.c > --- src/backend/executor/nodeUnique.c 1 Jan 2009 17:23:42 -0000 1.58 > +++ src/backend/executor/nodeUnique.c 6 Mar 2009 04:01:57 -0000 > @@ -36,6 +36,7 @@ > #include "executor/executor.h" > #include "executor/nodeUnique.h" > #include "utils/memutils.h" > +#include "pg_trace.h" > > > /* ---------------------------------------------------------------- > @@ -50,6 +51,8 @@ ExecUnique(UniqueState *node) > TupleTableSlot *slot; > PlanState *outerPlan; > > + TRACE_POSTGRESQL_EXECUTOR_UNIQUE((uintptr_t)node); > + > /* > * get information from the node > */ > Index: src/backend/utils/probes.d > =================================================================== > RCS file: /projects/cvsroot/pgsql/src/backend/utils/probes.d,v > retrieving revision 1.6 > diff -u -3 -p -r1.6 probes.d > --- src/backend/utils/probes.d 1 Jan 2009 17:23:48 -0000 1.6 > +++ src/backend/utils/probes.d 6 Mar 2009 04:01:57 -0000 > @@ -10,6 +10,7 @@ > > /* typedefs used in PostgreSQL */ > #define LocalTransactionId unsigned int > +#define TransactionId unsigned int > #define LWLockId int > #define LWLockMode int > #define LOCKMODE int > @@ -91,4 +92,29 @@ provider postgresql { > probe xlog__switch(); > probe wal__buffer__write__dirty__start(); > probe wal__buffer__write__dirty__done(); > + > + probe slru__readpage__start(unsigned int, int, bool, TransactionId); > + probe slru__readpage__done(int); > + probe slru__readpage__readonly(unsigned int, int, TransactionId); > + probe slru__writepage__start(unsigned int, int, int); > + probe slru__writepage__done(); > + probe slru__readpage__physical__start(unsigned int, char *, int, int); > + probe slru__readpage__physical__done(int, int, int); > + probe slru__writepage__physical__start(unsigned int, int, int); > + probe slru__writepage__physical__done(int, int, int); > + > + probe executor__scan(unsigned int, unsigned int, unsigned int); > + probe executor__agg(unsigned int, int); > + probe executor__group(unsigned int, int); > + probe executor__hash__multi(unsigned int); > + probe executor__hashjoin(unsigned int); > + probe executor__limit(unsigned int); > + probe executor__material(unsigned int); > + probe executor__mergejoin(unsigned int); > + probe executor__nestloop(unsigned int); > + probe executor__setop(unsigned int); > + probe executor__sort(unsigned int, int); > + probe executor__subplan__hash(unsigned int); > + probe executor__subplan__scan(unsigned int); > + probe executor__unique(unsigned int); > };
ITAGAKI Takahiro píše v pá 06. 03. 2009 v 17:03 +0900: > Hi, > > Robert Lor <Robert.Lor@Sun.COM> wrote: > > > The attached patch contains the probes (originally came from Theo > > Schlossnagle) that were removed from the previous submitted patch. > > Zdenek had some concerns about the way the probes were implemented > > http://archives.postgresql.org/pgsql-hackers/2008-07/msg01168.php. If > > there are specific recommendations, I'd be more than happy to make the > > changes and get them resubmitted. > > This will introduce SLRU and Executor probes. > We already have Lock, LWLock, Buffer, I/O and XLogs probes. > > I'd like to have the following probes, too. In my experience, > they could be bottlenecks or consume much time in some situations. Of course, there are lot of places where probes can be added and it needs to determine correct place regrading to data which we want to collect. > - idle in transaction This probes lost in a action. I remember that I had some comments about them. It requires small code refactoring (some extra if or something like this). > - spinlock wait-and-retry Can you specify a place in code or what you need to measure? > - CPU: Trigger execution > - CPU: Encoding conversion Maybe function call start and done could cover both cases. > - Network: send() and recv() I think there could be more probes on network like login/logout, login handshake itself and so on. > - smgr: lseek() calls Is any reason for it? You can monitoring syscall directly. > - Tempfile reads and writes I think that it is possible now. last arg in buffer_read function contains information, that it is local(temp) buffer. Zdenek
Zdenek Kotala wrote: > I tested your patch and it looks good. however I have several > comments/questions: > > 1) probes contains pointer but in probe.h it is declared as int. Is it > correct? > The probes cast the pointer to uintptr_t, so the correct type that will work for both ILP32 and LP64 models is unsigned long. I've made the correction from unsigned int to unsigned long. The reason uintptr_t is not used in the probe definition is because it causes compilation problem on Mac OS X. This is a known problem in the OS X's DTrace implementation. > 2) Maybe > > TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1); > > would be better. Because slru_errcause, slru_errno can contains garbage > in situation when everything goes fine. Same for write. > I've made the changes per your suggestion although one can argue that the script can check arg0, and if it's true, avoid using arg1 and arg2 as they are meaningless. > > I think it is committable for 8.4. > That would be awesome! -Robert Index: src/backend/access/transam/slru.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/access/transam/slru.c,v retrieving revision 1.45 diff -u -3 -p -r1.45 slru.c --- src/backend/access/transam/slru.c 1 Jan 2009 17:23:36 -0000 1.45 +++ src/backend/access/transam/slru.c 8 Mar 2009 20:39:01 -0000 @@ -57,6 +57,7 @@ #include "storage/fd.h" #include "storage/shmem.h" #include "miscadmin.h" +#include "pg_trace.h" /* @@ -372,6 +373,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen { SlruShared shared = ctl->shared; + TRACE_POSTGRESQL_SLRU_READPAGE_START((uintptr_t)ctl, pageno, write_ok, xid); /* Outer loop handles restart if we must wait for someone else's I/O */ for (;;) { @@ -399,6 +401,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen } /* Otherwise, it's ready to use */ SlruRecentlyUsed(shared, slotno); + TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno); return slotno; } @@ -446,6 +449,7 @@ SimpleLruReadPage(SlruCtl ctl, int pagen SlruReportIOError(ctl, pageno, xid); SlruRecentlyUsed(shared, slotno); + TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno); return slotno; } } @@ -470,6 +474,8 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, SlruShared shared = ctl->shared; int slotno; + TRACE_POSTGRESQL_SLRU_READPAGE_READONLY((uintptr_t)ctl, pageno, xid); + /* Try to find the page while holding only shared lock */ LWLockAcquire(shared->ControlLock, LW_SHARED); @@ -511,6 +517,8 @@ SimpleLruWritePage(SlruCtl ctl, int slot int pageno = shared->page_number[slotno]; bool ok; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_START((uintptr_t)ctl, pageno, slotno); + /* If a write is in progress, wait for it to finish */ while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS && shared->page_number[slotno] == pageno) @@ -525,7 +533,10 @@ SimpleLruWritePage(SlruCtl ctl, int slot if (!shared->page_dirty[slotno] || shared->page_status[slotno] != SLRU_PAGE_VALID || shared->page_number[slotno] != pageno) + { + TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE(); return; + } /* * Mark the slot write-busy, and clear the dirtybit. After this point, a @@ -569,6 +580,8 @@ SimpleLruWritePage(SlruCtl ctl, int slot /* Now it's okay to ereport if we failed */ if (!ok) SlruReportIOError(ctl, pageno, InvalidTransactionId); + + TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE(); } /* @@ -593,6 +606,8 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa SlruFileName(ctl, path, segno); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_START((uintptr_t)ctl, path, pageno, slotno); + /* * In a crash-and-restart situation, it's possible for us to receive * commands to set the commit status of transactions whose bits are in @@ -607,6 +622,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -614,6 +630,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa (errmsg("file \"%s\" doesn't exist, reading as zeroes", path))); MemSet(shared->page_buffer[slotno], 0, BLCKSZ); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1); return true; } @@ -622,6 +639,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa slru_errcause = SLRU_SEEK_FAILED; slru_errno = errno; close(fd); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -631,6 +649,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa slru_errcause = SLRU_READ_FAILED; slru_errno = errno; close(fd); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -638,9 +657,12 @@ SlruPhysicalReadPage(SlruCtl ctl, int pa { slru_errcause = SLRU_CLOSE_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1); + return true; } @@ -668,6 +690,8 @@ SlruPhysicalWritePage(SlruCtl ctl, int p char path[MAXPGPATH]; int fd = -1; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_START((uintptr_t)ctl, pageno, slotno); + /* * Honor the write-WAL-before-data rule, if appropriate, so that we do not * write out data before associated WAL records. This is the same action @@ -753,6 +777,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -781,6 +806,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p slru_errno = errno; if (!fdata) close(fd); + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -794,6 +820,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p slru_errno = errno; if (!fdata) close(fd); + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -808,6 +835,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int p slru_errcause = SLRU_FSYNC_FAILED; slru_errno = errno; close(fd); + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } @@ -815,10 +843,12 @@ SlruPhysicalWritePage(SlruCtl ctl, int p { slru_errcause = SLRU_CLOSE_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } } + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(true, -1, -1); return true; } Index: src/backend/executor/execScan.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/execScan.c,v retrieving revision 1.44 diff -u -3 -p -r1.44 execScan.c --- src/backend/executor/execScan.c 1 Jan 2009 17:23:41 -0000 1.44 +++ src/backend/executor/execScan.c 8 Mar 2009 20:39:02 -0000 @@ -21,6 +21,7 @@ #include "executor/executor.h" #include "miscadmin.h" #include "utils/memutils.h" +#include "pg_trace.h" static bool tlist_matches_tupdesc(PlanState *ps, List *tlist, Index varno, TupleDesc tupdesc); @@ -60,6 +61,8 @@ ExecScan(ScanState *node, qual = node->ps.qual; projInfo = node->ps.ps_ProjInfo; + TRACE_POSTGRESQL_EXECUTOR_SCAN((uintptr_t)node, ((Scan *)node->ps.plan)->scanrelid, (uintptr_t)accessMtd); + /* * If we have neither a qual to check nor a projection to do, just skip * all the overhead and return the raw scan tuple. Index: src/backend/executor/nodeAgg.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeAgg.c,v retrieving revision 1.164 diff -u -3 -p -r1.164 nodeAgg.c --- src/backend/executor/nodeAgg.c 1 Jan 2009 17:23:41 -0000 1.164 +++ src/backend/executor/nodeAgg.c 8 Mar 2009 20:39:02 -0000 @@ -86,6 +86,7 @@ #include "utils/syscache.h" #include "utils/tuplesort.h" #include "utils/datum.h" +#include "pg_trace.h" /* @@ -814,6 +815,8 @@ ExecAgg(AggState *node) if (node->agg_done) return NULL; + TRACE_POSTGRESQL_EXECUTOR_AGG((uintptr_t)node, ((Agg *) node->ss.ps.plan)->aggstrategy); + /* * Check to see if we're still projecting out tuples from a previous agg * tuple (because there is a function-returning-set in the projection Index: src/backend/executor/nodeGroup.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeGroup.c,v retrieving revision 1.73 diff -u -3 -p -r1.73 nodeGroup.c --- src/backend/executor/nodeGroup.c 1 Jan 2009 17:23:41 -0000 1.73 +++ src/backend/executor/nodeGroup.c 8 Mar 2009 20:39:02 -0000 @@ -24,6 +24,7 @@ #include "executor/executor.h" #include "executor/nodeGroup.h" +#include "pg_trace.h" /* @@ -49,6 +50,8 @@ ExecGroup(GroupState *node) numCols = ((Group *) node->ss.ps.plan)->numCols; grpColIdx = ((Group *) node->ss.ps.plan)->grpColIdx; + TRACE_POSTGRESQL_EXECUTOR_GROUP((uintptr_t)node, numCols); + /* * Check to see if we're still projecting out tuples from a previous group * tuple (because there is a function-returning-set in the projection Index: src/backend/executor/nodeHash.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHash.c,v retrieving revision 1.117 diff -u -3 -p -r1.117 nodeHash.c --- src/backend/executor/nodeHash.c 1 Jan 2009 17:23:41 -0000 1.117 +++ src/backend/executor/nodeHash.c 8 Mar 2009 20:39:02 -0000 @@ -35,6 +35,7 @@ #include "utils/dynahash.h" #include "utils/memutils.h" #include "utils/lsyscache.h" +#include "pg_trace.h" static void ExecHashIncreaseNumBatches(HashJoinTable hashtable); @@ -70,6 +71,8 @@ MultiExecHash(HashState *node) ExprContext *econtext; uint32 hashvalue; + TRACE_POSTGRESQL_EXECUTOR_HASH_MULTI((uintptr_t)node); + /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStartNode(node->ps.instrument); Index: src/backend/executor/nodeHashjoin.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v retrieving revision 1.97 diff -u -3 -p -r1.97 nodeHashjoin.c --- src/backend/executor/nodeHashjoin.c 1 Jan 2009 17:23:41 -0000 1.97 +++ src/backend/executor/nodeHashjoin.c 8 Mar 2009 20:39:02 -0000 @@ -20,6 +20,7 @@ #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" #include "utils/memutils.h" +#include "pg_trace.h" /* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */ @@ -61,6 +62,8 @@ ExecHashJoin(HashJoinState *node) uint32 hashvalue; int batchno; + TRACE_POSTGRESQL_EXECUTOR_HASHJOIN((uintptr_t)node); + /* * get information from HashJoin node */ Index: src/backend/executor/nodeLimit.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeLimit.c,v retrieving revision 1.36 diff -u -3 -p -r1.36 nodeLimit.c --- src/backend/executor/nodeLimit.c 4 Mar 2009 10:55:00 -0000 1.36 +++ src/backend/executor/nodeLimit.c 8 Mar 2009 20:39:02 -0000 @@ -23,6 +23,7 @@ #include "executor/executor.h" #include "executor/nodeLimit.h" +#include "pg_trace.h" static void recompute_limits(LimitState *node); @@ -41,6 +42,8 @@ ExecLimit(LimitState *node) TupleTableSlot *slot; PlanState *outerPlan; + TRACE_POSTGRESQL_EXECUTOR_LIMIT((uintptr_t)node); + /* * get information from the node */ Index: src/backend/executor/nodeMaterial.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeMaterial.c,v retrieving revision 1.65 diff -u -3 -p -r1.65 nodeMaterial.c --- src/backend/executor/nodeMaterial.c 1 Jan 2009 17:23:42 -0000 1.65 +++ src/backend/executor/nodeMaterial.c 8 Mar 2009 20:39:02 -0000 @@ -24,6 +24,7 @@ #include "executor/executor.h" #include "executor/nodeMaterial.h" #include "miscadmin.h" +#include "pg_trace.h" /* ---------------------------------------------------------------- * ExecMaterial @@ -45,6 +46,8 @@ ExecMaterial(MaterialState *node) bool eof_tuplestore; TupleTableSlot *slot; + TRACE_POSTGRESQL_EXECUTOR_MATERIAL((uintptr_t)node); + /* * get state info from node */ Index: src/backend/executor/nodeMergejoin.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v retrieving revision 1.94 diff -u -3 -p -r1.94 nodeMergejoin.c --- src/backend/executor/nodeMergejoin.c 1 Jan 2009 17:23:42 -0000 1.94 +++ src/backend/executor/nodeMergejoin.c 8 Mar 2009 20:39:02 -0000 @@ -102,6 +102,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/syscache.h" +#include "pg_trace.h" /* @@ -565,6 +566,8 @@ ExecMergeJoin(MergeJoinState *node) bool doFillOuter; bool doFillInner; + TRACE_POSTGRESQL_EXECUTOR_MERGEJOIN((uintptr_t)node); + /* * get information from node */ Index: src/backend/executor/nodeNestloop.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v retrieving revision 1.50 diff -u -3 -p -r1.50 nodeNestloop.c --- src/backend/executor/nodeNestloop.c 1 Jan 2009 17:23:42 -0000 1.50 +++ src/backend/executor/nodeNestloop.c 8 Mar 2009 20:39:02 -0000 @@ -24,6 +24,7 @@ #include "executor/execdebug.h" #include "executor/nodeNestloop.h" #include "utils/memutils.h" +#include "pg_trace.h" /* ---------------------------------------------------------------- @@ -67,6 +68,8 @@ ExecNestLoop(NestLoopState *node) List *otherqual; ExprContext *econtext; + TRACE_POSTGRESQL_EXECUTOR_NESTLOOP((uintptr_t)node); + /* * get information from the node */ Index: src/backend/executor/nodeSetOp.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v retrieving revision 1.28 diff -u -3 -p -r1.28 nodeSetOp.c --- src/backend/executor/nodeSetOp.c 1 Jan 2009 17:23:42 -0000 1.28 +++ src/backend/executor/nodeSetOp.c 8 Mar 2009 20:39:02 -0000 @@ -47,6 +47,7 @@ #include "executor/executor.h" #include "executor/nodeSetOp.h" #include "utils/memutils.h" +#include "pg_trace.h" /* @@ -196,6 +197,8 @@ ExecSetOp(SetOpState *node) SetOp *plannode = (SetOp *) node->ps.plan; TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot; + TRACE_POSTGRESQL_EXECUTOR_SETOP((uintptr_t)node); + /* * If the previously-returned tuple needs to be returned more than once, * keep returning it. Index: src/backend/executor/nodeSort.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSort.c,v retrieving revision 1.63 diff -u -3 -p -r1.63 nodeSort.c --- src/backend/executor/nodeSort.c 1 Jan 2009 17:23:42 -0000 1.63 +++ src/backend/executor/nodeSort.c 8 Mar 2009 20:39:02 -0000 @@ -19,6 +19,7 @@ #include "executor/nodeSort.h" #include "miscadmin.h" #include "utils/tuplesort.h" +#include "pg_trace.h" /* ---------------------------------------------------------------- @@ -53,6 +54,8 @@ ExecSort(SortState *node) dir = estate->es_direction; tuplesortstate = (Tuplesortstate *) node->tuplesortstate; + TRACE_POSTGRESQL_EXECUTOR_SORT((uintptr_t)node, dir); + /* * If first time through, read all tuples from outer plan and pass them to * tuplesort.c. Subsequent calls just fetch tuples from tuplesort. Index: src/backend/executor/nodeSubplan.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v retrieving revision 1.96 diff -u -3 -p -r1.96 nodeSubplan.c --- src/backend/executor/nodeSubplan.c 1 Jan 2009 17:23:42 -0000 1.96 +++ src/backend/executor/nodeSubplan.c 8 Mar 2009 20:39:02 -0000 @@ -27,6 +27,7 @@ #include "utils/array.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "pg_trace.h" static Datum ExecSubPlan(SubPlanState *node, @@ -92,6 +93,8 @@ ExecHashSubPlan(SubPlanState *node, ExprContext *innerecontext = node->innerecontext; TupleTableSlot *slot; + TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_HASH((uintptr_t)node); + /* Shouldn't have any direct correlation Vars */ if (subplan->parParam != NIL || node->args != NIL) elog(ERROR, "hashed subplan with direct correlation not supported"); @@ -227,6 +230,8 @@ ExecScanSubPlan(SubPlanState *node, ListCell *l; ArrayBuildState *astate = NULL; + TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_SCAN((uintptr_t)node); + /* * We are probably in a short-lived expression-evaluation context. Switch * to the per-query context for manipulating the child plan's chgParam, Index: src/backend/executor/nodeUnique.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/executor/nodeUnique.c,v retrieving revision 1.58 diff -u -3 -p -r1.58 nodeUnique.c --- src/backend/executor/nodeUnique.c 1 Jan 2009 17:23:42 -0000 1.58 +++ src/backend/executor/nodeUnique.c 8 Mar 2009 20:39:02 -0000 @@ -36,6 +36,7 @@ #include "executor/executor.h" #include "executor/nodeUnique.h" #include "utils/memutils.h" +#include "pg_trace.h" /* ---------------------------------------------------------------- @@ -50,6 +51,8 @@ ExecUnique(UniqueState *node) TupleTableSlot *slot; PlanState *outerPlan; + TRACE_POSTGRESQL_EXECUTOR_UNIQUE((uintptr_t)node); + /* * get information from the node */ Index: src/backend/utils/probes.d =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/utils/probes.d,v retrieving revision 1.6 diff -u -3 -p -r1.6 probes.d --- src/backend/utils/probes.d 1 Jan 2009 17:23:48 -0000 1.6 +++ src/backend/utils/probes.d 8 Mar 2009 20:39:07 -0000 @@ -10,6 +10,7 @@ /* typedefs used in PostgreSQL */ #define LocalTransactionId unsigned int +#define TransactionId unsigned int #define LWLockId int #define LWLockMode int #define LOCKMODE int @@ -91,4 +92,29 @@ provider postgresql { probe xlog__switch(); probe wal__buffer__write__dirty__start(); probe wal__buffer__write__dirty__done(); + + probe slru__readpage__start(unsigned long, int, bool, TransactionId); + probe slru__readpage__done(int); + probe slru__readpage__readonly(unsigned long, int, TransactionId); + probe slru__writepage__start(unsigned long, int, int); + probe slru__writepage__done(); + probe slru__readpage__physical__start(unsigned long, char *, int, int); + probe slru__readpage__physical__done(int, int, int); + probe slru__writepage__physical__start(unsigned long, int, int); + probe slru__writepage__physical__done(int, int, int); + + probe executor__scan(unsigned long, unsigned int, unsigned long); + probe executor__agg(unsigned long, int); + probe executor__group(unsigned long, int); + probe executor__hash__multi(unsigned long); + probe executor__hashjoin(unsigned long); + probe executor__limit(unsigned long); + probe executor__material(unsigned long); + probe executor__mergejoin(unsigned long); + probe executor__nestloop(unsigned long); + probe executor__setop(unsigned long); + probe executor__sort(unsigned long, int); + probe executor__subplan__hash(unsigned long); + probe executor__subplan__scan(unsigned long); + probe executor__unique(unsigned long); };
ITAGAKI Takahiro wrote: > This will introduce SLRU and Executor probes. > We already have Lock, LWLock, Buffer, I/O and XLogs probes. > > I'd like to have the following probes, too. In my experience, > they could be bottlenecks or consume much time in some situations. > > - idle in transaction > - spinlock wait-and-retry > - CPU: Trigger execution > - CPU: Encoding conversion > - Network: send() and recv() > - smgr: lseek() calls > - Tempfile reads and writes > > Great suggestions. If you have particular use cases in mind, please send them to the list. This will help determine what kind of data to be made available via the probes. It'll also be helpful if you know the locations for the probes. -Robert
It looks OK now. Zdenek Dne 8.03.09 21:58, Robert Lor napsal(a): >> 2) Maybe >> >> TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1); >> >> would be better. Because slru_errcause, slru_errno can contains garbage >> in situation when everything goes fine. Same for write. > > I've made the changes per your suggestion although one can argue that > the script can check arg0, and if it's true, avoid using arg1 and arg2 > as they are meaningless. Thanks. >> >> I think it is committable for 8.4. >> > > That would be awesome! yeah, but it depends on commiters :-) Zdenek
OK, updated patch attached and applied; thanks. --------------------------------------------------------------------------- Zdenek Kotala wrote: > It looks OK now. > > Zdenek > > Dne 8.03.09 21:58, Robert Lor napsal(a): > > >> 2) Maybe > >> > >> TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1); > >> > >> would be better. Because slru_errcause, slru_errno can contains garbage > >> in situation when everything goes fine. Same for write. > > > > I've made the changes per your suggestion although one can argue that > > the script can check arg0, and if it's true, avoid using arg1 and arg2 > > as they are meaningless. > > Thanks. > > >> > >> I think it is committable for 8.4. > >> > > > > That would be awesome! > > yeah, but it depends on commiters :-) > > Zdenek > > -- > Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) > To make changes to your subscription: > http://www.postgresql.org/mailpref/pgsql-hackers -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + If your life is a hard drive, Christ can be your backup. + Index: src/backend/access/transam/slru.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/access/transam/slru.c,v retrieving revision 1.45 diff -c -c -r1.45 slru.c *** src/backend/access/transam/slru.c 1 Jan 2009 17:23:36 -0000 1.45 --- src/backend/access/transam/slru.c 2 Apr 2009 19:01:55 -0000 *************** *** 57,62 **** --- 57,63 ---- #include "storage/fd.h" #include "storage/shmem.h" #include "miscadmin.h" + #include "pg_trace.h" /* *************** *** 372,377 **** --- 373,379 ---- { SlruShared shared = ctl->shared; + TRACE_POSTGRESQL_SLRU_READPAGE_START((uintptr_t)ctl, pageno, write_ok, xid); /* Outer loop handles restart if we must wait for someone else's I/O */ for (;;) { *************** *** 399,404 **** --- 401,407 ---- } /* Otherwise, it's ready to use */ SlruRecentlyUsed(shared, slotno); + TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno); return slotno; } *************** *** 446,451 **** --- 449,455 ---- SlruReportIOError(ctl, pageno, xid); SlruRecentlyUsed(shared, slotno); + TRACE_POSTGRESQL_SLRU_READPAGE_DONE(slotno); return slotno; } } *************** *** 470,475 **** --- 474,481 ---- SlruShared shared = ctl->shared; int slotno; + TRACE_POSTGRESQL_SLRU_READPAGE_READONLY((uintptr_t)ctl, pageno, xid); + /* Try to find the page while holding only shared lock */ LWLockAcquire(shared->ControlLock, LW_SHARED); *************** *** 511,516 **** --- 517,524 ---- int pageno = shared->page_number[slotno]; bool ok; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_START((uintptr_t)ctl, pageno, slotno); + /* If a write is in progress, wait for it to finish */ while (shared->page_status[slotno] == SLRU_PAGE_WRITE_IN_PROGRESS && shared->page_number[slotno] == pageno) *************** *** 525,531 **** --- 533,542 ---- if (!shared->page_dirty[slotno] || shared->page_status[slotno] != SLRU_PAGE_VALID || shared->page_number[slotno] != pageno) + { + TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE(); return; + } /* * Mark the slot write-busy, and clear the dirtybit. After this point, a *************** *** 569,574 **** --- 580,587 ---- /* Now it's okay to ereport if we failed */ if (!ok) SlruReportIOError(ctl, pageno, InvalidTransactionId); + + TRACE_POSTGRESQL_SLRU_WRITEPAGE_DONE(); } /* *************** *** 593,598 **** --- 606,613 ---- SlruFileName(ctl, path, segno); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_START((uintptr_t)ctl, path, pageno, slotno); + /* * In a crash-and-restart situation, it's possible for us to receive * commands to set the commit status of transactions whose bits are in *************** *** 607,612 **** --- 622,628 ---- { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } *************** *** 614,619 **** --- 630,636 ---- (errmsg("file \"%s\" doesn't exist, reading as zeroes", path))); MemSet(shared->page_buffer[slotno], 0, BLCKSZ); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1); return true; } *************** *** 622,627 **** --- 639,645 ---- slru_errcause = SLRU_SEEK_FAILED; slru_errno = errno; close(fd); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } *************** *** 631,636 **** --- 649,655 ---- slru_errcause = SLRU_READ_FAILED; slru_errno = errno; close(fd); + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } *************** *** 638,646 **** --- 657,668 ---- { slru_errcause = SLRU_CLOSE_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } + TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1); + return true; } *************** *** 668,673 **** --- 690,697 ---- char path[MAXPGPATH]; int fd = -1; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_START((uintptr_t)ctl, pageno, slotno); + /* * Honor the write-WAL-before-data rule, if appropriate, so that we do not * write out data before associated WAL records. This is the same action *************** *** 753,758 **** --- 777,783 ---- { slru_errcause = SLRU_OPEN_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } *************** *** 781,786 **** --- 806,812 ---- slru_errno = errno; if (!fdata) close(fd); + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } *************** *** 794,799 **** --- 820,826 ---- slru_errno = errno; if (!fdata) close(fd); + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } *************** *** 808,813 **** --- 835,841 ---- slru_errcause = SLRU_FSYNC_FAILED; slru_errno = errno; close(fd); + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } *************** *** 815,824 **** --- 843,854 ---- { slru_errcause = SLRU_CLOSE_FAILED; slru_errno = errno; + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(false, slru_errcause, slru_errno); return false; } } + TRACE_POSTGRESQL_SLRU_WRITEPAGE_PHYSICAL_DONE(true, -1, -1); return true; } Index: src/backend/executor/execScan.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/execScan.c,v retrieving revision 1.44 diff -c -c -r1.44 execScan.c *** src/backend/executor/execScan.c 1 Jan 2009 17:23:41 -0000 1.44 --- src/backend/executor/execScan.c 2 Apr 2009 19:01:55 -0000 *************** *** 20,25 **** --- 20,26 ---- #include "executor/executor.h" #include "miscadmin.h" + #include "pg_trace.h" #include "utils/memutils.h" *************** *** 60,65 **** --- 61,68 ---- qual = node->ps.qual; projInfo = node->ps.ps_ProjInfo; + TRACE_POSTGRESQL_EXECUTOR_SCAN((uintptr_t)node, ((Scan *)node->ps.plan)->scanrelid, (uintptr_t)accessMtd); + /* * If we have neither a qual to check nor a projection to do, just skip * all the overhead and return the raw scan tuple. Index: src/backend/executor/nodeAgg.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v retrieving revision 1.164 diff -c -c -r1.164 nodeAgg.c *** src/backend/executor/nodeAgg.c 1 Jan 2009 17:23:41 -0000 1.164 --- src/backend/executor/nodeAgg.c 2 Apr 2009 19:01:55 -0000 *************** *** 79,84 **** --- 79,85 ---- #include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "parser/parse_oper.h" + #include "pg_trace.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/lsyscache.h" *************** *** 814,819 **** --- 815,822 ---- if (node->agg_done) return NULL; + TRACE_POSTGRESQL_EXECUTOR_AGG((uintptr_t)node, ((Agg *) node->ss.ps.plan)->aggstrategy); + /* * Check to see if we're still projecting out tuples from a previous agg * tuple (because there is a function-returning-set in the projection Index: src/backend/executor/nodeGroup.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v retrieving revision 1.73 diff -c -c -r1.73 nodeGroup.c *** src/backend/executor/nodeGroup.c 1 Jan 2009 17:23:41 -0000 1.73 --- src/backend/executor/nodeGroup.c 2 Apr 2009 19:01:55 -0000 *************** *** 24,29 **** --- 24,30 ---- #include "executor/executor.h" #include "executor/nodeGroup.h" + #include "pg_trace.h" /* *************** *** 49,54 **** --- 50,57 ---- numCols = ((Group *) node->ss.ps.plan)->numCols; grpColIdx = ((Group *) node->ss.ps.plan)->grpColIdx; + TRACE_POSTGRESQL_EXECUTOR_GROUP((uintptr_t)node, numCols); + /* * Check to see if we're still projecting out tuples from a previous group * tuple (because there is a function-returning-set in the projection Index: src/backend/executor/nodeHash.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v retrieving revision 1.118 diff -c -c -r1.118 nodeHash.c *** src/backend/executor/nodeHash.c 21 Mar 2009 00:04:38 -0000 1.118 --- src/backend/executor/nodeHash.c 2 Apr 2009 19:01:55 -0000 *************** *** 33,38 **** --- 33,39 ---- #include "executor/nodeHashjoin.h" #include "miscadmin.h" #include "parser/parse_expr.h" + #include "pg_trace.h" #include "utils/dynahash.h" #include "utils/memutils.h" #include "utils/lsyscache.h" *************** *** 79,84 **** --- 80,87 ---- ExprContext *econtext; uint32 hashvalue; + TRACE_POSTGRESQL_EXECUTOR_HASH_MULTI((uintptr_t)node); + /* must provide our own instrumentation support */ if (node->ps.instrument) InstrStartNode(node->ps.instrument); Index: src/backend/executor/nodeHashjoin.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v retrieving revision 1.98 diff -c -c -r1.98 nodeHashjoin.c *** src/backend/executor/nodeHashjoin.c 21 Mar 2009 00:04:38 -0000 1.98 --- src/backend/executor/nodeHashjoin.c 2 Apr 2009 19:01:55 -0000 *************** *** 19,24 **** --- 19,25 ---- #include "executor/hashjoin.h" #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" + #include "pg_trace.h" #include "utils/memutils.h" *************** *** 61,66 **** --- 62,69 ---- uint32 hashvalue; int batchno; + TRACE_POSTGRESQL_EXECUTOR_HASHJOIN((uintptr_t)node); + /* * get information from HashJoin node */ Index: src/backend/executor/nodeLimit.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeLimit.c,v retrieving revision 1.36 diff -c -c -r1.36 nodeLimit.c *** src/backend/executor/nodeLimit.c 4 Mar 2009 10:55:00 -0000 1.36 --- src/backend/executor/nodeLimit.c 2 Apr 2009 19:01:55 -0000 *************** *** 23,28 **** --- 23,29 ---- #include "executor/executor.h" #include "executor/nodeLimit.h" + #include "pg_trace.h" static void recompute_limits(LimitState *node); *************** *** 41,46 **** --- 42,49 ---- TupleTableSlot *slot; PlanState *outerPlan; + TRACE_POSTGRESQL_EXECUTOR_LIMIT((uintptr_t)node); + /* * get information from the node */ Index: src/backend/executor/nodeMaterial.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeMaterial.c,v retrieving revision 1.66 diff -c -c -r1.66 nodeMaterial.c *** src/backend/executor/nodeMaterial.c 27 Mar 2009 18:30:21 -0000 1.66 --- src/backend/executor/nodeMaterial.c 2 Apr 2009 19:01:55 -0000 *************** *** 24,29 **** --- 24,30 ---- #include "executor/executor.h" #include "executor/nodeMaterial.h" #include "miscadmin.h" + #include "pg_trace.h" /* ---------------------------------------------------------------- * ExecMaterial *************** *** 45,50 **** --- 46,53 ---- bool eof_tuplestore; TupleTableSlot *slot; + TRACE_POSTGRESQL_EXECUTOR_MATERIAL((uintptr_t)node); + /* * get state info from node */ Index: src/backend/executor/nodeMergejoin.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v retrieving revision 1.94 diff -c -c -r1.94 nodeMergejoin.c *** src/backend/executor/nodeMergejoin.c 1 Jan 2009 17:23:42 -0000 1.94 --- src/backend/executor/nodeMergejoin.c 2 Apr 2009 19:01:55 -0000 *************** *** 98,103 **** --- 98,104 ---- #include "executor/execdefs.h" #include "executor/nodeMergejoin.h" #include "miscadmin.h" + #include "pg_trace.h" #include "utils/acl.h" #include "utils/lsyscache.h" #include "utils/memutils.h" *************** *** 565,570 **** --- 566,573 ---- bool doFillOuter; bool doFillInner; + TRACE_POSTGRESQL_EXECUTOR_MERGEJOIN((uintptr_t)node); + /* * get information from node */ Index: src/backend/executor/nodeNestloop.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeNestloop.c,v retrieving revision 1.50 diff -c -c -r1.50 nodeNestloop.c *** src/backend/executor/nodeNestloop.c 1 Jan 2009 17:23:42 -0000 1.50 --- src/backend/executor/nodeNestloop.c 2 Apr 2009 19:01:55 -0000 *************** *** 23,28 **** --- 23,29 ---- #include "executor/execdebug.h" #include "executor/nodeNestloop.h" + #include "pg_trace.h" #include "utils/memutils.h" *************** *** 67,72 **** --- 68,75 ---- List *otherqual; ExprContext *econtext; + TRACE_POSTGRESQL_EXECUTOR_NESTLOOP((uintptr_t)node); + /* * get information from the node */ Index: src/backend/executor/nodeSetOp.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeSetOp.c,v retrieving revision 1.28 diff -c -c -r1.28 nodeSetOp.c *** src/backend/executor/nodeSetOp.c 1 Jan 2009 17:23:42 -0000 1.28 --- src/backend/executor/nodeSetOp.c 2 Apr 2009 19:01:55 -0000 *************** *** 46,51 **** --- 46,52 ---- #include "executor/executor.h" #include "executor/nodeSetOp.h" + #include "pg_trace.h" #include "utils/memutils.h" *************** *** 196,201 **** --- 197,204 ---- SetOp *plannode = (SetOp *) node->ps.plan; TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot; + TRACE_POSTGRESQL_EXECUTOR_SETOP((uintptr_t)node); + /* * If the previously-returned tuple needs to be returned more than once, * keep returning it. Index: src/backend/executor/nodeSort.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v retrieving revision 1.63 diff -c -c -r1.63 nodeSort.c *** src/backend/executor/nodeSort.c 1 Jan 2009 17:23:42 -0000 1.63 --- src/backend/executor/nodeSort.c 2 Apr 2009 19:01:55 -0000 *************** *** 18,23 **** --- 18,24 ---- #include "executor/execdebug.h" #include "executor/nodeSort.h" #include "miscadmin.h" + #include "pg_trace.h" #include "utils/tuplesort.h" *************** *** 53,58 **** --- 54,61 ---- dir = estate->es_direction; tuplesortstate = (Tuplesortstate *) node->tuplesortstate; + TRACE_POSTGRESQL_EXECUTOR_SORT((uintptr_t)node, dir); + /* * If first time through, read all tuples from outer plan and pass them to * tuplesort.c. Subsequent calls just fetch tuples from tuplesort. Index: src/backend/executor/nodeSubplan.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v retrieving revision 1.96 diff -c -c -r1.96 nodeSubplan.c *** src/backend/executor/nodeSubplan.c 1 Jan 2009 17:23:42 -0000 1.96 --- src/backend/executor/nodeSubplan.c 2 Apr 2009 19:01:55 -0000 *************** *** 24,29 **** --- 24,30 ---- #include "executor/nodeSubplan.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" + #include "pg_trace.h" #include "utils/array.h" #include "utils/lsyscache.h" #include "utils/memutils.h" *************** *** 92,97 **** --- 93,100 ---- ExprContext *innerecontext = node->innerecontext; TupleTableSlot *slot; + TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_HASH((uintptr_t)node); + /* Shouldn't have any direct correlation Vars */ if (subplan->parParam != NIL || node->args != NIL) elog(ERROR, "hashed subplan with direct correlation not supported"); *************** *** 227,232 **** --- 230,237 ---- ListCell *l; ArrayBuildState *astate = NULL; + TRACE_POSTGRESQL_EXECUTOR_SUBPLAN_SCAN((uintptr_t)node); + /* * We are probably in a short-lived expression-evaluation context. Switch * to the per-query context for manipulating the child plan's chgParam, Index: src/backend/executor/nodeUnique.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v retrieving revision 1.58 diff -c -c -r1.58 nodeUnique.c *** src/backend/executor/nodeUnique.c 1 Jan 2009 17:23:42 -0000 1.58 --- src/backend/executor/nodeUnique.c 2 Apr 2009 19:01:55 -0000 *************** *** 35,40 **** --- 35,41 ---- #include "executor/executor.h" #include "executor/nodeUnique.h" + #include "pg_trace.h" #include "utils/memutils.h" *************** *** 50,55 **** --- 51,58 ---- TupleTableSlot *slot; PlanState *outerPlan; + TRACE_POSTGRESQL_EXECUTOR_UNIQUE((uintptr_t)node); + /* * get information from the node */ Index: src/backend/utils/probes.d =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/probes.d,v retrieving revision 1.9 diff -c -c -r1.9 probes.d *** src/backend/utils/probes.d 23 Mar 2009 01:52:38 -0000 1.9 --- src/backend/utils/probes.d 2 Apr 2009 19:01:55 -0000 *************** *** 15,20 **** --- 15,21 ---- * in probe definitions, as they cause compilation errors on Mac OS X 10.5. */ #define LocalTransactionId unsigned int + #define TransactionId unsigned int #define LWLockId int #define LWLockMode int #define LOCKMODE int *************** *** 90,93 **** --- 91,119 ---- probe xlog__switch(); probe wal__buffer__write__dirty__start(); probe wal__buffer__write__dirty__done(); + + probe slru__readpage__start(unsigned long, int, bool, TransactionId); + probe slru__readpage__done(int); + probe slru__readpage__readonly(unsigned long, int, TransactionId); + probe slru__writepage__start(unsigned long, int, int); + probe slru__writepage__done(); + probe slru__readpage__physical__start(unsigned long, char *, int, int); + probe slru__readpage__physical__done(int, int, int); + probe slru__writepage__physical__start(unsigned long, int, int); + probe slru__writepage__physical__done(int, int, int); + + probe executor__scan(unsigned long, unsigned int, unsigned long); + probe executor__agg(unsigned long, int); + probe executor__group(unsigned long, int); + probe executor__hash__multi(unsigned long); + probe executor__hashjoin(unsigned long); + probe executor__limit(unsigned long); + probe executor__material(unsigned long); + probe executor__mergejoin(unsigned long); + probe executor__nestloop(unsigned long); + probe executor__setop(unsigned long); + probe executor__sort(unsigned long, int); + probe executor__subplan__hash(unsigned long); + probe executor__subplan__scan(unsigned long); + probe executor__unique(unsigned long); };
After talking to Tom, I have reverted this patch. It does not contain documentation, and are not properly placed, and was submitted in February. I was hoping it would be an easy addition but obviously not so please resubmit for 8.5. Sorry. --------------------------------------------------------------------------- bruce wrote: > > OK, updated patch attached and applied; thanks. > > --------------------------------------------------------------------------- > > Zdenek Kotala wrote: > > It looks OK now. > > > > Zdenek > > > > Dne 8.03.09 21:58, Robert Lor napsal(a): > > > > >> 2) Maybe > > >> > > >> TRACE_POSTGRESQL_SLRU_READPAGE_PHYSICAL_DONE(true, -1, -1); > > >> > > >> would be better. Because slru_errcause, slru_errno can contains garbage > > >> in situation when everything goes fine. Same for write. > > > > > > I've made the changes per your suggestion although one can argue that > > > the script can check arg0, and if it's true, avoid using arg1 and arg2 > > > as they are meaningless. > > > > Thanks. > > > > >> > > >> I think it is committable for 8.4. > > >> > > > > > > That would be awesome! > > > > yeah, but it depends on commiters :-) > > > > Zdenek > > > > -- > > Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) > > To make changes to your subscription: > > http://www.postgresql.org/mailpref/pgsql-hackers > > -- > Bruce Momjian <bruce@momjian.us> http://momjian.us > EnterpriseDB http://enterprisedb.com > > + If your life is a hard drive, Christ can be your backup. + -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + If your life is a hard drive, Christ can be your backup. +
Bruce Momjian <bruce@momjian.us> writes: > After talking to Tom, I have reverted this patch. It does not contain > documentation, and are not properly placed, and was submitted in > February. I was hoping it would be an easy addition but obviously not > so please resubmit for 8.5. Sorry. Aside from the lack of documentation, I thought the executor probes were suffering from a lack of design clarity. If the intention was to probe every executor node, why did the patch miss a lot of node types? Why not solve it once and for all with a single probe in ExecProcNode()? Perhaps the idea was to make it less painful to trace the usage of particular node types, which is sensible, but then why did you put a probe in execScan rather than the individual calling scan-type nodes? Surely the ability to easily count seqscan vs indexscan would have to rank mighty high in any evaluation of whether it's easy to count particular node types. I was also pretty unhappy with passing the node pointers to the probes. What are probe routines realistically going to do with those? We feel free to whack the contents of executor node types around often, even in stable branches sometimes. I do *not* want to encourage probe authors to write code that depends on the contents of those struct types. (Heck, we probably shouldn't encourage them to assume the values of enum NodeTag even, so having a single probe in ExecProcNode is likely not a good solution. I will certainly not hold still for any future suggestions that we should avoid renumbering NodeTag or changing struct types because someone has written a probe that depends on it.) So this needs much more thought about where executor probes should go and what their arguments should be. I'm also a bit worried about the speed issue. The existing DTrace probes are tracking relatively expensive operations like I/O calls. We *know* that a kernel call per ExecProcNode iteration is highly expensive; see experience with EXPLAIN ANALYZE overhead on any platform that hasn't got a fast path for gettimeofday. I don't think that dropping probes into the executor loop is something we should do without careful analysis of what they're good for and whether anyone would really use them. regards, tom lane