Re: MemSet inline for newNode - Mailing list pgsql-patches
| From | Bruce Momjian |
|---|---|
| Subject | Re: MemSet inline for newNode |
| Date | |
| Msg-id | 200211110450.gAB4oSW29177@candle.pha.pa.us Whole thread Raw |
| In response to | Re: MemSet inline for newNode (Tom Lane <tgl@sss.pgh.pa.us>) |
| Responses |
Re: MemSet inline for newNode
|
| List | pgsql-patches |
Tom Lane wrote:
> Bruce Momjian <pgman@candle.pha.pa.us> writes:
> >> But your patch as committed does NOT inline newNode, in fact it does the
> >> exact opposite: the MemSet macro is removed from the callsite.
>
> > Yes, there were actually two patches; the first inlined newNode by
> > calling palloc0. Without palloc, there was no way to inline newNode.
>
> But you have *not* inlined newNode: the memset is still done at a location
> that cannot know the size at compile time.
I am sorry. I inlined newNode, but by pushing MemSet into palloc0, the
MemSet conditional tests can not be optimized away. Is that clearer?
#define newNode(size, tag) \
( \
AssertMacro((size) >= sizeof(Node)), /* need the tag, at least */ \
\
newNodeMacroHolder = (Node *) palloc0(size), \
newNodeMacroHolder->type = (tag), \
newNodeMacroHolder \
)
In this case, I needed palloc0 to make newNode inlined, and the old
function version of newNode couldn't optimize away the conditional test
either because it was called with various sizes.
> > The second was a more general one to merge palloc/MemSet into a single
> > palloc0 call. That has been backed out while I research it.
>
> That part could be sold just on the basis of making the code easier
> to read and less error-prone; particularly for call sites where the
> length is a runtime computation anyway. I think that newNode() is the
Yes, that was my thought.
> principal case where the length is knowable at compile time. So my
> feeling is that the general change to invent a palloc0() call is fine
> ... but if you want performance then newNode() should *not* be using the
> generic palloc0() call.
OK. I am still struggling about how to do that.
> > My new idea is to add a third boolean argument to
> > MemoryContextAllocZero() which will control whether the MemSet
> > assignment loop is used, or memset().
>
> But we are *trying to eliminate any runtime test whatever*. Short
> of that, you aren't going to get the speedup. Certainly passing a third
> argument to the alloc subroutine will eat enough cycles to negate any
> savings from simplifying the runtime test slightly.
My idea is that the conditional tests will be optimized away before the
palloc0 call is made.
Attached is a patch that implements the MemSet split and allows the size
tests to be optimized away _before_ at the call location. It isn't done
yet, but you can get the idea.
--
Bruce Momjian | http://candle.pha.pa.us
pgman@candle.pha.pa.us | (610) 359-1001
+ If your life is a hard drive, | 13 Roberts Road
+ Christ can be your backup. | Newtown Square, Pennsylvania 19073
Index: src/backend/utils/mmgr/mcxt.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/mmgr/mcxt.c,v
retrieving revision 1.35
diff -c -c -r1.35 mcxt.c
*** src/backend/utils/mmgr/mcxt.c 10 Nov 2002 02:17:25 -0000 1.35
--- src/backend/utils/mmgr/mcxt.c 11 Nov 2002 04:48:27 -0000
***************
*** 460,466 ****
* function is called too many times, so we have a separate version.
*/
void *
! MemoryContextAllocZero(MemoryContext context, Size size)
{
void *ret;
--- 460,466 ----
* function is called too many times, so we have a separate version.
*/
void *
! MemoryContextAllocZero(MemoryContext context, Size size, bool use_libc)
{
void *ret;
***************
*** 471,477 ****
(unsigned long) size);
ret = (*context->methods->alloc) (context, size);
! MemSet(ret, 0, size);
return ret;
}
--- 471,477 ----
(unsigned long) size);
ret = (*context->methods->alloc) (context, size);
! MemSetLoop(ret, 0, size, use_libc);
return ret;
}
Index: src/include/c.h
===================================================================
RCS file: /cvsroot/pgsql-server/src/include/c.h,v
retrieving revision 1.131
diff -c -c -r1.131 c.h
*** src/include/c.h 11 Nov 2002 03:02:19 -0000 1.131
--- src/include/c.h 11 Nov 2002 04:48:33 -0000
***************
*** 579,601 ****
*
* bjm 2002-10-08
*/
! #define MemSet(start, val, len) \
do \
{ \
int32 * _start = (int32 *) (start); \
int _val = (val); \
Size _len = (len); \
\
! if ((( ((long) _start) | _len) & INT_ALIGN_MASK) == 0 && \
! _val == 0 && \
! _len <= MEMSET_LOOP_LIMIT) \
{ \
int32 * _stop = (int32 *) ((char *) _start + _len); \
while (_start < _stop) \
*_start++ = 0; \
} \
else \
! memset((char *) _start, _val, _len); \
} while (0)
#define MEMSET_LOOP_LIMIT 1024
--- 579,610 ----
*
* bjm 2002-10-08
*/
! #define MemSetTest(val, len) \
! ( ((len) & INT_ALIGN_MASK) == 0 && \
! (len) <= MEMSET_LOOP_LIMIT && \
! (val) == 0 )
!
! #define MemSetLoop(start, val, len, use_libc) \
do \
{ \
int32 * _start = (int32 *) (start); \
int _val = (val); \
Size _len = (len); \
\
! if (!use_libc && ((long) _start & INT_ALIGN_MASK) == 0 ) \
{ \
int32 * _stop = (int32 *) ((char *) _start + _len); \
while (_start < _stop) \
*_start++ = 0; \
} \
else \
! memset((char *) _start, _val, _len); \
! } while (0)
!
! #define MemSet(start, val, len) \
! do \
! { \
! MemSetLoop(start, val, len, MemSetTest(val, len)); \
} while (0)
#define MEMSET_LOOP_LIMIT 1024
Index: src/include/utils/palloc.h
===================================================================
RCS file: /cvsroot/pgsql-server/src/include/utils/palloc.h,v
retrieving revision 1.22
diff -c -c -r1.22 palloc.h
*** src/include/utils/palloc.h 10 Nov 2002 02:17:25 -0000 1.22
--- src/include/utils/palloc.h 11 Nov 2002 04:48:35 -0000
***************
*** 46,56 ****
* Fundamental memory-allocation operations (more are in utils/memutils.h)
*/
extern void *MemoryContextAlloc(MemoryContext context, Size size);
! extern void *MemoryContextAllocZero(MemoryContext context, Size size);
#define palloc(sz) MemoryContextAlloc(CurrentMemoryContext, (sz))
! #define palloc0(sz) MemoryContextAllocZero(CurrentMemoryContext, (sz))
extern void pfree(void *pointer);
--- 46,57 ----
* Fundamental memory-allocation operations (more are in utils/memutils.h)
*/
extern void *MemoryContextAlloc(MemoryContext context, Size size);
! extern void *MemoryContextAllocZero(MemoryContext context, Size size, bool use_libc);
#define palloc(sz) MemoryContextAlloc(CurrentMemoryContext, (sz))
! #define palloc0(sz) MemoryContextAllocZero(CurrentMemoryContext, (sz), \
! MemSetTest(0, (sz)))
extern void pfree(void *pointer);
pgsql-patches by date: