From 1c1da11022c64ebb8bd1bec9bdca9783a78c94ee Mon Sep 17 00:00:00 2001 From: Pierre Ducroquet Date: Fri, 30 Jan 2026 10:35:43 +0100 Subject: [PATCH] llvmjit: reduce the number of jumps generated in O0 When using O0, LLVM doesn't try to change the basic blocks order in order to have a linear code in memory. LLVM also doesn't remove jumps even when they target the instruction next to the current one. Adding an optimizer step in O0 could end up having bad side effects, so instead of asking LLVM to fix it, we can modify the IR code we generate in order to get rid of as many jumps as possible. - EEOP_QUAL was written following the C logic, thus: if null or value is false: jump to qualfail jump to next block qualfail: .... By inverting the if, we have instead: if !null and value is not false: jump to next block .... This is one less jump on amd64 with O0 - change the block creation order in tuple_deforming so that the outblock stays at the end of the function, removing a jump back from the last attribute to the outblock before - don't create the adjust_unavail_cols block if not needed - jump directly above the attisnull and the attcheckalign blocks if they are empty All these together remove 7 jumps on a very basic query, and makes the generated assembly code far more natural and easier for the CPU. --- src/backend/jit/llvm/llvmjit_deform.c | 62 +++++++++++++++++++++------ src/backend/jit/llvm/llvmjit_expr.c | 22 +++++----- 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/src/backend/jit/llvm/llvmjit_deform.c b/src/backend/jit/llvm/llvmjit_deform.c index 3eb087eb56b..9aeff1e4ff5 100644 --- a/src/backend/jit/llvm/llvmjit_deform.c +++ b/src/backend/jit/llvm/llvmjit_deform.c @@ -145,14 +145,8 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc, b_entry = LLVMAppendBasicBlockInContext(lc, v_deform_fn, "entry"); - b_adjust_unavail_cols = - LLVMAppendBasicBlockInContext(lc, v_deform_fn, "adjust_unavail_cols"); b_find_start = LLVMAppendBasicBlockInContext(lc, v_deform_fn, "find_startblock"); - b_out = - LLVMAppendBasicBlockInContext(lc, v_deform_fn, "outblock"); - b_dead = - LLVMAppendBasicBlockInContext(lc, v_deform_fn, "deadblock"); b = LLVMCreateBuilderInContext(lc); @@ -314,6 +308,10 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc, l_bb_append_v(v_deform_fn, "block.attr.%d.store", attnum); } + /* create the exit and dead blocks at the end, so that even with O0 they will be at the end */ + b_out = LLVMAppendBasicBlockInContext(lc, v_deform_fn, "outblock"); + b_dead = LLVMAppendBasicBlockInContext(lc, v_deform_fn, "deadblock"); + /* * Check if it is guaranteed that all the desired attributes are available * in the tuple (but still possibly NULL), by dint of either the last @@ -325,8 +323,6 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc, if ((natts - 1) <= guaranteed_column_number) { /* just skip through unnecessary blocks */ - LLVMBuildBr(b, b_adjust_unavail_cols); - LLVMPositionBuilderAtEnd(b, b_adjust_unavail_cols); LLVMBuildBr(b, b_find_start); } else @@ -334,6 +330,9 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc, LLVMValueRef v_params[3]; LLVMValueRef f; + /* create the block since it is now needed */ + b_adjust_unavail_cols = LLVMAppendBasicBlockInContext(lc, v_deform_fn, "adjust_unavail_cols"); + /* branch if not all columns available */ LLVMBuildCondBr(b, LLVMBuildICmp(b, LLVMIntULT, @@ -399,6 +398,8 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc, LLVMValueRef l_attno = l_int16_const(lc, attnum); LLVMValueRef v_attdatap; LLVMValueRef v_resultp; + bool delayed_jump_in_nonnullable; + bool delayed_jump_in_attcheckno; /* build block checking whether we did all the necessary attributes */ LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]); @@ -419,7 +420,7 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc, */ if (attnum <= guaranteed_column_number) { - LLVMBuildBr(b, attstartblocks[attnum]); + delayed_jump_in_attcheckno = true; } else { @@ -430,6 +431,7 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc, v_maxatt, "heap_natts"); LLVMBuildCondBr(b, v_islast, b_out, attstartblocks[attnum]); + delayed_jump_in_attcheckno = false; } LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]); @@ -484,13 +486,19 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc, LLVMBuildBr(b, b_next); attguaranteedalign = false; + delayed_jump_in_nonnullable = false; + /* add the jump to our attisnull block in start */ + if (delayed_jump_in_attcheckno) + { + LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]); + LLVMBuildBr(b, attstartblocks[attnum]); + delayed_jump_in_attcheckno = false; + } } else { /* nothing to do */ - LLVMBuildBr(b, attcheckalignblocks[attnum]); - LLVMPositionBuilderAtEnd(b, attisnullblocks[attnum]); - LLVMBuildBr(b, attcheckalignblocks[attnum]); + delayed_jump_in_nonnullable = true; } LLVMPositionBuilderAtEnd(b, attcheckalignblocks[attnum]); @@ -574,14 +582,40 @@ slot_compile_deform(LLVMJitContext *context, TupleDesc desc, } LLVMBuildBr(b, attstoreblocks[attnum]); - LLVMPositionBuilderAtEnd(b, attstoreblocks[attnum]); + if (delayed_jump_in_nonnullable) + { + LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]); + LLVMBuildBr(b, attcheckalignblocks[attnum]); + LLVMPositionBuilderAtEnd(b, attisnullblocks[attnum]); + LLVMBuildBr(b, attcheckalignblocks[attnum]); + } + if (delayed_jump_in_attcheckno) + { + LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]); + LLVMBuildBr(b, attcheckalignblocks[attnum]); + delayed_jump_in_attcheckno = false; + } } else { LLVMPositionBuilderAtEnd(b, attcheckalignblocks[attnum]); - LLVMBuildBr(b, attalignblocks[attnum]); + LLVMBuildBr(b, attstoreblocks[attnum]); LLVMPositionBuilderAtEnd(b, attalignblocks[attnum]); LLVMBuildBr(b, attstoreblocks[attnum]); + if (delayed_jump_in_nonnullable) + { + LLVMPositionBuilderAtEnd(b, attstartblocks[attnum]); + LLVMBuildBr(b, attstoreblocks[attnum]); + LLVMPositionBuilderAtEnd(b, attisnullblocks[attnum]); + LLVMBuildBr(b, attstoreblocks[attnum]); + } + + if (delayed_jump_in_attcheckno) + { + LLVMPositionBuilderAtEnd(b, attcheckattnoblocks[attnum]); + LLVMBuildBr(b, attstoreblocks[attnum]); + delayed_jump_in_attcheckno = false; + } } LLVMPositionBuilderAtEnd(b, attstoreblocks[attnum]); diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index 885b34c27e4..c942e6f4557 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -1224,7 +1224,7 @@ llvm_compile_expr(ExprState *state) { LLVMValueRef v_resnull; LLVMValueRef v_resvalue; - LLVMValueRef v_nullorfalse; + LLVMValueRef v_notnullnorfalse; LLVMBasicBlockRef b_qualfail; b_qualfail = l_bb_before_v(opblocks[opno + 1], @@ -1233,18 +1233,18 @@ llvm_compile_expr(ExprState *state) v_resvalue = l_load(b, TypeDatum, v_resvaluep, ""); v_resnull = l_load(b, TypeStorageBool, v_resnullp, ""); - v_nullorfalse = - LLVMBuildOr(b, - LLVMBuildICmp(b, LLVMIntEQ, v_resnull, - l_sbool_const(1), ""), - LLVMBuildICmp(b, LLVMIntEQ, v_resvalue, - l_datum_const(0), ""), - ""); + v_notnullnorfalse = + LLVMBuildAnd(b, + LLVMBuildICmp(b, LLVMIntNE, v_resnull, + l_sbool_const(1), ""), + LLVMBuildICmp(b, LLVMIntNE, v_resvalue, + l_datum_const(0), ""), + ""); LLVMBuildCondBr(b, - v_nullorfalse, - b_qualfail, - opblocks[opno + 1]); + v_notnullnorfalse, + opblocks[opno + 1], + b_qualfail); /* build block handling NULL or false */ LLVMPositionBuilderAtEnd(b, b_qualfail); -- 2.43.0