Thread: [PATCH] Enable using llvm jitlink as an alternative llvm jit linker of old Rtdyld.

This brings the bonus of support jitting on riscv64 (included in this patch)
and other platforms Rtdyld doesn't support, e.g. windows COFF.

Currently, llvm doesn't expose jitlink (ObjectLinkingLayer) via C API, so
a wrapper is added. This also adds minor llvm 15 compat fix that is needed
---
 config/llvm.m4                        |  1 +
 src/backend/jit/llvm/llvmjit.c        | 67 +++++++++++++++++++++++++--
 src/backend/jit/llvm/llvmjit_wrap.cpp | 35 ++++++++++++++
 src/include/jit/llvmjit.h             |  9 ++++
 4 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/config/llvm.m4 b/config/llvm.m4
index 3a75cd8b4d..a31b8b304a 100644
--- a/config/llvm.m4
+++ b/config/llvm.m4
@@ -75,6 +75,7 @@ AC_DEFUN([PGAC_LLVM_SUPPORT],
       engine) pgac_components="$pgac_components $pgac_component";;
       debuginfodwarf) pgac_components="$pgac_components $pgac_component";;
       orcjit) pgac_components="$pgac_components $pgac_component";;
+      jitlink) pgac_components="$pgac_components $pgac_component";;
       passes) pgac_components="$pgac_components $pgac_component";;
       native) pgac_components="$pgac_components $pgac_component";;
       perfjitevents) pgac_components="$pgac_components $pgac_component";;
diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c
index 6c72d43beb..d8b840da8c 100644
--- a/src/backend/jit/llvm/llvmjit.c
+++ b/src/backend/jit/llvm/llvmjit.c
@@ -229,6 +229,11 @@ llvm_release_context(JitContext *context)
 LLVMModuleRef
 llvm_mutable_module(LLVMJitContext *context)
 {
+#ifdef __riscv
+    const char* abiname;
+    const char* target_abi = "target-abi";
+    LLVMMetadataRef abi_metadata;
+#endif
     llvm_assert_in_fatal_section();
 
     /*
@@ -241,6 +246,40 @@ llvm_mutable_module(LLVMJitContext *context)
         context->module = LLVMModuleCreateWithName("pg");
         LLVMSetTarget(context->module, llvm_triple);
         LLVMSetDataLayout(context->module, llvm_layout);
+#ifdef __riscv
+#if __riscv_xlen == 64
+#ifdef __riscv_float_abi_double
+        abiname = "lp64d";
+#elif defined(__riscv_float_abi_single)
+        abiname = "lp64f";
+#else
+        abiname = "lp64";
+#endif
+#elif __riscv_xlen == 32
+#ifdef __riscv_float_abi_double
+        abiname = "ilp32d";
+#elif defined(__riscv_float_abi_single)
+        abiname = "ilp32f";
+#else
+        abiname = "ilp32";
+#endif
+#else
+        elog(ERROR, "unsupported riscv xlen %d", __riscv_xlen);
+#endif
+        /*
+         * set this manually to avoid llvm defaulting to soft float and
+         * resulting in linker error: `can't link double-float modules
+         * with soft-float modules`
+         * we could set this for TargetMachine via MCOptions, but there
+         * is no C API for it
+         * ref:
https://github.com/llvm/llvm-project/blob/afa520ab34803c82587ea6759bfd352579f741b4/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp#L90
+         */
+        abi_metadata = LLVMMDStringInContext2(
+            LLVMGetModuleContext(context->module),
+            abiname, strlen(abiname));
+        LLVMAddModuleFlag(context->module, LLVMModuleFlagBehaviorOverride,
+            target_abi, strlen(target_abi), abi_metadata);
+#endif
     }
 
     return context->module;
@@ -786,6 +825,8 @@ llvm_session_initialize(void)
     char       *error = NULL;
     char       *cpu = NULL;
     char       *features = NULL;
+    LLVMRelocMode reloc=LLVMRelocDefault;
+    LLVMCodeModel codemodel=LLVMCodeModelJITDefault;
     LLVMTargetMachineRef opt0_tm;
     LLVMTargetMachineRef opt3_tm;
 
@@ -820,16 +861,21 @@ llvm_session_initialize(void)
     elog(DEBUG2, "LLVMJIT detected CPU \"%s\", with features \"%s\"",
          cpu, features);
 
+#ifdef __riscv
+    reloc=LLVMRelocPIC;
+    codemodel=LLVMCodeModelMedium;
+#endif
+
     opt0_tm =
         LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features,
                                 LLVMCodeGenLevelNone,
-                                LLVMRelocDefault,
-                                LLVMCodeModelJITDefault);
+                                reloc,
+                                codemodel);
     opt3_tm =
         LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features,
                                 LLVMCodeGenLevelAggressive,
-                                LLVMRelocDefault,
-                                LLVMCodeModelJITDefault);
+                                reloc,
+                                codemodel);
 
     LLVMDisposeMessage(cpu);
     cpu = NULL;
@@ -1112,7 +1158,11 @@ llvm_resolve_symbols(LLVMOrcDefinitionGeneratorRef GeneratorObj, void *Ctx,
                      LLVMOrcJITDylibRef JD, LLVMOrcJITDylibLookupFlags JDLookupFlags,
                      LLVMOrcCLookupSet LookupSet, size_t LookupSetSize)
 {
+#if LLVM_VERSION_MAJOR > 14
+    LLVMOrcCSymbolMapPairs symbols = palloc0(sizeof(LLVMOrcCSymbolMapPair) * LookupSetSize);
+#else
     LLVMOrcCSymbolMapPairs symbols = palloc0(sizeof(LLVMJITCSymbolMapPair) * LookupSetSize);
+#endif
     LLVMErrorRef error;
     LLVMOrcMaterializationUnitRef mu;
 
@@ -1160,6 +1210,10 @@ llvm_log_jit_error(void *ctx, LLVMErrorRef error)
 static LLVMOrcObjectLayerRef
 llvm_create_object_layer(void *Ctx, LLVMOrcExecutionSessionRef ES, const char *Triple)
 {
+#if defined(USE_JITLINK)
+    LLVMOrcObjectLayerRef objlayer =
+    LLVMOrcCreateJitlinkObjectLinkingLayer(ES);
+#else
     LLVMOrcObjectLayerRef objlayer =
     LLVMOrcCreateRTDyldObjectLinkingLayerWithSectionMemoryManager(ES);
 
@@ -1179,6 +1233,7 @@ llvm_create_object_layer(void *Ctx, LLVMOrcExecutionSessionRef ES, const char *T
 
         LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(objlayer, l);
     }
+#endif
 #endif
 
     return objlayer;
@@ -1230,7 +1285,11 @@ llvm_create_jit_instance(LLVMTargetMachineRef tm)
      * Symbol resolution support for "special" functions, e.g. a call into an
      * SQL callable function.
      */
+#if LLVM_VERSION_MAJOR > 14
+    ref_gen = LLVMOrcCreateCustomCAPIDefinitionGenerator(llvm_resolve_symbols, NULL, NULL);
+#else
     ref_gen = LLVMOrcCreateCustomCAPIDefinitionGenerator(llvm_resolve_symbols, NULL);
+#endif
     LLVMOrcJITDylibAddGenerator(LLVMOrcLLJITGetMainJITDylib(lljit), ref_gen);
 
     return lljit;
diff --git a/src/backend/jit/llvm/llvmjit_wrap.cpp b/src/backend/jit/llvm/llvmjit_wrap.cpp
index 8f11cc02b2..29f21f1715 100644
--- a/src/backend/jit/llvm/llvmjit_wrap.cpp
+++ b/src/backend/jit/llvm/llvmjit_wrap.cpp
@@ -27,6 +27,10 @@ extern "C"
 #include <llvm/Support/Host.h>
 
 #include "jit/llvmjit.h"
+#ifdef USE_JITLINK
+#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#endif
 
 
 /*
@@ -48,6 +52,19 @@ char *LLVMGetHostCPUFeatures(void) {
         for (auto &F : HostFeatures)
             Features.AddFeature(F.first(), F.second);
 
+#if defined(__riscv)
+    /* getHostCPUName returns "generic-rv[32|64]", which lacks all features */
+    Features.AddFeature("m", true);
+    Features.AddFeature("a", true);
+    Features.AddFeature("c", true);
+# if defined(__riscv_float_abi_single)
+    Features.AddFeature("f", true);
+# endif
+# if defined(__riscv_float_abi_double)
+    Features.AddFeature("d", true);
+# endif
+#endif
+
     return strdup(Features.getString().c_str());
 }
 #endif
@@ -76,3 +93,21 @@ LLVMGetAttributeCountAtIndexPG(LLVMValueRef F, uint32 Idx)
      */
     return LLVMGetAttributeCountAtIndex(F, Idx);
 }
+
+#ifdef USE_JITLINK
+/*
+ * There is no public C API to create ObjectLinkingLayer for JITLINK, create our own
+ */
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::orc::ExecutionSession, LLVMOrcExecutionSessionRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::orc::ObjectLayer, LLVMOrcObjectLayerRef)
+
+LLVMOrcObjectLayerRef
+LLVMOrcCreateJitlinkObjectLinkingLayer(LLVMOrcExecutionSessionRef ES)
+{
+    assert(ES && "ES must not be null");
+    auto ObjLinkingLayer = new llvm::orc::ObjectLinkingLayer(*unwrap(ES));
+    ObjLinkingLayer->addPlugin(std::make_unique<llvm::orc::EHFrameRegistrationPlugin>(
+        *unwrap(ES), std::make_unique<llvm::jitlink::InProcessEHFrameRegistrar>()));
+    return wrap(ObjLinkingLayer);
+}
+#endif
diff --git a/src/include/jit/llvmjit.h b/src/include/jit/llvmjit.h
index 4541f9a2c4..85a0cfe5e0 100644
--- a/src/include/jit/llvmjit.h
+++ b/src/include/jit/llvmjit.h
@@ -19,6 +19,11 @@
 
 #include <llvm-c/Types.h>
 
+#if defined(__riscv) && LLVM_VERSION_MAJOR >= 15
+#include <llvm-c/Orc.h>
+#define USE_JITLINK
+/* else use legacy RTDyld */
+#endif
 
 /*
  * File needs to be includable by both C and C++ code, and include other
@@ -134,6 +139,10 @@ extern char *LLVMGetHostCPUFeatures(void);
 
 extern unsigned LLVMGetAttributeCountAtIndexPG(LLVMValueRef F, uint32 Idx);
 
+#ifdef USE_JITLINK
+extern LLVMOrcObjectLayerRef LLVMOrcCreateJitlinkObjectLinkingLayer(LLVMOrcExecutionSessionRef ES);
+#endif
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
-- 
2.37.2




Hi,

I am new to the postgres community and apologise for resending this as the previous one didn't include patch properly and didn't cc reviewers (maybe the reason it has been buried in mailing list for months)

Adding to previous email, this patch exposes its own C API for creating ObjectLinkingLayer in a similar fashion as LLVMOrcCreateRTDyldObjectLinkingLayerWithSectionMemoryManager since orc doesn't expose it yet.

Thanks and really appreciate if someone can offer a review to this and help get it merged.

Cheers,
Alex


On Mon, Aug 29, 2022 at 5:46 PM Alex Fan <alex.fan.q@gmail.com> wrote:
This brings the bonus of support jitting on riscv64 (included in this patch)
and other platforms Rtdyld doesn't support, e.g. windows COFF.

Currently, llvm doesn't expose jitlink (ObjectLinkingLayer) via C API, so
a wrapper is added. This also adds minor llvm 15 compat fix that is needed
---
 config/llvm.m4                        |  1 +
 src/backend/jit/llvm/llvmjit.c        | 67 +++++++++++++++++++++++++--
 src/backend/jit/llvm/llvmjit_wrap.cpp | 35 ++++++++++++++
 src/include/jit/llvmjit.h             |  9 ++++
 4 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/config/llvm.m4 b/config/llvm.m4
index 3a75cd8b4d..a31b8b304a 100644
--- a/config/llvm.m4
+++ b/config/llvm.m4
@@ -75,6 +75,7 @@ AC_DEFUN([PGAC_LLVM_SUPPORT],
       engine) pgac_components="$pgac_components $pgac_component";;
       debuginfodwarf) pgac_components="$pgac_components $pgac_component";;
       orcjit) pgac_components="$pgac_components $pgac_component";;
+      jitlink) pgac_components="$pgac_components $pgac_component";;
       passes) pgac_components="$pgac_components $pgac_component";;
       native) pgac_components="$pgac_components $pgac_component";;
       perfjitevents) pgac_components="$pgac_components $pgac_component";;
diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c
index 6c72d43beb..d8b840da8c 100644
--- a/src/backend/jit/llvm/llvmjit.c
+++ b/src/backend/jit/llvm/llvmjit.c
@@ -229,6 +229,11 @@ llvm_release_context(JitContext *context)
 LLVMModuleRef
 llvm_mutable_module(LLVMJitContext *context)
 {
+#ifdef __riscv
+       const char* abiname;
+       const char* target_abi = "target-abi";
+       LLVMMetadataRef abi_metadata;
+#endif
        llvm_assert_in_fatal_section();

        /*
@@ -241,6 +246,40 @@ llvm_mutable_module(LLVMJitContext *context)
                context->module = LLVMModuleCreateWithName("pg");
                LLVMSetTarget(context->module, llvm_triple);
                LLVMSetDataLayout(context->module, llvm_layout);
+#ifdef __riscv
+#if __riscv_xlen == 64
+#ifdef __riscv_float_abi_double
+               abiname = "lp64d";
+#elif defined(__riscv_float_abi_single)
+               abiname = "lp64f";
+#else
+               abiname = "lp64";
+#endif
+#elif __riscv_xlen == 32
+#ifdef __riscv_float_abi_double
+               abiname = "ilp32d";
+#elif defined(__riscv_float_abi_single)
+               abiname = "ilp32f";
+#else
+               abiname = "ilp32";
+#endif
+#else
+               elog(ERROR, "unsupported riscv xlen %d", __riscv_xlen);
+#endif
+               /*
+                * set this manually to avoid llvm defaulting to soft float and
+                * resulting in linker error: `can't link double-float modules
+                * with soft-float modules`
+                * we could set this for TargetMachine via MCOptions, but there
+                * is no C API for it
+                * ref: https://github.com/llvm/llvm-project/blob/afa520ab34803c82587ea6759bfd352579f741b4/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp#L90
+                */
+               abi_metadata = LLVMMDStringInContext2(
+                       LLVMGetModuleContext(context->module),
+                       abiname, strlen(abiname));
+               LLVMAddModuleFlag(context->module, LLVMModuleFlagBehaviorOverride,
+                       target_abi, strlen(target_abi), abi_metadata);
+#endif
        }

        return context->module;
@@ -786,6 +825,8 @@ llvm_session_initialize(void)
        char       *error = NULL;
        char       *cpu = NULL;
        char       *features = NULL;
+       LLVMRelocMode reloc=LLVMRelocDefault;
+       LLVMCodeModel codemodel=LLVMCodeModelJITDefault;
        LLVMTargetMachineRef opt0_tm;
        LLVMTargetMachineRef opt3_tm;

@@ -820,16 +861,21 @@ llvm_session_initialize(void)
        elog(DEBUG2, "LLVMJIT detected CPU \"%s\", with features \"%s\"",
                 cpu, features);

+#ifdef __riscv
+       reloc=LLVMRelocPIC;
+       codemodel=LLVMCodeModelMedium;
+#endif
+
        opt0_tm =
                LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features,
                                                                LLVMCodeGenLevelNone,
-                                                               LLVMRelocDefault,
-                                                               LLVMCodeModelJITDefault);
+                                                               reloc,
+                                                               codemodel);
        opt3_tm =
                LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features,
                                                                LLVMCodeGenLevelAggressive,
-                                                               LLVMRelocDefault,
-                                                               LLVMCodeModelJITDefault);
+                                                               reloc,
+                                                               codemodel);

        LLVMDisposeMessage(cpu);
        cpu = NULL;
@@ -1112,7 +1158,11 @@ llvm_resolve_symbols(LLVMOrcDefinitionGeneratorRef GeneratorObj, void *Ctx,
                                         LLVMOrcJITDylibRef JD, LLVMOrcJITDylibLookupFlags JDLookupFlags,
                                         LLVMOrcCLookupSet LookupSet, size_t LookupSetSize)
 {
+#if LLVM_VERSION_MAJOR > 14
+       LLVMOrcCSymbolMapPairs symbols = palloc0(sizeof(LLVMOrcCSymbolMapPair) * LookupSetSize);
+#else
        LLVMOrcCSymbolMapPairs symbols = palloc0(sizeof(LLVMJITCSymbolMapPair) * LookupSetSize);
+#endif
        LLVMErrorRef error;
        LLVMOrcMaterializationUnitRef mu;

@@ -1160,6 +1210,10 @@ llvm_log_jit_error(void *ctx, LLVMErrorRef error)
 static LLVMOrcObjectLayerRef
 llvm_create_object_layer(void *Ctx, LLVMOrcExecutionSessionRef ES, const char *Triple)
 {
+#if defined(USE_JITLINK)
+       LLVMOrcObjectLayerRef objlayer =
+       LLVMOrcCreateJitlinkObjectLinkingLayer(ES);
+#else
        LLVMOrcObjectLayerRef objlayer =
        LLVMOrcCreateRTDyldObjectLinkingLayerWithSectionMemoryManager(ES);

@@ -1179,6 +1233,7 @@ llvm_create_object_layer(void *Ctx, LLVMOrcExecutionSessionRef ES, const char *T

                LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(objlayer, l);
        }
+#endif
 #endif

        return objlayer;
@@ -1230,7 +1285,11 @@ llvm_create_jit_instance(LLVMTargetMachineRef tm)
         * Symbol resolution support for "special" functions, e.g. a call into an
         * SQL callable function.
         */
+#if LLVM_VERSION_MAJOR > 14
+       ref_gen = LLVMOrcCreateCustomCAPIDefinitionGenerator(llvm_resolve_symbols, NULL, NULL);
+#else
        ref_gen = LLVMOrcCreateCustomCAPIDefinitionGenerator(llvm_resolve_symbols, NULL);
+#endif
        LLVMOrcJITDylibAddGenerator(LLVMOrcLLJITGetMainJITDylib(lljit), ref_gen);

        return lljit;
diff --git a/src/backend/jit/llvm/llvmjit_wrap.cpp b/src/backend/jit/llvm/llvmjit_wrap.cpp
index 8f11cc02b2..29f21f1715 100644
--- a/src/backend/jit/llvm/llvmjit_wrap.cpp
+++ b/src/backend/jit/llvm/llvmjit_wrap.cpp
@@ -27,6 +27,10 @@ extern "C"
 #include <llvm/Support/Host.h>

 #include "jit/llvmjit.h"
+#ifdef USE_JITLINK
+#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#endif


 /*
@@ -48,6 +52,19 @@ char *LLVMGetHostCPUFeatures(void) {
                for (auto &F : HostFeatures)
                        Features.AddFeature(F.first(), F.second);

+#if defined(__riscv)
+       /* getHostCPUName returns "generic-rv[32|64]", which lacks all features */
+       Features.AddFeature("m", true);
+       Features.AddFeature("a", true);
+       Features.AddFeature("c", true);
+# if defined(__riscv_float_abi_single)
+       Features.AddFeature("f", true);
+# endif
+# if defined(__riscv_float_abi_double)
+       Features.AddFeature("d", true);
+# endif
+#endif
+
        return strdup(Features.getString().c_str());
 }
 #endif
@@ -76,3 +93,21 @@ LLVMGetAttributeCountAtIndexPG(LLVMValueRef F, uint32 Idx)
         */
        return LLVMGetAttributeCountAtIndex(F, Idx);
 }
+
+#ifdef USE_JITLINK
+/*
+ * There is no public C API to create ObjectLinkingLayer for JITLINK, create our own
+ */
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::orc::ExecutionSession, LLVMOrcExecutionSessionRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::orc::ObjectLayer, LLVMOrcObjectLayerRef)
+
+LLVMOrcObjectLayerRef
+LLVMOrcCreateJitlinkObjectLinkingLayer(LLVMOrcExecutionSessionRef ES)
+{
+       assert(ES && "ES must not be null");
+       auto ObjLinkingLayer = new llvm::orc::ObjectLinkingLayer(*unwrap(ES));
+       ObjLinkingLayer->addPlugin(std::make_unique<llvm::orc::EHFrameRegistrationPlugin>(
+               *unwrap(ES), std::make_unique<llvm::jitlink::InProcessEHFrameRegistrar>()));
+       return wrap(ObjLinkingLayer);
+}
+#endif
diff --git a/src/include/jit/llvmjit.h b/src/include/jit/llvmjit.h
index 4541f9a2c4..85a0cfe5e0 100644
--- a/src/include/jit/llvmjit.h
+++ b/src/include/jit/llvmjit.h
@@ -19,6 +19,11 @@

 #include <llvm-c/Types.h>

+#if defined(__riscv) && LLVM_VERSION_MAJOR >= 15
+#include <llvm-c/Orc.h>
+#define USE_JITLINK
+/* else use legacy RTDyld */
+#endif

 /*
  * File needs to be includable by both C and C++ code, and include other
@@ -134,6 +139,10 @@ extern char *LLVMGetHostCPUFeatures(void);

 extern unsigned LLVMGetAttributeCountAtIndexPG(LLVMValueRef F, uint32 Idx);

+#ifdef USE_JITLINK
+extern LLVMOrcObjectLayerRef LLVMOrcCreateJitlinkObjectLinkingLayer(LLVMOrcExecutionSessionRef ES);
+#endif
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
--
2.37.2

Attachment
On Wed, 23 Nov 2022 at 23:13, Alex Fan <alex.fan.q@gmail.com> wrote:
> I am new to the postgres community and apologise for resending this as the previous one didn't include patch properly
anddidn't cc reviewers (maybe the reason it has been buried in mailing list for months)
 

Welcome to the community!

I've not looked at your patch, but I have noticed that you have
assigned some reviewers to the CF entry yourself.  Unless these people
know about that, this is likely a bad choice. People usually opt to
review patches of their own accord rather than because the patch
author put their name on the reviewer list.

There are a few reasons that the patch might not be getting much attention:

1. The CF entry ([1]) states that the patch is "Waiting on Author".
If you've done what you need to do, and are waiting for review, "Needs
review" might be a better state. Currently people browsing the CF app
will assume you need to do more work before it's worth looking at your
patch.
2. The CF entry already has reviewers listed.  People looking for a
patch to review are probably more likely to pick one with no reviewers
listed as they'd expect the existing listed reviewers to be taking
care of reviews for a particular patch. The latter might be unlikely
to happen given you've assigned reviewers yourself without asking them
(at least you didn't ask me after you put me on the list).
3. Target version is 17.  What's the reason for that? The next version is 16.

I'd recommend setting the patch to "Needs review" and removing all the
reviewers that have not confirmed to you that they'll review the
patch.  I'd also leave the target version blank or set it to 16.

There might be a bit more useful information for you in [2].

David

[1] https://commitfest.postgresql.org/40/3857/
[2] https://wiki.postgresql.org/wiki/Submitting_a_Patch



On Thu, Nov 24, 2022 at 12:08 AM David Rowley <dgrowleyml@gmail.com> wrote:
> On Wed, 23 Nov 2022 at 23:13, Alex Fan <alex.fan.q@gmail.com> wrote:
> > I am new to the postgres community and apologise for resending this as the previous one didn't include patch
properlyand didn't cc reviewers (maybe the reason it has been buried in mailing list for months)
 
>
> Welcome to the community!

+1

I don't know enough about LLVM or RISCV to have any strong opinions
here, but I have a couple of questions...  It looks like we have two
different things in this patch:

1.  Optionally use JITLink instead of RuntimeDyld for relocation.
From what I can tell from some quick googling, that is necessary for
RISCV because they haven't got around to doing this yet:

https://reviews.llvm.org/D127842

Independently of that, it seems that
https://llvm.org/docs/JITLink.html is the future and RuntimeDyld will
eventually be obsolete, so one question I have is: why should we do
this only for riscv?

You mentioned that this change might be necessary to support COFF and
thus Windows.  I'm not a Windows user and I think it would be beyond
my pain threshold to try to get this working there by using CI alone,
but I'm just curious... wouldn't
https://github.com/llvm/llvm-project/blob/main/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
work for that already?  (I haven't heard about anyone successfully
using PostgreSQL/LLVM on Windows; it would certainly be cool to hear
some more about what would be needed for that.)

2.  Manually adjust the CPU features and ABI/subtarget.

+#if defined(__riscv)
+    /* getHostCPUName returns "generic-rv[32|64]", which lacks all features */
+    Features.AddFeature("m", true);
+    Features.AddFeature("a", true);
+    Features.AddFeature("c", true);
+# if defined(__riscv_float_abi_single)
+    Features.AddFeature("f", true);
+# endif
+# if defined(__riscv_float_abi_double)
+    Features.AddFeature("d", true);
+# endif
+#endif

I'm trying to understand this, and the ABI name selection logic.
Maybe there are two categories of features here?

The ABI bits, "f" and "d" are not just "which instructions can I
used", but they also affect the ABI (I guess something like: where
floats go in the calling convention), and they have to match the ABI
of the main executable to allow linking to succeed, right?  Probably a
stupid question: wouldn't the subtarget/ABI be the same as the one
that the LLVM library itself was compiled for (which must also match
the postgres executable), and doesn't it know that somewhere?  I guess
I'm confused about why we don't need to deal with this kind of manual
subtarget selection on any other architecture: for PPC it
automatically knows whether to be big endian/little endian, 32 or 64
bit, etc.

Then for "m", "a", "c", I guess these are code generation options -- I
think "c" is compressed instructions for example?  Can we get a
comment to say what they are?  Why do you think that all RISCV chips
have these features?  Perhaps these are features that are part of some
kind of server chip profile (ie features not present in a tiny
microcontroller chip found in a toaster, but expected in any system
that would actually run PostgreSQL) -- in which case can we get a
reference to explain that?

I remembered the specific reason why we have that
LLVMGethostCPUFeatures() call: it's because the list of default
features that would apply otherwise based on CPU "name" alone turned
out to assume that all x86 chips had AVX, but some low end parts
don't, so we have to check for AVX etc presence that way.  But your
patch seems to imply that LLVM is not able to get features reliably
for RISCV -- why not, immaturity or technical reason why it can't?

+    assert(ES && "ES must not be null");

We use our own Assert() macro (capital A).



Hi,

On 2022-11-23 21:13:04 +1100, Alex Fan wrote:
> > @@ -241,6 +246,40 @@ llvm_mutable_module(LLVMJitContext *context)
> >                 context->module = LLVMModuleCreateWithName("pg");
> >                 LLVMSetTarget(context->module, llvm_triple);
> >                 LLVMSetDataLayout(context->module, llvm_layout);
> > +#ifdef __riscv
> > +#if __riscv_xlen == 64
> > +#ifdef __riscv_float_abi_double
> > +               abiname = "lp64d";
> > +#elif defined(__riscv_float_abi_single)
> > +               abiname = "lp64f";
> > +#else
> > +               abiname = "lp64";
> > +#endif
> > +#elif __riscv_xlen == 32
> > +#ifdef __riscv_float_abi_double
> > +               abiname = "ilp32d";
> > +#elif defined(__riscv_float_abi_single)
> > +               abiname = "ilp32f";
> > +#else
> > +               abiname = "ilp32";
> > +#endif
> > +#else
> > +               elog(ERROR, "unsupported riscv xlen %d", __riscv_xlen);
> > +#endif
> > +               /*
> > +                * set this manually to avoid llvm defaulting to soft
> > float and
> > +                * resulting in linker error: `can't link double-float
> > modules
> > +                * with soft-float modules`
> > +                * we could set this for TargetMachine via MCOptions, but
> > there
> > +                * is no C API for it
> > +                * ref:

I think this is something that should go into the llvm code, rather than
postgres.


> > @@ -820,16 +861,21 @@ llvm_session_initialize(void)
> >         elog(DEBUG2, "LLVMJIT detected CPU \"%s\", with features \"%s\"",
> >                  cpu, features);
> >
> > +#ifdef __riscv
> > +       reloc=LLVMRelocPIC;
> > +       codemodel=LLVMCodeModelMedium;
> > +#endif

Same.




> > +#ifdef USE_JITLINK
> > +/*
> > + * There is no public C API to create ObjectLinkingLayer for JITLINK,
> > create our own
> > + */
> > +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::orc::ExecutionSession,
> > LLVMOrcExecutionSessionRef)
> > +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::orc::ObjectLayer,
> > LLVMOrcObjectLayerRef)

I recommend proposing a patch for adding such an API to LLVM.



Greetings,

Andres Freund



> why should we do this only for riscv?
I originally considered riscv because I only have amd64 and riscv hardware for testing. But afaik, JITLink currently supports arm64 and x86-64 (ELF & MacOS), riscv (both 64bit and 32bit, ELF). i386 seems also supported. No support for ppc or mips exist yet. I am most familiar with riscv and its support has been quite stable. I also know Julia folks have been using orcjit only for arm64 on MacOS for quite a while, but stayed in mcjit for other platforms. clasp also uses orcjit on x86 (ELF & macos) heavily. I'd say It should be safe to switch on x86 and arm64 also.

> that is necessary for RISCV because they haven't got around to doing this yet
I doubt if the runtimedylib patch will eventually be accepted since mcjit with its runtimedylib is in maintenance mode. Users are generally suggested to switch to orcjit.

I am not familiar with Windows either. I realise from your link, RuntimeDyld does have windows COFF support. I have clearly misread something from discord.
There is a recent talk about jitlink windows support that covers a lot of details. I think the situation is that RuntimeDyld support for COFF is not stable and only limited to large code model (RuntimeDyld's constraint), so people tend to use ELF format on windows with RuntimeDyld. JITLINK's recent COFF support is more stable and allows small code model.

The logic of abi and extensions is indeed confusing and llvm backend also has some nuances. I will try to explain it to my best and am very happy to clarify more based on my knowledge if there are more questions.

 'm' 'i' 'a' 'f' 'd' are all extension features and usually are grouped together as 'g' for general. Along with 'c' for compression (and two tiny extension sets Zicsr and Zifencei splitted from 'i' since isa spec 20191213, llvm will still default enable them along with 'i', so not relevant to us) is named rv64gc for 64 bit machine. rv64gc is generally the recommended basic set for linux capable 64bit machines. Some embedded cores without +f +d still work with Linux, but are rare and unlikely want postgresql.
abi_name like 'lp64' 'lp64d' is considered independent from cpu extensions. You can have a "+f +d" machines with lp64 abi, meaning the function body can have +f +d instructions and registers, but the function signature cannot. To set abi explicitly for llvm backend, we need to pass MCOptions.ABIName or a module metadata target-abi.

If abi is missing, before llvm-15, the backend defaults to lp64 on 64bit platform, ignoring any float extension enabled or not. The consensus seemed to be backend should be explicitly configured. After llvm-15, specifically this commit, it chooses lp64d if +d is present to align with clang default. I mostly test on llvm-14 because JITLINK riscv support is complete already except some minor fixes, and notice until now the newly change. But because I test on Gentoo, it has abi lp64 build on riscv64gc, so if abi is not configured this way, I would end up with lp64d enabled by +d extension from riscv64`g`c on a lp64 build.

> your patch seems to imply that LLVM is not able to get features reliably
> for RISCV -- why not, immaturity or technical reason why it can't?
Immaturity. Actually, unimplemented for riscv as you can check here. Because gethostcpuname usually returns generic or generic-rv64, feature list for these is basically empty except 'i'. I may work out a patch for llvm later.

> wouldn't the subtarget/ABI be the same as the one that the LLVM library itself was compiled for
llvm is inherently a multitarget & cross platform compiler backend. It is capable of all subtargets & features for enabled platform(s). The target triple works like you said. There is LLVM_DEFAULT_TARGET_TRIPLE that sets the default to native if no target is specified in runtime, so default triple is reliable. But cpuname, abi, extensions don't follow this logic. The llvm riscv backend devs expect these to be configured explicitly (although there are some default and dependencies, and frontend like clang also have default). Therefore gethostcpuname is needed and feature extensions are derived from known cpuname. In case cpuname from gethostcpuname is not enough, gethostcpufeatures is needed like your example of AVX extension.

> why we don't need to deal with this kind of manual subtarget selection on any other architecture
ppc sets default abi here, so there is no abi issue. Big end or little end is encoded in target triple like ppc64 (big endian), ppc64le (little endian), and a recent riscv64be patch. I guess that is why there are no endian issues.

From: Thomas Munro <thomas.munro@gmail.com>
Sent: Thursday, December 15, 2022 9:59:39 AM
To: David Rowley <dgrowleyml@gmail.com>
Cc: Alex Fan <alex.fan.q@gmail.com>; pgsql-hackers@postgresql.org <pgsql-hackers@postgresql.org>; andres@anarazel.de <andres@anarazel.de>; geidav.pg@gmail.com <geidav.pg@gmail.com>; luc@swarm64.com <luc@swarm64.com>
Subject: Re: [PATCH] Enable using llvm jitlink as an alternative llvm jit linker of old Rtdyld.
 
On Thu, Nov 24, 2022 at 12:08 AM David Rowley <dgrowleyml@gmail.com> wrote:
> On Wed, 23 Nov 2022 at 23:13, Alex Fan <alex.fan.q@gmail.com> wrote:
> > I am new to the postgres community and apologise for resending this as the previous one didn't include patch properly and didn't cc reviewers (maybe the reason it has been buried in mailing list for months)
>
> Welcome to the community!

+1

I don't know enough about LLVM or RISCV to have any strong opinions
here, but I have a couple of questions...  It looks like we have two
different things in this patch:

1.  Optionally use JITLink instead of RuntimeDyld for relocation.
From what I can tell from some quick googling, that is necessary for
RISCV because they haven't got around to doing this yet:

https://reviews.llvm.org/D127842

Independently of that, it seems that
https://llvm.org/docs/JITLink.html is the future and RuntimeDyld will
eventually be obsolete, so one question I have is: why should we do
this only for riscv?

You mentioned that this change might be necessary to support COFF and
thus Windows.  I'm not a Windows user and I think it would be beyond
my pain threshold to try to get this working there by using CI alone,
but I'm just curious... wouldn't
https://github.com/llvm/llvm-project/blob/main/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
work for that already?  (I haven't heard about anyone successfully
using PostgreSQL/LLVM on Windows; it would certainly be cool to hear
some more about what would be needed for that.)

2.  Manually adjust the CPU features and ABI/subtarget.

+#if defined(__riscv)
+    /* getHostCPUName returns "generic-rv[32|64]", which lacks all features */
+    Features.AddFeature("m", true);
+    Features.AddFeature("a", true);
+    Features.AddFeature("c", true);
+# if defined(__riscv_float_abi_single)
+    Features.AddFeature("f", true);
+# endif
+# if defined(__riscv_float_abi_double)
+    Features.AddFeature("d", true);
+# endif
+#endif

I'm trying to understand this, and the ABI name selection logic.
Maybe there are two categories of features here?

The ABI bits, "f" and "d" are not just "which instructions can I
used", but they also affect the ABI (I guess something like: where
floats go in the calling convention), and they have to match the ABI
of the main executable to allow linking to succeed, right?  Probably a
stupid question: wouldn't the subtarget/ABI be the same as the one
that the LLVM library itself was compiled for (which must also match
the postgres executable), and doesn't it know that somewhere?  I guess
I'm confused about why we don't need to deal with this kind of manual
subtarget selection on any other architecture: for PPC it
automatically knows whether to be big endian/little endian, 32 or 64
bit, etc.

Then for "m", "a", "c", I guess these are code generation options -- I
think "c" is compressed instructions for example?  Can we get a
comment to say what they are?  Why do you think that all RISCV chips
have these features?  Perhaps these are features that are part of some
kind of server chip profile (ie features not present in a tiny
microcontroller chip found in a toaster, but expected in any system
that would actually run PostgreSQL) -- in which case can we get a
reference to explain that?

I remembered the specific reason why we have that
LLVMGethostCPUFeatures() call: it's because the list of default
features that would apply otherwise based on CPU "name" alone turned
out to assume that all x86 chips had AVX, but some low end parts
don't, so we have to check for AVX etc presence that way.  But your
patch seems to imply that LLVM is not able to get features reliably
for RISCV -- why not, immaturity or technical reason why it can't?

+    assert(ES && "ES must not be null");

We use our own Assert() macro (capital A).
Attachment
There is discussion in https://github.com/riscv-non-isa/riscv-toolchain-conventions/issues/13 to change the abi default, but not much attention for some time. The consensus seems to be set the abi and extension explicitly.

> I recommend proposing a patch for adding such an API to LLVM.

I would like to try some time later. Jitlink allows lots of flexibility to inspect each linking process, I feel myself don't know enough use cases to propose a good enough c-abi for it.

The thing I am thinking is these patch to llvm will take some time to land especially for abi and extension default. But jitlink and orc for riscv is very mature since llvm-15, and even llvm-14 with two minor patches. It would be good to have these bits, though ugly, so that postgresql jit can work with llvm-15 as most distros are still moving to it.

cheers,
Alex Fan

On Sun, Dec 25, 2022 at 11:02 PM Andres Freund <andres@anarazel.de> wrote:
Hi,

On 2022-11-23 21:13:04 +1100, Alex Fan wrote:
> > @@ -241,6 +246,40 @@ llvm_mutable_module(LLVMJitContext *context)
> >                 context->module = LLVMModuleCreateWithName("pg");
> >                 LLVMSetTarget(context->module, llvm_triple);
> >                 LLVMSetDataLayout(context->module, llvm_layout);
> > +#ifdef __riscv
> > +#if __riscv_xlen == 64
> > +#ifdef __riscv_float_abi_double
> > +               abiname = "lp64d";
> > +#elif defined(__riscv_float_abi_single)
> > +               abiname = "lp64f";
> > +#else
> > +               abiname = "lp64";
> > +#endif
> > +#elif __riscv_xlen == 32
> > +#ifdef __riscv_float_abi_double
> > +               abiname = "ilp32d";
> > +#elif defined(__riscv_float_abi_single)
> > +               abiname = "ilp32f";
> > +#else
> > +               abiname = "ilp32";
> > +#endif
> > +#else
> > +               elog(ERROR, "unsupported riscv xlen %d", __riscv_xlen);
> > +#endif
> > +               /*
> > +                * set this manually to avoid llvm defaulting to soft
> > float and
> > +                * resulting in linker error: `can't link double-float
> > modules
> > +                * with soft-float modules`
> > +                * we could set this for TargetMachine via MCOptions, but
> > there
> > +                * is no C API for it
> > +                * ref:

I think this is something that should go into the llvm code, rather than
postgres.


> > @@ -820,16 +861,21 @@ llvm_session_initialize(void)
> >         elog(DEBUG2, "LLVMJIT detected CPU \"%s\", with features \"%s\"",
> >                  cpu, features);
> >
> > +#ifdef __riscv
> > +       reloc=LLVMRelocPIC;
> > +       codemodel=LLVMCodeModelMedium;
> > +#endif

Same.




> > +#ifdef USE_JITLINK
> > +/*
> > + * There is no public C API to create ObjectLinkingLayer for JITLINK,
> > create our own
> > + */
> > +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::orc::ExecutionSession,
> > LLVMOrcExecutionSessionRef)
> > +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(llvm::orc::ObjectLayer,
> > LLVMOrcObjectLayerRef)

I recommend proposing a patch for adding such an API to LLVM.



Greetings,

Andres Freund
Hi Alex,

JITLink came back onto the radar screen: we see that LLVM 20 will
deprecate the RuntimeDyld link layer that we're using.  JITLink would
in fact fix the open bug report we have about LLVM crashing all over
the place on ARM[1], and I can see that would be quite easy to do what
you showed, but we can't use that solution for that problem because it
only works on LLVM 15+ (and maybe doesn't even support all the
architectures until later releases, I haven't followed the details).
So we'll probably finish up backporting that fix for RuntimeDyld,
which seems like it's going to work OK (thanks Anthonin, CCd, for
diagnosing that).  That's all fine and good, but if my crystal ball is
operating correctly, fairly soon we'll have a situation where RHEL is
shipping versions that *only* support JITLink, while other distros are
still shipping versions that need RuntimeDyld because they don't yet
have JITLink or it is not mature enough yet for all architectures.  So
we'll need to support both for a while.  That's all fine, and I can
see that it's going to be pretty easy to do, it's mostly just
LLVMOrcCreateThis() or LLVMOrcCreateThat() with some #ifdef around it,
job done.

The question I have is: is someone looking into getting the C API we
need for that into the LLVM main branch (LLVM 20-to-be)?  I guess I
would prefer to be able to use that, rather than adding more of our
own C++ wrapper code into our tree, if we can, and it seems like now
would be a good time to get ahead of that.

[1]
https://www.postgresql.org/message-id/flat/CAO6_Xqr63qj%3DSx7HY6ZiiQ6R_JbX%2B-p6sTPwDYwTWZjUmjsYBg%40mail.gmail.com