Re: gsoc, store hash index tuple with hash code only - Mailing list pgsql-hackers

From Xiao Meng
Subject Re: gsoc, store hash index tuple with hash code only
Date
Msg-id ded849dd0807160237s113624f6m3abee3387284cced@mail.gmail.com
Whole thread Raw
In response to gsoc, store hash index tuple with hash code only  ("Xiao Meng" <mx.cogito@gmail.com>)
Responses Re: gsoc, store hash index tuple with hash code only  ("Xiao Meng" <mx.cogito@gmail.com>)
List pgsql-hackers
I've fixed the patch just now. It works and pass the regression test ;-)
Here is the new patch. I'll keep the hash code  in order and use
binary search in a later version soon.


diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 6a5c000..1a8dc75 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -129,7 +129,11 @@ hashbuildCallback(Relation index,
     IndexTuple    itup;

     /* form an index tuple and point it at the heap tuple */
+#ifdef HASHVALUE_ONLY
+    itup = _hash_form_tuple(index, values,isnull);
+#else
     itup = index_form_tuple(RelationGetDescr(index), values, isnull);
+#endif
     itup->t_tid = htup->t_self;

     /* Hash indexes don't index nulls, see notes in hashinsert */
@@ -171,7 +175,12 @@ hashinsert(PG_FUNCTION_ARGS)
     IndexTuple    itup;

     /* generate an index tuple */
+#ifdef HASHVALUE_ONLY
+    itup = _hash_form_tuple(rel, values, isnull);
+#else
     itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
+#endif
+
     itup->t_tid = *ht_ctid;

     /*
@@ -212,7 +221,11 @@ hashgettuple(PG_FUNCTION_ARGS)
     bool        res;

     /* Hash indexes are never lossy (at the moment anyway) */
-    scan->xs_recheck = false;
+#ifdef HASHVALUE_ONLY
+    scan->xs_recheck = true;
+#else
+     scan->xs_recheck = false;
+#endif

     /*
      * We hold pin but not lock on current buffer while outside the hash AM.
diff --git a/src/backend/access/hash/hashinsert.c
b/src/backend/access/hash/hashinsert.c
index 3eb226a..0800006 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -52,9 +52,15 @@ _hash_doinsert(Relation rel, IndexTuple itup)
      */
     if (rel->rd_rel->relnatts != 1)
         elog(ERROR, "hash indexes support only one index key");
+#ifdef HASHVALUE_ONLY
+    datum = index_getattr(itup, 1, _create_hash_desc(), &isnull);
+    Assert(!isnull);
+    hashkey = DatumGetUInt32(datum);
+#else
     datum = index_getattr(itup, 1, RelationGetDescr(rel), &isnull);
     Assert(!isnull);
     hashkey = _hash_datum2hashkey(rel, datum);
+#endif

     /* compute item size too */
     itemsz = IndexTupleDSize(*itup);
diff --git a/src/backend/access/hash/hashpage.c
b/src/backend/access/hash/hashpage.c
index b0b5874..bba64c4 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -785,7 +785,12 @@ _hash_splitbucket(Relation rel,
     OffsetNumber omaxoffnum;
     Page        opage;
     Page        npage;
-    TupleDesc    itupdesc = RelationGetDescr(rel);
+    TupleDesc    itupdesc;
+#ifdef HASHVALUE_ONLY
+    itupdesc = _create_hash_desc();
+#else
+    itupdesc = RelationGetDescr(rel);
+#endif

     /*
      * It should be okay to simultaneously write-lock pages from each bucket,
@@ -854,9 +859,13 @@ _hash_splitbucket(Relation rel,
         itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum));
         datum = index_getattr(itup, 1, itupdesc, &null);
         Assert(!null);
-
+#ifdef HASHVALUE_ONLY
+        bucket = _hash_hashkey2bucket(DatumGetUInt32(datum),
+                                      maxbucket, highmask, lowmask);
+#else
         bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum),
                                       maxbucket, highmask, lowmask);
+#endif

         if (bucket == nbucket)
         {
diff --git a/src/backend/access/hash/hashsearch.c
b/src/backend/access/hash/hashsearch.c
index 258526b..5211e67 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -178,6 +178,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
         hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
                                            cur->sk_subtype);

+    so->hashso_sk_hash = hashkey;
     /*
      * Acquire shared split lock so we can compute the target bucket safely
      * (see README).
diff --git a/src/backend/access/hash/hashutil.c
b/src/backend/access/hash/hashutil.c
index 41e2eef..81c6829 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -20,7 +20,7 @@
 #include "executor/execdebug.h"
 #include "storage/bufmgr.h"
 #include "utils/lsyscache.h"
-
+#include "catalog/pg_type.h"

 /*
  * _hash_checkqual -- does the index tuple satisfy the scan conditions?
@@ -28,16 +28,31 @@
 bool
 _hash_checkqual(IndexScanDesc scan, IndexTuple itup)
 {
-    TupleDesc    tupdesc = RelationGetDescr(scan->indexRelation);
+    TupleDesc    tupdesc;
     ScanKey        key = scan->keyData;
     int            scanKeySize = scan->numberOfKeys;
+    Datum        datum;
+    bool        isNull;
+    HashScanOpaque      so = scan->opaque;

     IncrIndexProcessed();

+#ifdef HASHVALUE_ONLY
+    tupdesc = _create_hash_desc();
+    datum = index_getattr(itup,
+                          key->sk_attno,
+                          tupdesc,
+                          &isNull);
+    if( so->hashso_sk_hash != DatumGetUInt32(datum) )
+        return false;
+    key++;
+    scanKeySize--;
+#else
+    tupdesc = RelationGetDescr(scan->indexRelation);
+#endif
+
     while (scanKeySize > 0)
     {
-        Datum        datum;
-        bool        isNull;
         Datum        test;

         datum = index_getattr(itup,
@@ -222,3 +237,31 @@ hashoptions(PG_FUNCTION_ARGS)
         PG_RETURN_BYTEA_P(result);
     PG_RETURN_NULL();
 }
+
+/*
+ * _create_hash_desc - create a hash TupleDesc
+ *
+ * the TupleDesc is with int32 attribute, not uint32 because we've no
pg_type with UINT4OID
+ */
+TupleDesc _create_hash_desc()
+{
+    TupleDesc tupdesc = CreateTemplateTupleDesc(1, false);
+    TupleDescInitEntry(tupdesc, 1, "hashcode", INT4OID, -1, 0);
+    return tupdesc;
+}
+
+/*
+ * _hash_form_tuple - form a tuple with hash code only
+ */
+IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull)
+{
+    TupleDesc       hashdesc;
+    IndexTuple      itup;
+    uint32          hashkey;
+
+    hashdesc = _create_hash_desc();
+    hashkey = _hash_datum2hashkey(rel, values[0]);
+    values[0] = UInt32GetDatum(hashkey);
+    itup = index_form_tuple(hashdesc, values, isnull);
+    return itup;
+}
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index ab0824d..141f1fc 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -100,6 +100,8 @@ typedef struct HashScanOpaqueData
     /* Current and marked position of the scan */
     ItemPointerData hashso_curpos;
     ItemPointerData hashso_mrkpos;
+    /* Hash value of the scan key */
+    uint32      hashso_sk_hash;
 } HashScanOpaqueData;

 typedef HashScanOpaqueData *HashScanOpaque;
@@ -227,6 +229,10 @@ typedef HashMetaPageData *HashMetaPage;
  */
 #define HASHPROC        1

+/*
+ * store hash value only in the bucket
+ */
+#define HASHVALUE_ONLY

 /* public routines */

@@ -330,6 +336,8 @@ extern Bucket _hash_hashkey2bucket(uint32 hashkey,
uint32 maxbucket,
                      uint32 highmask, uint32 lowmask);
 extern uint32 _hash_log2(uint32 num);
 extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
+extern TupleDesc _create_hash_desc();
+extern IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull);

 /* hash.c */
 extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);


--
Best Regards,
Xiao Meng

DKERC, Harbin Institute of Technology, China
Gtalk: mx.cogito@gmail.com
MSN: cnEnder@live.com
http://xiaomeng.yo2.cn

Attachment

pgsql-hackers by date:

Previous
From: cinu
Date:
Subject: postmaster.pid not visible
Next
From: "Xiao Meng"
Date:
Subject: Re: gsoc, store hash index tuple with hash code only