From 4423d726583a9adcfacce477cb98265f900e72fd Mon Sep 17 00:00:00 2001 From: David Rowley Date: Tue, 25 Mar 2025 18:49:15 +1300 Subject: [PATCH v7] Fix query jumbling to account for NULL nodes Previously NULL nodes were ignored. This could cause issues where the computed query ID could match for queries where fields that are next to each other in their Node struct where one field was NULL and the other non-NULL. For example, the Query struct has distinctClause and sortClause next to each other. If someone wrote; SELECT DISTINCT c1 FROM t; and then; SELECT c1 FROM t ORDER BY c1; these would produce the same query ID since in the first query we ignore the NULL sortClause and append the jumble bytes for the distictClause. In the latter query, since we do nothing for the NULL distinctClause then jumble the non-NULL sortClause, and since the node representation stored is the same in both cases, the query IDs are identical. Here we fix this by always jumbling NULL nodes. This produces different jumble bytes in the above queries since the order that we jumble the NULL changes between each of the above two queries. Fixing this without regressing the performance of jumbling was quite tricky, so this commit also contains a few optimizations to improve the jumble performance. Discussion: https://postgr.es/m/aafce7966e234372b2ba876c0193f1e9%40localhost.localdomain --- src/backend/nodes/queryjumblefuncs.c | 159 +++++++++++++++++++++++++-- src/include/nodes/queryjumble.h | 6 + 2 files changed, 153 insertions(+), 12 deletions(-) diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index f8b0f91704b..8d3b1348634 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -58,6 +58,8 @@ bool query_id_squash_values = false; */ bool query_id_enabled = false; +static void FlushPendingNulls(JumbleState *jstate); + static void AppendJumble(JumbleState *jstate, const unsigned char *item, Size size); static void RecordConstLocation(JumbleState *jstate, @@ -134,9 +136,13 @@ JumbleQuery(Query *query) palloc(jstate->clocations_buf_size * sizeof(LocationLen)); jstate->clocations_count = 0; jstate->highest_extern_param_id = 0; + jstate->pending_nulls = 0; /* Compute query ID and mark the Query node with it */ _jumbleNode(jstate, (Node *) query); + if (jstate->pending_nulls > 0) + FlushPendingNulls(jstate); + query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble, jstate->jumble_len, 0)); @@ -170,25 +176,40 @@ EnableQueryId(void) } /* - * AppendJumble: Append a value that is substantive in a given query to - * the current jumble. + * AppendJumbleInternal: Append a value that is substantive in a given query + * to the current jumble. + * + * Note: Callers must ensure that jstate->pending_nulls is zero first by + * calling FlushPendingNulls() when required. Callers must also ensure that + * size > 0. */ -static void -AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) +static pg_attribute_always_inline void +AppendJumbleInternal(JumbleState *jstate, const unsigned char *item, + Size size) { unsigned char *jumble = jstate->jumble; Size jumble_len = jstate->jumble_len; + /* Ensure the caller didn't mess up */ + Assert(size > 0); + + if (likely(size <= JUMBLE_SIZE - jumble_len)) + { + memcpy(jumble + jumble_len, item, size); + jstate->jumble_len += size; + return; + } + /* * Whenever the jumble buffer is full, we hash the current contents and * reset the buffer to contain just that hash value, thus relying on the * hash to summarize everything so far. */ - while (size > 0) + do { Size part_size; - if (jumble_len >= JUMBLE_SIZE) + if (unlikely(jumble_len >= JUMBLE_SIZE)) { uint64 start_hash; @@ -202,10 +223,106 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size) jumble_len += part_size; item += part_size; size -= part_size; - } + } while (size > 0); + jstate->jumble_len = jumble_len; } +/* + * AppendJumbleNull + * For jumbling NULL pointers + */ +static pg_attribute_always_inline void +AppendJumbleNull(JumbleState *jstate) +{ + jstate->pending_nulls++; +} + +/* + * AppendJumble + * Add 'size' bytes of the given jumble 'value' to the jumble state + */ +static pg_noinline void +AppendJumble(JumbleState *jstate, const unsigned char *value, Size size) +{ + if (jstate->pending_nulls > 0) + FlushPendingNulls(jstate); + + AppendJumbleInternal(jstate, value, size); +} + +/* + * AppendJumble8 + * Add the first byte from the given 'value' pointer to the jumble state + */ +static pg_noinline void +AppendJumble8(JumbleState *jstate, const unsigned char *value) +{ + if (jstate->pending_nulls > 0) + FlushPendingNulls(jstate); + + AppendJumbleInternal(jstate, value, 1); +} + +/* + * AppendJumble16 + * Add the first 2 bytes from the given 'value' pointer to the jumble + * state. + */ +static pg_noinline void +AppendJumble16(JumbleState *jstate, const unsigned char *value) +{ + if (jstate->pending_nulls > 0) + FlushPendingNulls(jstate); + + AppendJumbleInternal(jstate, value, 2); +} + +/* + * AppendJumble32 + * Add the first 4 bytes from the given 'value' pointer to the jumble + * state. + */ +static pg_noinline void +AppendJumble32(JumbleState *jstate, const unsigned char *value) +{ + if (jstate->pending_nulls > 0) + FlushPendingNulls(jstate); + + AppendJumbleInternal(jstate, value, 4); +} + +/* + * AppendJumble64 + * Add the first 8 bytes from the given 'value' pointer to the jumble + * state. + */ +static pg_noinline void +AppendJumble64(JumbleState *jstate, const unsigned char *value) +{ + if (jstate->pending_nulls > 0) + FlushPendingNulls(jstate); + + AppendJumbleInternal(jstate, value, 8); +} + +/* + * FlushPendingNulls + * Incorporate the pending_null values into the jumble buffer. + * + * Note: Callers must ensure that there's at least 1 pending NULL. + */ +static pg_attribute_always_inline void +FlushPendingNulls(JumbleState *jstate) +{ + Assert(jstate->pending_nulls > 0); + + AppendJumbleInternal(jstate, + (const unsigned char *) &jstate->pending_nulls, 4); + jstate->pending_nulls = 0; +} + + /* * Record location of constant within query string of query tree that is * currently being walked. @@ -319,19 +436,34 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) } #define JUMBLE_NODE(item) \ - _jumbleNode(jstate, (Node *) expr->item) + _jumbleNode(jstate, (Node *) expr->item); #define JUMBLE_ELEMENTS(list) \ _jumbleElements(jstate, (List *) expr->list) #define JUMBLE_LOCATION(location) \ RecordConstLocation(jstate, expr->location, false) #define JUMBLE_FIELD(item) \ - AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)) +do { \ + if (sizeof(expr->item) == 8) \ + AppendJumble64(jstate, (const unsigned char *) &(expr->item)); \ + else if (sizeof(expr->item) == 4) \ + AppendJumble32(jstate, (const unsigned char *) &(expr->item)); \ + else if (sizeof(expr->item) == 2) \ + AppendJumble16(jstate, (const unsigned char *) &(expr->item)); \ + else if (sizeof(expr->item) == 1) \ + AppendJumble8(jstate, (const unsigned char *) &(expr->item)); \ + else \ + AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item)); \ +} while (0) + +/* XXX what's this used for? */ #define JUMBLE_FIELD_SINGLE(item) \ AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item)) #define JUMBLE_STRING(str) \ do { \ if (expr->str) \ AppendJumble(jstate, (const unsigned char *) (expr->str), strlen(expr->str) + 1); \ + else \ + AppendJumbleNull(jstate); \ } while(0) /* Function name used for the node field attribute custom_query_jumble. */ #define JUMBLE_CUSTOM(nodetype, item) \ @@ -384,7 +516,10 @@ _jumbleNode(JumbleState *jstate, Node *node) Node *expr = node; if (expr == NULL) + { + AppendJumbleNull(jstate); return; + } /* Guard against stack overflow due to overly complex expressions */ check_stack_depth(); @@ -448,15 +583,15 @@ _jumbleList(JumbleState *jstate, Node *node) break; case T_IntList: foreach(l, expr) - JUMBLE_FIELD_SINGLE(lfirst_int(l)); + AppendJumble32(jstate, (const unsigned char *) &lfirst_int(l)); break; case T_OidList: foreach(l, expr) - JUMBLE_FIELD_SINGLE(lfirst_oid(l)); + AppendJumble32(jstate, (const unsigned char *) &lfirst_oid(l)); break; case T_XidList: foreach(l, expr) - JUMBLE_FIELD_SINGLE(lfirst_xid(l)); + AppendJumble32(jstate, (const unsigned char *) &lfirst_xid(l)); break; default: elog(ERROR, "unrecognized list node type: %d", diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index 905f66bc0bd..ab49b2b8b93 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -54,6 +54,12 @@ typedef struct JumbleState /* highest Param id we've seen, in order to start normalization correctly */ int highest_extern_param_id; + + /* + * Count of the number of NULL nodes seen since last appending a value. + * These get flushed out to the jumble buffer before subsequent appends + */ + unsigned int pending_nulls; } JumbleState; /* Values for the compute_query_id GUC */ -- 2.43.0