Re: Problem while setting the fpw with SIGHUP - Mailing list pgsql-hackers

From Kyotaro HORIGUCHI
Subject Re: Problem while setting the fpw with SIGHUP
Date
Msg-id 20180412.103430.133595350.horiguchi.kyotaro@lab.ntt.co.jp
Whole thread Raw
In response to Re: Problem while setting the fpw with SIGHUP  (Michael Paquier <michael@paquier.xyz>)
Responses Re: Problem while setting the fpw with SIGHUP
List pgsql-hackers
Hello. Thanks to Heikkit for picking this up and thanks for the
commnet to Michael.

# The attached is changed only in a comment, and rebased.

At Thu, 12 Apr 2018 05:24:14 +0900, Michael Paquier <michael@paquier.xyz> wrote in
<20180411202414.GA32449@paquier.xyz>
> On Wed, Apr 11, 2018 at 02:09:48PM +0300, Heikki Linnakangas wrote:
> > I think the new behavior where the GUC only takes effect at next checkpoint
> > is OK. It seems quite intuitive.
> > 
> > > [rebased patch version]
> > 
> > Looks good at a quick glance. Assuming no objections from others, I'll take
> > a closer look and commit tomorrow. Thanks!
> 
> Sorry for not following up closely this thread lately.
> 
> +   /*
> +    * If full_page_writes has been turned off, issue XLOG_FPW_CHANGE before
> +    * the flag actually takes effect. No lock is required since checkpointer
> +    * is the only updator of shared fullPageWrites after recovery is
> +    * finished. Both shared and local fullPageWrites do not change before the
> +    * next reading below.
> +    */
> +   if (Insert->fullPageWrites && !fullPageWrites)
> +   {
> +       XLogBeginInsert();
> +       XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
> +       XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
> +   }
> 
> This is not actually true.  If a fallback_promote is used, then
> CreateCheckPoint() is called by the startup process which is in charge
> of issuing the end-of-recovery checkpoint, and not the checkpointer.  So
> I still fail to see how a no-lock approach is fine except if we remove
> fallback_promote?

Checkpointer never calls CreateCheckPoint while
RecoveryInProgress() == true. In other words, checkpointer is not
an updator of shared FPW at the time StartupXLOG calls
CreateCheckPoint for fallback_promote.

The comment may be somewhat confusing that it is written
there. The point is that checkpointer and StartupXLOG are
mutually excluded on updating shared FPW by
SharedRecoveryInProgress flag.

| * If full_page_writes has been turned off, issue XLOG_FPW_CHANGE before
| * the flag actually takes effect. Checkpointer never calls this function
| * before StartupXLOG() turns off SharedRecoveryInProgress so there's no
| * window where checkpointer and startup processes - the only updators of
| * the flag - can update shared FPW simultaneously. Thus no lock is
| * required here. Both shared and local fullPageWrites do not change
| * before the next reading below.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
From f6d4857356508fa16dc5d54b92d0177dbeaae3e2 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp>
Date: Fri, 6 Apr 2018 13:57:48 +0900
Subject: [PATCH] Change FPW handling

The GUC full_pages_writes currently has an effect immediately. That
makes a race condition between config reload on checkpointer and
StartupXLOG. But since full page images are meaningful only when they
are attached to all WAL records covers a checkpoint, there is no
problem if we update the shared FPW only at REDO point.  On the other
hand, online backup mechanism on standby requires to know if FPW is
turned off before the next checkpoint record comes.

As the result, with this patch, changing of full_page_writes takes
effect at REDO point and additional XLOG_FPW_CHANGE is written only
for turning-off. These are sufficient for standby-backup to work
properly, reduces complexity and prevent the race condition.
---
 doc/src/sgml/config.sgml                   |   4 +-
 src/backend/access/transam/xlog.c          | 116 ++++++++---------------------
 src/backend/optimizer/util/clauses.c       |   4 +-
 src/backend/parser/gram.y                  |  78 ++++++++++---------
 src/backend/parser/parse_agg.c             |  10 +++
 src/backend/parser/parse_expr.c            |   5 ++
 src/backend/parser/parse_func.c            |   3 +
 src/backend/parser/parse_utilcmd.c         |  72 +++++++++++-------
 src/backend/postmaster/checkpointer.c      |   6 --
 src/include/access/xlog.h                  |   1 -
 src/include/catalog/pg_control.h           |   2 +-
 src/include/optimizer/clauses.h            |   2 +
 src/include/parser/parse_node.h            |   1 +
 src/include/utils/rel.h                    |   6 ++
 src/test/regress/expected/create_table.out |  36 +++++----
 src/test/regress/sql/create_table.sql      |  21 +++++-
 16 files changed, 192 insertions(+), 175 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 5d5f2d23c4..7ea42c25e2 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2598,7 +2598,9 @@ include_dir 'conf.d'
        <para>
         This parameter can only be set in the <filename>postgresql.conf</filename>
         file or on the server command line.
-        The default is <literal>on</literal>.
+
+        The default is <literal>on</literal>. The change of the parmeter takes
+        effect at the next checkpoint time.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 4a47395174..e251cc108b 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -202,14 +202,6 @@ static XLogRecPtr LastRec;
 static XLogRecPtr receivedUpto = 0;
 static TimeLineID receiveTLI = 0;
 
-/*
- * During recovery, lastFullPageWrites keeps track of full_page_writes that
- * the replayed WAL records indicate. It's initialized with full_page_writes
- * that the recovery starting checkpoint record indicates, and then updated
- * each time XLOG_FPW_CHANGE record is replayed.
- */
-static bool lastFullPageWrites;
-
 /*
  * Local copy of SharedRecoveryInProgress variable. True actually means "not
  * known, need to check the shared state".
@@ -6851,11 +6843,7 @@ StartupXLOG(void)
      */
     restoreTwoPhaseData();
 
-    lastFullPageWrites = checkPoint.fullPageWrites;
-
     RedoRecPtr = XLogCtl->RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
-    doPageWrites = lastFullPageWrites;
-
     if (RecPtr < checkPoint.redo)
         ereport(PANIC,
                 (errmsg("invalid redo in checkpoint record")));
@@ -7650,16 +7638,6 @@ StartupXLOG(void)
     /* Pre-scan prepared transactions to find out the range of XIDs present */
     oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
 
-    /*
-     * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
-     * record before resource manager writes cleanup WAL records or checkpoint
-     * record is written.
-     */
-    Insert->fullPageWrites = lastFullPageWrites;
-    LocalSetXLogInsertAllowed();
-    UpdateFullPageWrites();
-    LocalXLogInsertAllowed = -1;
-
     if (InRecovery)
     {
         /*
@@ -7893,6 +7871,13 @@ StartupXLOG(void)
     ControlFile->state = DB_IN_PRODUCTION;
     ControlFile->time = (pg_time_t) time(NULL);
 
+    /*
+     * Set the initial value of shared fullPageWrites. Once
+     * SharedRecoveryInProgress is turned false, checkpointer will update this
+     * value.
+     */
+    XLogCtl->Insert.fullPageWrites = checkPoint.fullPageWrites;
+
     SpinLockAcquire(&XLogCtl->info_lck);
     XLogCtl->SharedRecoveryInProgress = false;
     SpinLockRelease(&XLogCtl->info_lck);
@@ -8754,6 +8739,22 @@ CreateCheckPoint(int flags)
      */
     last_important_lsn = GetLastImportantRecPtr();
 
+    /*
+     * If full_page_writes has been turned off, issue XLOG_FPW_CHANGE before
+     * the flag actually takes effect. Checkpointer never calls this function
+     * before StartupXLOG() turns off SharedRecoveryInProgress so there's no
+     * window where checkpointer and startup processes - the only updators of
+     * the flag - can update shared FPW simultaneously. Thus no lock is
+     * required here. Both shared and local fullPageWrites do not change
+     * before the next reading below.
+     */
+    if (Insert->fullPageWrites && !fullPageWrites)
+    {
+        XLogBeginInsert();
+        XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
+        XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
+    }
+
     /*
      * We must block concurrent insertions while examining insert state to
      * determine the checkpoint REDO pointer.
@@ -8795,6 +8796,15 @@ CreateCheckPoint(int flags)
     else
         checkPoint.PrevTimeLineID = ThisTimeLineID;
 
+    /*
+     * Update shared flag of fullPageWrites. WALInsertLock ensures that this
+     * affects all WAL records exactly from REDO point. As the result a
+     * checkpoint marked as fpw=true is ensured that all WAL records have full
+     * page image.
+     */
+    if (fullPageWrites != Insert->fullPageWrites)
+        Insert->fullPageWrites = fullPageWrites;
+
     checkPoint.fullPageWrites = Insert->fullPageWrites;
 
     /*
@@ -9642,65 +9652,6 @@ XlogChecksums(ChecksumType new_type)
     XLogInsert(RM_XLOG_ID, XLOG_CHECKSUMS);
 }
 
-/*
- * Update full_page_writes in shared memory, and write an
- * XLOG_FPW_CHANGE record if necessary.
- *
- * Note: this function assumes there is no other process running
- * concurrently that could update it.
- */
-void
-UpdateFullPageWrites(void)
-{
-    XLogCtlInsert *Insert = &XLogCtl->Insert;
-
-    /*
-     * Do nothing if full_page_writes has not been changed.
-     *
-     * It's safe to check the shared full_page_writes without the lock,
-     * because we assume that there is no concurrently running process which
-     * can update it.
-     */
-    if (fullPageWrites == Insert->fullPageWrites)
-        return;
-
-    START_CRIT_SECTION();
-
-    /*
-     * It's always safe to take full page images, even when not strictly
-     * required, but not the other round. So if we're setting full_page_writes
-     * to true, first set it true and then write the WAL record. If we're
-     * setting it to false, first write the WAL record and then set the global
-     * flag.
-     */
-    if (fullPageWrites)
-    {
-        WALInsertLockAcquireExclusive();
-        Insert->fullPageWrites = true;
-        WALInsertLockRelease();
-    }
-
-    /*
-     * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
-     * full_page_writes during archive recovery, if required.
-     */
-    if (XLogStandbyInfoActive() && !RecoveryInProgress())
-    {
-        XLogBeginInsert();
-        XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
-
-        XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
-    }
-
-    if (!fullPageWrites)
-    {
-        WALInsertLockAcquireExclusive();
-        Insert->fullPageWrites = false;
-        WALInsertLockRelease();
-    }
-    END_CRIT_SECTION();
-}
-
 /*
  * Check that it's OK to switch to new timeline during recovery.
  *
@@ -10066,9 +10017,6 @@ xlog_redo(XLogReaderState *record)
                 XLogCtl->lastFpwDisableRecPtr = ReadRecPtr;
             SpinLockRelease(&XLogCtl->info_lck);
         }
-
-        /* Keep track of full_page_writes */
-        lastFullPageWrites = fpw;
     }
     else if (info == XLOG_CHECKSUMS)
     {
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index ed6b680ed8..cfb0984100 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -152,8 +152,6 @@ static Node *substitute_actual_parameters(Node *expr, int nargs, List *args,
 static Node *substitute_actual_parameters_mutator(Node *node,
                                      substitute_actual_parameters_context *context);
 static void sql_inline_error_callback(void *arg);
-static Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod,
-              Oid result_collation);
 static Query *substitute_actual_srf_parameters(Query *expr,
                                  int nargs, List *args);
 static Node *substitute_actual_srf_parameters_mutator(Node *node,
@@ -4842,7 +4840,7 @@ sql_inline_error_callback(void *arg)
  * We use the executor's routine ExecEvalExpr() to avoid duplication of
  * code and ensure we get the same result as the executor would get.
  */
-static Expr *
+Expr *
 evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod,
               Oid result_collation)
 {
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index dd0c26c11b..2745c4b3da 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -180,6 +180,8 @@ static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args,
 static List *mergeTableFuncParameters(List *func_args, List *columns);
 static TypeName *TableFuncTypeName(List *columns);
 static RangeVar *makeRangeVarFromAnyName(List *names, int position, core_yyscan_t yyscanner);
+static Node *makePartRangeDatum(PartitionRangeDatumKind kind, Node *value,
+                                int location);
 static void SplitColQualList(List *qualList,
                              List **constraintList, CollateClause **collClause,
                              core_yyscan_t yyscanner);
@@ -472,7 +474,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <node>    columnDef columnOptions
 %type <defelt>    def_elem reloption_elem old_aggr_elem operator_def_elem
 %type <node>    def_arg columnElem where_clause where_or_current_clause
-                a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound
+                a_expr u_expr b_expr b0_expr c_expr c0_expr
+                AexprConst indirection_el opt_slice_bound
                 columnref in_expr having_clause func_table xmltable array_expr
                 ExclusionWhereClause operator_def_arg
 %type <list>    rowsfrom_item rowsfrom_list opt_col_def_list
@@ -585,7 +588,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <partelem>    part_elem
 %type <list>        part_params
 %type <partboundspec> PartitionBoundSpec
-%type <node>        partbound_datum PartitionRangeDatum
+%type <node>        PartitionRangeDatum
 %type <list>        hash_partbound partbound_datum_list range_datum_list
 %type <defelt>        hash_partbound_elem
 
@@ -2804,15 +2807,9 @@ hash_partbound:
             }
         ;
 
-partbound_datum:
-            Sconst            { $$ = makeStringConst($1, @1); }
-            | NumericOnly    { $$ = makeAConst($1, @1); }
-            | NULL_P        { $$ = makeNullAConst(@1); }
-        ;
-
 partbound_datum_list:
-            partbound_datum                        { $$ = list_make1($1); }
-            | partbound_datum_list ',' partbound_datum
+            u_expr                        { $$ = list_make1($1); }
+            | partbound_datum_list ',' u_expr
                                                 { $$ = lappend($1, $3); }
         ;
 
@@ -2825,33 +2822,18 @@ range_datum_list:
 PartitionRangeDatum:
             MINVALUE
                 {
-                    PartitionRangeDatum *n = makeNode(PartitionRangeDatum);
-
-                    n->kind = PARTITION_RANGE_DATUM_MINVALUE;
-                    n->value = NULL;
-                    n->location = @1;
-
-                    $$ = (Node *) n;
+                    $$ = makePartRangeDatum(PARTITION_RANGE_DATUM_MINVALUE,
+                                            NULL, @1);
                 }
             | MAXVALUE
                 {
-                    PartitionRangeDatum *n = makeNode(PartitionRangeDatum);
-
-                    n->kind = PARTITION_RANGE_DATUM_MAXVALUE;
-                    n->value = NULL;
-                    n->location = @1;
-
-                    $$ = (Node *) n;
+                    $$ = makePartRangeDatum(PARTITION_RANGE_DATUM_MAXVALUE,
+                                            NULL, @1);
                 }
-            | partbound_datum
+            | u_expr
                 {
-                    PartitionRangeDatum *n = makeNode(PartitionRangeDatum);
-
-                    n->kind = PARTITION_RANGE_DATUM_VALUE;
-                    n->value = $1;
-                    n->location = @1;
-
-                    $$ = (Node *) n;
+                    $$ = makePartRangeDatum(PARTITION_RANGE_DATUM_VALUE,
+                                            $1, @1);
                 }
         ;
 
@@ -13478,9 +13460,17 @@ a_expr:        c_expr                                    { $$ = $1; }
  * cause trouble in the places where b_expr is used.  For simplicity, we
  * just eliminate all the boolean-keyword-operator productions from b_expr.
  */
-b_expr:        c_expr
-                { $$ = $1; }
-            | b_expr TYPECAST Typename
+b_expr:        c_expr { $$ = $1; }
+            | b0_expr { $$ = $1; }
+        ;
+
+/* u_expr is a subset of b_expr usable along with unreserved keywords */
+u_expr:        c0_expr { $$ = $1; }
+            | b0_expr { $$ = $1; }
+        ;
+
+/* common part of b_expr and u_expr */
+b0_expr:    b_expr TYPECAST Typename
                 { $$ = makeTypeCast($1, $3, @2); }
             | '+' b_expr                    %prec UMINUS
                 { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", NULL, $2, @1); }
@@ -13554,7 +13544,11 @@ b_expr:        c_expr
  * ambiguity to the b_expr syntax.
  */
 c_expr:        columnref                                { $$ = $1; }
-            | AexprConst                            { $$ = $1; }
+            | c0_expr                                { $$ = $1; }
+        ;
+
+/* common part of c_expr and u_expr */
+c0_expr:         AexprConst                            { $$ = $1; }
             | PARAM opt_indirection
                 {
                     ParamRef *p = makeNode(ParamRef);
@@ -16275,6 +16269,18 @@ makeRangeVarFromAnyName(List *names, int position, core_yyscan_t yyscanner)
     return r;
 }
 
+static Node *
+makePartRangeDatum(PartitionRangeDatumKind kind, Node *value, int location)
+{
+    PartitionRangeDatum *n = makeNode(PartitionRangeDatum);
+
+    n->kind = kind;
+    n->value = value;
+    n->location = location;
+
+    return (Node *) n;
+}
+
 /* Separate Constraint nodes from COLLATE clauses in a ColQualList */
 static void
 SplitColQualList(List *qualList,
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 0307738946..4e426f2b28 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -506,6 +506,13 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
             else
                 err = _("grouping operations are not allowed in EXECUTE parameters");
 
+            break;
+        case EXPR_KIND_PARTITION_BOUND:
+            if (isAgg)
+                err = _("aggregate functions are not allowed in partition bound");
+            else
+                err = _("grouping operations are not allowed in partition bound");
+
             break;
         case EXPR_KIND_TRIGGER_WHEN:
             if (isAgg)
@@ -909,6 +916,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
         case EXPR_KIND_PARTITION_EXPRESSION:
             err = _("window functions are not allowed in partition key expressions");
             break;
+        case EXPR_KIND_PARTITION_BOUND:
+            err = _("window functions are not allowed in partition bound");
+            break;
         case EXPR_KIND_CALL_ARGUMENT:
             err = _("window functions are not allowed in CALL arguments");
             break;
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 38fbe3366f..a7f3d86f75 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -1850,6 +1850,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
         case EXPR_KIND_CALL_ARGUMENT:
             err = _("cannot use subquery in CALL argument");
             break;
+        case EXPR_KIND_PARTITION_BOUNDS:
+            err = _("cannot use subquery in partition bounds");
+            break;
 
             /*
              * There is intentionally no default: case here, so that the
@@ -3474,6 +3477,8 @@ ParseExprKindName(ParseExprKind exprKind)
             return "WHEN";
         case EXPR_KIND_PARTITION_EXPRESSION:
             return "PARTITION BY";
+        case EXPR_KIND_PARTITION_BOUNDS:
+            return "partition bounds";
         case EXPR_KIND_CALL_ARGUMENT:
             return "CALL";
         case EXPR_KIND_MERGE_WHEN_AND:
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 615aee6d15..fef05388b6 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -2306,6 +2306,9 @@ check_srf_call_placement(ParseState *pstate, Node *last_srf, int location)
         case EXPR_KIND_PARTITION_EXPRESSION:
             err = _("set-returning functions are not allowed in partition key expressions");
             break;
+        case EXPR_KIND_PARTITION_BOUND:
+            err = _("set-returning functions are not allowed in partition bounds");
+            break;
         case EXPR_KIND_CALL_ARGUMENT:
             err = _("set-returning functions are not allowed in CALL arguments");
             break;
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index f9f9904bad..0112d22d23 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -48,6 +48,7 @@
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
 #include "optimizer/planner.h"
 #include "parser/analyze.h"
 #include "parser/parse_clause.h"
@@ -138,8 +139,9 @@ static void transformColumnType(CreateStmtContext *cxt, ColumnDef *column);
 static void setSchemaName(char *context_schema, char **stmt_schema_name);
 static void transformPartitionCmd(CreateStmtContext *cxt, PartitionCmd *cmd);
 static void validateInfiniteBounds(ParseState *pstate, List *blist);
-static Const *transformPartitionBoundValue(ParseState *pstate, A_Const *con,
-                             const char *colName, Oid colType, int32 colTypmod);
+static Const *transformPartitionBoundValue(ParseState *pstate, Node *con,
+                             const char *colName, Oid colType, int32 colTypmod,
+                             Oid colCollation);
 
 
 /*
@@ -3651,6 +3653,7 @@ transformPartitionBound(ParseState *pstate, Relation parent,
         char       *colname;
         Oid            coltype;
         int32        coltypmod;
+        Oid            colcollation;
 
         if (spec->strategy != PARTITION_STRATEGY_LIST)
             ereport(ERROR,
@@ -3670,17 +3673,19 @@ transformPartitionBound(ParseState *pstate, Relation parent,
         /* Need its type data too */
         coltype = get_partition_col_typid(key, 0);
         coltypmod = get_partition_col_typmod(key, 0);
+        colcollation = get_partition_col_collation(key, 0);
 
         result_spec->listdatums = NIL;
         foreach(cell, spec->listdatums)
         {
-            A_Const    *con = castNode(A_Const, lfirst(cell));
+            Node       *expr = (Node *)lfirst (cell);
             Const       *value;
             ListCell   *cell2;
             bool        duplicate;
 
-            value = transformPartitionBoundValue(pstate, con,
-                                                 colname, coltype, coltypmod);
+            value = transformPartitionBoundValue(pstate, expr,
+                                                 colname, coltype, coltypmod,
+                                                 colcollation);
 
             /* Don't add to the result if the value is a duplicate */
             duplicate = false;
@@ -3740,7 +3745,7 @@ transformPartitionBound(ParseState *pstate, Relation parent,
             char       *colname;
             Oid            coltype;
             int32        coltypmod;
-            A_Const    *con;
+            Oid            colcollation;
             Const       *value;
 
             /* Get the column's name in case we need to output an error */
@@ -3758,13 +3763,15 @@ transformPartitionBound(ParseState *pstate, Relation parent,
             /* Need its type data too */
             coltype = get_partition_col_typid(key, i);
             coltypmod = get_partition_col_typmod(key, i);
+            colcollation = get_partition_col_collation(key, i);
 
             if (ldatum->value)
             {
-                con = castNode(A_Const, ldatum->value);
-                value = transformPartitionBoundValue(pstate, con,
+                value = transformPartitionBoundValue(pstate,
+                                                     ldatum->value,
                                                      colname,
-                                                     coltype, coltypmod);
+                                                     coltype, coltypmod,
+                                                     colcollation);
                 if (value->constisnull)
                     ereport(ERROR,
                             (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -3775,10 +3782,11 @@ transformPartitionBound(ParseState *pstate, Relation parent,
 
             if (rdatum->value)
             {
-                con = castNode(A_Const, rdatum->value);
-                value = transformPartitionBoundValue(pstate, con,
+                value = transformPartitionBoundValue(pstate,
+                                                     rdatum->value,
                                                      colname,
-                                                     coltype, coltypmod);
+                                                     coltype, coltypmod,
+                                                     colcollation);
                 if (value->constisnull)
                     ereport(ERROR,
                             (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -3845,13 +3853,14 @@ validateInfiniteBounds(ParseState *pstate, List *blist)
  * Transform one constant in a partition bound spec
  */
 static Const *
-transformPartitionBoundValue(ParseState *pstate, A_Const *con,
-                             const char *colName, Oid colType, int32 colTypmod)
+transformPartitionBoundValue(ParseState *pstate, Node *val,
+                             const char *colName, Oid colType, int32 colTypmod,
+                             Oid colCollation)
 {
     Node       *value;
 
-    /* Make it into a Const */
-    value = (Node *) make_const(pstate, &con->val, con->location);
+    /* Transform raw parsetree */
+    value = transformExpr(pstate, val, EXPR_KIND_PARTITION_BOUND);
 
     /* Coerce to correct type */
     value = coerce_to_target_type(pstate,
@@ -3867,21 +3876,32 @@ transformPartitionBoundValue(ParseState *pstate, A_Const *con,
                 (errcode(ERRCODE_DATATYPE_MISMATCH),
                  errmsg("specified value cannot be cast to type %s for column \"%s\"",
                         format_type_be(colType), colName),
-                 parser_errposition(pstate, con->location)));
+                 parser_errposition(pstate, exprLocation(val))));
+
+    /* Fix collations after all else */
+    assign_expr_collations(pstate, value);
+
+    /*
+     * Check for conflict between explict collations. Partition key expression
+     * has precedence over partition bound value.
+     */
+    if (exprCollation(value) != DEFAULT_COLLATION_OID &&
+        colCollation != exprCollation(value))    
+        ereport(ERROR,
+                (errcode(ERRCODE_COLLATION_MISMATCH),
+                 errmsg("collation mismatch between partition key expression (%d) and partition bound value (%d)",
colCollation,exprCollation(value)),
 
+                 parser_errposition(pstate, exprLocation(val))));
+                
 
     /* Simplify the expression, in case we had a coercion */
     if (!IsA(value, Const))
         value = (Node *) expression_planner((Expr *) value);
 
-    /* Fail if we don't have a constant (i.e., non-immutable coercion) */
+    /* Eval if we still don't have a constant (i.e., non-immutable coercion) */
     if (!IsA(value, Const))
-        ereport(ERROR,
-                (errcode(ERRCODE_DATATYPE_MISMATCH),
-                 errmsg("specified value cannot be cast to type %s for column \"%s\"",
-                        format_type_be(colType), colName),
-                 errdetail("The cast requires a non-immutable conversion."),
-                 errhint("Try putting the literal value in single quotes."),
-                 parser_errposition(pstate, con->location)));
-
+        value = (Node *)evaluate_expr((Expr *) value, colType, colTypmod,
+                                      colCollation);
+    
+    Assert(IsA(value, Const));
     return (Const *) value;
 }
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 4b452e7cee..8b87d139d0 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -1359,12 +1359,6 @@ UpdateSharedMemoryConfig(void)
     /* update global shmem state for sync rep */
     SyncRepUpdateSyncStandbysDefined();
 
-    /*
-     * If full_page_writes has been changed by SIGHUP, we update it in shared
-     * memory and write an XLOG_FPW_CHANGE record.
-     */
-    UpdateFullPageWrites();
-
     elog(DEBUG2, "checkpointer updated shared memory configuration values");
 }
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index f21870c644..6e4648e94b 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -276,7 +276,6 @@ extern void CreateCheckPoint(int flags);
 extern bool CreateRestartPoint(int flags);
 extern void XLogPutNextOid(Oid nextOid);
 extern XLogRecPtr XLogRestorePoint(const char *rpName);
-extern void UpdateFullPageWrites(void);
 extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p);
 extern XLogRecPtr GetRedoRecPtr(void);
 extern XLogRecPtr GetInsertRecPtr(void);
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index 33c59f9a63..1710b8ce1e 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -38,7 +38,7 @@ typedef struct CheckPoint
     TimeLineID    ThisTimeLineID; /* current TLI */
     TimeLineID    PrevTimeLineID; /* previous TLI, if this record begins a new
                                  * timeline (equals ThisTimeLineID otherwise) */
-    bool        fullPageWrites; /* current full_page_writes */
+    bool        fullPageWrites; /* true if all covering WALs are having FPI */
     uint32        nextXidEpoch;    /* higher-order bits of nextXid */
     TransactionId nextXid;        /* next free XID */
     Oid            nextOid;        /* next free OID */
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index ba4fa4b68b..4b1a5b96f8 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -85,4 +85,6 @@ extern Node *estimate_expression_value(PlannerInfo *root, Node *node);
 extern Query *inline_set_returning_function(PlannerInfo *root,
                               RangeTblEntry *rte);
 
+extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod,
+                           Oid result_collation);
 #endif                            /* CLAUSES_H */
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index 3fd2151ccb..9175a32c42 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -70,6 +70,7 @@ typedef enum ParseExprKind
     EXPR_KIND_TRIGGER_WHEN,        /* WHEN condition in CREATE TRIGGER */
     EXPR_KIND_POLICY,            /* USING or WITH CHECK expr in policy */
     EXPR_KIND_PARTITION_EXPRESSION, /* PARTITION BY expression */
+    EXPR_KIND_PARTITION_BOUND,     /* partition bounds value */
     EXPR_KIND_CALL_ARGUMENT        /* procedure argument in CALL */
 } ParseExprKind;
 
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index ffffde01da..215f5fa06e 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -660,6 +660,12 @@ get_partition_col_typmod(PartitionKey key, int col)
     return key->parttypmod[col];
 }
 
+static inline Oid
+get_partition_col_collation(PartitionKey key, int col)
+{
+    return key->partcollation[col];
+}
+
 /*
  * RelationGetPartitionDesc
  *        Returns partition descriptor for a relation.
diff --git a/src/test/regress/expected/create_table.out b/src/test/regress/expected/create_table.out
index e724439037..2080a656e4 100644
--- a/src/test/regress/expected/create_table.out
+++ b/src/test/regress/expected/create_table.out
@@ -449,14 +449,6 @@ CREATE TABLE list_parted (
 CREATE TABLE part_1 PARTITION OF list_parted FOR VALUES IN ('1');
 CREATE TABLE part_2 PARTITION OF list_parted FOR VALUES IN (2);
 CREATE TABLE part_null PARTITION OF list_parted FOR VALUES IN (null);
-CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN (int '1');
-ERROR:  syntax error at or near "int"
-LINE 1: ... fail_part PARTITION OF list_parted FOR VALUES IN (int '1');
-                                                              ^
-CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ('1'::int);
-ERROR:  syntax error at or near "::"
-LINE 1: ...fail_part PARTITION OF list_parted FOR VALUES IN ('1'::int);
-                                                                ^
 -- syntax does not allow empty list of values for list partitions
 CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ();
 ERROR:  syntax error at or near ")"
@@ -490,12 +482,8 @@ CREATE TABLE moneyp (
     a money
 ) PARTITION BY LIST (a);
 CREATE TABLE moneyp_10 PARTITION OF moneyp FOR VALUES IN (10);
-ERROR:  specified value cannot be cast to type money for column "a"
-LINE 1: ...EATE TABLE moneyp_10 PARTITION OF moneyp FOR VALUES IN (10);
-                                                                   ^
-DETAIL:  The cast requires a non-immutable conversion.
-HINT:  Try putting the literal value in single quotes.
-CREATE TABLE moneyp_10 PARTITION OF moneyp FOR VALUES IN ('10');
+CREATE TABLE moneyp_11 PARTITION OF moneyp FOR VALUES IN ('11');
+CREATE TABLE moneyp_12 PARTITION OF moneyp FOR VALUES IN (to_char(12, '99')::int);
 DROP TABLE moneyp;
 -- immutable cast should work, though
 CREATE TABLE bigintp (
@@ -683,6 +671,26 @@ ERROR:  modulus for hash partition must be a positive integer
 -- remainder must be greater than or equal to zero and less than modulus
 CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 8);
 ERROR:  remainder for hash partition must be less than modulus
+-- check for collation handling
+CREATE TABLE col_parted (
+    a varchar
+) PARTITION BY LIST (a);
+CREATE TABLE fail_part PARTITION OF col_parted FOR VALUES IN (('a' collate "en_US"));
+ERROR:  collation mismatch between partition key expression (100) and partition bound value (12638)
+LINE 1: ...fail_part PARTITION OF col_parted FOR VALUES IN (('a' collat...
+                                                             ^
+CREATE TABLE success_part PARTITION OF col_parted FOR VALUES IN ('a');
+DROP TABLE col_parted;
+CREATE TABLE col_parted (
+    a varchar collate "en_US"
+) PARTITION BY LIST (a);
+CREATE TABLE fail_part PARTITION OF col_parted FOR VALUES IN (('a' collate "en_GB"));
+ERROR:  collation mismatch between partition key expression (12638) and partition bound value (12631)
+LINE 1: ...fail_part PARTITION OF col_parted FOR VALUES IN (('a' collat...
+                                                             ^
+CREATE TABLE success_part PARTITION OF col_parted FOR VALUES IN ('a');
+CREATE TABLE success_part2 PARTITION OF col_parted FOR VALUES IN (('b' collate "en_US"));
+DROP TABLE col_parted;
 -- check schema propagation from parent
 CREATE TABLE parted (
     a text,
diff --git a/src/test/regress/sql/create_table.sql b/src/test/regress/sql/create_table.sql
index 235bef13dc..f739d89a75 100644
--- a/src/test/regress/sql/create_table.sql
+++ b/src/test/regress/sql/create_table.sql
@@ -432,8 +432,6 @@ CREATE TABLE list_parted (
 CREATE TABLE part_1 PARTITION OF list_parted FOR VALUES IN ('1');
 CREATE TABLE part_2 PARTITION OF list_parted FOR VALUES IN (2);
 CREATE TABLE part_null PARTITION OF list_parted FOR VALUES IN (null);
-CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN (int '1');
-CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ('1'::int);
 
 -- syntax does not allow empty list of values for list partitions
 CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ();
@@ -458,7 +456,8 @@ CREATE TABLE moneyp (
     a money
 ) PARTITION BY LIST (a);
 CREATE TABLE moneyp_10 PARTITION OF moneyp FOR VALUES IN (10);
-CREATE TABLE moneyp_10 PARTITION OF moneyp FOR VALUES IN ('10');
+CREATE TABLE moneyp_11 PARTITION OF moneyp FOR VALUES IN ('11');
+CREATE TABLE moneyp_12 PARTITION OF moneyp FOR VALUES IN (to_char(12, '99')::int);
 DROP TABLE moneyp;
 
 -- immutable cast should work, though
@@ -620,6 +619,22 @@ CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 0, REM
 -- remainder must be greater than or equal to zero and less than modulus
 CREATE TABLE fail_part PARTITION OF hash_parted2 FOR VALUES WITH (MODULUS 8, REMAINDER 8);
 
+-- check for collation handling
+CREATE TABLE col_parted (
+    a varchar
+) PARTITION BY LIST (a);
+CREATE TABLE fail_part PARTITION OF col_parted FOR VALUES IN (('a' collate "en_US"));
+CREATE TABLE success_part PARTITION OF col_parted FOR VALUES IN ('a');
+DROP TABLE col_parted;
+
+CREATE TABLE col_parted (
+    a varchar collate "en_US"
+) PARTITION BY LIST (a);
+CREATE TABLE fail_part PARTITION OF col_parted FOR VALUES IN (('a' collate "en_GB"));
+CREATE TABLE success_part PARTITION OF col_parted FOR VALUES IN ('a');
+CREATE TABLE success_part2 PARTITION OF col_parted FOR VALUES IN (('b' collate "en_US"));
+DROP TABLE col_parted;
+
 -- check schema propagation from parent
 
 CREATE TABLE parted (
-- 
2.16.3


pgsql-hackers by date:

Previous
From: Peter Eisentraut
Date:
Subject: Re: Bugs in TOAST handling, OID assignment and redo recovery
Next
From: David Rowley
Date:
Subject: Re: Native partitioning tablespace inheritance