Thread: BUG #16010: Unexpected reordering of WHERE clause operations and SELECT list function calls

BUG #16010: Unexpected reordering of WHERE clause operations and SELECT list function calls

From
PG Bug reporting form
Date:
The following bug has been logged on the website:

Bug reference:      16010
Logged by:          Stephen Kendall
Email address:      spkendall@gmail.com
PostgreSQL version: 12beta3
Operating system:   MacOS
Description:

Expected output is a list of tables where the names contain simple date
strings that fall within a range. Actual result is an error because one or
more table names that do not contain validly formatted dates are passed to
TO_TIMESTAMP(). This should not happen because the table with the invalid
date in its name is in a schema that should be filtered out by a WHERE
clause before the evaluation of the TO_TIMESTAMP().

All but the first query shown below actually work in our production
environment (Amazon RDS 10.6). They don't error there and do return the
expected list of tables. However, that environment is too complex and too
full of proprietary data; I can not provide reproduction steps for it.

In any event, I believe that either the first query after the CREATE TABLE
statements should work because I think TO_TIMESTAMP() should *not* be run
against rows that fail the first WHERE clause predicate. That query errors
in both environments.

spkmbp:~ spk$ psql12 service=app-12-postgres-postgres 
psql12 (12beta3)
Type "help" for help.

localhost: spk@postgres=# \set VERBOSITY verbose 
localhost: spk@postgres=# set client_min_messages to debug ;
SET
localhost: spk@postgres=# CREATE SCHEMA proc ;
CREATE SCHEMA
localhost: spk@postgres=# CREATE SCHEMA bkup_proc ;
CREATE SCHEMA
localhost: spk@postgres=# CREATE TABLE proc.decide_proc_tst040318 () ;
CREATE TABLE
localhost: spk@postgres=# CREATE TABLE bkup_proc.solution_use_proc_20190730
() ;
CREATE TABLE
localhost: spk@postgres=# SELECT * FROM (
localhost: spk@postgres(# SELECT schemaname, tablename,
current_date-interval'2'month time_frame,
localhost: spk@postgres(#
to_timestamp(regexp_replace(tablename,'[^0-9]*',''),'yyyymmdd') table_date
localhost: spk@postgres(# from pg_tables
localhost: spk@postgres(# where schemaname = 'bkup_proc'
localhost: spk@postgres(# and (tablename ~* 'decide_proc_'
localhost: spk@postgres(#        or tablename ~* 'solution_use_proc_'
localhost: spk@postgres(#        )
localhost: spk@postgres(# ) subq
localhost: spk@postgres-# where table_date >= time_frame
localhost: spk@postgres-# ;
ERROR:  22008: date/time field value out of range: "040318"
LOCATION:  DateTimeParseError, datetime.c:3741
localhost: spk@postgres=# WITH
localhost: spk@postgres-#  tabs AS (
localhost: spk@postgres(# SELECT schemaname, tablename
localhost: spk@postgres(#   FROM pg_tables
localhost: spk@postgres(#  WHERE schemaname = 'bkup_proc'
localhost: spk@postgres(#    AND (tablename ~* 'decide_proc_'
localhost: spk@postgres(#        OR tablename ~* 'solution_use_proc_'
localhost: spk@postgres(#        )
localhost: spk@postgres(# ) 
localhost: spk@postgres-# SELECT * FROM (
localhost: spk@postgres(# SELECT schemaname, tablename,
current_date-interval'2'month time_frame,
localhost: spk@postgres(#
to_timestamp(regexp_replace(tablename,'[^0-9]*',''),'yyyymmdd') table_date
localhost: spk@postgres(#   FROM tabs
localhost: spk@postgres(# ) subq
localhost: spk@postgres-# where table_date >= time_frame
localhost: spk@postgres-# ;
ERROR:  22008: date/time field value out of range: "040318"
LOCATION:  DateTimeParseError, datetime.c:3741
localhost: spk@postgres=# WITH
localhost: spk@postgres-#  tabs AS (
localhost: spk@postgres(# SELECT schemaname, tablename
localhost: spk@postgres(#   FROM pg_tables
localhost: spk@postgres(#  WHERE schemaname = 'bkup_proc'
localhost: spk@postgres(# ) 
localhost: spk@postgres-# ,proc_tabs AS (
localhost: spk@postgres(# SELECT schemaname, tablename
localhost: spk@postgres(#   FROM tabs
localhost: spk@postgres(#  WHERE (tablename ~* 'decide_proc_'
localhost: spk@postgres(#        OR tablename ~* 'solution_use_proc_'
localhost: spk@postgres(#        )
localhost: spk@postgres(# )
localhost: spk@postgres-# SELECT * FROM (
localhost: spk@postgres(# SELECT schemaname, tablename,
current_date-interval'2'month time_frame,
localhost: spk@postgres(#
to_timestamp(regexp_replace(tablename,'[^0-9]*',''),'yyyymmdd') table_date
localhost: spk@postgres(#   FROM proc_tabs
localhost: spk@postgres(# ) subq
localhost: spk@postgres-# where table_date >= time_frame
localhost: spk@postgres-# ;
ERROR:  22008: date/time field value out of range: "040318"
LOCATION:  DateTimeParseError, datetime.c:3741
localhost: spk@postgres=# WITH
localhost: spk@postgres-#  tabs AS (
localhost: spk@postgres(# SELECT schemaname, tablename
localhost: spk@postgres(#   FROM pg_tables
localhost: spk@postgres(#  WHERE schemaname = 'bkup_proc'
localhost: spk@postgres(# ) 
localhost: spk@postgres-# ,proc_tabs AS (
localhost: spk@postgres(# SELECT schemaname, tablename
localhost: spk@postgres(#   FROM tabs
localhost: spk@postgres(#  WHERE (tablename ~* 'decide_proc_'
localhost: spk@postgres(#        OR tablename ~* 'solution_use_proc_'
localhost: spk@postgres(#        )
localhost: spk@postgres(# )
localhost: spk@postgres-# ,proc_tabs_calc AS (
localhost: spk@postgres(# SELECT schemaname, tablename,
current_date-interval'2'month time_frame,
localhost: spk@postgres(#
to_timestamp(regexp_replace(tablename,'[^0-9]*',''),'yyyymmdd') table_date
localhost: spk@postgres(#   FROM proc_tabs
localhost: spk@postgres(# )
localhost: spk@postgres-# SELECT * 
localhost: spk@postgres-#   FROM proc_tabs_calc
localhost: spk@postgres-#  WHERE table_date >= time_frame
localhost: spk@postgres-# ;
ERROR:  22008: date/time field value out of range: "040318"
LOCATION:  DateTimeParseError, datetime.c:3741
localhost: spk@postgres=# WITH
localhost: spk@postgres-#  tabs AS (
localhost: spk@postgres(# SELECT schemaname, tablename
localhost: spk@postgres(#   FROM pg_tables
localhost: spk@postgres(#  WHERE schemaname = 'bkup_proc'
localhost: spk@postgres(# )
localhost: spk@postgres-# ,proc_tabs AS (
localhost: spk@postgres(# SELECT schemaname, tablename,
current_date-interval'2'month time_frame,
localhost: spk@postgres(#
to_timestamp(regexp_replace(tablename,'[^0-9]*',''),'yyyymmdd') table_date
localhost: spk@postgres(#   FROM tabs
localhost: spk@postgres(#  WHERE tablename ~* 'decide_proc_'
localhost: spk@postgres(#        or tablename ~* 'solution_use_proc_'
localhost: spk@postgres(# )
localhost: spk@postgres-# SELECT *
localhost: spk@postgres-#   FROM proc_tabs
localhost: spk@postgres-#  WHERE table_date >= time_frame      
localhost: spk@postgres-#   ;
ERROR:  22008: date/time field value out of range: "040318"
LOCATION:  DateTimeParseError, datetime.c:3741
localhost: spk@postgres=#


PG Bug reporting form <noreply@postgresql.org> writes:
> Expected output is a list of tables where the names contain simple date
> strings that fall within a range. Actual result is an error because one or
> more table names that do not contain validly formatted dates are passed to
> TO_TIMESTAMP(). This should not happen because the table with the invalid
> date in its name is in a schema that should be filtered out by a WHERE
> clause before the evaluation of the TO_TIMESTAMP().

There is not any particular guarantee about the order of evaluation of
WHERE clauses [1], and we're not going to make one because it would
be catastrophic for performance in many real-world cases.  In this case
you might be able to force it by inserting an optimization fence in the
subquery (so that the "where table_date >= time_frame" clause can't be
pushed down into it); but you'd probably be better off to make the
table-name-to-timestamp conversion expression more robust.  You could
do something like

case when tablename ~ '^[^0-9]*[0-9]{8}$'
then to_timestamp(regexp_replace(tablename,'[^0-9]*',''),'yyyymmdd')
else null
end

Another approach, but one that's also going to cost you performance,
is to wrap expressions that might throw errors into plpgsql functions
that you mark as volatile.  That will discourage the planner from
moving them around ... but it will probably also cripple optimizations
that you still want.

            regards, tom lane

[1] https://www.postgresql.org/docs/current/sql-expressions.html#SYNTAX-EXPRESS-EVAL