Thread: Slow query needs a kick in the pants.

Slow query needs a kick in the pants.

From
"Dann Corbit"
Date:
This query:
 
connxdatasync=# select "a".OID
connxdatasync-# from "CNX_DS2_1_BCHFIL_FILE" "a"
connxdatasync-#         left outer join "CNX_DS_1_BCHFIL_FILE" "b" on
connxdatasync-#                 ( "a"."FILE_KEY" = "b"."FILE_KEY" and
connxdatasync(#                   "a"."SYS_YYYYMMDD" = "b"."SYS_YYYYMMDD" and
connxdatasync(#                   "a"."SYS_HHMMSSUU" = "b"."SYS_HHMMSSUU" )
connxdatasync-#                   where ( "b".OID is NULL )
connxdatasync-# ;
 
Is abysmally slow.
 
connxdatasync=# \d "CNX_DS2_1_BCHFIL_FILE"
      Table "CNX_DS2_1_BCHFIL_FILE"
  Attribute   |     Type      | Modifier
--------------+---------------+----------
 FILE_KEY     | character(30) |
 SYS_YYYYMMDD | character(8)  |
 SYS_HHMMSSUU | character(8)  |
 CRC          | bigint        | not null
Index: UA4IYKF5LY9402
 
connxdatasync=# explain  VERBOSE
connxdatasync-# select "a".OID
connxdatasync-# from "CNX_DS2_1_BCHFIL_FILE" "a"
connxdatasync-#         left outer join "CNX_DS_1_BCHFIL_FILE" "b" on
connxdatasync-#                 ( "a"."FILE_KEY" = "b"."FILE_KEY" and
connxdatasync(#                   "a"."SYS_YYYYMMDD" = "b"."SYS_YYYYMMDD" and
connxdatasync(#                   "a"."SYS_HHMMSSUU" = "b"."SYS_HHMMSSUU" )
connxdatasync-#                   where ( "b".OID is NULL )
connxdatasync-# ;
NOTICE:  QUERY DUMP:
 
{ MERGEJOIN :startup_cost 312848.17 :total_cost 351988.93 :rows 973970 :width 80 :qptargetlist ({ TARGETENTRY :resdom { RESDOM :resno 1 :restype 26 :restyp
mod -1 :resname oid :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 65001 :varattno 1 :vartype 26 :vartypmod -1  :varlevelsup
0 :varnoold 1 :varoattno -2}}) :qpqual ({ EXPR :typeOid 16  :opType func :oper { FUNC :funcid 1029 :functype 16 } :args ({ VAR :varno 65000 :varattno 4 :va
rtype 26 :vartypmod -1  :varlevelsup 0 :varnoold 2 :varoattno -2})}) :lefttree { SORT :startup_cost 155655.15 :total_cost 155655.15 :rows 973970 :width 40
:qptargetlist ({ TARGETENTRY :resdom { RESDOM :resno 1 :restype 26 :restypmod -1 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :exp
r { VAR :varno 1 :varattno -2 :vartype 26 :vartypmod -1  :varlevelsup 0 :varnoold 1 :varoattno -2}} { TARGETENTRY :resdom { RESDOM :resno 2 :restype 1042 :
restypmod 34 :resname <> :reskey 3 :reskeyop 1049 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 1 :varattno 1 :vartype 1042 :vartypmod 34  :varlev
elsup 0 :varnoold 1 :varoattno 1}} { TARGETENTRY :resdom { RESDOM :resno 3 :restype 1042 :restypmod 12 :resname <> :reskey 2 :reskeyop 1049 :ressortgroupre
f 0 :resjunk false } :expr { VAR :varno 1 :varattno 2 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 1 :varoattno 2}} { TARGETENTRY :resdom { RESDOM
 :resno 4 :restype 1042 :restypmod 12 :resname <> :reskey 1 :reskeyop 1049 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 1 :varattno 3 :vartype 10
42 :vartypmod 12  :varlevelsup 0 :varnoold 1 :varoattno 3}}) :qpqual <> :lefttree { SEQSCAN :startup_cost 0.00 :total_cost 22726.70 :rows 973970 :width 40
:qptargetlist ({ TARGETENTRY :resdom { RESDOM :resno 1 :restype 26 :restypmod -1 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :exp
r { VAR :varno 1 :varattno -2 :vartype 26 :vartypmod -1  :varlevelsup 0 :varnoold 1 :varoattno -2}} { TARGETENTRY :resdom { RESDOM :resno 2 :restype 1042 :
restypmod 34 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 1 :varattno 1 :vartype 1042 :vartypmod 34  :varlevels
up 0 :varnoold 1 :varoattno 1}} { TARGETENTRY :resdom { RESDOM :resno 3 :restype 1042 :restypmod 12 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :r
esjunk false } :expr { VAR :varno 1 :varattno 2 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 1 :varoattno 2}} { TARGETENTRY :resdom { RESDOM :resn
o 4 :restype 1042 :restypmod 12 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 1 :varattno 3 :vartype 1042 :varty
pmod 12  :varlevelsup 0 :varnoold 1 :varoattno 3}}) :qpqual <> :lefttree <> :righttree <> :extprm () :locprm () :initplan <> :nprm 0  :scanrelid 1 } :right
tree <> :extprm () :locprm () :initplan <> :nprm 0  :keycount 3 } :righttree { SORT :startup_cost 157193.02 :total_cost 157193.02 :rows 983068 :width 40 :q
ptargetlist ({ TARGETENTRY :resdom { RESDOM :resno 1 :restype 1042 :restypmod 34 :resname <> :reskey 3 :reskeyop 1049 :ressortgroupref 0 :resjunk false } :
expr { VAR :varno 2 :varattno 1 :vartype 1042 :vartypmod 34  :varlevelsup 0 :varnoold 2 :varoattno 1}} { TARGETENTRY :resdom { RESDOM :resno 2 :restype 104
2 :restypmod 12 :resname <> :reskey 2 :reskeyop 1049 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 2 :varattno 2 :vartype 1042 :vartypmod 12  :var
levelsup 0 :varnoold 2 :varoattno 2}} { TARGETENTRY :resdom { RESDOM :resno 3 :restype 1042 :restypmod 12 :resname <> :reskey 1 :reskeyop 1049 :ressortgrou
pref 0 :resjunk false } :expr { VAR :varno 2 :varattno 3 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 2 :varoattno 3}} { TARGETENTRY :resdom { RES
DOM :resno 4 :restype 26 :restypmod -1 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 2 :varattno -2 :vartype 26
:vartypmod -1  :varlevelsup 0 :varnoold 2 :varoattno -2}}) :qpqual <> :lefttree { SEQSCAN :startup_cost 0.00 :total_cost 22938.68 :rows 983068 :width 40 :q
ptargetlist ({ TARGETENTRY :resdom { RESDOM :resno 1 :restype 1042 :restypmod 34 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :exp
r { VAR :varno 2 :varattno 1 :vartype 1042 :vartypmod 34  :varlevelsup 0 :varnoold 2 :varoattno 1}} { TARGETENTRY :resdom { RESDOM :resno 2 :restype 1042 :
restypmod 12 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 2 :varattno 2 :vartype 1042 :vartypmod 12  :varlevels
up 0 :varnoold 2 :varoattno 2}} { TARGETENTRY :resdom { RESDOM :resno 3 :restype 1042 :restypmod 12 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :r
esjunk false } :expr { VAR :varno 2 :varattno 3 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 2 :varoattno 3}} { TARGETENTRY :resdom { RESDOM :resn
o 4 :restype 26 :restypmod -1 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 2 :varattno -2 :vartype 26 :vartypmo
d -1  :varlevelsup 0 :varnoold 2 :varoattno -2}}) :qpqual <> :lefttree <> :righttree <> :extprm () :locprm () :initplan <> :nprm 0  :scanrelid 2 } :righttr
ee <> :extprm () :locprm () :initplan <> :nprm 0  :keycount 3 } :extprm () :locprm () :initplan <> :nprm 0  :jointype 1 :joinqual <> :mergeclauses ({ EXPR
:typeOid 16  :opType op :oper { OPER :opno 1054 :opid 1048 :opresulttype 16 } :args ({ VAR :varno 65001 :varattno 4 :vartype 1042 :vartypmod 12  :varlevels
up 0 :varnoold 1 :varoattno 3} { VAR :varno 65000 :varattno 3 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 2 :varoattno 3})} { EXPR :typeOid 16  :
opType op :oper { OPER :opno 1054 :opid 1048 :opresulttype 16 } :args ({ VAR :varno 65001 :varattno 3 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold
 1 :varoattno 2} { VAR :varno 65000 :varattno 2 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 2 :varoattno 2})} { EXPR :typeOid 16  :opType op :ope
r { OPER :opno 1054 :opid 1048 :opresulttype 16 } :args ({ VAR :varno 65001 :varattno 2 :vartype 1042 :vartypmod 34  :varlevelsup 0 :varnoold 1 :varoattno
1} { VAR :varno 65000 :varattno 1 :vartype 1042 :vartypmod 34  :varlevelsup 0 :varnoold 2 :varoattno 1})})}
NOTICE:  QUERY PLAN:
 
Merge Join  (cost=312848.17..351988.93 rows=973970 width=80)
  ->  Sort  (cost=155655.15..155655.15 rows=973970 width=40)
        ->  Seq Scan on CNX_DS2_1_BCHFIL_FILE a  (cost=0.00..22726.70 rows=973970 width=40)
  ->  Sort  (cost=157193.02..157193.02 rows=983068 width=40)
        ->  Seq Scan on CNX_DS_1_BCHFIL_FILE b  (cost=0.00..22938.68 rows=983068 width=40)
Any ideas about how it might be possible to speed it up?
I can add indexes, tag columns or any other schema change that might help.

Re: Slow query needs a kick in the pants.

From
Arjen van der Meijden
Date:
Can't you do something with a query like:
SELECT a.oid
FROM CNX_DS2_1_BCHFIL_FILE a, CNX_DS2_1_BCHFIL_FILE b
WHERE a.FILE_KEY = b.FILE_KEY and ... and b.OID = NULL
 
?
 
I'm absolutely not sure whether this will give the expected results. But my version is probably much faster.
 
Regards,
 
Arjen
-----Oorspronkelijk bericht-----
Van: pgsql-general-owner@postgresql.org [mailto:pgsql-general-owner@postgresql.org] Namens Dann Corbit
Verzonden: vrijdag 28 maart 2003 0:30
Aan: pgsql-general@postgresql.org
Onderwerp: [GENERAL] Slow query needs a kick in the pants.

This query:
 
connxdatasync=# select "a".OID
connxdatasync-# from "CNX_DS2_1_BCHFIL_FILE" "a"
connxdatasync-#         left outer join "CNX_DS_1_BCHFIL_FILE" "b" on
connxdatasync-#                 ( "a"."FILE_KEY" = "b"."FILE_KEY" and
connxdatasync(#                   "a"."SYS_YYYYMMDD" = "b"."SYS_YYYYMMDD" and
connxdatasync(#                   "a"."SYS_HHMMSSUU" = "b"."SYS_HHMMSSUU" )
connxdatasync-#                   where ( "b".OID is NULL )
connxdatasync-# ;
 
Is abysmally slow.
 
connxdatasync=# \d "CNX_DS2_1_BCHFIL_FILE"
      Table "CNX_DS2_1_BCHFIL_FILE"
  Attribute   |     Type      | Modifier
--------------+---------------+----------
 FILE_KEY     | character(30) |
 SYS_YYYYMMDD | character(8)  |
 SYS_HHMMSSUU | character(8)  |
 CRC          | bigint        | not null
Index: UA4IYKF5LY9402
 
connxdatasync=# explain  VERBOSE
connxdatasync-# select "a".OID
connxdatasync-# from "CNX_DS2_1_BCHFIL_FILE" "a"
connxdatasync-#         left outer join "CNX_DS_1_BCHFIL_FILE" "b" on
connxdatasync-#                 ( "a"."FILE_KEY" = "b"."FILE_KEY" and
connxdatasync(#                   "a"."SYS_YYYYMMDD" = "b"."SYS_YYYYMMDD" and
connxdatasync(#                   "a"."SYS_HHMMSSUU" = "b"."SYS_HHMMSSUU" )
connxdatasync-#                   where ( "b".OID is NULL )
connxdatasync-# ;
NOTICE:  QUERY DUMP:
 
{ MERGEJOIN :startup_cost 312848.17 :total_cost 351988.93 :rows 973970 :width 80 :qptargetlist ({ TARGETENTRY :resdom { RESDOM :resno 1 :restype 26 :restyp
mod -1 :resname oid :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 65001 :varattno 1 :vartype 26 :vartypmod -1  :varlevelsup
0 :varnoold 1 :varoattno -2}}) :qpqual ({ EXPR :typeOid 16  :opType func :oper { FUNC :funcid 1029 :functype 16 } :args ({ VAR :varno 65000 :varattno 4 :va
rtype 26 :vartypmod -1  :varlevelsup 0 :varnoold 2 :varoattno -2})}) :lefttree { SORT :startup_cost 155655.15 :total_cost 155655.15 :rows 973970 :width 40
:qptargetlist ({ TARGETENTRY :resdom { RESDOM :resno 1 :restype 26 :restypmod -1 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :exp
r { VAR :varno 1 :varattno -2 :vartype 26 :vartypmod -1  :varlevelsup 0 :varnoold 1 :varoattno -2}} { TARGETENTRY :resdom { RESDOM :resno 2 :restype 1042 :
restypmod 34 :resname <> :reskey 3 :reskeyop 1049 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 1 :varattno 1 :vartype 1042 :vartypmod 34  :varlev
elsup 0 :varnoold 1 :varoattno 1}} { TARGETENTRY :resdom { RESDOM :resno 3 :restype 1042 :restypmod 12 :resname <> :reskey 2 :reskeyop 1049 :ressortgroupre
f 0 :resjunk false } :expr { VAR :varno 1 :varattno 2 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 1 :varoattno 2}} { TARGETENTRY :resdom { RESDOM
 :resno 4 :restype 1042 :restypmod 12 :resname <> :reskey 1 :reskeyop 1049 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 1 :varattno 3 :vartype 10
42 :vartypmod 12  :varlevelsup 0 :varnoold 1 :varoattno 3}}) :qpqual <> :lefttree { SEQSCAN :startup_cost 0.00 :total_cost 22726.70 :rows 973970 :width 40
:qptargetlist ({ TARGETENTRY :resdom { RESDOM :resno 1 :restype 26 :restypmod -1 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :exp
r { VAR :varno 1 :varattno -2 :vartype 26 :vartypmod -1  :varlevelsup 0 :varnoold 1 :varoattno -2}} { TARGETENTRY :resdom { RESDOM :resno 2 :restype 1042 :
restypmod 34 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 1 :varattno 1 :vartype 1042 :vartypmod 34  :varlevels
up 0 :varnoold 1 :varoattno 1}} { TARGETENTRY :resdom { RESDOM :resno 3 :restype 1042 :restypmod 12 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :r
esjunk false } :expr { VAR :varno 1 :varattno 2 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 1 :varoattno 2}} { TARGETENTRY :resdom { RESDOM :resn
o 4 :restype 1042 :restypmod 12 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 1 :varattno 3 :vartype 1042 :varty
pmod 12  :varlevelsup 0 :varnoold 1 :varoattno 3}}) :qpqual <> :lefttree <> :righttree <> :extprm () :locprm () :initplan <> :nprm 0  :scanrelid 1 } :right
tree <> :extprm () :locprm () :initplan <> :nprm 0  :keycount 3 } :righttree { SORT :startup_cost 157193.02 :total_cost 157193.02 :rows 983068 :width 40 :q
ptargetlist ({ TARGETENTRY :resdom { RESDOM :resno 1 :restype 1042 :restypmod 34 :resname <> :reskey 3 :reskeyop 1049 :ressortgroupref 0 :resjunk false } :
expr { VAR :varno 2 :varattno 1 :vartype 1042 :vartypmod 34  :varlevelsup 0 :varnoold 2 :varoattno 1}} { TARGETENTRY :resdom { RESDOM :resno 2 :restype 104
2 :restypmod 12 :resname <> :reskey 2 :reskeyop 1049 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 2 :varattno 2 :vartype 1042 :vartypmod 12  :var
levelsup 0 :varnoold 2 :varoattno 2}} { TARGETENTRY :resdom { RESDOM :resno 3 :restype 1042 :restypmod 12 :resname <> :reskey 1 :reskeyop 1049 :ressortgrou
pref 0 :resjunk false } :expr { VAR :varno 2 :varattno 3 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 2 :varoattno 3}} { TARGETENTRY :resdom { RES
DOM :resno 4 :restype 26 :restypmod -1 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 2 :varattno -2 :vartype 26
:vartypmod -1  :varlevelsup 0 :varnoold 2 :varoattno -2}}) :qpqual <> :lefttree { SEQSCAN :startup_cost 0.00 :total_cost 22938.68 :rows 983068 :width 40 :q
ptargetlist ({ TARGETENTRY :resdom { RESDOM :resno 1 :restype 1042 :restypmod 34 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :exp
r { VAR :varno 2 :varattno 1 :vartype 1042 :vartypmod 34  :varlevelsup 0 :varnoold 2 :varoattno 1}} { TARGETENTRY :resdom { RESDOM :resno 2 :restype 1042 :
restypmod 12 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 2 :varattno 2 :vartype 1042 :vartypmod 12  :varlevels
up 0 :varnoold 2 :varoattno 2}} { TARGETENTRY :resdom { RESDOM :resno 3 :restype 1042 :restypmod 12 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :r
esjunk false } :expr { VAR :varno 2 :varattno 3 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 2 :varoattno 3}} { TARGETENTRY :resdom { RESDOM :resn
o 4 :restype 26 :restypmod -1 :resname <> :reskey 0 :reskeyop 0 :ressortgroupref 0 :resjunk false } :expr { VAR :varno 2 :varattno -2 :vartype 26 :vartypmo
d -1  :varlevelsup 0 :varnoold 2 :varoattno -2}}) :qpqual <> :lefttree <> :righttree <> :extprm () :locprm () :initplan <> :nprm 0  :scanrelid 2 } :righttr
ee <> :extprm () :locprm () :initplan <> :nprm 0  :keycount 3 } :extprm () :locprm () :initplan <> :nprm 0  :jointype 1 :joinqual <> :mergeclauses ({ EXPR
:typeOid 16  :opType op :oper { OPER :opno 1054 :opid 1048 :opresulttype 16 } :args ({ VAR :varno 65001 :varattno 4 :vartype 1042 :vartypmod 12  :varlevels
up 0 :varnoold 1 :varoattno 3} { VAR :varno 65000 :varattno 3 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 2 :varoattno 3})} { EXPR :typeOid 16  :
opType op :oper { OPER :opno 1054 :opid 1048 :opresulttype 16 } :args ({ VAR :varno 65001 :varattno 3 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold
 1 :varoattno 2} { VAR :varno 65000 :varattno 2 :vartype 1042 :vartypmod 12  :varlevelsup 0 :varnoold 2 :varoattno 2})} { EXPR :typeOid 16  :opType op :ope
r { OPER :opno 1054 :opid 1048 :opresulttype 16 } :args ({ VAR :varno 65001 :varattno 2 :vartype 1042 :vartypmod 34  :varlevelsup 0 :varnoold 1 :varoattno
1} { VAR :varno 65000 :varattno 1 :vartype 1042 :vartypmod 34  :varlevelsup 0 :varnoold 2 :varoattno 1})})}
NOTICE:  QUERY PLAN:
 
Merge Join  (cost=312848.17..351988.93 rows=973970 width=80)
  ->  Sort  (cost=155655.15..155655.15 rows=973970 width=40)
        ->  Seq Scan on CNX_DS2_1_BCHFIL_FILE a  (cost=0.00..22726.70 rows=973970 width=40)
  ->  Sort  (cost=157193.02..157193.02 rows=983068 width=40)
        ->  Seq Scan on CNX_DS_1_BCHFIL_FILE b  (cost=0.00..22938.68 rows=983068 width=40)
Any ideas about how it might be possible to speed it up?
I can add indexes, tag columns or any other schema change that might help.

Re: Slow query needs a kick in the pants.

From
"Dann Corbit"
Date:
The other query attempts given to me do not produce the desired results.  Thanks for taking a look anyway.
 
Let me explain the problem set, and what I want to accomplish...
 
I have two tables in all cases.  Each table pair consists of the following columns:
1.  A primary key of one or more columns {with a unique index}
2.  An Oid column {with a unique index}
3.  A 64 bit CRC
 
For both tables, the primary key information will "mostly" match.  I need to know which primary keys are found in the first table but not in the second.  Also, which primary keys are found in the second table but not in the first.
 
One possibility would be to add a tag column and set it where the two tables agree on primary key.  Unfortunately, we are likely to have many millions of rows (and only a few thousand disagreements) and so I would end up rewriting the entire table for both tables (less the tiny difference set) and hence that would be inefficient.   My outer join also stinks up the place.  (Several minutes for a million rows -- unacceptable).
 
Probably, I am going to have to write my own piece of custom software that manipulates the data outside of any database.  The database query approach is just too slow.
 
 

Re: Slow query needs a kick in the pants.

From
Dennis Gearon
Date:
Do it in two stages, find the primary keys first, then do a search on on IN such
and such query, (BUT 'IN' is slow, some others can recommend a faster alternative. )

Just an idea.

Dann Corbit wrote:
> The other query attempts given to me do not produce the desired
> results.  Thanks for taking a look anyway.
>
> Let me explain the problem set, and what I want to accomplish...
>
> I have two tables in all cases.  Each table pair consists of the
> following columns:
> 1.  A primary key of one or more columns {with a unique index}
> 2.  An Oid column {with a unique index}
> 3.  A 64 bit CRC
>
> For both tables, the primary key information will "mostly" match.  I
> need to know which primary keys are found in the first table but not in
> the second.  Also, which primary keys are found in the second table but
> not in the first.
>
> One possibility would be to add a tag column and set it where the two
> tables agree on primary key.  Unfortunately, we are likely to have many
> millions of rows (and only a few thousand disagreements) and so I would
> end up rewriting the entire table for both tables (less the tiny
> difference set) and hence that would be inefficient.   My outer join
> also stinks up the place.  (Several minutes for a million rows --
> unacceptable).
>
> Probably, I am going to have to write my own piece of custom software
> that manipulates the data outside of any database.  The database query
> approach is just too slow.
>
>


Re: Slow query needs a kick in the pants.

From
Greg Stark
Date:
"Dann Corbit" <DCorbit@connx.com> writes:

> I have two tables in all cases.  Each table pair consists of the
> following columns:
> 1.  A primary key of one or more columns {with a unique index}
> 2.  An Oid column {with a unique index}
> 3.  A 64 bit CRC
>
> For both tables, the primary key information will "mostly" match.  I
> need to know which primary keys are found in the first table but not in
> the second.  Also, which primary keys are found in the second table but
> not in the first.

The approach you wrote is how I would write it too. Though from the problem
description it sounds like you want a FULL OUTER JOIN.

You could try timing it with enable_seqscan = off to see if an index scan is
any faster. It will also show you if postgres thinks you're joining precisely
on the primary key. If it's faster you could try lowering random_page cost to
coerce postgres into doing index scans instead of sequential scans + sorts. In
theory sorting the tables should be faster than the index scan but I'm always
skeptical about that.

If it's something you do often but the data rarely changes you could also try
clustering both tables on the primary key index. This won't fundamentally
change things but postgres will notice it (after an analyze) and be more
likely to use the index, and it will make the index scan somewhat faster.

I wonder how you ended up with a database structure like this, it's arguably
denormalized.

--
greg

Re: Slow query needs a kick in the pants.

From
"Dann Corbit"
Date:
Food for optimizer thought...

The same query, ported to SQL*Server, is 20x faster than PostgreSQL.

-----Original Message-----
From: Dann Corbit
Sent: Thursday, March 27, 2003 3:30 PM
To: pgsql-general@postgresql.org
Subject: [GENERAL] Slow query needs a kick in the pants.


This query:

connxdatasync=# select "a".OID
connxdatasync-# from "CNX_DS2_1_BCHFIL_FILE" "a"
connxdatasync-#         left outer join "CNX_DS_1_BCHFIL_FILE" "b" on
connxdatasync-#                 ( "a"."FILE_KEY" = "b"."FILE_KEY" and
connxdatasync(#                   "a"."SYS_YYYYMMDD" =
"b"."SYS_YYYYMMDD" and
connxdatasync(#                   "a"."SYS_HHMMSSUU" =
"b"."SYS_HHMMSSUU" )
connxdatasync-#                   where ( "b".OID is NULL )
connxdatasync-# ;

Is abysmally slow.

connxdatasync=# \d "CNX_DS2_1_BCHFIL_FILE"
      Table "CNX_DS2_1_BCHFIL_FILE"
  Attribute   |     Type      | Modifier
--------------+---------------+----------
 FILE_KEY     | character(30) |
 SYS_YYYYMMDD | character(8)  |
 SYS_HHMMSSUU | character(8)  |
 CRC          | bigint        | not null
Index: UA4IYKF5LY9402

connxdatasync=# explain  VERBOSE
connxdatasync-# select "a".OID
connxdatasync-# from "CNX_DS2_1_BCHFIL_FILE" "a"
connxdatasync-#         left outer join "CNX_DS_1_BCHFIL_FILE" "b" on
connxdatasync-#                 ( "a"."FILE_KEY" = "b"."FILE_KEY" and
connxdatasync(#                   "a"."SYS_YYYYMMDD" =
"b"."SYS_YYYYMMDD" and
connxdatasync(#                   "a"."SYS_HHMMSSUU" =
"b"."SYS_HHMMSSUU" )
connxdatasync-#                   where ( "b".OID is NULL )
connxdatasync-# ;
NOTICE:  QUERY DUMP:


Re: Slow query needs a kick in the pants.

From
"Dann Corbit"
Date:
This is the SQL Server plan for the same query.  It may be worth
studying, since it is absurdly faster than what PostgreSQL does.


StmtText
StmtId      NodeId      Parent      PhysicalOp
LogicalOp                      Argument
DefinedValues
EstimateRows             EstimateIO               EstimateCPU
AvgRowSize  TotalSubtreeCost         OutputList
Warnings Type                           Parallel EstimateExecutions

------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
---------------------------------------- ----------- -----------
----------- ------------------------------
------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
-------------------------------
------------------------------------------------------------------
------------------------ ------------------------
------------------------ ----------- ------------------------
------------------------------------------------------------------
-------- ------------------------------ --------
------------------------
select a.crc from CNX_DS2_1_BCHFIL_FILE a left outer join
CNX_DS_1_BCHFIL_FILE b on ( a.FILE_KEY = b.FILE_KEY and a.SYS_YYYYMMDD =
b.SYS_YYYYMMDD and a.SYS_HHMMSSUU = b.SYS_HHMMSSUU ) where ( b.crc is
NULL )                                                  4           1
0           NULL                           NULL
NULL
NULL                                                               1.0
NULL                     NULL                     NULL        121.25754
NULL                                                               NULL
SELECT                         0        NULL
  |--Filter(WHERE:([b].[crc]=NULL))
4           3           1           Filter
Filter                         WHERE:([b].[crc]=NULL)
NULL                                                               1.0
0.0                      0.24271794               125         121.25754
[a].[crc]                                                          NULL
PLAN_ROW                       0        1.0
       |--Hash Match(Left Outer Join, HASH:([a].[FILE_KEY],
[a].[SYS_HHMMSSUU], [a].[SYS_YYYYMMDD])=([b].[FILE_KEY],
[b].[SYS_HHMMSSUU], [b].[SYS_YYYYMMDD]),
RESIDUAL:(([a].[FILE_KEY]=[b].[FILE_KEY] AND
[a].[SYS_HHMMSSUU]=[b].[SYS_HHMMSSUU]) AND [a].[SYS_Y 4           4
3           Hash Match                     Left Outer Join
HASH:([a].[FILE_KEY], [a].[SYS_HHMMSSUU],
[a].[SYS_YYYYMMDD])=([b].[FILE_KEY], [b].[SYS_HHMMSSUU],
[b].[SYS_YYYYMMDD]), RESIDUAL:(([a].[FILE_KEY]=[b].[FILE_KEY] AND
[a].[SYS_HHMMSSUU]=[b].[SYS_HHMMSSUU]) AND
[a].[SYS_YYYYMMDD]=[b].[SYS_YYYYMMDD])  NULL
1348433.0                42.871052                60.077095
125         120.61029                [a].[crc], [b].[crc]
NULL     PLAN_ROW                       0        1.0
            |--Table
Scan(OBJECT:([Scratch].[dbo].[CNX_DS2_1_BCHFIL_FILE] AS [a]))
4           5           4           Table Scan                     Table
Scan                     OBJECT:([Scratch].[dbo].[CNX_DS2_1_BCHFIL_FILE]
AS [a])
[a].[crc], [a].[FILE_KEY], [a].[SYS_HHMMSSUU], [a].[SYS_YYYYMMDD]
973970.0                 7.7205415                1.0714455
77          8.7919874                [a].[crc], [a].[FILE_KEY],
[a].[SYS_HHMMSSUU], [a].[SYS_YYYYMMDD]  NULL     PLAN_ROW
0        1.0
            |--Table Scan(OBJECT:([Scratch].[dbo].[CNX_DS_1_BCHFIL_FILE]
AS [b]))
4           6           4           Table Scan                     Table
Scan                     OBJECT:([Scratch].[dbo].[CNX_DS_1_BCHFIL_FILE]
AS [b])
[b].[crc], [b].[FILE_KEY], [b].[SYS_HHMMSSUU], [b].[SYS_YYYYMMDD]
983068.0                 7.7886896                1.0814533
77          8.8701429                [b].[crc], [b].[FILE_KEY],
[b].[SYS_HHMMSSUU], [b].[SYS_YYYYMMDD]  NULL     PLAN_ROW
0        1.0

> -----Original Message-----
> From: Dann Corbit
> Sent: Friday, March 28, 2003 10:55 PM
> To: pgsql-general@postgresql.org
> Subject: RE: [GENERAL] Slow query needs a kick in the pants.
>
>
> Food for optimizer thought...
>
> The same query, ported to SQL*Server, is 20x faster than PostgreSQL.
>
> -----Original Message-----
> From: Dann Corbit
> Sent: Thursday, March 27, 2003 3:30 PM
> To: pgsql-general@postgresql.org
> Subject: [GENERAL] Slow query needs a kick in the pants.
>
>
> This query:
>
> connxdatasync=# select "a".OID
> connxdatasync-# from "CNX_DS2_1_BCHFIL_FILE" "a"
> connxdatasync-#         left outer join "CNX_DS_1_BCHFIL_FILE" "b" on
> connxdatasync-#                 ( "a"."FILE_KEY" = "b"."FILE_KEY" and
> connxdatasync(#                   "a"."SYS_YYYYMMDD" =
> "b"."SYS_YYYYMMDD" and
> connxdatasync(#                   "a"."SYS_HHMMSSUU" =
> "b"."SYS_HHMMSSUU" )
> connxdatasync-#                   where ( "b".OID is NULL )
> connxdatasync-# ;
>
> Is abysmally slow.
>
> connxdatasync=# \d "CNX_DS2_1_BCHFIL_FILE"
>       Table "CNX_DS2_1_BCHFIL_FILE"
>   Attribute   |     Type      | Modifier
> --------------+---------------+----------
>  FILE_KEY     | character(30) |
>  SYS_YYYYMMDD | character(8)  |
>  SYS_HHMMSSUU | character(8)  |
>  CRC          | bigint        | not null
> Index: UA4IYKF5LY9402
>
> connxdatasync=# explain  VERBOSE
> connxdatasync-# select "a".OID
> connxdatasync-# from "CNX_DS2_1_BCHFIL_FILE" "a"
> connxdatasync-#         left outer join "CNX_DS_1_BCHFIL_FILE" "b" on
> connxdatasync-#                 ( "a"."FILE_KEY" = "b"."FILE_KEY" and
> connxdatasync(#                   "a"."SYS_YYYYMMDD" =
> "b"."SYS_YYYYMMDD" and
> connxdatasync(#                   "a"."SYS_HHMMSSUU" =
> "b"."SYS_HHMMSSUU" )
> connxdatasync-#                   where ( "b".OID is NULL )
> connxdatasync-# ;
> NOTICE:  QUERY DUMP:
>