Thread: hashjoin chosen over 1000x faster plan

hashjoin chosen over 1000x faster plan

From

"Kevin Grittner"

Date:

09 October 2007, 20:10:23

I have a situation where a query is running much slower than I would
expect.  The ANALYZE showed that it is hashing some information which
is rarely needed.  When I set enable_hashjoin = off for the
connection the query run in 1/1000 the time.

This isn't a debilitating level of performance, but it would be nice
to clean it up, and we haven't yet come up with a viable solution.

The runs below are after several identical runs to create a fully
cached situation.  Autovacuum is aggressive and there is a nightly
vacuum analyze of the whole database.  This box has 4 x 2 GHz Xeon
CPUs, 6 GB RAM, RAID 5 with 13 spindles on 256 MB BBU controller.

I simplified the original a bit; sorry it's still kinda big.

-Kevin

listen_addresses = '*'
max_connections = 200
shared_buffers = 160MB
temp_buffers = 50MB
work_mem = 10MB
maintenance_work_mem = 160MB
max_fsm_pages = 800000
bgwriter_lru_percent = 20.0
bgwriter_lru_maxpages = 200
bgwriter_all_percent = 10.0
bgwriter_all_maxpages = 600
wal_buffers = 160kB
checkpoint_segments = 10
random_page_cost = 2.0
effective_cache_size = 5GB
redirect_stderr = on
log_line_prefix = '[%m] %p %q<%u %d %r> '
stats_block_level = on
stats_row_level = on
autovacuum = on
autovacuum_naptime = 10s
autovacuum_vacuum_threshold = 1
autovacuum_analyze_threshold = 1
datestyle = 'iso, mdy'
lc_messages = 'C'
lc_monetary = 'C'
lc_numeric = 'C'
lc_time = 'C'
escape_string_warning = off
standard_conforming_strings = on
sql_inheritance = off

bigbird=> select version();
                                       version
-------------------------------------------------------------------------------------
 PostgreSQL 8.2.4 on i686-pc-linux-gnu, compiled by GCC gcc (GCC) 3.3.3 (SuSE Linux)
(1 row)

bigbird=> explain analyze
SELECT
    "CH"."caseNo",
    "CH"."countyNo",
    "CH"."chargeNo",
    "CH"."statuteCite",
    "CH"."sevClsCode",
    "CH"."modSevClsCode",
    "CH"."descr",
    "CH"."offenseDate",
    "CH"."pleaCode",
    "CH"."pleaDate",
    "CH"."chargeSeqNo",
    "CHST"."eventDate" AS "reopEventDate",
    "CTHE"."descr" AS "reopEventDescr"
  FROM "Charge" "CH"
  LEFT OUTER JOIN "CaseHist" "CHST"
      ON ( "CHST"."countyNo" = "CH"."countyNo"
       AND "CHST"."caseNo" = "CH"."caseNo"
       AND "CHST"."histSeqNo" = "CH"."reopHistSeqNo"
         )
  LEFT OUTER JOIN "CaseTypeHistEvent" "CTHE"
      ON ( "CHST"."eventType" = "CTHE"."eventType"
       AND "CHST"."caseType" = "CTHE"."caseType"
       AND "CHST"."countyNo" = "CTHE"."countyNo"
         )
  WHERE (
        ("CH"."caseNo" = '2005CF000001')
    AND ("CH"."countyNo" = 13))
  ORDER BY
    "chargeNo",
    "chargeSeqNo"
;
                                                                                               QUERY PLAN
                             

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=2554.50..2554.52 rows=7 width=146) (actual time=443.068..443.070 rows=3 loops=1)
   Sort Key: "CH"."chargeNo", "CH"."chargeSeqNo"
   ->  Hash Left Join  (cost=2318.91..2554.40 rows=7 width=146) (actual time=443.004..443.039 rows=3 loops=1)
         Hash Cond: ((("CHST"."eventType")::bpchar = ("CTHE"."eventType")::bpchar) AND (("CHST"."caseType")::bpchar =
("CTHE"."caseType")::bpchar))
         ->  Nested Loop Left Join  (cost=0.00..208.13 rows=7 width=131) (actual time=0.062..0.093 rows=3 loops=1)
               ->  Index Scan using "Charge_pkey" on "Charge" "CH"  (cost=0.00..15.37 rows=7 width=112) (actual
time=0.052..0.059rows=3 loops=1) 
                     Index Cond: ((("countyNo")::smallint = 13) AND (("caseNo")::bpchar = '2005CF000001'::bpchar))
               ->  Index Scan using "CaseHist_pkey" on "CaseHist" "CHST"  (cost=0.00..27.46 rows=6 width=41) (actual
time=0.002..0.002rows=0 loops=3) 
                     Index Cond: ((("CHST"."countyNo")::smallint = 13) AND (("CHST"."caseNo")::bpchar =
'2005CF000001'::bpchar)AND (("CHST"."histSeqNo")::smallint = ("CH"."reopHistSeqNo")::smallint)) 
         ->  Hash  (cost=2084.80..2084.80 rows=15607 width=98) (actual time=442.919..442.919 rows=15607 loops=1)
               ->  Subquery Scan "CTHE"  (cost=1630.43..2084.80 rows=15607 width=98) (actual time=331.665..411.390
rows=15607loops=1) 
                     ->  Merge Right Join  (cost=1630.43..1928.73 rows=15607 width=89) (actual time=331.661..391.999
rows=15607loops=1) 
                           Merge Cond: (((d."countyNo")::smallint = "inner"."?column9?") AND ((d."caseType")::bpchar =
"inner"."?column10?")AND ((d."eventType")::bpchar = "inner"."?column11?")) 
                           ->  Index Scan using "CaseTypeHistEventD_pkey" on "CaseTypeHistEventD" d  (cost=0.00..87.77
rows=2051width=21) (actual time=0.026..0.730 rows=434 loops=1) 
                           ->  Sort  (cost=1630.43..1669.45 rows=15607 width=76) (actual time=331.022..341.450
rows=15607loops=1) 
                                 Sort Key: (c."countyNo")::smallint, (b."caseType")::bpchar, (b."eventType")::bpchar
                                 ->  Nested Loop  (cost=0.00..543.41 rows=15607 width=76) (actual time=0.035..47.206
rows=15607loops=1) 
                                       ->  Index Scan using "ControlRecord_pkey" on "ControlRecord" c  (cost=0.00..4.27
rows=1width=2) (actual time=0.010..0.017 rows=1 loops=1) 
                                             Index Cond: (("countyNo")::smallint = 13)
                                       ->  Seq Scan on "CaseTypeHistEventB" b  (cost=0.00..383.07 rows=15607 width=74)
(actualtime=0.019..14.634 rows=15607 loops=1) 
 Total runtime: 444.452 ms
(21 rows)

bigbird=> set enable_hashjoin = off;
SET
bigbird=> [same query]
                                                                                                  QUERY PLAN
                             

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=3497.26..3497.28 rows=7 width=146) (actual time=0.115..0.117 rows=3 loops=1)
   Sort Key: "CH"."chargeNo", "CH"."chargeSeqNo"
   ->  Merge Left Join  (cost=3380.05..3497.17 rows=7 width=146) (actual time=0.091..0.097 rows=3 loops=1)
         Merge Cond: (("outer"."?column16?" = "inner"."?column5?") AND ("outer"."?column17?" = "inner"."?column6?"))
         ->  Sort  (cost=208.23..208.25 rows=7 width=131) (actual time=0.087..0.089 rows=3 loops=1)
               Sort Key: ("CHST"."caseType")::bpchar, ("CHST"."eventType")::bpchar
               ->  Nested Loop Left Join  (cost=0.00..208.13 rows=7 width=131) (actual time=0.053..0.070 rows=3
loops=1)
                     ->  Index Scan using "Charge_pkey" on "Charge" "CH"  (cost=0.00..15.37 rows=7 width=112) (actual
time=0.043..0.048rows=3 loops=1) 
                           Index Cond: ((("countyNo")::smallint = 13) AND (("caseNo")::bpchar =
'2005CF000001'::bpchar))
                     ->  Index Scan using "CaseHist_pkey" on "CaseHist" "CHST"  (cost=0.00..27.46 rows=6 width=41)
(actualtime=0.001..0.001 rows=0 loops=3) 
                           Index Cond: ((("CHST"."countyNo")::smallint = 13) AND (("CHST"."caseNo")::bpchar =
'2005CF000001'::bpchar)AND (("CHST"."histSeqNo")::smallint = ("CH"."reopHistSeqNo")::smallint)) 
         ->  Sort  (cost=3171.82..3210.84 rows=15607 width=98) (never executed)
               Sort Key: ("CTHE"."caseType")::bpchar, ("CTHE"."eventType")::bpchar
               ->  Subquery Scan "CTHE"  (cost=1630.43..2084.80 rows=15607 width=98) (never executed)
                     ->  Merge Right Join  (cost=1630.43..1928.73 rows=15607 width=89) (never executed)
                           Merge Cond: (((d."countyNo")::smallint = "inner"."?column9?") AND ((d."caseType")::bpchar =
"inner"."?column10?")AND ((d."eventType")::bpchar = "inner"."?column11?")) 
                           ->  Index Scan using "CaseTypeHistEventD_pkey" on "CaseTypeHistEventD" d  (cost=0.00..87.77
rows=2051width=21) (never executed) 
                           ->  Sort  (cost=1630.43..1669.45 rows=15607 width=76) (never executed)
                                 Sort Key: (c."countyNo")::smallint, (b."caseType")::bpchar, (b."eventType")::bpchar
                                 ->  Nested Loop  (cost=0.00..543.41 rows=15607 width=76) (never executed)
                                       ->  Index Scan using "ControlRecord_pkey" on "ControlRecord" c  (cost=0.00..4.27
rows=1width=2) (never executed) 
                                             Index Cond: (("countyNo")::smallint = 13)
                                       ->  Seq Scan on "CaseTypeHistEventB" b  (cost=0.00..383.07 rows=15607 width=74)
(neverexecuted) 
 Total runtime: 0.437 ms
(24 rows)

bigbird=> \d "Charge"
                Table "public.Charge"
       Column       |        Type         | Modifiers
--------------------+---------------------+-----------
 caseNo             | "CaseNoT"           | not null
 chargeSeqNo        | "ChargeSeqNoT"      | not null
 countyNo           | "CountyNoT"         | not null
 areSentCondsMet    | boolean             | not null
 caseType           | "CaseTypeT"         | not null
 chargeNo           | "ChargeNoT"         | not null
 descr              | "StatuteDescrT"     | not null
 isPartyTo          | boolean             | not null
 lastChargeModSeqNo | integer             | not null
 lastJdgmtSeqNo     | "JdgmtSeqNoT"       | not null
 ordStatuteFlag     | character(1)        | not null
 plntfAgencyNo      | "PlntfAgencyNoT"    | not null
 statuteAgencyNo    | "PlntfAgencyNoT"    | not null
 statuteCite        | "StatuteCiteT"      | not null
 statuteEffDate     | "DateT"             |
 arrestCaseNo       | "ArrestCaseNoT"     |
 arrestDate         | "DateT"             |
 arrestTrackingNo   | "ArrestTrackingNoT" |
 bookAgencyNo       | "IssAgencyNoT"      |
 bookCaseNo         | "BookCaseNoT"       |
 chargeId           | "ChargeIdT"         |
 dispoCode          | "DispoCodeT"        |
 issAgencyNo        | "IssAgencyNoT"      |
 modSevClsCode      | "SevClsCodeT"       |
 offenseDate        | "DateT"             |
 offenseDateRange   | "OffenseDateRangeT" |
 pleaCode           | "PleaCodeT"         |
 pleaDate           | "DateT"             |
 reopHistSeqNo      | "HistSeqNoT"        |
 sevClsCode         | "SevClsCodeT"       |
 statuteSevSeqNo    | "StatuteSevSeqNoT"  |
 wcisClsCode        | "WcisClsCodeT"      |
 pleaHistSeqNo      | "HistSeqNoT"        |
 chargeStatusCode   | "ChargeStatusCodeT" |
Indexes:
    "Charge_pkey" PRIMARY KEY, btree ("countyNo", "caseNo", "chargeSeqNo")
    "Charge_ArrestTrackingNo" UNIQUE, btree ("arrestTrackingNo", "countyNo", "caseNo", "chargeSeqNo")
    "Charge_OffenseDate" btree ("offenseDate", "countyNo", "issAgencyNo")

bigbird=> \d "CaseHist"
           Table "public.CaseHist"
    Column     |       Type       | Modifiers
---------------+------------------+-----------
 caseNo        | "CaseNoT"        | not null
 histSeqNo     | "HistSeqNoT"     | not null
 countyNo      | "CountyNoT"      | not null
 caseType      | "CaseTypeT"      |
 eventAmt      | "MoneyT"         |
 eventDate     | "DateT"          |
 eventType     | "EventTypeT"     |
 userId        | "UserIdT"        |
 courtRptrCode | "CtofcNoT"       |
 ctofcNo       | "CtofcNoT"       |
 dktTxt        | "TextT"          |
 prevRespCtofc | "CtofcNoT"       |
 tag           | "TagTypeT"       |
 tapeCounterNo | "TapeCounterNoT" |
 tapeLoc       | "TapeLocT"       |
 wcisReported  | "DateT"          |
 weightPd      | "PdCodeT"        |
 weightTime    | "CalDurationT"   |
 sealCtofcNo   | "CtofcNoT"       |
 sccaCaseNo    | "SccaCaseNoT"    |
Indexes:
    "CaseHist_pkey" PRIMARY KEY, btree ("countyNo", "caseNo", "histSeqNo")
    "CaseHist_CaseHistCibRpt" btree ("countyNo", "eventDate", "eventType", "caseType")

bigbird=> \d "CaseTypeHistEvent"
      View "public.CaseTypeHistEvent"
     Column     |     Type      | Modifiers
----------------+---------------+-----------
 caseType       | "CaseTypeT"   |
 eventType      | "EventTypeT"  |
 descr          | "EventDescrT" |
 isActive       | boolean       |
 isKeyEvent     | boolean       |
 isMoneyEnabled | boolean       |
 keyEventSeqNo  | integer       |
 countyNo       | "CountyNoT"   |
View definition:
 SELECT b."caseType", b."eventType", b.descr, b."isActive",
        CASE
            WHEN d."eventType" IS NOT NULL THEN d."isKeyEvent"
            ELSE b."isKeyEvent"
        END AS "isKeyEvent",
        CASE
            WHEN d."eventType" IS NOT NULL THEN d."isMoneyEnabled"
            ELSE b."isMoneyEnabled"
        END AS "isMoneyEnabled", COALESCE(
        CASE
            WHEN d."eventType" IS NOT NULL THEN d."keyEventSeqNo"::smallint
            ELSE b."keyEventSeqNo"::smallint
        END::integer, 0) AS "keyEventSeqNo", c."countyNo"
   FROM ONLY "CaseTypeHistEventB" b
   JOIN ONLY "ControlRecord" c ON 1 = 1
   LEFT JOIN ONLY "CaseTypeHistEventD" d ON d."caseType"::bpchar = b."caseType"::bpchar AND d."eventType"::bpchar =
b."eventType"::bpcharAND d."countyNo"::smallint = c."countyNo"::smallint; 

bigbird=> \d "CaseTypeHistEventB"
      Table "public.CaseTypeHistEventB"
     Column     |      Type      | Modifiers
----------------+----------------+-----------
 caseType       | "CaseTypeT"    | not null
 eventType      | "EventTypeT"   | not null
 descr          | "EventDescrT"  | not null
 isActive       | boolean        | not null
 isKeyEvent     | boolean        | not null
 isMoneyEnabled | boolean        | not null
 keyEventSeqNo  | "KeyEventSeqT" |
Indexes:
    "CaseTypeHistEventB_pkey" PRIMARY KEY, btree ("caseType", "eventType") CLUSTER

bigbird=> \d "CaseTypeHistEventD"
      Table "public.CaseTypeHistEventD"
     Column     |      Type      | Modifiers
----------------+----------------+-----------
 countyNo       | "CountyNoT"    | not null
 caseType       | "CaseTypeT"    | not null
 eventType      | "EventTypeT"   | not null
 isMoneyEnabled | boolean        | not null
 isKeyEvent     | boolean        | not null
 keyEventSeqNo  | "KeyEventSeqT" |
Indexes:
    "CaseTypeHistEventD_pkey" PRIMARY KEY, btree ("countyNo", "caseType", "eventType")
    "CaseTypeHistEventD_CaseType" btree ("caseType", "eventType")

bigbird=> select count(*), count("reopHistSeqNo") from "Charge";
  count   | count
----------+--------
 14041511 | 141720
(1 row)

Re: hashjoin chosen over 1000x faster plan

From

Simon Riggs

Date:

10 October 2007, 06:31:51

On Tue, 2007-10-09 at 15:09 -0500, Kevin Grittner wrote:

> I have a situation where a query is running much slower than I would
> expect.  The ANALYZE showed that it is hashing some information which
> is rarely needed.  When I set enable_hashjoin = off for the
> connection the query run in 1/1000 the time.

Can you confirm the two queries give identical outputs? It isn't clear
to me why the second sort is (never executed) in your second plan, which
I would only expect to see for an inner merge join.

Can you show the details for ControlRecord also.

--
  Simon Riggs
  2ndQuadrant  http://www.2ndQuadrant.com

Re: hashjoin chosen over 1000x faster plan

From

"Kevin Grittner"

Date:

10 October 2007, 14:15:36

>>> On Wed, Oct 10, 2007 at  1:31 AM, in message
<1191997904.4233.125.camel@ebony.site>, Simon Riggs <simon@2ndquadrant.com>
wrote:
> On Tue, 2007-10-09 at 15:09 -0500, Kevin Grittner wrote:
>
>> I have a situation where a query is running much slower than I would
>> expect.  The ANALYZE showed that it is hashing some information which
>> is rarely needed.  When I set enable_hashjoin = off for the
>> connection the query run in 1/1000 the time.
>
> Can you confirm the two queries give identical outputs?

I checked; the output is identical.

> It isn't clear
> to me why the second sort is (never executed) in your second plan, which
> I would only expect to see for an inner merge join.

I assume that is because there were no rows to sort.  The
CaseTypeHistEvent view is only needed if there is a link to an event
which reopens the charge after it is disposed.  This only happens for
about 1% of the Charge records.

The view is a bit weird, but evolved this way.  Originally, there
was a table by that name which was maintained statewide by our
organization (the Consolidated Court Automation Programs, or CCAP).
Then there was a decision to allow counties to override certain
columns with their own values.  Modifying the central copy and
merging the changes into 72 modified copies was nasty, so we split
the state-maintained portion and the county overrides into two tables
with a B (for Base) and D (for Distributed) suffix, and provided a
view to present the merged form to the existing queries.  That way
only the software to maintain the data needed to be modified, rather
than all the references to it.

There aren't a lot of rows in the distributed table; most counties
take the defaults.  The ControlRecord table is joined to the base to
show one row of the base data per county in the database.  This
performance problem is on the central, consolidated copy of all 72
counties.

> Can you show the details for ControlRecord also.

bigbird=> \d "ControlRecord"
              Table "public.ControlRecord"
       Column       |          Type          | Modifiers
--------------------+------------------------+-----------
 countyNo           | "CountyNoT"            | not null
 dispEventTime      | boolean                | not null
 exportDeletes      | boolean                | not null
 standAloneMode     | boolean                | not null
 sysMailData        | character(1)           | not null
 chargeClsEvent     | "EventTypeT"           |
 checkPrinterDriver | character varying(50)  |
 cofcCtofcNo        | "CtofcNoT"             |
 ctofcNo            | "CtofcNoT"             |
 defaultDaAttyNo    | "AttyNoT"              |
 districtNo         | "DistrictNoT"          |
 dktFee             | "MoneyT"               |
 dotCourtNo         | character(8)           |
 initialTrafCal     | "ActivityTypeT"        |
 maxToPrint         | smallint               |
 postJdgmtStatus    | "StatusCodeT"          |
 rcptPrinterDriver  | character varying(50)  |
 savedTxtFilePath   | character varying(120) |
 scffAmt            | "MoneyT"               |
 scsfAmt            | "MoneyT"               |
 taxWarrantNo       | "CountyNoT"            |
 dorAgencyNo        | character(10)          |
 jurorMailerPrntDrv | character varying(50)  |
 calKioskMessage    | "TextT"                |
 autoAssgnCaseEqual | boolean                | not null
 sectionLimit       | integer                | not null
 sectionBufferLimit | integer                | not null
 calKioskKeyboard   | character(1)           |
 saveCFRdoc         | boolean                |
 showAudioRecTab    | boolean                |
 weekdayStartTime   | "TimeT"                |
 weekdayEndTime     | "TimeT"                |
 saturdayStartTime  | "TimeT"                |
 saturdayEndTime    | "TimeT"                |
 sundayStartTime    | "TimeT"                |
 sundayEndTime      | "TimeT"                |
 reportStorageDays  | integer                |
Indexes:
    "ControlRecord_pkey" PRIMARY KEY, btree ("countyNo")

-Kevin

Re: hashjoin chosen over 1000x faster plan

From

Simon Riggs

Date:

10 October 2007, 17:05:57

On Wed, 2007-10-10 at 09:15 -0500, Kevin Grittner wrote:
> >>> On Wed, Oct 10, 2007 at  1:31 AM, in message
> <1191997904.4233.125.camel@ebony.site>, Simon Riggs <simon@2ndquadrant.com>
> wrote:
> > On Tue, 2007-10-09 at 15:09 -0500, Kevin Grittner wrote:
> >
> >> I have a situation where a query is running much slower than I would
> >> expect.  The ANALYZE showed that it is hashing some information which
> >> is rarely needed.  When I set enable_hashjoin = off for the
> >> connection the query run in 1/1000 the time.
> >
> > Can you confirm the two queries give identical outputs?
>
> I checked; the output is identical.
>
> > It isn't clear
> > to me why the second sort is (never executed) in your second plan, which
> > I would only expect to see for an inner merge join.
>
> I assume that is because there were no rows to sort.  The
> CaseTypeHistEvent view is only needed if there is a link to an event
> which reopens the charge after it is disposed.  This only happens for
> about 1% of the Charge records.

So CHST.EventType is mostly NULL? So the good news is that the default
plan works best when it does actually find a match. So for 1% of cases
you will have an execution time of about 1s, <1ms for the others if you
fiddle with the planner methods.

The planner thinks every row will find a match, yet the actual number is
only 1%. Hmmm, same section of code as last week.

Basically the planner doesn't ever optimise for the possibility of the
never-executed case because even a single row returned would destroy
that assumption.

If we had an Option node in there, we could run the first part of the
plan before deciding whether to do an MJ or an HJ. Doing that would
avoid doing 2 sorts and return even quicker in the common case (about
80% time) without being slower in the slowest.

--
  Simon Riggs
  2ndQuadrant  http://www.2ndQuadrant.com

Re: hashjoin chosen over 1000x faster plan

From

Tom Lane

Date:

10 October 2007, 18:07:40

Simon Riggs <simon@2ndquadrant.com> writes:
> Basically the planner doesn't ever optimise for the possibility of the
> never-executed case because even a single row returned would destroy
> that assumption.

It's worse than that: the outer subplan *does* return some rows.
I suppose that all of them had NULLs in the join keys, which means
that (since 8.1 or so) nodeMergejoin discards them as unmatchable.
Had even one been non-NULL the expensive subplan would have been run.

This seems like too much of a corner case to justify adding a lot of
machinery for.

            regards, tom lane

Re: hashjoin chosen over 1000x faster plan

From

"Kevin Grittner"

Date:

10 October 2007, 18:30:31

>>> On Wed, Oct 10, 2007 at  1:07 PM, in message <20980.1192039650@sss.pgh.pa.us>,
Tom Lane <tgl@sss.pgh.pa.us> wrote:
> Simon Riggs <simon@2ndquadrant.com> writes:
>> Basically the planner doesn't ever optimise for the possibility of the
>> never-executed case because even a single row returned would destroy
>> that assumption.
>
> It's worse than that: the outer subplan *does* return some rows.
> I suppose that all of them had NULLs in the join keys, which means
> that (since 8.1 or so) nodeMergejoin discards them as unmatchable.
> Had even one been non-NULL the expensive subplan would have been run.

Well, this query is run tens of thousands of times per day by our web
application; less than one percent of those runs would require the
subplan.  (In my initial post I showed counts to demonstrate that 1%
of the rows had a non-NULL value and, while I wouldn't expect the
planner to know this, these tend to be clustered on a lower
percentage of cases.)  If the philosophy of the planner is to go for
the lowest average cost (versus lowest worst case cost) shouldn't it
use the statistics for to look at the percentage of NULLs?

-Kevin

Re: hashjoin chosen over 1000x faster plan

From

Simon Riggs

Date:

10 October 2007, 18:55:06

On Wed, 2007-10-10 at 13:30 -0500, Kevin Grittner wrote:
> >>> On Wed, Oct 10, 2007 at  1:07 PM, in message <20980.1192039650@sss.pgh.pa.us>,
> Tom Lane <tgl@sss.pgh.pa.us> wrote:
> > Simon Riggs <simon@2ndquadrant.com> writes:
> >> Basically the planner doesn't ever optimise for the possibility of the
> >> never-executed case because even a single row returned would destroy
> >> that assumption.
> >
> > It's worse than that: the outer subplan *does* return some rows.
> > I suppose that all of them had NULLs in the join keys, which means
> > that (since 8.1 or so) nodeMergejoin discards them as unmatchable.
> > Had even one been non-NULL the expensive subplan would have been run.
>
> Well, this query is run tens of thousands of times per day by our web
> application; less than one percent of those runs would require the
> subplan.  (In my initial post I showed counts to demonstrate that 1%
> of the rows had a non-NULL value and, while I wouldn't expect the
> planner to know this, these tend to be clustered on a lower
> percentage of cases.)  If the philosophy of the planner is to go for
> the lowest average cost (versus lowest worst case cost) shouldn't it
> use the statistics for to look at the percentage of NULLs?

But the planner doesn't work on probability. It works on a best-guess
selectivity, as known at planning time.

That's why dynamic planning was invented, which we don't do yet. Don't
hold your breath either.

--
  Simon Riggs
  2ndQuadrant  http://www.2ndQuadrant.com

Re: hashjoin chosen over 1000x faster plan

From

Simon Riggs

Date:

10 October 2007, 19:01:18

On Wed, 2007-10-10 at 14:07 -0400, Tom Lane wrote:
> Simon Riggs <simon@2ndquadrant.com> writes:
> > Basically the planner doesn't ever optimise for the possibility of the
> > never-executed case because even a single row returned would destroy
> > that assumption.
>
> It's worse than that: the outer subplan *does* return some rows.
> I suppose that all of them had NULLs in the join keys, which means
> that (since 8.1 or so) nodeMergejoin discards them as unmatchable.
> Had even one been non-NULL the expensive subplan would have been run.

Yup

> This seems like too much of a corner case to justify adding a lot of
> machinery for.

Well, I thought about that and it is pretty common to have root classes
left outer joined to sub-classes, if you are using the One Table per
Subclass object-relational mapping. The joined-subclass mapping within
Hibernate implements this.

Not everybody uses that, but it is an option some people take in some
circumstances. So we should keep it on our radar if we want to extend
our for support complex applications.

--
  Simon Riggs
  2ndQuadrant  http://www.2ndQuadrant.com

Re: hashjoin chosen over 1000x faster plan

From

"Kevin Grittner"

Date:

10 October 2007, 19:36:29

>>> On Wed, Oct 10, 2007 at  1:54 PM, in message
<1192042492.4233.334.camel@ebony.site>, Simon Riggs <simon@2ndquadrant.com>
wrote:
>
> But the planner doesn't work on probability. It works on a best-guess
> selectivity, as known at planning time.

The point I'm trying to make is that at planning time the
pg_statistic row for this "Charge"."reopHistSeqNo" column showed
stanullfrac as 0.989; it doesn't seem to have taken this into account
when making its guess about how many rows would be joined when it was
compared to the primary key column of the "CaseHist" table.  I'm
suggesting that it might be a good thing if it did.

-Kevin

Re: hashjoin chosen over 1000x faster plan

From

Simon Riggs

Date:

10 October 2007, 19:52:30

On Wed, 2007-10-10 at 14:35 -0500, Kevin Grittner wrote:
> >>> On Wed, Oct 10, 2007 at  1:54 PM, in message
> <1192042492.4233.334.camel@ebony.site>, Simon Riggs <simon@2ndquadrant.com>
> wrote:
> >
> > But the planner doesn't work on probability. It works on a best-guess
> > selectivity, as known at planning time.
>
> The point I'm trying to make is that at planning time the
> pg_statistic row for this "Charge"."reopHistSeqNo" column showed
> stanullfrac as 0.989; it doesn't seem to have taken this into account
> when making its guess about how many rows would be joined when it was
> compared to the primary key column of the "CaseHist" table.  I'm
> suggesting that it might be a good thing if it did.

Understood, it would be a good thing if it did.

It's more complex than you think:

The fast plan is an all-or-nothing plan. It is *only* faster when the
number of matched rows is zero. You know it is zero, but currently the
planner doesn't, nor is it able to make use of the information when it
has it, half thru execution. Even if we could work out the high
probability of it being zero, we would still be left with the decision
of whether to optimise for the zero or for the non-zero.

--
  Simon Riggs
  2ndQuadrant  http://www.2ndQuadrant.com

Re: hashjoin chosen over 1000x faster plan

From

"Kevin Grittner"

Date:

10 October 2007, 20:12:05

>>> On Wed, Oct 10, 2007 at  2:52 PM, in message
<1192045945.4233.351.camel@ebony.site>, Simon Riggs <simon@2ndquadrant.com>
wrote:
>
> The fast plan is an all-or-nothing plan. It is *only* faster when the
> number of matched rows is zero. You know it is zero, but currently the
> planner doesn't, nor is it able to make use of the information when it
> has it, half thru execution. Even if we could work out the high
> probability of it being zero, we would still be left with the decision
> of whether to optimise for the zero or for the non-zero.

For a different case number which has four charges, two reopened:

 Sort  (cost=2450.27..2450.28 rows=4 width=146) (actual time=463.048..463.052 rows=4 loops=1)
   Sort Key: "CH"."chargeNo", "CH"."chargeSeqNo"
   ->  Hash Left Join  (cost=2318.93..2450.23 rows=4 width=146) (actual time=462.857..462.995 rows=4 loops=1)
         Hash Cond: ((("CHST"."eventType")::bpchar = ("CTHE"."eventType")::bpchar) AND (("CHST"."caseType")::bpchar =
("CTHE"."caseType")::bpchar))
         ->  Nested Loop Left Join  (cost=0.00..115.67 rows=4 width=131) (actual time=0.045..0.165 rows=4 loops=1)
               ->  Index Scan using "Charge_pkey" on "Charge" "CH"  (cost=0.00..10.69 rows=4 width=112) (actual
time=0.036..0.053rows=4 loops=1) 
                     Index Cond: ((("countyNo")::smallint = 13) AND (("caseNo")::bpchar = '2004CF002575'::bpchar))
               ->  Index Scan using "CaseHist_pkey" on "CaseHist" "CHST"  (cost=0.00..26.18 rows=5 width=41) (actual
time=0.018..0.019rows=0 loops=4) 
                     Index Cond: ((("CHST"."countyNo")::smallint = 13) AND (("CHST"."caseNo")::bpchar =
'2004CF002575'::bpchar)AND (("CHST"."histSeqNo")::smallint = ("CH"."reopHistSeqNo")::smallint)) 
         ->  Hash  (cost=2084.82..2084.82 rows=15607 width=98) (actual time=462.780..462.780 rows=15607 loops=1)
               ->  Subquery Scan "CTHE"  (cost=1630.43..2084.82 rows=15607 width=98) (actual time=355.962..433.081
rows=15607loops=1) 
                     ->  Merge Right Join  (cost=1630.43..1928.75 rows=15607 width=89) (actual time=355.960..414.249
rows=15607loops=1) 
                           Merge Cond: (((d."countyNo")::smallint = "inner"."?column9?") AND ((d."caseType")::bpchar =
"inner"."?column10?")AND ((d."eventType")::bpchar = "inner"."?column11?")) 
                           ->  Index Scan using "CaseTypeHistEventD_pkey" on "CaseTypeHistEventD" d  (cost=0.00..87.77
rows=2051width=21) (actual time=0.025..0.713 rows=434 loops=1) 
                           ->  Sort  (cost=1630.43..1669.45 rows=15607 width=76) (actual time=355.320..365.251
rows=15607loops=1) 
                                 Sort Key: (c."countyNo")::smallint, (b."caseType")::bpchar, (b."eventType")::bpchar
                                 ->  Nested Loop  (cost=0.00..543.41 rows=15607 width=76) (actual time=0.035..46.914
rows=15607loops=1) 
                                       ->  Index Scan using "ControlRecord_pkey" on "ControlRecord" c  (cost=0.00..4.27
rows=1width=2) (actual time=0.010..0.019 rows=1 loops=1) 
                                             Index Cond: (("countyNo")::smallint = 13)
                                       ->  Seq Scan on "CaseTypeHistEventB" b  (cost=0.00..383.07 rows=15607 width=74)
(actualtime=0.019..14.069 rows=15607 loops=1) 
 Total runtime: 464.588 ms
(21 rows)

With set enable_hashjoin = off:

 Sort  (cost=3404.68..3404.69 rows=4 width=146) (actual time=448.049..448.053 rows=4 loops=1)
   Sort Key: "CH"."chargeNo", "CH"."chargeSeqNo"
   ->  Merge Left Join  (cost=3287.55..3404.64 rows=4 width=146) (actual time=447.986..448.005 rows=4 loops=1)
         Merge Cond: (("outer"."?column16?" = "inner"."?column5?") AND ("outer"."?column17?" = "inner"."?column6?"))
         ->  Sort  (cost=115.71..115.72 rows=4 width=131) (actual time=0.179..0.182 rows=4 loops=1)
               Sort Key: ("CHST"."caseType")::bpchar, ("CHST"."eventType")::bpchar
               ->  Nested Loop Left Join  (cost=0.00..115.67 rows=4 width=131) (actual time=0.051..0.139 rows=4
loops=1)
                     ->  Index Scan using "Charge_pkey" on "Charge" "CH"  (cost=0.00..10.69 rows=4 width=112) (actual
time=0.040..0.053rows=4 loops=1) 
                           Index Cond: ((("countyNo")::smallint = 13) AND (("caseNo")::bpchar =
'2004CF002575'::bpchar))
                     ->  Index Scan using "CaseHist_pkey" on "CaseHist" "CHST"  (cost=0.00..26.18 rows=5 width=41)
(actualtime=0.013..0.014 rows=0 loops=4) 
                           Index Cond: ((("CHST"."countyNo")::smallint = 13) AND (("CHST"."caseNo")::bpchar =
'2004CF002575'::bpchar)AND (("CHST"."histSeqNo")::smallint = ("CH"."reopHistSeqNo")::smallint)) 
         ->  Sort  (cost=3171.84..3210.86 rows=15607 width=98) (actual time=446.459..446.936 rows=768 loops=1)
               Sort Key: ("CTHE"."caseType")::bpchar, ("CTHE"."eventType")::bpchar
               ->  Subquery Scan "CTHE"  (cost=1630.43..2084.82 rows=15607 width=98) (actual time=322.928..405.654
rows=15607loops=1) 
                     ->  Merge Right Join  (cost=1630.43..1928.75 rows=15607 width=89) (actual time=322.922..381.371
rows=15607loops=1) 
                           Merge Cond: (((d."countyNo")::smallint = "inner"."?column9?") AND ((d."caseType")::bpchar =
"inner"."?column10?")AND ((d."eventType")::bpchar = "inner"."?column11?")) 
                           ->  Index Scan using "CaseTypeHistEventD_pkey" on "CaseTypeHistEventD" d  (cost=0.00..87.77
rows=2051width=21) (actual time=0.024..0.734 rows=434 loops=1) 
                           ->  Sort  (cost=1630.43..1669.45 rows=15607 width=76) (actual time=322.294..332.182
rows=15607loops=1) 
                                 Sort Key: (c."countyNo")::smallint, (b."caseType")::bpchar, (b."eventType")::bpchar
                                 ->  Nested Loop  (cost=0.00..543.41 rows=15607 width=76) (actual time=0.035..45.539
rows=15607loops=1) 
                                       ->  Index Scan using "ControlRecord_pkey" on "ControlRecord" c  (cost=0.00..4.27
rows=1width=2) (actual time=0.010..0.016 rows=1 loops=1) 
                                             Index Cond: (("countyNo")::smallint = 13)
                                       ->  Seq Scan on "CaseTypeHistEventB" b  (cost=0.00..383.07 rows=15607 width=74)
(actualtime=0.019..13.754 rows=15607 loops=1) 
 Total runtime: 449.660 ms
(24 rows)

So in all cases it is faster without the hashjoin; it's just a
question of whether it is 4% faster or 1000 times faster, with a 99+%
chance of being 1000 times faster.

This may get back to a question I've always had about the wisdom of
rounding fractional reads to whole numbers.  You lose information
which might lead to better plan choices.  You can't read half a row,
but you can read one row half the time.

-Kevin

Re: hashjoin chosen over 1000x faster plan

From

Tom Lane

Date:

10 October 2007, 20:33:39

"Kevin Grittner" <Kevin.Grittner@wicourts.gov> writes:
> The point I'm trying to make is that at planning time the
> pg_statistic row for this "Charge"."reopHistSeqNo" column showed
> stanullfrac as 0.989; it doesn't seem to have taken this into account
> when making its guess about how many rows would be joined when it was
> compared to the primary key column of the "CaseHist" table.

It certainly does take nulls into account, but the estimate of resulting
rows was still nonzero; and even if it were zero, I'd be very hesitant
to make it choose a plan that is fast only if there were exactly zero
such rows and is slow otherwise.  Most of the complaints we've had about
issues of this sort involve the opposite problem, ie, the planner is
choosing a plan that works well for few rows but falls down because
reality involves many rows.  "Fast-for-few-rows" plans are usually a lot
more brittle than the alternatives in terms of the penalty you pay for
too many rows, and so putting a thumb on the scales to push it towards a
"fast" corner case sounds pretty unsafe to me.

As Simon notes, the only technically sound way to handle this would
involve run-time plan changeover, which is something we're not nearly
ready to tackle.

            regards, tom lane

Re: hashjoin chosen over 1000x faster plan

From

"Kevin Grittner"

Date:

10 October 2007, 20:48:59

>>> On Wed, Oct 10, 2007 at  3:32 PM, in message <23650.1192048377@sss.pgh.pa.us>,
Tom Lane <tgl@sss.pgh.pa.us> wrote:
>
> I'd be very hesitant
> to make it choose a plan that is fast only if there were exactly zero
> such rows and is slow otherwise.

I'm not sure why it looks at the slow option at all; it seems like a remaining weakness in the OUTER JOIN
optimizations. If I change the query to use an inner join between the CaseHist table and the view, I get more of what I
wasexpecting for the "slow" option.  This ten times faster, and I can't see why it would not be usable with an outer
join.

bigbird=# explain analyze
bigbird-# SELECT
bigbird-#     "CH"."caseNo",
bigbird-#     "CH"."countyNo",
bigbird-#     "CH"."chargeNo",
bigbird-#     "CH"."statuteCite",
bigbird-#     "CH"."sevClsCode",
bigbird-#     "CH"."modSevClsCode",
bigbird-#     "CH"."descr",
bigbird-#     "CH"."offenseDate",
bigbird-#     "CH"."pleaCode",
bigbird-#     "CH"."pleaDate",
bigbird-#     "CH"."chargeSeqNo",
bigbird-#     "CHST"."eventDate" AS "reopEventDate",
bigbird-#     "CTHE"."descr" AS "reopEventDescr"
bigbird-#   FROM "Charge" "CH"
bigbird-#   LEFT OUTER JOIN "CaseHist" "CHST"
bigbird-#       ON ( "CHST"."countyNo" = "CH"."countyNo"
bigbird(#        AND "CHST"."caseNo" = "CH"."caseNo"
bigbird(#        AND "CHST"."histSeqNo" = "CH"."reopHistSeqNo"
bigbird(#          )
bigbird-#   JOIN "CaseTypeHistEvent" "CTHE"
bigbird-#       ON ( "CHST"."eventType" = "CTHE"."eventType"
bigbird(#        AND "CHST"."caseType" = "CTHE"."caseType"
bigbird(#        AND "CHST"."countyNo" = "CTHE"."countyNo"
bigbird(#          )
bigbird-#   WHERE (
bigbird(#         ("CH"."caseNo" = '2004CF002575')
bigbird(#     AND ("CH"."countyNo" = 13))
bigbird-#   ORDER BY
bigbird-#     "chargeNo",
bigbird-#     "chargeSeqNo"
bigbird-# ;
                                                                                                     QUERY PLAN
                                   

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=126.69..126.70 rows=1 width=168) (actual time=36.854..36.855 rows=2 loops=1)
   Sort Key: "CH"."chargeNo", "CH"."chargeSeqNo"
   ->  Nested Loop Left Join  (cost=0.00..126.68 rows=1 width=168) (actual time=36.465..36.623 rows=2 loops=1)
         Join Filter: ((d."countyNo")::smallint = (c."countyNo")::smallint)
         ->  Nested Loop  (cost=0.00..123.44 rows=1 width=185) (actual time=24.264..24.408 rows=2 loops=1)
               ->  Index Scan using "ControlRecord_pkey" on "ControlRecord" c  (cost=0.00..4.27 rows=1 width=2) (actual
time=9.424..9.427rows=1 loops=1) 
                     Index Cond: (13 = ("countyNo")::smallint)
               ->  Nested Loop  (cost=0.00..119.16 rows=1 width=185) (actual time=14.835..14.975 rows=2 loops=1)
                     ->  Nested Loop  (cost=0.00..115.67 rows=1 width=131) (actual time=8.346..8.463 rows=2 loops=1)
                           ->  Index Scan using "Charge_pkey" on "Charge" "CH"  (cost=0.00..10.69 rows=4 width=112)
(actualtime=5.723..8.228 rows=4 loops=1) 
                                 Index Cond: ((("countyNo")::smallint = 13) AND (("caseNo")::bpchar =
'2004CF002575'::bpchar))
                           ->  Index Scan using "CaseHist_pkey" on "CaseHist" "CHST"  (cost=0.00..26.18 rows=5
width=41)(actual time=0.052..0.053 rows=0 loops=4) 
                                 Index Cond: ((13 = ("CHST"."countyNo")::smallint) AND ('2004CF002575'::bpchar =
("CHST"."caseNo")::bpchar)AND (("CHST"."histSeqNo")::smallint = ("CH"."reopHistSeqNo")::smallint)) 
                     ->  Index Scan using "CaseTypeHistEventB_pkey" on "CaseTypeHistEventB" b  (cost=0.00..3.48 rows=1
width=69)(actual time=3.248..3.250 rows=1 loops=2) 
                           Index Cond: ((("CHST"."caseType")::bpchar = (b."caseType")::bpchar) AND
(("CHST"."eventType")::bpchar= (b."eventType")::bpchar)) 
         ->  Index Scan using "CaseTypeHistEventD_CaseType" on "CaseTypeHistEventD" d  (cost=0.00..3.23 rows=1
width=17)(actual time=6.103..6.103 rows=0 loops=2) 
               Index Cond: (((d."caseType")::bpchar = (b."caseType")::bpchar) AND ((d."eventType")::bpchar =
(b."eventType")::bpchar))
 Total runtime: 46.072 ms
(18 rows)

-Kevin

Re: hashjoin chosen over 1000x faster plan

From

"Kevin Grittner"

Date:

10 October 2007, 21:03:20

>>> On Wed, Oct 10, 2007 at  3:48 PM, in message
<470CF450.EE98.0025.0@wicourts.gov>, "Kevin Grittner"
<Kevin.Grittner@wicourts.gov> wrote:
>
> This ten times faster

That understates it -- I forgot to get things cached, as I had done
for all the other tests.  When cached, this is sub-millisecond,
although not quite the 1000-fold increase which I get when no matches
are found.

-Kevin

 Sort  (cost=126.70..126.70 rows=1 width=168) (actual time=0.259..0.261 rows=2 loops=1)
   Sort Key: "CH"."chargeNo", "CH"."chargeSeqNo"
   ->  Nested Loop Left Join  (cost=0.00..126.69 rows=1 width=168) (actual time=0.157..0.234 rows=2 loops=1)
         Join Filter: ((d."countyNo")::smallint = (c."countyNo")::smallint)
         ->  Nested Loop  (cost=0.00..123.44 rows=1 width=185) (actual time=0.139..0.203 rows=2 loops=1)
               ->  Index Scan using "ControlRecord_pkey" on "ControlRecord" c  (cost=0.00..4.27 rows=1 width=2) (actual
time=0.024..0.026rows=1 loops=1) 
                     Index Cond: (13 = ("countyNo")::smallint)
               ->  Nested Loop  (cost=0.00..119.17 rows=1 width=185) (actual time=0.109..0.169 rows=2 loops=1)
                     ->  Nested Loop  (cost=0.00..115.67 rows=1 width=131) (actual time=0.087..0.127 rows=2 loops=1)
                           ->  Index Scan using "Charge_pkey" on "Charge" "CH"  (cost=0.00..10.69 rows=4 width=112)
(actualtime=0.038..0.051 rows=4 loops=1) 
                                 Index Cond: ((("countyNo")::smallint = 13) AND (("caseNo")::bpchar =
'2004CF002575'::bpchar))
                           ->  Index Scan using "CaseHist_pkey" on "CaseHist" "CHST"  (cost=0.00..26.18 rows=5
width=41)(actual time=0.014..0.015 rows=0 loops=4) 
                                 Index Cond: ((13 = ("CHST"."countyNo")::smallint) AND ('2004CF002575'::bpchar =
("CHST"."caseNo")::bpchar)AND (("CHST"."histSeqNo")::smallint = ("CH"."reopHistSeqNo")::smallint)) 
                     ->  Index Scan using "CaseTypeHistEventB_pkey" on "CaseTypeHistEventB" b  (cost=0.00..3.48 rows=1
width=69)(actual time=0.015..0.017 rows=1 loops=2) 
                           Index Cond: ((("CHST"."caseType")::bpchar = (b."caseType")::bpchar) AND
(("CHST"."eventType")::bpchar= (b."eventType")::bpchar)) 
         ->  Index Scan using "CaseTypeHistEventD_CaseType" on "CaseTypeHistEventD" d  (cost=0.00..3.23 rows=1
width=17)(actual time=0.011..0.011 rows=0 loops=2) 
               Index Cond: (((d."caseType")::bpchar = (b."caseType")::bpchar) AND ((d."eventType")::bpchar =
(b."eventType")::bpchar))
 Total runtime: 0.605 ms
(18 rows)

Re: hashjoin chosen over 1000x faster plan

From

Tom Lane

Date:

10 October 2007, 22:09:09

"Kevin Grittner" <Kevin.Grittner@wicourts.gov> writes:
> I'm not sure why it looks at the slow option at all; it seems like a remain=
> ing weakness in the OUTER JOIN optimizations.

I think that comes mostly from the fact that you've got non-nullable
targetlist entries in the definition of the CaseTypeHistEvent view.
Those prevent that view from being flattened into the upper query when
it's underneath an outer join, because the current variable-evaluation
rules provide no other way to ensure that the values are forced NULL
when they need to be.  This is something we should fix someday but don't
hold your breath waiting --- it's likely to take some pretty fundamental
rejiggering of the planner's handling of Vars.

            regards, tom lane

Re: hashjoin chosen over 1000x faster plan

From

"Kevin Grittner"

Date:

10 October 2007, 22:09:48

>>> On Wed, Oct 10, 2007 at  3:48 PM, in message
<470CF450.EE98.0025.0@wicourts.gov>, "Kevin Grittner"
<Kevin.Grittner@wicourts.gov> wrote:
> I'm not sure why it looks at the slow option at all; it seems like a
> remaining weakness in the OUTER JOIN optimizations.  If I change the query to
> use an inner join between the CaseHist table and the view, I get more of what
> I was expecting for the "slow" option.

Just to wrap this up (from my perspective), it looks like we're
headed to a workaround of using the underlying "base" table instead
of the view.  We ignore any county override of our description, but
performance is good, and they were reluctant to change it to an inner
join.

-Kevin

SELECT
    "CH"."caseNo",
    "CH"."countyNo",
    "CH"."chargeNo",
    "CH"."statuteCite",
    "CH"."sevClsCode",
    "CH"."modSevClsCode",
    "CH"."descr",
    "CH"."offenseDate",
    "CH"."pleaCode",
    "CH"."pleaDate",
    "CH"."chargeSeqNo",
    "CHST"."eventDate" AS "reopEventDate",
    "CTHE"."descr" AS "reopEventDescr"
  FROM "Charge" "CH"
  LEFT OUTER JOIN "CaseHist" "CHST"
      ON ( "CHST"."countyNo" = "CH"."countyNo"
       AND "CHST"."caseNo" = "CH"."caseNo"
       AND "CHST"."histSeqNo" = "CH"."reopHistSeqNo"
         )
  LEFT OUTER JOIN "CaseTypeHistEventB" "CTHE"
      ON ( "CHST"."eventType" = "CTHE"."eventType"
       AND "CHST"."caseType" = "CTHE"."caseType"
         )
  WHERE (
        ("CH"."caseNo" = '2004CF002575')
    AND ("CH"."countyNo" = 13))
  ORDER BY
    "chargeNo",
    "chargeSeqNo"
;

 Sort  (cost=129.70..129.71 rows=4 width=168) (actual time=0.218..0.220 rows=4 loops=1)
   Sort Key: "CH"."chargeNo", "CH"."chargeSeqNo"
   ->  Nested Loop Left Join  (cost=0.00..129.66 rows=4 width=168) (actual time=0.059..0.190 rows=4 loops=1)
         ->  Nested Loop Left Join  (cost=0.00..115.67 rows=4 width=129) (actual time=0.055..0.139 rows=4 loops=1)
               ->  Index Scan using "Charge_pkey" on "Charge" "CH"  (cost=0.00..10.69 rows=4 width=112) (actual
time=0.046..0.059rows=4 loops=1) 
                     Index Cond: ((("countyNo")::smallint = 13) AND (("caseNo")::bpchar = '2004CF002575'::bpchar))
               ->  Index Scan using "CaseHist_pkey" on "CaseHist" "CHST"  (cost=0.00..26.18 rows=5 width=41) (actual
time=0.013..0.014rows=0 loops=4) 
                     Index Cond: ((("CHST"."countyNo")::smallint = 13) AND (("CHST"."caseNo")::bpchar =
'2004CF002575'::bpchar)AND (("CHST"."histSeqNo")::smallint = ("CH"."reopHistSeqNo")::smallint)) 
         ->  Index Scan using "CaseTypeHistEventB_pkey" on "CaseTypeHistEventB" "CTHE"  (cost=0.00..3.48 rows=1
width=69)(actual time=0.008..0.009 rows=0 loops=4) 
               Index Cond: ((("CHST"."caseType")::bpchar = ("CTHE"."caseType")::bpchar) AND
(("CHST"."eventType")::bpchar= ("CTHE"."eventType")::bpchar)) 
 Total runtime: 0.410 ms
(11 rows)