Thread: WHERE ... IN condition and multiple columns in subquery

WHERE ... IN condition and multiple columns in subquery

From
Alexander Farber
Date:
Hello,

is it please possible to rewrite the SQL query

        SELECT DISTINCT ON (uid)
        uid,
        female,
        given,
                photo,
                place
        FROM words_social
        WHERE uid IN (SELECT player1 FROM games)
                OR uid IN (SELECT player2 FROM games)
        ORDER BY uid, stamp DESC

where first column player1 is fetched in a subquery and then column player2 is fetched from the same table?

I've searched around and it seems that a JOIN should be used here, but can not figure out exactly how.

Thank you
Alex

PS: Below are my tables and the actual CTE query which works well, but I'd like to optimize:

CREATE TABLE words_social (
        sid varchar(255) NOT NULL,

        social integer NOT NULL CHECK (0 <= social AND social <= 6),
        female integer NOT NULL CHECK (female = 0 OR female = 1),
        given  varchar(255) NOT NULL CHECK (given ~ '\S'),
        family varchar(255),
        photo  varchar(255) CHECK (photo ~* '^https?://...'),
        place  varchar(255),
        stamp  integer NOT NULL,              /* only the most recent stamp is used */

        uid integer NOT NULL REFERENCES words_users ON DELETE CASCADE,
        PRIMARY KEY(sid, social)
);

CREATE TABLE words_games (
        gid SERIAL PRIMARY KEY,

        created timestamptz NOT NULL,
        finished timestamptz,

        player1 integer REFERENCES words_users(uid) ON DELETE CASCADE NOT NULL,
        player2 integer REFERENCES words_users(uid) ON DELETE CASCADE,

        played1 timestamptz,
        played2 timestamptz,

        mid integer /* REFERENCES words_moves */,

        score1 integer NOT NULL CHECK (score1 >= 0),
        score2 integer NOT NULL CHECK (score2 >= 0),

        hand1 varchar[7] NOT NULL,
        hand2 varchar[7] NOT NULL,
        pile  varchar[116] NOT NULL,

        letters varchar[15][15] NOT NULL,
        values integer[15][15] NOT NULL,
        bid integer NOT NULL REFERENCES words_boards ON DELETE CASCADE
);

CREATE OR REPLACE FUNCTION words_get_games(in_uid integer)
        RETURNS TABLE (
                out_gid integer,
                out_created integer,
                out_finished integer,
                out_player1 integer,
                out_player2 integer,
                out_played1 integer,
                out_played2 integer,
                out_score1 integer,
                out_score2 integer,
                out_hand1 text,
                out_hand2 text,
                out_letters varchar[15][15],
                out_values integer[15][15],
                out_bid integer,
                out_last_tiles jsonb,
                out_last_score integer,
                out_female1 integer,
                out_female2 integer,
                out_given1 varchar,
                out_given2 varchar,
                out_photo1 varchar,
                out_photo2 varchar,
                out_place1 varchar,
                out_place2 varchar
        ) AS
$func$
        WITH games AS (
                SELECT
                        g.gid,
                        EXTRACT(EPOCH FROM g.created)::int AS created,
                        EXTRACT(EPOCH FROM g.finished)::int AS finished,
                        g.player1,
                        g.player2, -- can be NULL
                        EXTRACT(EPOCH FROM g.played1)::int AS played1,
                        EXTRACT(EPOCH FROM g.played2)::int AS played2,
                        g.score1,
                        g.score2,
                        ARRAY_TO_STRING(g.hand1, '') AS hand1,
                        REGEXP_REPLACE(ARRAY_TO_STRING(g.hand2, ''), '.', '?', 'g') AS hand2,
                        g.letters,
                        g.values,
                        g.bid,
                        m.tiles AS last_tiles,
                        m.score AS last_score
                FROM words_games g LEFT JOIN words_moves m USING(mid)
                WHERE g.player1 = in_uid
                AND (g.finished IS NULL OR g.finished > CURRENT_TIMESTAMP - INTERVAL '1 day')
                UNION SELECT
                        g.gid,
                        EXTRACT(EPOCH FROM g.created)::int AS created,
                        EXTRACT(EPOCH FROM g.finished)::int AS finished,
                        g.player2 AS player1,
                        g.player1 AS player2, -- can not be NULL
                        EXTRACT(EPOCH FROM g.played2)::int AS played1,
                        EXTRACT(EPOCH FROM g.played1)::int AS played2,
                        g.score2 AS score1,
                        g.score1 AS score2,
                        ARRAY_TO_STRING(g.hand2, '') AS hand1,
                        REGEXP_REPLACE(ARRAY_TO_STRING(g.hand1, ''), '.', '?', 'g') AS hand2,
                        g.letters,
                        g.values,
                        g.bid,
                        m.tiles AS last_tiles,
                        m.score AS last_score
                FROM words_games g LEFT JOIN words_moves m USING(mid)
                WHERE g.player2 = in_uid
                AND (g.finished IS NULL OR g.finished > CURRENT_TIMESTAMP - INTERVAL '1 day')
        ),
    social AS (
        SELECT DISTINCT ON (uid)
        uid,
        female,
        given,
                photo,
                place
        FROM words_social
        WHERE uid IN (SELECT player1 FROM games)         /* How to optimize? */
                OR uid IN (SELECT player2 FROM games)
        ORDER BY uid, stamp DESC
    )
    SELECT
                g.gid,
                g.created,
                g.finished,
                g.player1,
                g.player2,
                g.played1,
                g.played2,
                g.score1,
                g.score2,
                g.hand1,
                g.hand2,
                g.letters,
                g.values,
                g.bid,
                g.last_tiles,
                g.last_score,
                s1.female,
                s2.female,
                s1.given,
                s2.given,
                s1.photo,
                s2.photo,
                s1.place,
                s2.place
    FROM games g
    LEFT OUTER JOIN social s1 ON g.player1 = s1.uid
    LEFT OUTER JOIN social s2 ON g.player2 = s2.uid;

$func$ LANGUAGE sql;

Re: WHERE ... IN condition and multiple columns in subquery

From
Tom Lane
Date:
Alexander Farber <alexander.farber@gmail.com> writes:
> is it please possible to rewrite the SQL query

>         SELECT DISTINCT ON (uid)
>         uid,
>         female,
>         given,
>                 photo,
>                 place
>         FROM words_social
>         WHERE uid IN (SELECT player1 FROM games)
>                 OR uid IN (SELECT player2 FROM games)
>         ORDER BY uid, stamp DESC

You could do

    WHERE uid IN (SELECT player1 FROM games UNION SELECT player2 FROM games)

(or possibly UNION ALL would be a trifle faster).  This still reads the
"games" CTE twice; but since you're evaluating that CTE elsewhere in the
query, I think the additional follower node isn't worth trying to get
rid of.

            regards, tom lane


Re: WHERE ... IN condition and multiple columns in subquery

From
Geoff Winkless
Date:
On 28 October 2016 at 12:03, Alexander Farber <alexander.farber@gmail.com> wrote:
is it please possible to rewrite the SQL query

        SELECT DISTINCT ON (uid)
        uid,
        female,
        given,
                photo,
                place
        FROM words_social
        WHERE uid IN (SELECT player1 FROM games)
                OR uid IN (SELECT player2 FROM games)
        ORDER BY uid, stamp DESC

where first column player1 is fetched in a subquery and then column player2 is fetched from the same table?

You could use

​WHERE EXISTS (SELECT FROM games WHERE player1=uid OR player2=uid)

although as Tom says, it's dubious whether that will result in a significant speedup.

Geoff​

Re: WHERE ... IN condition and multiple columns in subquery

From
Alban Hertroys
Date:
On 28 October 2016 at 13:03, Alexander Farber
<alexander.farber@gmail.com> wrote:
> Hello,
>
> is it please possible to rewrite the SQL query
>
>         SELECT DISTINCT ON (uid)
>         uid,
>         female,
>         given,
>                 photo,
>                 place
>         FROM words_social
>         WHERE uid IN (SELECT player1 FROM games)
>                 OR uid IN (SELECT player2 FROM games)
>         ORDER BY uid, stamp DESC
>
> where first column player1 is fetched in a subquery and then column player2
> is fetched from the same table?
>
> I've searched around and it seems that a JOIN should be used here, but can
> not figure out exactly how.
>
> Thank you
> Alex

You mean like this?:

         SELECT DISTINCT ON (uid)
                 uid,
                 female,
                 given,
                 photo,
                 place
         FROM words_social
         JOIN games ON uid IN (player1, player2)
         ORDER BY uid, stamp DESC


Re: WHERE ... IN condition and multiple columns in subquery

From
Alexander Farber
Date:
Thank you for the advices and I have also got few answers at

Regards
Alex