Thread: Adding AVG to a JOIN

Adding AVG to a JOIN

From
Alexander Farber
Date:
Hello,

in PostgreSQL 10.3 I run the following query to find top 10 players with the best ELO rating:

# SELECT
        u.elo,
        u.uid,
        s.given,
        s.photo
FROM words_users u
JOIN words_social s USING (uid)
WHERE u.elo > 1500

-- take the most recent record from words_social (storing user details from social networks)
AND NOT EXISTS (SELECT 1
                FROM words_social x
                WHERE s.uid = x.uid
                AND x.stamp > s.stamp)

-- only show players who where active in the last week
AND EXISTS (SELECT 1
            FROM words_moves
            WHERE played > CURRENT_TIMESTAMP - INTERVAL '1 week'
            AND action IN ('play', 'skip', 'swap', 'resign'))
ORDER BY u.elo DESC
LIMIT 10;

 elo  | uid  |  given   |                                                   photo
------+------+----------+------------------------------------------------------------------------------------------------------------
 2078 | 1201 | Roman    | https://lh6.googleusercontent.com/-kNp75NGW6wo/AAAAAAAAAAI/AAAAAAAAABs/QN7rEc17JNc/photo.jpg
 1952 | 2846 | дана     | https://avt-30.foto.mail.ru/mail/dance1011/_avatarbig?1523746018
 1923 | 2808 | Ириша    | https://avt-24.foto.mail.ru/mail/irusy2277/_avatarbig?1518190793
 1788 | 3479 | наталья  | https://avt-5.foto.mail.ru/mail/leutan/_avatarbig
 1749 | 3404 | ♕ OLGA ♕ | https://avt-30.foto.mail.ru/mail/olgapinsk2/_avatarbig?1484081891
 1733 | 3336 | Надежда  | https://avt-14.foto.mail.ru/mail/katerenyuk78/_avatarbig?1520366579
 1724 | 1765 | ЕЛЕНА    | https://i.mycdn.me/image?id=805029440389&t=0&plc=API&ts=00&aid=1158060544&tkn=*looACpPtImwclHOmPBfnpuashFk
 1717 | 3091 | андрей   | https://avt-11.foto.mail.ru/yandex.ru/maimun-11/_avatarbig?1453033064
 1711 | 3000 | Алекс    | https://avt-20.foto.mail.ru/mail/taa113/_avatarbig?1495430756
 1708 | 3991 | Кузнецов | https://avt-10.foto.mail.ru/inbox/sobaka.58/_avatarbig?1353528572
(10 rows)

The above query works well and quick, but I need to add the average score per move information to it.

Here is such a query for the best player

# SELECT AVG(score) FROM words_moves WHERE uid = 1201;
         avg
---------------------
 18.4803525523319868

However I am not sure, how to "marry" the 2 queries?

I have tried to add words_moves through another JOIN, but that does not work:

# SELECT
        u.elo,
        u.uid,
        AVG(m.score),                -- how to add the player average score?
        s.given,
        s.photo
FROM words_users u
JOIN words_social s USING (uid)
JOIN words_moves m USING (uid)
WHERE u.elo > 1500

AND NOT EXISTS (SELECT 1
                FROM words_social x
                WHERE s.uid = x.uid
                AND x.stamp > s.stamp)

AND EXISTS (SELECT 1
            FROM words_moves
            WHERE played > CURRENT_TIMESTAMP - INTERVAL '1 week'
            AND action IN ('play', 'skip', 'swap', 'resign'))
ORDER BY u.elo DESC
LIMIT 10
;
ERROR:  42803: column "u.elo" must appear in the GROUP BY clause or be used in an aggregate function
LINE 2:         u.elo,
                ^

Please give me some hints, how to approach this.

Thank you!
Alex

P.S: Below are the 3 tables referenced above:

# \d words_users
                                         Table "public.words_users"
    Column     |           Type           | Collation | Nullable |                 Default
---------------+--------------------------+-----------+----------+------------------------------------------
 uid           | integer                  |           | not null | nextval('words_users_uid_seq'::regclass)
 created       | timestamp with time zone |           | not null |
 visited       | timestamp with time zone |           | not null |
 ip            | inet                     |           | not null |
 fcm           | text                     |           |          |
 apns          | text                     |           |          |
 adm           | text                     |           |          |
 motto         | text                     |           |          |
 vip_until     | timestamp with time zone |           |          |
 grand_until   | timestamp with time zone |           |          |
 banned_until  | timestamp with time zone |           |          |
 banned_reason | text                     |           |          |
 elo           | integer                  |           | not null |
 medals        | integer                  |           | not null |
 coins         | integer                  |           | not null |
Indexes:
    "words_users_pkey" PRIMARY KEY, btree (uid)
Check constraints:
    "words_users_banned_reason_check" CHECK (length(banned_reason) > 0)
    "words_users_elo_check" CHECK (elo >= 0)
    "words_users_medals_check" CHECK (medals >= 0)
Referenced by:
    TABLE "words_chat" CONSTRAINT "words_chat_uid_fkey" FOREIGN KEY (uid) REFERENCES words_users(uid) ON DELETE CASCADE
    TABLE "words_games" CONSTRAINT "words_games_player1_fkey" FOREIGN KEY (player1) REFERENCES words_users(uid) ON DELETE CASCADE
    TABLE "words_games" CONSTRAINT "words_games_player2_fkey" FOREIGN KEY (player2) REFERENCES words_users(uid) ON DELETE CASCADE
    TABLE "words_moves" CONSTRAINT "words_moves_uid_fkey" FOREIGN KEY (uid) REFERENCES words_users(uid) ON DELETE CASCADE
    TABLE "words_reviews" CONSTRAINT "words_reviews_author_fkey" FOREIGN KEY (author) REFERENCES words_users(uid) ON DELETE CASCADE
    TABLE "words_reviews" CONSTRAINT "words_reviews_uid_fkey" FOREIGN KEY (uid) REFERENCES words_users(uid) ON DELETE CASCADE
    TABLE "words_scores" CONSTRAINT "words_scores_uid_fkey" FOREIGN KEY (uid) REFERENCES words_users(uid) ON DELETE CASCADE
    TABLE "words_social" CONSTRAINT "words_social_uid_fkey" FOREIGN KEY (uid) REFERENCES words_users(uid) ON DELETE CASCADE

# \d words_social
                Table "public.words_social"
 Column |       Type       | Collation | Nullable | Default
--------+------------------+-----------+----------+---------
 sid    | text             |           | not null |
 social | integer          |           | not null |
 given  | text             |           | not null |
 family | text             |           |          |
 photo  | text             |           |          |
 lat    | double precision |           |          |
 lng    | double precision |           |          |
 stamp  | integer          |           | not null |
 uid    | integer          |           | not null |
Indexes:
    "words_social_pkey" PRIMARY KEY, btree (sid, social)
Check constraints:
    "words_social_given_check" CHECK (given ~ '\S'::text)
    "words_social_photo_check" CHECK (photo ~* '^https?://...'::text)
    "words_social_social_check" CHECK (0 < social AND social <= 64)
Foreign-key constraints:
    "words_social_uid_fkey" FOREIGN KEY (uid) REFERENCES words_users(uid) ON DELETE CASCADE
Referenced by:
    TABLE "words_payments" CONSTRAINT "words_payments_sid_fkey" FOREIGN KEY (sid, social) REFERENCES words_social(sid, social) ON DELETE CASCADE

# \d words_moves
                                      Table "public.words_moves"
 Column  |           Type           | Collation | Nullable |                 Default
---------+--------------------------+-----------+----------+------------------------------------------
 mid     | bigint                   |           | not null | nextval('words_moves_mid_seq'::regclass)
 action  | text                     |           | not null |
 gid     | integer                  |           | not null |
 uid     | integer                  |           | not null |
 played  | timestamp with time zone |           | not null |
 tiles   | jsonb                    |           |          |
 score   | integer                  |           |          |
 letters | text                     |           |          |
Indexes:
    "words_moves_pkey" PRIMARY KEY, btree (mid)
Check constraints:
    "words_moves_score_check" CHECK (score >= 0)
Foreign-key constraints:
    "words_moves_gid_fkey" FOREIGN KEY (gid) REFERENCES words_games(gid) ON DELETE CASCADE
    "words_moves_uid_fkey" FOREIGN KEY (uid) REFERENCES words_users(uid) ON DELETE CASCADE
Referenced by:
    TABLE "words_daily" CONSTRAINT "words_daily_mid_fkey" FOREIGN KEY (mid) REFERENCES words_moves(mid) ON DELETE CASCADE
    TABLE "words_scores" CONSTRAINT "words_scores_mid_fkey" FOREIGN KEY (mid) REFERENCES words_moves(mid) ON DELETE CASCADE



Re: Adding AVG to a JOIN

From
"Daniel Verite"
Date:
    Alexander Farber wrote:

> Here is such a query for the best player
>
> # SELECT AVG(score) FROM words_moves WHERE uid = 1201;
>         avg
> ---------------------
> 18.4803525523319868
>
> However I am not sure, how to "marry" the 2 queries?
>
> I have tried to add words_moves through another JOIN, but that does not
> work:

You may use a correlated subquery in the SELECT clause,
like this:

 SELECT
       u.elo,
       u.uid,
       (SELECT AVG(score) FROM words_moves WHERE uid=u.uid),
       s.given,
       s.photo


Best regards,
--
Daniel Vérité
PostgreSQL-powered mailer: http://www.manitou-mail.org
Twitter: @DanielVerite


Re: Adding AVG to a JOIN

From
Alexander Farber
Date:
Thank you, Daniel -

On Mon, Apr 23, 2018 at 12:15 PM, Daniel Verite <daniel@manitou-mail.org> wrote:

You may use a correlated subquery in the SELECT clause,
like this:

 SELECT
       u.elo,
       u.uid,
       (SELECT AVG(score) FROM words_moves WHERE uid=u.uid),
       s.given,
       s.photo

this has worked great for me:

               SELECT
                        u.elo,
                        (SELECT ROUND(AVG(score), 1) FROM words_moves WHERE uid = u.uid) AS score,
                        s.given,
                        s.photo
                FROM words_users u
                JOIN words_social s USING (uid)
                WHERE u.elo > 1500
                -- take the most recent record from words_social
                AND NOT EXISTS (SELECT 1
                                FROM words_social x
                                WHERE s.uid = x.uid
                                AND x.stamp > s.stamp)
                -- only show players who where active in the last week
                AND EXISTS (SELECT 1
                            FROM words_moves
                            WHERE played > CURRENT_TIMESTAMP - INTERVAL '1 week'
                            AND action IN ('play', 'skip', 'swap', 'resign'))
                ORDER BY u.elo DESC
                LIMIT 10

Is that what is called LATERAL JOIN?

Regards
Alex

Re: Adding AVG to a JOIN

From
Alexander Farber
Date:
Good evening,

On Mon, Apr 23, 2018 at 12:56 PM, Alexander Farber <alexander.farber@gmail.com> wrote:
On Mon, Apr 23, 2018 at 12:15 PM, Daniel Verite <daniel@manitou-mail.org> wrote:

You may use a correlated subquery in the SELECT clause,

               SELECT
                        u.elo,
                        (SELECT ROUND(AVG(score), 1) FROM words_moves WHERE uid = u.uid) AS score,
                        s.given,
                        s.photo
                FROM words_users u
                JOIN words_social s USING (uid)
                WHERE u.elo > 1500
                -- take the most recent record from words_social
                AND NOT EXISTS (SELECT 1
                                FROM words_social x
                                WHERE s.uid = x.uid
                                AND x.stamp > s.stamp)
                -- only show players who where active in the last week
                AND EXISTS (SELECT 1
                            FROM words_moves
                            WHERE played > CURRENT_TIMESTAMP - INTERVAL '1 week'
                            AND action IN ('play', 'skip', 'swap', 'resign'))
                ORDER BY u.elo DESC
                LIMIT 10


thank you all for helping me in adding an average score per move to my SQL JOIN.

Of course I would like to add yet another statistic and now am trying to add the average time per move to the same query, by prepending it a CTE with LAG():

                 WITH cte AS (
                        SELECT
                                m.gid,
                                m.uid,
                                m.played,
                                LAG(m.played) OVER (PARTITION BY m.gid ORDER BY played) AS prev_played
                        FROM words_moves m
                        JOIN words_games g ON (m.gid = g.gid AND m.uid in (g.player1, g.player2))
                        WHERE m.played > CURRENT_TIMESTAMP - interval '1 month'
                ) 
                SELECT
                        u.elo,
                        AVG(c.played - c.prev_played) AS avg_time_per_move,
                        (SELECT ROUND(AVG(score), 1) FROM words_moves WHERE uid = u.uid) AS score,
                        s.given,
                        s.photo
                FROM words_users u 
                JOIN words_social s USING (uid)
                JOIN cte c USING (uid)
                WHERE u.elo > 1500 
                -- take the most recent record from words_social
                AND NOT EXISTS (SELECT 1 
                                FROM words_social x 
                                WHERE s.uid = x.uid 
                                AND x.stamp > s.stamp)
                -- only show players who where active in the last week
                AND EXISTS (SELECT 1
                            FROM words_moves
                            WHERE played > CURRENT_TIMESTAMP - INTERVAL '1 week'
                            AND action IN ('play', 'skip', 'swap', 'resign'))
                ORDER BY u.elo DESC
                LIMIT 10;

but this fails with 

ERROR:  42803: column "u.elo" must appear in the GROUP BY clause or be used in an aggregate function
LINE 15:                         u.elo,
                                 ^

And I don't understand why adding a CTE has caused it, because without the CTE the GROUP BY u.elo was not required...

Regards
Alex


Re: Adding AVG to a JOIN

From
"David G. Johnston"
Date:
On Mon, Apr 23, 2018 at 12:47 PM, Alexander Farber <alexander.farber@gmail.com> wrote:
                SELECT
                        u.elo,
                        AVG(c.played - c.prev_played) AS avg_time_per_move,
                        (SELECT ROUND(AVG(score), 1) FROM words_moves WHERE uid = u.uid) AS score,

And I don't understand why adding a CTE has caused it, because without the CTE the GROUP BY u.elo was not required...


​Adding "AVG(c.played - c.prev_played)" directly to the top-level select statement​ column list is what turned it into a "GROUP BY" query.  When you embedded the "AVG(score)" in a subquery the GROUP BY was limited to just that subquery, and it had no other columns besides the aggregate and so didn't require a GROUP BY clause.

David J.

Re: Adding AVG to a JOIN

From
Alexander Farber
Date:
Thank you for the explanation. I have rearranged my query and it works now (surprisingly fast too) -

On Mon, Apr 23, 2018 at 9:58 PM, David G. Johnston <david.g.johnston@gmail.com> wrote:
On Mon, Apr 23, 2018 at 12:47 PM, Alexander Farber <alexander.farber@gmail.com> wrote:
                SELECT
                        u.elo,
                        AVG(c.played - c.prev_played) AS avg_time_per_move,
                        (SELECT ROUND(AVG(score), 1) FROM words_moves WHERE uid = u.uid) AS score,

And I don't understand why adding a CTE has caused it, because without the CTE the GROUP BY u.elo was not required...


​Adding "AVG(c.played - c.prev_played)" directly to the top-level select statement​ column list is what turned it into a "GROUP BY" query.  When you embedded the "AVG(score)" in a subquery the GROUP BY was limited to just that subquery, and it had no other columns besides the aggregate and so didn't require a GROUP BY clause.


                WITH cte AS (
                        SELECT
                                m.gid,
                                m.uid,
                                m.played,
                                LAG(m.played) OVER (PARTITION BY m.gid ORDER BY played) AS prev_played
                        FROM words_moves m
                        JOIN words_games g ON (m.gid = g.gid AND m.uid in (g.player1, g.player2))
                        WHERE m.played > CURRENT_TIMESTAMP - interval '1 month'
                )
                SELECT
                        u.elo,
                        (SELECT TO_CHAR(AVG(played - prev_played), 'HH24:MI') FROM cte WHERE uid = u.uid) AS avg_time,
                        (SELECT ROUND(AVG(score), 1) FROM words_moves WHERE uid = u.uid) AS avg_score,
                        s.given,
                        s.photo
                FROM words_users u
                JOIN words_social s USING (uid)
                WHERE u.elo > 1500
                -- take the most recent record from words_social
                AND NOT EXISTS (SELECT 1
                                FROM words_social x
                                WHERE s.uid = x.uid
                                AND x.stamp > s.stamp)
                -- only show players who where active in the last week
                AND EXISTS (SELECT 1
                            FROM words_moves
                            WHERE played > CURRENT_TIMESTAMP - INTERVAL '1 week'
                            AND action IN ('play', 'skip', 'swap', 'resign'))
                ORDER BY u.elo DESC
                LIMIT 10;

Best regards
Alex