Thread: Difficulties with LAG-function when calculating overtime
Hi all, I'm having difficulties dealing with values from "previous-rows" (LAG).
I have these variables:
balance: A user's balance of worked hours for month, may be negative if worked too little.
overtime_rate: percent-value used to calculate "bonus hours". Ex: If a user has a balance of 10 hours (meaning 10 hours overtime) and 50% then he gets 5 "extra hours" which is added to the accumulated total.
compensatory_time: A user may have hours logged as compensatory time when having some hours off work.
payout: A number of hours the user gets "payed" (withdraw) when getting compensated for overtime for the current month.
I have this (simplified example) schema:
drop table if exists logged_hours;
create table logged_hours(
date DATE NOT NULL,
balance NUMERIC(100,2) NOT NULL,
overtime_rate INT NOT NULL,
compensatory_time NUMERIC(100,2) NOT NULL,
payout NUMERIC(100,2) NOT NULL
);
create table logged_hours(
date DATE NOT NULL,
balance NUMERIC(100,2) NOT NULL,
overtime_rate INT NOT NULL,
compensatory_time NUMERIC(100,2) NOT NULL,
payout NUMERIC(100,2) NOT NULL
);
INSERT INTO logged_hours(date, balance, overtime_rate, compensatory_time, payout)
VALUES
('2018-01-01', 17.5, 50, 0, 26.25)
, ('2018-02-01', 2.5, 50, 5 , 0)
, ('2018-03-01', 14 , 50, 4 , 3.75)
, ('2018-04-01', -10 , 50, 10 , 0)
;
VALUES
('2018-01-01', 17.5, 50, 0, 26.25)
, ('2018-02-01', 2.5, 50, 5 , 0)
, ('2018-03-01', 14 , 50, 4 , 3.75)
, ('2018-04-01', -10 , 50, 10 , 0)
;
And I'm trying to craft a query to calculate, for each month:
- balance (the "input-value")
- extra overtime
This is GREATEST(balance minus compensatory-time minus "accumulated total from previous period if negative", 0)
- Accumulated balance
balance + extra_overtime - compensatory_time - payout + "accumulated balance from previous period"
Here we see that extra_overtime and accumulated_balance are "inter-related"
The goal is to come up with a query which gives this data (from the above INSERT):
date | balance | basis_for_extra_overtime | extra_overtime (50%) | total_time | payout | compensatory_time | accumulated_balance_after_payout |
2018-01-01 | 17.50 | 17.5 | (17.5 * 0.5) 8.75 | (17.5 + 8.75) 26.25 | 26.25 | 0.00 | (26.26 – 26.25) 0 |
2018-02-01 | 2.50 | 0 | 0 | 2.5 | 0.00 | 5.00 | (2.5 – 5) –2.5 |
2018-03-01 | 14.00 | (14 – 4 – 2.5) 7.5 | (7.5 * 0.5) 3.75 | (14 + 3.75) 17.75 | 3.75 | 4.00 | (17.75 – 3.75 – 4 – 2.5) 7.5 |
2018-04-01 | –10.00 | 0 | 0 | –10 | 0.00 | 10.00 | (–10 – 10 + 7.5) –12.5 |
As we see, the "previous period"'s value for "accumulated_balance_after_payout" is used in the calculation of the fields "basis_for_extra_overtime" (for period 2018-03, because the accumulated balance is negative) and "accumulated_balance_after_payout".
I have this basis-query:
SELECT lh.date , lh.balance -- Basis for extra overtime: balance - compensatory_time - "previous month's" accumulated_balance_after_payout if negative , GREATEST(lh.balance - lh.compensatory_time -- TODO: minus "previous month's" accumulated_balance_after_payout if negative , 0) AS basis_for_extra_overtime , (GREATEST(lh.balance - lh.compensatory_time -- TODO: minus "previous month's" accumulated_balance_after_payout if negative , 0) * lh.overtime_rate/100) as extra_overtime -- balance + extra_overtime , lh.balance -- extra_overtime + (GREATEST(lh.balance - lh.compensatory_time -- TODO: minus "previous month's" accumulated_balance_after_payout if negative , 0) * lh.overtime_rate/100) AS total_time , lh.payout , lh.compensatory_time -- Accumulated balance: (total_time - payout - compensatory_time + "previous month's" accumulated_balance , lh.balance -- extra_overtime + (GREATEST(lh.balance - lh.compensatory_time -- TODO: minus "previous month's" accumulated_balance_after_payout if negative , 0) * lh.overtime_rate/100) - lh.payout - lh.compensatory_time -- TODO: plus "previous month's" accumulated_balance_after_payout AS accumulated_balance_after_payout FROM (SELECT cast(generate_series('2018-01-01' :: DATE, '2018-11-01' :: DATE, '1 month') AS DATE) as start_date) AS q CROSS JOIN logged_hours lh WHERE lh.date = q.start_date ORDER BY lh.date ASC ;
And have tried to use LAG to use value from "previous row" when calculating "basis_for_extra_overtime":
SELECT lh.date , lh.balance -- Basis for extra overtime: balance - compensatory_time - "previous month's" accumulated_balance_after_payout if negative , GREATEST(lh.balance - lh.compensatory_time + LEAST(LAG( lh.balance + (GREATEST(lh.balance - lh.compensatory_time, 0) * lh.overtime_rate/100) - lh.payout - lh.compensatory_time ) OVER (order by lh.date) , 0) , 0) AS basis_for_extra_overtime , (GREATEST(lh.balance - lh.compensatory_time -- TODO: minus "previous month's" accumulated_balance_after_payout if negative , 0) * lh.overtime_rate/100) as extra_overtime -- balance + extra_overtime , lh.balance + (GREATEST(lh.balance - lh.compensatory_time -- TODO: minus "previous month's" accumulated_balance_after_payout if negative , 0) * lh.overtime_rate/100) AS total_time , lh.payout , lh.compensatory_time -- Accumulated balance: (total_time - payout - compensatory_time + "previous month's" accumulated_balance , lh.balance + (GREATEST(lh.balance - lh.compensatory_time , 0) * lh.overtime_rate/100) - lh.payout - lh.compensatory_time -- TODO: plus "previous month's" accumulated_balance_after_payout AS accumulated_balance_after_payout FROM (SELECT cast(generate_series('2018-01-01' :: DATE, '2018-11-01' :: DATE, '1 month') AS DATE) as start_date) AS q CROSS JOIN logged_hours lh WHERE lh.date = q.start_date ORDER BY lh.date ASC ;
In the query above I use LEAST(<expr>, 0) to only add the value if negative, effectively subtracting it, which is what I want.
The problem with this as I've written it above is that it tries to subtract the previous row's accumulated_balance_after_payout, but the calculation of that is not correct because in order to do that we need the previous row's basis_for_extra_overtime, which again is dependent of the previous-previous-row, and it kind of gets difficult from here...
Anyone has a clever way to solve this kinds of issues and craft a query which produces the desired result as in the table above?
Thanks.
--
Andreas Joseph Krogh
On Mon, Nov 19, 2018 at 6:24 AM Andreas Joseph Krogh <andreas@visena.com> wrote:
Anyone has a clever way to solve this kinds of issues and craft a query which produces the desired result as in the table above?
Thinking in terms of theory - you need to calculate the first row and then calculate the next row using data from the first row. Then calculate the third row using data from the second row (you might need to carry-forward some value from the first row so that the third row can see them...). That sounds like the algorithm for iteration which is implemented in SQL via "WITH RECURSIVE".
David J.
På mandag 19. november 2018 kl. 17:08:57, skrev David G. Johnston <david.g.johnston@gmail.com>:
On Mon, Nov 19, 2018 at 6:24 AM Andreas Joseph Krogh <andreas@visena.com> wrote:Anyone has a clever way to solve this kinds of issues and craft a query which produces the desired result as in the table above?Thinking in terms of theory - you need to calculate the first row and then calculate the next row using data from the first row. Then calculate the third row using data from the second row (you might need to carry-forward some value from the first row so that the third row can see them...). That sounds like the algorithm for iteration which is implemented in SQL via "WITH RECURSIVE".David J.
Yea, I kind of figured RECURSIVE CTE was the way foreward...
If anyone has got this working, give me a tip:-)
--
Andreas Joseph Krogh
På mandag 19. november 2018 kl. 17:20:23, skrev Andreas Joseph Krogh <andreas@visena.com>:
På mandag 19. november 2018 kl. 17:08:57, skrev David G. Johnston <david.g.johnston@gmail.com>:On Mon, Nov 19, 2018 at 6:24 AM Andreas Joseph Krogh <andreas@visena.com> wrote:Anyone has a clever way to solve this kinds of issues and craft a query which produces the desired result as in the table above?Thinking in terms of theory - you need to calculate the first row and then calculate the next row using data from the first row. Then calculate the third row using data from the second row (you might need to carry-forward some value from the first row so that the third row can see them...). That sounds like the algorithm for iteration which is implemented in SQL via "WITH RECURSIVE".David J.Yea, I kind of figured RECURSIVE CTE was the way foreward...If anyone has got this working, give me a tip:-)
Got it, thanks for getting me on the right track!
For the archives:
WITH RECURSIVE prev AS ( SELECT lh.date , lh.balance -- Basis for extra overtime: balance - compensatory_time , GREATEST(lh.balance - lh.compensatory_time , 0) AS basis_for_extra_overtime , (GREATEST(lh.balance - lh.compensatory_time , 0) * lh.overtime_rate/100) as extra_overtime -- balance + extra_overtime , lh.balance -- extra_overtime + (GREATEST(lh.balance - lh.compensatory_time , 0) * lh.overtime_rate/100) AS total_time , lh.payout , lh.compensatory_time -- Accumulated balance: (total_time - payout - compensatory_time + "previous month's" accumulated_balance , lh.balance -- extra_overtime + (GREATEST(lh.balance - lh.compensatory_time , 0) * lh.overtime_rate/100) - lh.payout - lh.compensatory_time AS accumulated_balance_after_payout FROM logged_hours lh WHERE lh.date = '2018-01-01' :: DATE UNION ALL SELECT lh.date , lh.balance -- Basis for extra overtime: balance - compensatory_time - "previous month's" accumulated_balance_after_payout if negative , GREATEST(lh.balance - lh.compensatory_time -- minus "previous month's" accumulated_balance_after_payout if negative + LEAST(coalesce(prev.accumulated_balance_after_payout, 0), 0) , 0) AS basis_for_extra_overtime , (GREATEST(lh.balance - lh.compensatory_time -- minus "previous month's" accumulated_balance_after_payout if negative + LEAST(coalesce(prev.accumulated_balance_after_payout, 0), 0) , 0) * lh.overtime_rate/100) as extra_overtime -- balance + extra_overtime , lh.balance -- extra_overtime + (GREATEST(lh.balance - lh.compensatory_time -- minus "previous month's" accumulated_balance_after_payout if negative + LEAST(coalesce(prev.accumulated_balance_after_payout, 0), 0) , 0) * lh.overtime_rate/100) AS total_time , lh.payout , lh.compensatory_time -- Accumulated balance: (total_time - payout - compensatory_time + "previous month's" accumulated_balance , lh.balance -- extra_overtime + (GREATEST(lh.balance - lh.compensatory_time -- minus "previous month's" accumulated_balance_after_payout if negative + LEAST(coalesce(prev.accumulated_balance_after_payout, 0), 0) , 0) * lh.overtime_rate/100) - lh.payout - lh.compensatory_time + coalesce(prev.accumulated_balance_after_payout, 0) AS accumulated_balance_after_payout FROM logged_hours lh JOIN prev ON lh.date = prev.date + '1 MONTH'::INTERVAL ) select * from prev;
Produces the correct result:
date | balance | basis_for_extra_overtime | extra_overtime | total_time | payout | compensatory_time | accumulated_balance_after_payout |
---|---|---|---|---|---|---|---|
2018-01-01 | 17.50 | 17.5 | 8.75 | 26.25 | 26.25 | 0.00 | 0 |
2018-02-01 | 2.50 | 0 | 0 | 2.5 | 0.00 | 5.00 | -2.5 |
2018-03-01 | 14.00 | 7.5 | 3.75 | 17.75 | 3.75 | 4.00 | 7.5 |
2018-04-01 | -10.00 | 0 | 0 | -10 | 0.00 | 10.00 | -12.5 |
--
Andreas Joseph Krogh