Thread: data import via COPY, Rules + Triggers

data import via COPY, Rules + Triggers

From
Tarlika Elisabeth Schmitz
Date:
I have got a database that needs to be populated, first with historical
data, then on a daily basis. Number of entities is small, data
volume huge. Once imported, data will neither be modified nor deleted.

Data come in denormalized CSV formats. There are overlaps between
different CSV formats and existing data will keep on reappearing and
need updating to catch possible changes.


I created interim tables matching the structure of the CSV formats
(about 6 of them). I want to import via COPY and distribute the data to
the "proper" tables via rules + triggers.

I just hit a wall with one of the rules, (see example below): how do I
populate athlete.manager_fk, which is the result of the previous INSERT?





-- interim table
CREATE TABLE zathlete
( id integer NOT NULL, "name" character varying(50) NOT NULL, dad_id integer, dad_name character varying(50),
sponsor_idinteger, sponsor_name character varying(50), manager_name character varying(50), _received timestamp without
timezone NOT NULL
 
)

-- proper tables
CREATE TABLE sponsor
( id integer NOT NULL, "name" character varying(50) NOT NULL, _received timestamp without time zone NOT NULL,
CONSTRAINTsponsor_pkey PRIMARY KEY (id)
 
)

CREATE TABLE manager
( id serial NOT NULL, "name" character varying(50) NOT NULL,_received timestamp without time zone NOT NULL,CONSTRAINT
manager_pkeyPRIMARY KEY (id)
 
)

CREATE TABLE athlete
( id integer NOT NULL, "name" character varying(50) NOT NULL, dad_fk integer, sponsor_fk integer, manager_fk integer,
_receivedtimestamp without time zone NOT NULL, CONSTRAINT athlete_pkey PRIMARY KEY (id), CONSTRAINT manager_athlete_fk
FOREIGNKEY (manager_fk)     REFERENCES manager (id) MATCH SIMPLE     ON UPDATE CASCADE ON DELETE RESTRICT, CONSTRAINT
sponsor_athlete_fkFOREIGN KEY (sponsor_fk)     REFERENCES sponsor (id) MATCH SIMPLE     ON UPDATE CASCADE ON DELETE
RESTRICT,CONSTRAINT dad_athlete_fk FOREIGN KEY (dad_fk)     REFERENCES athlete (id) MATCH SIMPLE     ON UPDATE CASCADE
ONDELETE RESTRICT
 
)


-- rules

CREATE OR REPLACE RULE zathlete_insert_1 AS   ON INSERT TO zathlete   DO ALSO -- INSTEAD once all is working   (INSERT
INTOathlete           (id, name, _received) VALUES           (NEW.dad_id, NEW.dad_name, NEW._received);INSERT INTO
sponsor          (id, name, _received) VALUES           (NEW.sponsor_id, NEW.sponsor_name, NEW._received);INSERT INTO
manager          (name, _received) VALUES           (NEW.manager_name, NEW._received);INSERT INTO athlete
(id,name, dad_fk, sponsor_fk, manager_fk, _received) VALUES           (NEW.id, NEW.name, NEW.dad_id,
NEW.sponsor_id,?????, NEW._received);
 
)





====
System: PostgreSQL 8.3
no of users: 1

-- 

Best Regards,
Tarlika Elisabeth Schmitz


Re: data import via COPY, Rules + Triggers

From
Tarlika Elisabeth Schmitz
Date:
Thank you for your help, Sergey. That certainly works.

I was wondering whether the manager.id could maybe be obtained via
INSERT ... RETURNING? 

-- 

Best Regards,
Tarlika Elisabeth Schmitz



On Thu, 5 May 2011 08:45:32 +0300
sergey kapustin <kapustin.sergey@gmail.com> wrote:

>Try using (select id from manager where name=NEW.manager_name) to get
>the newly inserted manager.
>The "name" column  in "manager" table should have unique constraint -
>this will be good both for performance and consistency.
>
>
>
>CREATE OR REPLACE RULE zathlete_insert_1 AS
>   ON INSERT TO zathlete
>   DO ALSO
>   (
>       INSERT INTO athlete
>           (id, name, _received) VALUES
>           (NEW.dad_id, NEW.dad_name, NEW._received);
>       INSERT INTO sponsor
>           (id, name, _received) VALUES
>           (NEW.sponsor_id, NEW.sponsor_name, NEW._received);
>       INSERT INTO manager
>           (name, _received) VALUES
>           (NEW.manager_name, NEW._received);
>       INSERT INTO athlete
>           (id, name, dad_fk, sponsor_fk, manager_fk, _received) VALUES
>           (NEW.id, NEW.name, NEW.dad_id,
>            NEW.sponsor_id, (select id from manager where
>name=NEW.manager_name), NEW._received);
>)
>;
>
>On Thu, May 5, 2011 at 1:48 AM, Tarlika Elisabeth Schmitz <
>postgresql3@numerixtechnology.de> wrote:
>
>> [...]
>>
>>
>> I created interim tables matching the structure of the CSV formats
>> (about 6 of them). I want to import via COPY and distribute the data
>> to the "proper" tables via rules + triggers.
>>
>> I just hit a wall with one of the rules, (see example below): how do
>> I populate athlete.manager_fk, which is the result of the previous
>> INSERT?
>>
>>
>>
>>
>>
>> -- interim table
>> CREATE TABLE zathlete
>> (
>>  id integer NOT NULL,
>>  "name" character varying(50) NOT NULL,
>>  dad_id integer,
>>  dad_name character varying(50),
>>  sponsor_id integer,
>>  sponsor_name character varying(50),
>>  manager_name character varying(50),
>>  _received timestamp without time zone NOT NULL
>> )
>>
>> -- proper tables
>> CREATE TABLE sponsor
>> (
>>  id integer NOT NULL,
>>  "name" character varying(50) NOT NULL,
>>  _received timestamp without time zone NOT NULL,
>>  CONSTRAINT sponsor_pkey PRIMARY KEY (id)
>> )
>>
>> CREATE TABLE manager
>> (
>>  id serial NOT NULL,
>>  "name" character varying(50) NOT NULL,
>>  _received timestamp without time zone NOT NULL,
>>  CONSTRAINT manager_pkey PRIMARY KEY (id)
>> )
>>
>> CREATE TABLE athlete
>> (
>>  id integer NOT NULL,
>>  "name" character varying(50) NOT NULL,
>>  dad_fk integer,
>>  sponsor_fk integer,
>>  manager_fk integer,
>>  _received timestamp without time zone NOT NULL,
>>  CONSTRAINT athlete_pkey PRIMARY KEY (id),
>>  CONSTRAINT manager_athlete_fk FOREIGN KEY (manager_fk)
>>      REFERENCES manager (id) MATCH SIMPLE
>>      ON UPDATE CASCADE ON DELETE RESTRICT,
>>  CONSTRAINT sponsor_athlete_fk FOREIGN KEY (sponsor_fk)
>>      REFERENCES sponsor (id) MATCH SIMPLE
>>      ON UPDATE CASCADE ON DELETE RESTRICT,
>>  CONSTRAINT dad_athlete_fk FOREIGN KEY (dad_fk)
>>      REFERENCES athlete (id) MATCH SIMPLE
>>      ON UPDATE CASCADE ON DELETE RESTRICT
>> )
>>
>>
>> -- rules
>>
>> CREATE OR REPLACE RULE zathlete_insert_1 AS
>>    ON INSERT TO zathlete
>>    DO ALSO -- INSTEAD once all is working
>>    (
>>        INSERT INTO athlete
>>            (id, name, _received) VALUES
>>            (NEW.dad_id, NEW.dad_name, NEW._received);
>>        INSERT INTO sponsor
>>            (id, name, _received) VALUES
>>            (NEW.sponsor_id, NEW.sponsor_name, NEW._received);
>>        INSERT INTO manager
>>            (name, _received) VALUES
>>            (NEW.manager_name, NEW._received);
>>        INSERT INTO athlete
>>            (id, name, dad_fk, sponsor_fk, manager_fk, _received)
>> VALUES (NEW.id, NEW.name, NEW.dad_id,
>>             NEW.sponsor_id, ?????, NEW._received);
>> )
>>
>>
>>
>>
>>
>> ====
>> System: PostgreSQL 8.3
>> no of users: 1
>>
>> --
>>
>> Best Regards,
>> Tarlika Elisabeth Schmitz





Re: data import via COPY, Rules + Triggers

From
Tarlika Elisabeth Schmitz
Date:
Thank your for your reply, Sergey.
(By the way, I noticed you are not cc'ing/replying to the list.)

On Fri, 6 May 2011 01:45:19 +0300
sergey kapustin <kapustin.sergey@gmail.com> wrote:

>INSERT ... RETURNING will not work in rules i think. You cannot nest
>INSERTs and its not possible to use variables.

And rules are not actioned by the COPY command (which I use to populate
the tables) whereas triggers are.

>you have
>to use conditional statements because you don't want to insert into
>manager table every time you insert new athlete.

That has been taking care of by an insert/update trigger on manager,
which updates a manager record, if necessary, where it exists already.




>I suggest you use plsql function (trigger on zathlete) instead of
>rules. Then you can do something like this -
>
>id_manager:=null
>select into id_manager id from manager where name=NEW.manager_name;
>if not found then
>    insert into manager(name) values (NEW.manager_name);
>    select into id_manager CURRVAL('manager_id_seq');
>end if;
>INSERT INTO athlete (... manager_fk...) VALUES ( ... id_manager...);
>
>good luck!
>
>
>On Thu, May 5, 2011 at 6:21 PM, Tarlika Elisabeth Schmitz <
>postgresql3@numerixtechnology.de> wrote:
>
>> I was wondering whether the manager.id could maybe be obtained via
>> INSERT ... RETURNING?
>>
>> --
>>
>> Best Regards,
>> Tarlika Elisabeth Schmitz
>>
>>
>>
>> On Thu, 5 May 2011 08:45:32 +0300
>> sergey kapustin <kapustin.sergey@gmail.com> wrote:
>>
>> >Try using (select id from manager where name=NEW.manager_name) to
>> >get the newly inserted manager.
>> >The "name" column  in "manager" table should have unique constraint
>> >- this will be good both for performance and consistency.
>> >
>> >
>> >
>> >CREATE OR REPLACE RULE zathlete_insert_1 AS
>> >   ON INSERT TO zathlete
>> >   DO ALSO
>> >   (
>> >       INSERT INTO athlete
>> >           (id, name, _received) VALUES
>> >           (NEW.dad_id, NEW.dad_name, NEW._received);
>> >       INSERT INTO sponsor
>> >           (id, name, _received) VALUES
>> >           (NEW.sponsor_id, NEW.sponsor_name, NEW._received);
>> >       INSERT INTO manager
>> >           (name, _received) VALUES
>> >           (NEW.manager_name, NEW._received);
>> >       INSERT INTO athlete
>> >           (id, name, dad_fk, sponsor_fk, manager_fk, _received)
>> > VALUES (NEW.id, NEW.name, NEW.dad_id,
>> >            NEW.sponsor_id, (select id from manager where
>> >name=NEW.manager_name), NEW._received);
>> >)
>> >;
>> >
>> >On Thu, May 5, 2011 at 1:48 AM, Tarlika Elisabeth Schmitz <
>> >postgresql3@numerixtechnology.de> wrote:
>> >
>> >> [...]
>> >>
>> >>
>> >> I created interim tables matching the structure of the CSV formats
>> >> (about 6 of them). I want to import via COPY and distribute the
>> >> data to the "proper" tables via rules + triggers.
>> >>
>> >> I just hit a wall with one of the rules, (see example below): how
>> >> do I populate athlete.manager_fk, which is the result of the
>> >> previous INSERT?
>> >>
>> >>
>> >>
>> >>
>> >>
>> >> -- interim table
>> >> CREATE TABLE zathlete
>> >> (
>> >>  id integer NOT NULL,
>> >>  "name" character varying(50) NOT NULL,
>> >>  dad_id integer,
>> >>  dad_name character varying(50),
>> >>  sponsor_id integer,
>> >>  sponsor_name character varying(50),
>> >>  manager_name character varying(50),
>> >>  _received timestamp without time zone NOT NULL
>> >> )
>> >>
>> >> -- proper tables
>> >> CREATE TABLE sponsor
>> >> (
>> >>  id integer NOT NULL,
>> >>  "name" character varying(50) NOT NULL,
>> >>  _received timestamp without time zone NOT NULL,
>> >>  CONSTRAINT sponsor_pkey PRIMARY KEY (id)
>> >> )
>> >>
>> >> CREATE TABLE manager
>> >> (
>> >>  id serial NOT NULL,
>> >>  "name" character varying(50) NOT NULL,
>> >>  _received timestamp without time zone NOT NULL,
>> >>  CONSTRAINT manager_pkey PRIMARY KEY (id)
>> >> )
>> >>
>> >> CREATE TABLE athlete
>> >> (
>> >>  id integer NOT NULL,
>> >>  "name" character varying(50) NOT NULL,
>> >>  dad_fk integer,
>> >>  sponsor_fk integer,
>> >>  manager_fk integer,
>> >>  _received timestamp without time zone NOT NULL,
>> >>  CONSTRAINT athlete_pkey PRIMARY KEY (id),
>> >>  CONSTRAINT manager_athlete_fk FOREIGN KEY (manager_fk)
>> >>      REFERENCES manager (id) MATCH SIMPLE
>> >>      ON UPDATE CASCADE ON DELETE RESTRICT,
>> >>  CONSTRAINT sponsor_athlete_fk FOREIGN KEY (sponsor_fk)
>> >>      REFERENCES sponsor (id) MATCH SIMPLE
>> >>      ON UPDATE CASCADE ON DELETE RESTRICT,
>> >>  CONSTRAINT dad_athlete_fk FOREIGN KEY (dad_fk)
>> >>      REFERENCES athlete (id) MATCH SIMPLE
>> >>      ON UPDATE CASCADE ON DELETE RESTRICT
>> >> )
>> >>
>> >>
>> >> -- rules
>> >>
>> >> CREATE OR REPLACE RULE zathlete_insert_1 AS
>> >>    ON INSERT TO zathlete
>> >>    DO ALSO -- INSTEAD once all is working
>> >>    (
>> >>        INSERT INTO athlete
>> >>            (id, name, _received) VALUES
>> >>            (NEW.dad_id, NEW.dad_name, NEW._received);
>> >>        INSERT INTO sponsor
>> >>            (id, name, _received) VALUES
>> >>            (NEW.sponsor_id, NEW.sponsor_name, NEW._received);
>> >>        INSERT INTO manager
>> >>            (name, _received) VALUES
>> >>            (NEW.manager_name, NEW._received);
>> >>        INSERT INTO athlete
>> >>            (id, name, dad_fk, sponsor_fk, manager_fk, _received)
>> >> VALUES (NEW.id, NEW.name, NEW.dad_id,
>> >>             NEW.sponsor_id, ?????, NEW._received);
>> >> )
>> >>
>> >>
>> >>
>> >>
>> >>
>> >> ====
>> >> System: PostgreSQL 8.3
>> >> no of users: 1
>> >>
>> >> --
>> >>
>> >> Best Regards,
>> >> Tarlika Elisabeth Schmitz
>>
>>
>>
>>
>> --
>> Sent via pgsql-sql mailing list (pgsql-sql@postgresql.org)
>> To make changes to your subscription:
>> http://www.postgresql.org/mailpref/pgsql-sql
>>




-- 

Best Regards,
Tarlika Elisabeth Schmitz


Re: data import via COPY, Rules + Triggers

From
Tarlika Elisabeth Schmitz
Date:
On Wed, 4 May 2011 23:48:04 +0100
Tarlika Elisabeth Schmitz <postgresql3@numerixtechnology.de> wrote:

>I have got a database that needs to be populated, first with historical
>data, then on a daily basis.[...]
>Once imported, data will neither be modified nor deleted.
>
>Data come in denormalized CSV formats. [...]
>I created interim tables matching the structure of the CSV formats
>I want to import via COPY and distribute the data to
>the "proper" tables via triggers.

[...]


I am the only user of the system and at the moment I have only one
database user.

I only want the insdert/update triggers to be executed when I am
importing data. There might be situations where I need to intervene
"manually" and fix some data without the triggers being actioned.

How about creating a second user through whom the imports are done?

I could query current_user in the trigger and simply skip the trigger
actions for any other user. 

QUESTION: 
Or could I do something more sophisticated with privileges?

-- 
Tarlika Elisabeth Schmitz