Thread: Backend problem with large objects
I am writing a Postgres interface for Guile. (I know, I am the other one of the two people doing this!) I sent a message at the weekend, but I used mime encoding of the files which I understand some people have difficulty decoding, so here is my report again, in the clear this time. I am having problems with my large object interface. In particular I get the error ERROR: heap_fetch: xinv19073 relation: ReadBuffer(81aeefe) failed from the backend. I have written a small C program to reproduce the problem, and it follows below, along with the output of PQtrace. In summary, the problem is: /* Pseudo-C */ conn = PQconnectdb() PQexec (conn, BEGIN TRANSACTION) oid = lo_creat (conn, INV_READ | WRITE) fd = lo_open(conn oid, INV_READ | INV_WRITE) for (i = 0; i < 5; i++) lo_write(fd, 'X') lo_lseek(fd, 1, 0) lo_write(fd, 'y') lo_lseek(fd, 3, 0) lo_write(fd, 'y') /**** error happens here ****/ lo_close(fd) PQexec (conn, END TRANSACTION) The real C is: #include <stdio.h> #include "libpq-fe.h" #include "libpq/libpq-fs.h" void exec_cmd(PGconn *conn, char *str); main (int argc, char *argv[]) { PGconn *conn; int lobj_fd; char buf[256]; int ret, i; Oid lobj_id; conn = PQconnectdb("dbname=test"); if (PQstatus(conn) != CONNECTION_OK) { fprintf(stderr, "Can't connect to backend.\n"); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } exec_cmd(conn, "BEGIN TRANSACTION"); PQtrace (conn, stdout); if ((lobj_id = lo_creat(conn, INV_READ | INV_WRITE)) < 0) { fprintf(stderr, "Can'tcreate lobj.\n"); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } if ((lobj_fd = lo_open(conn,lobj_id, INV_READ | INV_WRITE)) < 0) { fprintf(stderr, "Can't open lobj.\n"); fprintf(stderr, "ERROR:%s\n", PQerrorMessage(conn)); exit(1); } fprintf(stderr, "lo_open returned fd = %d.\n", lobj_fd); for (i =0; i < 5; i++) { if ((ret = lo_write(conn, lobj_fd, "X", 1)) != 1) { fprintf(stderr, "Can't write lobj.\n"); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } } if ((ret = lo_lseek(conn,lobj_fd, 1, 0)) != 1) { fprintf(stderr, "error (%d) lseeking in large object.\n", ret); fprintf(stderr,"ERROR: %s\n", PQerrorMessage(conn)); exit(1); } if ((ret = lo_write(conn, lobj_fd, "y", 1)) != 1) { fprintf(stderr, "Can't write lobj.\n"); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } if((ret = lo_lseek(conn, lobj_fd, 3, 0)) != 3) { fprintf(stderr, "error (%d) lseeking in large object.\n", ret); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } if ((ret = lo_write(conn, lobj_fd, "y", 1)) != 1){ fprintf(stderr, "Can't write lobj.\n"); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } ret = lo_close(conn, lobj_fd); printf("lo_close returned %d.\n", ret); if (ret) fprintf(stderr, "Error message: %s\n",PQerrorMessage(conn)); PQuntrace(conn); exec_cmd(conn, "END TRANSACTION"); exit(0); } void exec_cmd(PGconn *conn, char *str) { PGresult *res; if ((res = PQexec(conn, str)) == NULL) { fprintf(stderr, "Error executing %s.\n", str); fprintf(stderr, "Errormessage: %s\n", PQerrorMessage(conn)); exit(1); } if (PQresultStatus(res) != PGRES_COMMAND_OK) { fprintf(stderr,"Error executing %s.\n", str); fprintf(stderr, "Error message: %s\n", PQerrorMessage(conn)); PQclear(res); exit(1); } PQclear(res); } Here is a trace-log of the whole affair: To backend> Q To backend> select proname, oid from pg_proc where proname = 'lo_open' or proname = 'lo_close' or proname = 'lo_creat' or proname = 'lo_unlink' or proname = 'lo_lseek' or proname = 'lo_tell' or proname = 'loread' o r proname = 'lowrite' >From backend> P >From backend> "blank" >From backend> T >From backend (#2)> 2 >From backend> "proname" >From backend (#4)> 19 >From backend (#2)> 32 >From backend (#4)> -1 >From backend> "oid" >From backend (#4)> 26 >From backend (#2)> 4 >From backend (#4)> -1 >From backend> D >From backend (1)> À >From backend (#4)> 11 >From backend (7)> lo_open >From backend (#4)> 7 >From backend (3)> 952 >From backend> D >From backend (1)> À >From backend (#4)> 12 >From backend (8)> lo_close >From backend (#4)> 7 >From backend (3)> 953 >From backend> D >From backend (1)> À >From backend (#4)> 12 >From backend (8)> lo_creat >From backend (#4)> 7 >From backend (3)> 957 >From backend> D >From backend (1)> À >From backend (#4)> 13 >From backend (9)> lo_unlink >From backend (#4)> 7 >From backend (3)> 964 >From backend> D >From backend (1)> À >From backend (#4)> 12 >From backend (8)> lo_lseek >From backend (#4)> 7 >From backend (3)> 956 >From backend> D >From backend (1)> À >From backend (#4)> 11 >From backend (7)> lo_tell >From backend (#4)> 7 >From backend (3)> 958 >From backend> D >From backend (1)> À >From backend (#4)> 10 >From backend (6)> loread >From backend (#4)> 7 >From backend (3)> 954 >From backend> D >From backend (1)> À >From backend (#4)> 11 >From backend (7)> lowrite >From backend (#4)> 7 >From backend (3)> 955 >From backend> C >From backend> "SELECT" >From backend> Z >From backend> Z To backend> F To backend (4#)> 957 To backend (4#)> 1 To backend (4#)> 4 To backend (4#)> 393216 >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 19201 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 952 To backend (4#)> 2 To backend (4#)> 4 To backend (4#)> 19201 To backend (4#)> 4 To backend (4#)> 393216 >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 0 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 956 To backend (4#)> 3 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 4 To backend (4#)> 0 >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 0 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 955 To backend (4#)> 2 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 1 To backend> X >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 1 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 955 To backend (4#)> 2 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 1 To backend> X >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 1 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 955 To backend (4#)> 2 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 1 To backend> X >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 1 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 955 To backend (4#)> 2 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 1 To backend> X >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 1 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 955 To backend (4#)> 2 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 1 To backend> X >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 1 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 956 To backend (4#)> 3 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 4 To backend (4#)> 1 To backend (4#)> 4 To backend (4#)> 0 >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 1 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 955 To backend (4#)> 2 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 1 To backend> y >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 1 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 956 To backend (4#)> 3 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 4 To backend (4#)> 3 To backend (4#)> 4 To backend (4#)> 0 >From backend> V >From backend> G >From backend (#4)> 4 >From backend (#4)> 3 >From backend> 0 >From backend> Z To backend> F To backend (4#)> 955 To backend (4#)> 2 To backend (4#)> 4 To backend (4#)> 0 To backend (4#)> 1 To backend> y >From backend> E >From backend> "ERROR: heap_fetch: xinv19201 relation: ReadBuffer(81aeefe) failed " >From backend> Z
> I am writing a Postgres interface for Guile. (I know, I am the other one > of the two people doing this!) > > I sent a message at the weekend, but I used mime encoding of the files > which I understand some people have difficulty decoding, so here is my > report again, in the clear this time. > > I am having problems with my large object interface. In particular I get > the error > > ERROR: heap_fetch: xinv19073 relation: ReadBuffer(81aeefe) failed > > from the backend. Reproduced here too. Seems very old and known problem of large object (writing into in the middle of a large object does not work). --- Tatsuo Ishii
On Tue, 2 Feb 1999, Tatsuo Ishii wrote: > Reproduced here too. Seems very old and known problem of large object > (writing into in the middle of a large object does not work). Many thanks, does this mean it's not likely to be fixed? If so I'll take this to the documentation list, if there is one. But first, can anyone explain what *is* allowed in lo_write after lo_lseek? Is it OK to overwrite a large object for example? I also note that there is no way to truncate a large object without reading the beginning bit and copying it out to another new large object, which involves it going down the wire to the client and then back again. Are there any plans to implement lo_trunc or something? Perhaps this is difficult for the same reason lo_write is difficult inside a large object. Ian
> On Tue, 2 Feb 1999, Tatsuo Ishii wrote: > > > Reproduced here too. Seems very old and known problem of large object > > (writing into in the middle of a large object does not work). > > Many thanks, does this mean it's not likely to be fixed? If so I'll take > this to the documentation list, if there is one. But first, can anyone > explain what *is* allowed in lo_write after lo_lseek? Is it OK to > overwrite a large object for example? Ok. I think I have found the source of the problem. Please apply included patches and try again. > I also note that there is no way to truncate a large object without > reading the beginning bit and copying it out to another new large object, > which involves it going down the wire to the client and then back again. > Are there any plans to implement lo_trunc or something? Perhaps this is > difficult for the same reason lo_write is difficult inside a large object. Seems not too difficult, but I don't have time to do that. --- Tatsuo Ishii ----------------------------- cut here ---------------------------------- *** postgresql-6.4.2/src/backend/storage/large_object/inv_api.c.orig Sun Dec 13 14:08:19 1998 --- postgresql-6.4.2/src/backend/storage/large_object/inv_api.c Thu Feb 4 22:02:43 1999 *************** *** 545,555 **** tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); else { ! if (obj_desc->offset > obj_desc->highbyte) tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); else tuplen = inv_wrold(obj_desc, buf, nbytes - nwritten, tuple, buffer); ! ReleaseBuffer(buffer); } /* move pointers past the amount we just wrote */ --- 545,561 ---- tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); else { ! if (obj_desc->offset > obj_desc->highbyte) { tuplen = inv_wrnew(obj_desc, buf, nbytes- nwritten); + ReleaseBuffer(buffer); + } else tuplen = inv_wrold(obj_desc, buf, nbytes - nwritten, tuple, buffer); ! /* inv_wrold() has already issued WriteBuffer() ! which has decremented local reference counter ! (LocalRefCount). So we should not call ! ReleaseBuffer() here. -- Tatsuo 99/2/4 ! ReleaseBuffer(buffer); */ } /* move pointers past the amount we just wrote */
Applied manually. The patch did not apply cleanly, and needed a &tuple in inv_wrold, not tuple. > > On Tue, 2 Feb 1999, Tatsuo Ishii wrote: > > > > > Reproduced here too. Seems very old and known problem of large object > > > (writing into in the middle of a large object does not work). > > > > Many thanks, does this mean it's not likely to be fixed? If so I'll take > > this to the documentation list, if there is one. But first, can anyone > > explain what *is* allowed in lo_write after lo_lseek? Is it OK to > > overwrite a large object for example? > > Ok. I think I have found the source of the problem. Please apply > included patches and try again. > > > I also note that there is no way to truncate a large object without > > reading the beginning bit and copying it out to another new large object, > > which involves it going down the wire to the client and then back again. > > Are there any plans to implement lo_trunc or something? Perhaps this is > > difficult for the same reason lo_write is difficult inside a large object. > > Seems not too difficult, but I don't have time to do that. > --- > Tatsuo Ishii > > ----------------------------- cut here ---------------------------------- > *** postgresql-6.4.2/src/backend/storage/large_object/inv_api.c.orig Sun Dec 13 14:08:19 1998 > --- postgresql-6.4.2/src/backend/storage/large_object/inv_api.c Thu Feb 4 22:02:43 1999 > *************** > *** 545,555 **** > tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); > else > { > ! if (obj_desc->offset > obj_desc->highbyte) > tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); > else > tuplen = inv_wrold(obj_desc, buf, nbytes - nwritten, tuple, buffer); > ! ReleaseBuffer(buffer); > } > > /* move pointers past the amount we just wrote */ > --- 545,561 ---- > tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); > else > { > ! if (obj_desc->offset > obj_desc->highbyte) { > tuplen = inv_wrnew(obj_desc, buf, nbytes - nwritten); > + ReleaseBuffer(buffer); > + } > else > tuplen = inv_wrold(obj_desc, buf, nbytes - nwritten, tuple, buffer); > ! /* inv_wrold() has already issued WriteBuffer() > ! which has decremented local reference counter > ! (LocalRefCount). So we should not call > ! ReleaseBuffer() here. -- Tatsuo 99/2/4 > ! ReleaseBuffer(buffer); */ > } > > /* move pointers past the amount we just wrote */ > > -- Bruce Momjian | http://www.op.net/~candle maillist@candle.pha.pa.us | (610) 853-3000+ If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania19026
On Thu, 4 Feb 1999, Bruce Momjian wrote: > Applied manually. The patch did not apply cleanly, and needed a &tuple > in inv_wrold, not tuple. In the 4.6.2 release there are no &tuple arguments to inv_wrold around the patch. Perhaps there is a patch you have applied that I need? Please see below: > > > On Tue, 2 Feb 1999, Tatsuo Ishii wrote: > > > > > Ok. I think I have found the source of the problem. Please apply > > included patches and try again. Many thanks indeed for this. Unfortunately it doesn't completely work: it fixes the problem as reported, but when, instead of writing five characters, one at a time, I write five at once, the backend dies in the same place it did before. Here's the C code slightly modified to reproduce the problem: #include <stdio.h> #include "libpq-fe.h" #include "libpq/libpq-fs.h" void exec_cmd(PGconn *conn, char *str); main (int argc, char *argv[]) { PGconn *conn; int lobj_fd; char buf[256]; int ret, i; Oid lobj_id; conn = PQconnectdb("dbname=test"); if (PQstatus(conn) != CONNECTION_OK) { fprintf(stderr, "Can't connect to backend.\n"); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } exec_cmd(conn, "BEGIN TRANSACTION"); PQtrace (conn, stdout); if ((lobj_id = lo_creat(conn, INV_READ | INV_WRITE)) < 0) { fprintf(stderr, "Can'tcreate lobj.\n"); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } if ((lobj_fd = lo_open(conn,lobj_id, INV_READ | INV_WRITE)) < 0) { fprintf(stderr, "Can't open lobj.\n"); fprintf(stderr, "ERROR:%s\n", PQerrorMessage(conn)); exit(1); } fprintf(stderr, "lo_open returned fd = %d.\n", lobj_fd); /* for (i = 0; i < 5; i++) { */ if ((ret = lo_write(conn, lobj_fd, "XXXXX", 5)) != 5) { fprintf(stderr, "Can't write lobj.\n"); fprintf(stderr,"ERROR: %s\n", PQerrorMessage(conn)); exit(1); } /* } */ if ((ret = lo_lseek(conn, lobj_fd, 1, 0)) != 1) { fprintf(stderr, "error (%d) lseeking in large object.\n", ret); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } if ((ret = lo_write(conn, lobj_fd, "y", 1))!= 1) { fprintf(stderr, "Can't write lobj.\n"); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } if ((ret = lo_lseek(conn, lobj_fd, 3, 0)) != 3) { fprintf(stderr, "error (%d) lseeking in large object.\n",ret); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } if ((ret = lo_write(conn, lobj_fd,"y", 1)) != 1) { fprintf(stderr, "Can't write lobj.\n"); fprintf(stderr, "ERROR: %s\n", PQerrorMessage(conn)); exit(1); } ret = lo_close(conn, lobj_fd); printf("lo_close returned %d.\n", ret); if (ret) fprintf(stderr, "Error message: %s\n", PQerrorMessage(conn)); PQuntrace(conn); exec_cmd(conn, "END TRANSACTION"); exit(0); } void exec_cmd(PGconn *conn, char *str) { PGresult *res; if ((res = PQexec(conn, str)) == NULL) { fprintf(stderr, "Error executing %s.\n", str); fprintf(stderr, "Errormessage: %s\n", PQerrorMessage(conn)); exit(1); } if (PQresultStatus(res) != PGRES_COMMAND_OK) { fprintf(stderr,"Error executing %s.\n", str); fprintf(stderr, "Error message: %s\n", PQerrorMessage(conn)); PQclear(res); exit(1); } PQclear(res); }
>On Thu, 4 Feb 1999, Bruce Momjian wrote: > >> Applied manually. The patch did not apply cleanly, and needed a &tuple >> in inv_wrold, not tuple. > >In the 4.6.2 release there are no &tuple arguments to inv_wrold around the >patch. Perhaps there is a patch you have applied that I need? Please see >below: My patches are for 6.4.2. Bruce is talking about current. Oh, I don't know what version of PostgreSQL you are using. >> > > On Tue, 2 Feb 1999, Tatsuo Ishii wrote: >> > > >> > Ok. I think I have found the source of the problem. Please apply >> > included patches and try again. > >Many thanks indeed for this. Unfortunately it doesn't completely work: it >fixes the problem as reported, but when, instead of writing five >characters, one at a time, I write five at once, the backend dies in >the same place it did before. Here's the C code slightly modified to >reproduce the problem: Give me some time. I'm not sure if I could solve new problem, though. -- Tatsuo Ishii
> >Many thanks indeed for this. Unfortunately it doesn't completely work: it > >fixes the problem as reported, but when, instead of writing five > >characters, one at a time, I write five at once, the backend dies in > >the same place it did before. Here's the C code slightly modified to > >reproduce the problem: > > Give me some time. I'm not sure if I could solve new problem, though. > -- > Tatsuo Ishii I think I have fixed the problem you mentioned. Ian, could you apply included patches and test again? Note that those are for 6.4.2 and additions to the previous patches. BTW, lobj strangely added a 0 filled disk block at the head of the heap. As a result, even 1-byte-user-data lobj consumes at least 16384 bytes (2 disk blocks)! Included patches also fix this problem. To Bruce: Thanks for taking care of my previous patches for current. If included patch is ok, I will make one for current. ---------------------------- cut here --------------------------------- *** postgresql-6.4.2/src/backend/storage/large_object/inv_api.c.orig Sun Dec 13 14:08:19 1998 --- postgresql-6.4.2/src/backend/storage/large_object/inv_api.c Fri Feb 12 20:21:05 1999 *************** *** 624,648 **** || obj_desc->offset < obj_desc->lowbyte || !ItemPointerIsValid(&(obj_desc->htid))) { /* initialize scan key if not done */ if (obj_desc->iscan == (IndexScanDesc) NULL) { - ScanKeyData skey; - /* * As scan index may be prematurely closed (on commit), we * must use object currentoffset (was 0) to reinitialize the * entry [ PA ]. */ - ScanKeyEntryInitialize(&skey, 0x0, 1, F_INT4GE, - Int32GetDatum(obj_desc->offset)); obj_desc->iscan = index_beginscan(obj_desc->index_r, (bool) 0, (uint16) 1, &skey); } - do { res = index_getnext(obj_desc->iscan, ForwardScanDirection); --- 630,655 ---- || obj_desc->offset < obj_desc->lowbyte || !ItemPointerIsValid(&(obj_desc->htid))) { + ScanKeyData skey; + + ScanKeyEntryInitialize(&skey, 0x0, 1, F_INT4GE, + Int32GetDatum(obj_desc->offset)); /* initialize scan key if not done */ if (obj_desc->iscan== (IndexScanDesc) NULL) { /* * As scan index may be prematurely closed(on commit), we * must use object current offset (was 0) to reinitialize the * entry [ PA]. */ obj_desc->iscan = index_beginscan(obj_desc->index_r, (bool) 0, (uint16) 1, &skey); + } else { + index_rescan(obj_desc->iscan, false, &skey); } do { res = index_getnext(obj_desc->iscan,ForwardScanDirection); *************** *** 666,672 **** tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, &res->heap_iptr,buffer); pfree(res); ! } while (tuple == (HeapTuple) NULL); /* remember this tid -- we may need it for later reads/writes */ ItemPointerCopy(&tuple->t_ctid, &obj_desc->htid); --- 673,679 ---- tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, &res->heap_iptr,buffer); pfree(res); ! } while (!HeapTupleIsValid(tuple)); /* remember this tid -- we may need it for later reads/writes */ ItemPointerCopy(&tuple->t_ctid, &obj_desc->htid); *************** *** 675,680 **** --- 682,691 ---- { tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, &(obj_desc->htid),buffer); + if (!HeapTupleIsValid(tuple)) { + elog(ERROR, + "inv_fetchtup: heap_fetch failed"); + } } /* *************** *** 746,757 **** nblocks = RelationGetNumberOfBlocks(hr); ! if (nblocks > 0) buffer = ReadBuffer(hr, nblocks - 1); ! else buffer = ReadBuffer(hr, P_NEW); ! ! page = BufferGetPage(buffer); /* * If the last page is too small to hold all the data, and it's too --- 757,771 ---- nblocks = RelationGetNumberOfBlocks(hr); ! if (nblocks > 0) { buffer = ReadBuffer(hr, nblocks - 1); ! page = BufferGetPage(buffer); ! } ! else { buffer = ReadBuffer(hr, P_NEW); ! page = BufferGetPage(buffer); ! PageInit(page, BufferGetPageSize(buffer), 0); ! } /* * If the last page is too small to hold all the data, and it's too *************** *** 865,876 **** nblocks = RelationGetNumberOfBlocks(hr); ! if (nblocks > 0) newbuf = ReadBuffer(hr, nblocks - 1); ! else newbuf = ReadBuffer(hr, P_NEW); - newpage = BufferGetPage(newbuf); freespc = IFREESPC(newpage); /* --- 879,894 ---- nblocks = RelationGetNumberOfBlocks(hr); ! if (nblocks > 0) { newbuf = ReadBuffer(hr, nblocks - 1); ! newpage = BufferGetPage(newbuf); ! } ! else { newbuf = ReadBuffer(hr, P_NEW); + newpage = BufferGetPage(newbuf); + PageInit(newpage, BufferGetPageSize(newbuf), 0); + } freespc = IFREESPC(newpage); /* *************** *** 973,978 **** --- 991,999 ---- WriteBuffer(buffer); if (newbuf != buffer) WriteBuffer(newbuf); + + /* Tuple id is no longer valid */ + ItemPointerSetInvalid(&(obj_desc->htid)); /* done */ return nwritten;
Well, after a delay of a lot of months (sorry, huge personal crises!) I am happy to report that this works now, at least on a cursory test. I'll hit on it a bit harder soon and report back, but hopefully this patch'll be in time for 6.5? Certainly it works better now than before so maybe include the patch anyway, even if there isn't time to do better testing. Cheers, and many many thanks Tatsuo. Ian ---------- Forwarded message ---------- Date: Fri, 12 Feb 1999 23:12:07 +0900 From: Tatsuo Ishii <t-ishii@sra.co.jp> To: t-ishii@sra.co.jp Cc: Ian Grant <I.A.N.Grant@damtp.cam.ac.uk>, Bruce Momjian <maillist@candle.pha.pa.us>, pgsql-hackers@postgreSQL.org Subject: Re: [HACKERS] Backend problem with large objects > >Many thanks indeed for this. Unfortunately it doesn't completely work: it > >fixes the problem as reported, but when, instead of writing five > >characters, one at a time, I write five at once, the backend dies in > >the same place it did before. Here's the C code slightly modified to > >reproduce the problem: > > Give me some time. I'm not sure if I could solve new problem, though. > -- > Tatsuo Ishii I think I have fixed the problem you mentioned. Ian, could you apply included patches and test again? Note that those are for 6.4.2 and additions to the previous patches. BTW, lobj strangely added a 0 filled disk block at the head of the heap. As a result, even 1-byte-user-data lobj consumes at least 16384 bytes (2 disk blocks)! Included patches also fix this problem. To Bruce: Thanks for taking care of my previous patches for current. If included patch is ok, I will make one for current. ---------------------------- cut here --------------------------------- *** postgresql-6.4.2/src/backend/storage/large_object/inv_api.c.orig Sun Dec 13 14:08:19 1998 --- postgresql-6.4.2/src/backend/storage/large_object/inv_api.c Fri Feb 12 20:21:05 1999 *************** *** 624,648 **** || obj_desc->offset < obj_desc->lowbyte || !ItemPointerIsValid(&(obj_desc->htid))) { /* initialize scan key if not done */ if (obj_desc->iscan == (IndexScanDesc) NULL) { - ScanKeyData skey; - /* * As scan index may be prematurely closed (on commit), we * must use object currentoffset (was 0) to reinitialize the * entry [ PA ]. */ - ScanKeyEntryInitialize(&skey, 0x0, 1, F_INT4GE, - Int32GetDatum(obj_desc->offset)); obj_desc->iscan = index_beginscan(obj_desc->index_r, (bool) 0, (uint16) 1, &skey); } - do { res = index_getnext(obj_desc->iscan, ForwardScanDirection); --- 630,655 ---- || obj_desc->offset < obj_desc->lowbyte || !ItemPointerIsValid(&(obj_desc->htid))) { + ScanKeyData skey; + + ScanKeyEntryInitialize(&skey, 0x0, 1, F_INT4GE, + Int32GetDatum(obj_desc->offset)); /* initialize scan key if not done */ if (obj_desc->iscan== (IndexScanDesc) NULL) { /* * As scan index may be prematurely closed(on commit), we * must use object current offset (was 0) to reinitialize the * entry [ PA]. */ obj_desc->iscan = index_beginscan(obj_desc->index_r, (bool) 0, (uint16) 1, &skey); + } else { + index_rescan(obj_desc->iscan, false, &skey); } do { res = index_getnext(obj_desc->iscan,ForwardScanDirection); *************** *** 666,672 **** tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, &res->heap_iptr,buffer); pfree(res); ! } while (tuple == (HeapTuple) NULL); /* remember this tid -- we may need it for later reads/writes */ ItemPointerCopy(&tuple->t_ctid, &obj_desc->htid); --- 673,679 ---- tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, &res->heap_iptr,buffer); pfree(res); ! } while (!HeapTupleIsValid(tuple)); /* remember this tid -- we may need it for later reads/writes */ ItemPointerCopy(&tuple->t_ctid, &obj_desc->htid); *************** *** 675,680 **** --- 682,691 ---- { tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, &(obj_desc->htid),buffer); + if (!HeapTupleIsValid(tuple)) { + elog(ERROR, + "inv_fetchtup: heap_fetch failed"); + } } /* *************** *** 746,757 **** nblocks = RelationGetNumberOfBlocks(hr); ! if (nblocks > 0) buffer = ReadBuffer(hr, nblocks - 1); ! else buffer = ReadBuffer(hr, P_NEW); ! ! page = BufferGetPage(buffer); /* * If the last page is too small to hold all the data, and it's too --- 757,771 ---- nblocks = RelationGetNumberOfBlocks(hr); ! if (nblocks > 0) { buffer = ReadBuffer(hr, nblocks - 1); ! page = BufferGetPage(buffer); ! } ! else { buffer = ReadBuffer(hr, P_NEW); ! page = BufferGetPage(buffer); ! PageInit(page, BufferGetPageSize(buffer), 0); ! } /* * If the last page is too small to hold all the data, and it's too *************** *** 865,876 **** nblocks = RelationGetNumberOfBlocks(hr); ! if (nblocks > 0) newbuf = ReadBuffer(hr, nblocks - 1); ! else newbuf = ReadBuffer(hr, P_NEW); - newpage = BufferGetPage(newbuf); freespc = IFREESPC(newpage); /* --- 879,894 ---- nblocks = RelationGetNumberOfBlocks(hr); ! if (nblocks > 0) { newbuf = ReadBuffer(hr, nblocks - 1); ! newpage = BufferGetPage(newbuf); ! } ! else { newbuf = ReadBuffer(hr, P_NEW); + newpage = BufferGetPage(newbuf); + PageInit(newpage, BufferGetPageSize(newbuf), 0); + } freespc = IFREESPC(newpage); /* *************** *** 973,978 **** --- 991,999 ---- WriteBuffer(buffer); if (newbuf != buffer) WriteBuffer(newbuf); + + /* Tuple id is no longer valid */ + ItemPointerSetInvalid(&(obj_desc->htid)); /* done */ return nwritten;
What do we do with this. Is it already done? Looks quite old. > Well, after a delay of a lot of months (sorry, huge personal crises!) I am > happy to report that this works now, at least on a cursory test. I'll hit > on it a bit harder soon and report back, but hopefully this patch'll be in > time for 6.5? Certainly it works better now than before so maybe include > the patch anyway, even if there isn't time to do better testing. > > Cheers, and many many thanks Tatsuo. > > Ian > > ---------- Forwarded message ---------- > Date: Fri, 12 Feb 1999 23:12:07 +0900 > From: Tatsuo Ishii <t-ishii@sra.co.jp> > To: t-ishii@sra.co.jp > Cc: Ian Grant <I.A.N.Grant@damtp.cam.ac.uk>, > Bruce Momjian <maillist@candle.pha.pa.us>, pgsql-hackers@postgreSQL.org > Subject: Re: [HACKERS] Backend problem with large objects > > > >Many thanks indeed for this. Unfortunately it doesn't completely work: it > > >fixes the problem as reported, but when, instead of writing five > > >characters, one at a time, I write five at once, the backend dies in > > >the same place it did before. Here's the C code slightly modified to > > >reproduce the problem: > > > > Give me some time. I'm not sure if I could solve new problem, though. > > -- > > Tatsuo Ishii > > I think I have fixed the problem you mentioned. Ian, could you apply > included patches and test again? Note that those are for 6.4.2 and > additions to the previous patches. > > BTW, lobj strangely added a 0 filled disk block at the head of the > heap. As a result, even 1-byte-user-data lobj consumes at least 16384 > bytes (2 disk blocks)! Included patches also fix this problem. > > To Bruce: > Thanks for taking care of my previous patches for current. If > included patch is ok, I will make one for current. > > ---------------------------- cut here --------------------------------- > *** postgresql-6.4.2/src/backend/storage/large_object/inv_api.c.orig Sun Dec 13 14:08:19 1998 > --- postgresql-6.4.2/src/backend/storage/large_object/inv_api.c Fri Feb 12 20:21:05 1999 > *************** > *** 624,648 **** > || obj_desc->offset < obj_desc->lowbyte > || !ItemPointerIsValid(&(obj_desc->htid))) > { > > /* initialize scan key if not done */ > if (obj_desc->iscan == (IndexScanDesc) NULL) > { > - ScanKeyData skey; > - > /* > * As scan index may be prematurely closed (on commit), we > * must use object current offset (was 0) to reinitialize the > * entry [ PA ]. > */ > - ScanKeyEntryInitialize(&skey, 0x0, 1, F_INT4GE, > - Int32GetDatum(obj_desc->offset)); > obj_desc->iscan = > index_beginscan(obj_desc->index_r, > (bool) 0, (uint16) 1, > &skey); > } > - > do > { > res = index_getnext(obj_desc->iscan, ForwardScanDirection); > --- 630,655 ---- > || obj_desc->offset < obj_desc->lowbyte > || !ItemPointerIsValid(&(obj_desc->htid))) > { > + ScanKeyData skey; > + > + ScanKeyEntryInitialize(&skey, 0x0, 1, F_INT4GE, > + Int32GetDatum(obj_desc->offset)); > > /* initialize scan key if not done */ > if (obj_desc->iscan == (IndexScanDesc) NULL) > { > /* > * As scan index may be prematurely closed (on commit), we > * must use object current offset (was 0) to reinitialize the > * entry [ PA ]. > */ > obj_desc->iscan = > index_beginscan(obj_desc->index_r, > (bool) 0, (uint16) 1, > &skey); > + } else { > + index_rescan(obj_desc->iscan, false, &skey); > } > do > { > res = index_getnext(obj_desc->iscan, ForwardScanDirection); > *************** > *** 666,672 **** > tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, > &res->heap_iptr, buffer); > pfree(res); > ! } while (tuple == (HeapTuple) NULL); > > /* remember this tid -- we may need it for later reads/writes */ > ItemPointerCopy(&tuple->t_ctid, &obj_desc->htid); > --- 673,679 ---- > tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, > &res->heap_iptr, buffer); > pfree(res); > ! } while (!HeapTupleIsValid(tuple)); > > /* remember this tid -- we may need it for later reads/writes */ > ItemPointerCopy(&tuple->t_ctid, &obj_desc->htid); > *************** > *** 675,680 **** > --- 682,691 ---- > { > tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, > &(obj_desc->htid), buffer); > + if (!HeapTupleIsValid(tuple)) { > + elog(ERROR, > + "inv_fetchtup: heap_fetch failed"); > + } > } > > /* > *************** > *** 746,757 **** > > nblocks = RelationGetNumberOfBlocks(hr); > > ! if (nblocks > 0) > buffer = ReadBuffer(hr, nblocks - 1); > ! else > buffer = ReadBuffer(hr, P_NEW); > ! > ! page = BufferGetPage(buffer); > > /* > * If the last page is too small to hold all the data, and it's too > --- 757,771 ---- > > nblocks = RelationGetNumberOfBlocks(hr); > > ! if (nblocks > 0) { > buffer = ReadBuffer(hr, nblocks - 1); > ! page = BufferGetPage(buffer); > ! } > ! else { > buffer = ReadBuffer(hr, P_NEW); > ! page = BufferGetPage(buffer); > ! PageInit(page, BufferGetPageSize(buffer), 0); > ! } > > /* > * If the last page is too small to hold all the data, and it's too > *************** > *** 865,876 **** > > nblocks = RelationGetNumberOfBlocks(hr); > > ! if (nblocks > 0) > newbuf = ReadBuffer(hr, nblocks - 1); > ! else > newbuf = ReadBuffer(hr, P_NEW); > > - newpage = BufferGetPage(newbuf); > freespc = IFREESPC(newpage); > > /* > --- 879,894 ---- > > nblocks = RelationGetNumberOfBlocks(hr); > > ! if (nblocks > 0) { > newbuf = ReadBuffer(hr, nblocks - 1); > ! newpage = BufferGetPage(newbuf); > ! } > ! else { > newbuf = ReadBuffer(hr, P_NEW); > + newpage = BufferGetPage(newbuf); > + PageInit(newpage, BufferGetPageSize(newbuf), 0); > + } > > freespc = IFREESPC(newpage); > > /* > *************** > *** 973,978 **** > --- 991,999 ---- > WriteBuffer(buffer); > if (newbuf != buffer) > WriteBuffer(newbuf); > + > + /* Tuple id is no longer valid */ > + ItemPointerSetInvalid(&(obj_desc->htid)); > > /* done */ > return nwritten; > > -- Bruce Momjian | http://www.op.net/~candle maillist@candle.pha.pa.us | (610) 853-3000+ If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania19026
>What do we do with this. Is it already done? Looks quite old. I believe these have been already in the current source. --- Tatsuo Ishii >> Well, after a delay of a lot of months (sorry, huge personal crises!) I am >> happy to report that this works now, at least on a cursory test. I'll hit >> on it a bit harder soon and report back, but hopefully this patch'll be in >> time for 6.5? Certainly it works better now than before so maybe include >> the patch anyway, even if there isn't time to do better testing. >> >> Cheers, and many many thanks Tatsuo. >> >> Ian >> >> ---------- Forwarded message ---------- >> Date: Fri, 12 Feb 1999 23:12:07 +0900 >> From: Tatsuo Ishii <t-ishii@sra.co.jp> >> To: t-ishii@sra.co.jp >> Cc: Ian Grant <I.A.N.Grant@damtp.cam.ac.uk>, >> Bruce Momjian <maillist@candle.pha.pa.us>, pgsql-hackers@postgreSQL.org >> Subject: Re: [HACKERS] Backend problem with large objects >> >> > >Many thanks indeed for this. Unfortunately it doesn't completely work: it >> > >fixes the problem as reported, but when, instead of writing five >> > >characters, one at a time, I write five at once, the backend dies in >> > >the same place it did before. Here's the C code slightly modified to >> > >reproduce the problem: >> > >> > Give me some time. I'm not sure if I could solve new problem, though. >> > -- >> > Tatsuo Ishii >> >> I think I have fixed the problem you mentioned. Ian, could you apply >> included patches and test again? Note that those are for 6.4.2 and >> additions to the previous patches. >> >> BTW, lobj strangely added a 0 filled disk block at the head of the >> heap. As a result, even 1-byte-user-data lobj consumes at least 16384 >> bytes (2 disk blocks)! Included patches also fix this problem. >> >> To Bruce: >> Thanks for taking care of my previous patches for current. If >> included patch is ok, I will make one for current. >> >> ---------------------------- cut here --------------------------------- >> *** postgresql-6.4.2/src/backend/storage/large_object/inv_api.c.orig Sun Dec 13 14:08:19 1998 >> --- postgresql-6.4.2/src/backend/storage/large_object/inv_api.c Fri Feb 12 20:21:05 1999 >> *************** >> *** 624,648 **** >> || obj_desc->offset < obj_desc->lowbyte >> || !ItemPointerIsValid(&(obj_desc->htid))) >> { >> >> /* initialize scan key if not done */ >> if (obj_desc->iscan == (IndexScanDesc) NULL) >> { >> - ScanKeyData skey; >> - >> /* >> * As scan index may be prematurely closed (on commit), we >> * must use object current offset (was 0) to reinitialize the >> * entry [ PA ]. >> */ >> - ScanKeyEntryInitialize(&skey, 0x0, 1, F_INT4GE, >> - Int32GetDatum(obj_desc->offset)); >> obj_desc->iscan = >> index_beginscan(obj_desc->index_r, >> (bool) 0, (uint16) 1, >> &skey); >> } >> - >> do >> { >> res = index_getnext(obj_desc->iscan, ForwardScanDirection); >> --- 630,655 ---- >> || obj_desc->offset < obj_desc->lowbyte >> || !ItemPointerIsValid(&(obj_desc->htid))) >> { >> + ScanKeyData skey; >> + >> + ScanKeyEntryInitialize(&skey, 0x0, 1, F_INT4GE, >> + Int32GetDatum(obj_desc->offset)); >> >> /* initialize scan key if not done */ >> if (obj_desc->iscan == (IndexScanDesc) NULL) >> { >> /* >> * As scan index may be prematurely closed (on commit), we >> * must use object current offset (was 0) to reinitialize the >> * entry [ PA ]. >> */ >> obj_desc->iscan = >> index_beginscan(obj_desc->index_r, >> (bool) 0, (uint16) 1, >> &skey); >> + } else { >> + index_rescan(obj_desc->iscan, false, &skey); >> } >> do >> { >> res = index_getnext(obj_desc->iscan, ForwardScanDirection); >> *************** >> *** 666,672 **** >> tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, >> &res->heap_iptr, buffer); >> pfree(res); >> ! } while (tuple == (HeapTuple) NULL); >> >> /* remember this tid -- we may need it for later reads/writes */ >> ItemPointerCopy(&tuple->t_ctid, &obj_desc->htid); >> --- 673,679 ---- >> tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, >> &res->heap_iptr, buffer); >> pfree(res); >> ! } while (!HeapTupleIsValid(tuple)); >> >> /* remember this tid -- we may need it for later reads/writes */ >> ItemPointerCopy(&tuple->t_ctid, &obj_desc->htid); >> *************** >> *** 675,680 **** >> --- 682,691 ---- >> { >> tuple = heap_fetch(obj_desc->heap_r, SnapshotNow, >> &(obj_desc->htid), buffer); >> + if (!HeapTupleIsValid(tuple)) { >> + elog(ERROR, >> + "inv_fetchtup: heap_fetch failed"); >> + } >> } >> >> /* >> *************** >> *** 746,757 **** >> >> nblocks = RelationGetNumberOfBlocks(hr); >> >> ! if (nblocks > 0) >> buffer = ReadBuffer(hr, nblocks - 1); >> ! else >> buffer = ReadBuffer(hr, P_NEW); >> ! >> ! page = BufferGetPage(buffer); >> >> /* >> * If the last page is too small to hold all the data, and it's too >> --- 757,771 ---- >> >> nblocks = RelationGetNumberOfBlocks(hr); >> >> ! if (nblocks > 0) { >> buffer = ReadBuffer(hr, nblocks - 1); >> ! page = BufferGetPage(buffer); >> ! } >> ! else { >> buffer = ReadBuffer(hr, P_NEW); >> ! page = BufferGetPage(buffer); >> ! PageInit(page, BufferGetPageSize(buffer), 0); >> ! } >> >> /* >> * If the last page is too small to hold all the data, and it's too >> *************** >> *** 865,876 **** >> >> nblocks = RelationGetNumberOfBlocks(hr); >> >> ! if (nblocks > 0) >> newbuf = ReadBuffer(hr, nblocks - 1); >> ! else >> newbuf = ReadBuffer(hr, P_NEW); >> >> - newpage = BufferGetPage(newbuf); >> freespc = IFREESPC(newpage); >> >> /* >> --- 879,894 ---- >> >> nblocks = RelationGetNumberOfBlocks(hr); >> >> ! if (nblocks > 0) { >> newbuf = ReadBuffer(hr, nblocks - 1); >> ! newpage = BufferGetPage(newbuf); >> ! } >> ! else { >> newbuf = ReadBuffer(hr, P_NEW); >> + newpage = BufferGetPage(newbuf); >> + PageInit(newpage, BufferGetPageSize(newbuf), 0); >> + } >> >> freespc = IFREESPC(newpage); >> >> /* >> *************** >> *** 973,978 **** >> --- 991,999 ---- >> WriteBuffer(buffer); >> if (newbuf != buffer) >> WriteBuffer(newbuf); >> + >> + /* Tuple id is no longer valid */ >> + ItemPointerSetInvalid(&(obj_desc->htid)); >> >> /* done */ >> return nwritten; >> >> > > >-- > Bruce Momjian | http://www.op.net/~candle > maillist@candle.pha.pa.us | (610) 853-3000 > + If your life is a hard drive, | 830 Blythe Avenue > + Christ can be your backup. | Drexel Hill, Pennsylvania 19026