Thread: File descriptor leakage?
Has anyone seen a problem with postgresql-6.5.1 leaking file descriptors? I am not sure what exact activity causes the problem, but e.g. using pgaccess to inspect the database causes the following: cr@photox% ps ax|grep postgres 425 ?? Ss 1:58.01 /usr/local/pgsql/bin/postmaster -i -S -o -F (postgres 78404 ?? I 0:00.96 /usr/local/pgsql/bin/postgres cr 127.0.0.1 cr idle cr@photox% fstat -p 78404 USER CMD PID FD MOUNT INUM MODE SZ|DV R/W pgsql postgres 78404 root / 2 drwxr-xr-x 512 r pgsql postgres 78404 wd /usr 389050 drwx------ 4096 r pgsql postgres 78404 text /usr 334856 -r-xr-xr-x 1050936 r pgsql postgres 78404 0 / 967 crw-rw-rw- null rw pgsql postgres 78404 1 / 967 crw-rw-rw- null rw pgsql postgres 78404 2 / 967 crw-rw-rw- null rw pgsql postgres 78404 3 /usr 366283 -rw------- 245760 rw pgsql postgres 78404 4 /usr 389846 -rw------- 24576 rw pgsql postgres 78404 5* internet stream tcp ca0ba960 pgsql postgres 78404 6 /usr 389850 -rw------- 139264 rw pgsql postgres 78404 7 /usr 389856 -rw------- 8192 rw pgsql postgres 78404 8 /usr 389841 -rw------- 16384 rw pgsql postgres 78404 9 /usr 389854 -rw------- 8192 rw pgsql postgres 78404 10 /usr 389855 -rw------- 16384 rw pgsql postgres 78404 11 /usr 389830 -rw------- 65536 rw pgsql postgres 78404 12 /usr 389847 -rw------- 147456 rw pgsql postgres 78404 13 /usr 389844 -rw------- 40960 rw pgsql postgres 78404 14 /usr 389845 -rw------- 16384 rw pgsql postgres 78404 15 /usr 366236 -rw------- 8192 rw pgsql postgres 78404 16 /usr 366281 -rw------- 8192 rw pgsql postgres 78404 17 /usr 389823 -rw------- 24576 rw pgsql postgres 78404 18 /usr 389848 -rw------- 385024 rw pgsql postgres 78404 19 /usr 389817 -rw------- 24576 rw pgsql postgres 78404 20 /usr 389815 -rw------- 16384 rw pgsql postgres 78404 21 /usr 389857 -rw------- 8192 rw pgsql postgres 78404 22 /usr 389829 -rw------- 172032 rw pgsql postgres 78404 23 /usr 389828 -rw------- 40960 rw pgsql postgres 78404 24 /usr 389919 -rw------- 0 rw pgsql postgres 78404 25 /usr 366280 -rw------- 8192 rw pgsql postgres 78404 26 /usr 389821 -rw------- 32768 rw pgsql postgres 78404 27 /usr 389827 -rw------- 139264 rw pgsql postgres 78404 28 /usr 389826 -rw------- 57344 rw pgsql postgres 78404 29 /usr 390476 -rw------- 8192 rw pgsql postgres 78404 30 /usr 390488 -rw------- 8192 rw pgsql postgres 78404 31 /usr 390176 -rw------- 8192 rw pgsql postgres 78404 32 /usr 390547 -rw------- 8192 rw pgsql postgres 78404 33 /usr 389913 -rw------- 8192 rw pgsql postgres 78404 34 /usr 389929 -rw------- 8192 rw pgsql postgres 78404 35 /usr 389853 -rw------- 16384 rw pgsql postgres 78404 36 /usr 389852 -rw------- 16384 rw pgsql postgres 78404 37 /usr 389819 -rw------- 8192 rw pgsql postgres 78404 38 /usr 389818 -rw------- 16384 rw pgsql postgres 78404 39 /usr 390217 -rw------- 8192 rw pgsql postgres 78404 40 /usr 390360 -rw------- 8192 rw pgsql postgres 78404 41 /usr 390361 -rw------- 16384 rw pgsql postgres 78404 42 /usr 390379 -rw------- 32768 rw pgsql postgres 78404 43 /usr 390218 -rw------- 16384 rw pgsql postgres 78404 44 /usr 390234 -rw------- 16384 rw pgsql postgres 78404 45 /usr 390255 -rw------- 16384 rw pgsql postgres 78404 46 /usr 390276 -rw------- 16384 rw pgsql postgres 78404 47 /usr 390297 -rw------- 32768 rw pgsql postgres 78404 48 /usr 389678 -rw------- 32768 rw pgsql postgres 78404 49 /usr 389823 -rw------- 24576 rw pgsql postgres 78404 50 /usr 389856 -rw------- 8192 rw pgsql postgres 78404 51 /usr 389841 -rw------- 16384 rw pgsql postgres 78404 52 /usr 389854 -rw------- 8192 rw pgsql postgres 78404 53 /usr 389855 -rw------- 16384 rw pgsql postgres 78404 54 /usr 389830 -rw------- 65536 rw pgsql postgres 78404 55 /usr 389848 -rw------- 385024 rw pgsql postgres 78404 56 /usr 389815 -rw------- 16384 rw pgsql postgres 78404 57 /usr 389857 -rw------- 8192 rw pgsql postgres 78404 58 /usr 366281 -rw------- 8192 rw pgsql postgres 78404 59 /usr 389828 -rw------- 40960 rw pgsql postgres 78404 60 /usr 389929 -rw------- 8192 rw pgsql postgres 78404 61 /usr 389853 -rw------- 16384 rw pgsql postgres 78404 62 /usr 389852 -rw------- 16384 rw pgsql postgres 78404 63 /usr 389819 -rw------- 8192 rw pgsql postgres 78404 64 /usr 389818 -rw------- 16384 rw pgsql postgres 78404 65 /usr 390360 -rw------- 8192 rw pgsql postgres 78404 66 /usr 390361 -rw------- 16384 rw pgsql postgres 78404 67 /usr 390379 -rw------- 32768 rw pgsql postgres 78404 68 /usr 389832 -rw------- 0 rw pgsql postgres 78404 69 /usr 389832 -rw------- 0 rw pgsql postgres 78404 70 /usr 389832 -rw------- 0 rw pgsql postgres 78404 71 /usr 389832 -rw------- 0 rw pgsql postgres 78404 72 /usr 389832 -rw------- 0 rw pgsql postgres 78404 73 /usr 389832 -rw------- 0 rw pgsql postgres 78404 74 /usr 389832 -rw------- 0 rw pgsql postgres 78404 75 /usr 389832 -rw------- 0 rw pgsql postgres 78404 76 /usr 389832 -rw------- 0 rw pgsql postgres 78404 77 /usr 389832 -rw------- 0 rw pgsql postgres 78404 78 /usr 389832 -rw------- 0 rw pgsql postgres 78404 79 /usr 389832 -rw------- 0 rw pgsql postgres 78404 80 /usr 389832 -rw------- 0 rw pgsql postgres 78404 81 /usr 389832 -rw------- 0 rw pgsql postgres 78404 82 /usr 389832 -rw------- 0 rw pgsql postgres 78404 83 /usr 389832 -rw------- 0 rw pgsql postgres 78404 84 /usr 389832 -rw------- 0 rw pgsql postgres 78404 85 /usr 389929 -rw------- 8192 rw pgsql postgres 78404 86 /usr 389856 -rw------- 8192 rw pgsql postgres 78404 87 /usr 389841 -rw------- 16384 rw pgsql postgres 78404 88 /usr 389854 -rw------- 8192 rw pgsql postgres 78404 89 /usr 389855 -rw------- 16384 rw pgsql postgres 78404 90 /usr 389830 -rw------- 65536 rw pgsql postgres 78404 91 /usr 389848 -rw------- 385024 rw pgsql postgres 78404 92 /usr 389815 -rw------- 16384 rw pgsql postgres 78404 93 /usr 366281 -rw------- 8192 rw pgsql postgres 78404 94 /usr 389828 -rw------- 40960 rw pgsql postgres 78404 95 /usr 389853 -rw------- 16384 rw pgsql postgres 78404 96 /usr 389852 -rw------- 16384 rw pgsql postgres 78404 97 /usr 389819 -rw------- 8192 rw pgsql postgres 78404 98 /usr 389818 -rw------- 16384 rw pgsql postgres 78404 99 /usr 390360 -rw------- 8192 rw pgsql postgres 78404 100 /usr 390361 -rw------- 16384 rw pgsql postgres 78404 101 /usr 390379 -rw------- 32768 rw pgsql postgres 78404 102 /usr 389913 -rw------- 8192 rw pgsql postgres 78404 103 /usr 389832 -rw------- 0 rw cr@photox% foreach i (389855 389854 389848 389841 389830 390360) ? echo ${i}: `ls -i /usr/local/pgsql/data/base/cr | grep $i` ? echo ${i}: `fstat -p 78404 |grep $i | wc -l` ? end 389855: 389855 pg_amop 389855: 3 389854: 389854 pg_amproc 389854: 3 389848: 389848 pg_attribute_relid_attnam_index 389848: 3 389841: 389841 pg_index 389841: 3 389830: 389830 pg_operator 389830: 3 390360: 390360 users 390360: 3 What I see is that the backend opens many of the files associated with tables (both user and system) many times. It is not so bad in this example with pgaccess, but for long running processes I have backends with each table file open dozens of times, consuming hundreds of file descriptors per hour of run-time. This is on FreeBSD 3.2-STABLE, using the 6.5.1 release of postgresql from the ports collection. Anyway, is it a known problem? Aside from being careful to not use a particular instance of the backend too long, are there any fixes? Thanks, Cyrus Rahman
Cyrus Rahman <cr@photox.jcmax.com> writes: > Has anyone seen a problem with postgresql-6.5.1 leaking file > descriptors? That's interesting, I thought I'd fixed all the file-descriptor-leakage problems. Guess not :-( In addition to the files you list, there seem to be a whole bunch of descriptors for 389832; can you find out what that is? (Check in the top-level data directory as well as data/base/xxx.) Can you generate a repeatable script that causes a particular file to be opened more than once? This is going to be tough to track down without a test case... regards, tom lane
> -----Original Message----- > From: owner-pgsql-hackers@postgreSQL.org > [mailto:owner-pgsql-hackers@postgreSQL.org]On Behalf Of Tom Lane > Sent: Tuesday, August 31, 1999 10:50 AM > To: Cyrus Rahman > Cc: pgsql-hackers@postgreSQL.org > Subject: Re: [HACKERS] File descriptor leakage? > > > Cyrus Rahman <cr@photox.jcmax.com> writes: > > Has anyone seen a problem with postgresql-6.5.1 leaking file > > descriptors? > > That's interesting, I thought I'd fixed all the file-descriptor-leakage > problems. Guess not :-( > The following may be one of the cause. > -----Original Message----- > From: owner-pgsql-hackers@postgreSQL.org > [mailto:owner-pgsql-hackers@postgreSQL.org]On Behalf Of Vadim Mikheev > Sent: Monday, June 07, 1999 7:49 PM > To: Hiroshi Inoue > Cc: The Hermit Hacker; pgsql-hackers@postgreSQL.org > Subject: Re: [HACKERS] postgresql-v6.5beta2.tar.gz ... > [snip] > > 1. bug in cache invalidation code: when we invalidate relcache > we forget to free MdfdVec in md.c! > > Vacuum invalidates a relation tuple in pg_class and concurrent > xactions invalidate corresponding relcache entry, but don't > free MdfdVec and so allocate new one for the same relation > more and more. Each MdfdVed requires own fd.c:Vfd entry -> below > > 2. fd.c:pg_nofile()->sysconf(_SC_OPEN_MAX) returns in FreeBSD > near total number of files that can be opened in system > (by _all_ users/procs). With total number of opened files > ~ 2000 I can run your test with 10-20 simultaneous > xactions for very short time, -:) > > Should we limit fd.c:no_files to ~ 256? > This is port-specific, of course... I posted a patch about a month ago([HACKERS] double opens). But yutaka tanida [yutaka@marin.or.jp] reported a bug caused by the patch. I found it's because of calling smgrclose() after smgrclose()/smgrunlink() for the same relation. It seems my old patch has not been appiled yet. Here is a new patch. Regards. Hiroshi Inoue Inoue@tpf.co.jp *** utils/cache/relcache.c.orig Mon Jul 26 12:45:15 1999 --- utils/cache/relcache.c Mon Aug 30 15:37:10 1999 *************** *** 1259,1264 **** --- 1259,1265 ---- oldcxt = MemoryContextSwitchTo((MemoryContext) CacheCxt); + smgrclose(DEFAULT_SMGR, relation); RelationCacheDelete(relation); FreeTupleDesc(relation->rd_att); *** storage/smgr/md.c.orig Mon Jul 26 12:45:09 1999 --- storage/smgr/md.c Tue Aug 31 13:44:28 1999 *************** *** 190,195 **** --- 190,197 ---- /* finally, clean out the mdfd vector */ fd = RelationGetFile(reln); + if (fd < 0) + return SM_SUCCESS; Md_fdvec[fd].mdfd_flags = (uint16) 0; oldcxt = MemoryContextSwitchTo(MdCxt); *************** *** 211,216 **** --- 213,219 ---- MemoryContextSwitchTo(oldcxt); _fdvec_free(fd); + reln->rd_fd = -1; return SM_SUCCESS; } *************** *** 319,324 **** --- 322,329 ---- MemoryContext oldcxt; fd = RelationGetFile(reln); + if (fd < 0) + return SM_SUCCESS; oldcxt = MemoryContextSwitchTo(MdCxt); #ifndef LET_OS_MANAGE_FILESIZE *************** *** 370,375 **** --- 375,381 ---- MemoryContextSwitchTo(oldcxt); _fdvec_free(fd); + reln->rd_fd = -1; return SM_SUCCESS; } *************** *** 895,900 **** --- 901,907 ---- { Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE); + Assert(Md_fdvec[fdvec].mdfd_flags != MDFD_FREE); Md_fdvec[fdvec].mdfd_nextFree = Md_Free; Md_fdvec[fdvec].mdfd_flags= MDFD_FREE; Md_Free = fdvec;
"Hiroshi Inoue" <Inoue@tpf.co.jp> writes: > I posted a patch about a month ago([HACKERS] double opens). > But yutaka tanida [yutaka@marin.or.jp] reported a bug caused > by the patch. I found it's because of calling smgrclose() after > smgrclose()/smgrunlink() for the same relation. > It seems my old patch has not been appiled yet. > Here is a new patch. I think we ought to hold up 6.5.2 long enough to cram this patch in, but I'm hesitant to stick it in the stable branch without some more testing. Cyrus, can you try it and see if it fixes your problem? regards, tom lane
On Tue, 31 Aug 1999, Tom Lane wrote: > I think we ought to hold up 6.5.2 long enough to cram this patch in, but Let me know when you are ready then...the only one that I want to keep to a relatively fixed date on (or as close to one as possible) are the minor releases (6.5, 6.6, etc)...the minor-minor releases I have no problems with shifting around as is required... Marc G. Fournier ICQ#7615664 IRC Nick: Scrappy Systems Administrator @ hub.org primary: scrappy@hub.org secondary: scrappy@{freebsd|postgresql}.org