Thread: Postgresql 8.3 beta crash

Postgresql 8.3 beta crash

From
Sheikh Amjad
Date:
Following test case is crashing the postgresql-8.3-beta 

create schema st;
CREATE TABLE ST.STUDENT(   STUDENT_ID     VARCHAR2(10),   NAME         VARCHAR(50) NOT NULL,   NIC         CHAR(11),
DOB         DATE NOT NULL,   GENDER         CHAR(1) NOT NULL,   MAR_STAT      CHAR(1) NOT NULL,   NATION
VARCHAR2(15),  FNAME         VARCHAR2(50) NOT NULL,   GNAME         VARCHAR2(50),   ADDRESS            VARCHAR2(100)
NOTNULL,   POS_CODE     VARCHAR2(8),   PER_TEL     VARCHAR2(15),   EMAIL_ADD     VARCHAR(20),   LAST_DEGREE
VARCHAR(25),  ENT_DATE     DATE NOT NULL);
 
ALTER TABLE ST.STUDENT    ADD CONSTRAINT PK_ST PRIMARY KEY (STUDENT_ID);




Insert into st.student values(0,'Hassan
Mustafa','1887749999','10-Jan-1981','M','S','Pakistani','Mustafa
Kamal',NULL,'H#2,ST#20,
Islamabad',44000,'0300-4329984','hmustafa@hotmail.com','BS','10-JAN-2005');
Insert into st.student values(1,'Faiza
Shah','1887749999','05-Jan-1980','F','S','Pakistani','Muneer
Shah','Sadia Shah','H#6,ST#1 G-10,
Islamabad',44000,'0333-233329984','faiza@hotmail.com','FSC','12-FEB-2005');
Insert into st.student values(2,'Mustafa
Kamal','23388777','23-Aug-1978','M','S','Pakistani','Ali
baig',NULL,'H#12,ST#2 F-6,
Islamabad',44000,'0321-66788800','mkamal@hotmail.com','BS','10-JAN-2005');
Insert into st.student values(3,'Kashif
Parveez','334459999','27-Dec-1975','M','S','Pakistani','Parveez
Haq',NULL,'H#2,ST#20,
Islamabad',44000,'0345-54329984','kp@hotmail.com','BCom','10-JAN-2005');
Insert into st.student values(4,'Ali
Khan','567749999','11-Jan-1981','M','S','Pakistani','S
Kamal',NULL,'H#2,ST#20,
Islamabad',44000,'0399-4329984','skmsss@hotmail.com','BS','10-JAN-2005');
Insert into st.student values(5,'Saadia
Naz','1887749999','10-Jan-1981','S','S','Pakistani','Malik
Awan',NULL,'H#2,ST#20,
Islamabad',44000,'0300-4329984','hmustafa@hotmail.com','BS','10-JAN-2005');
Insert into st.student values(6,'Shah
Rafeeq','1887749999','10-Jan-1982','M','S','Pakistani','Rafeed
Ahmed',NULL,'H#2,ST#20,
Islamabad',44000,'0300-4329984','hmustafa@hotmail.com','BS','10-JAN-2005');
Insert into st.student values(7,'Mohammad
Bilal','1887749999','10-Jun-1981','M','S','Pakistani','Abdul
Hameed',NULL,'H#2,ST#20,
Islamabad',44000,'0300-4329984','hmustafa@hotmail.com','BS','10-JAN-2005');
Insert into st.student values(8,'Bilal
Mustafa','1887749999','10-Feb-1980','M','S','Pakistani','Mustafa
Kamal',NULL,'H#2,ST#20,
Islamabad',44000,'0300-4329984','hmustafa@hotmail.com','BS','10-JAN-2005');
Insert into st.student values(8,'Shahkir Hussain
Naqvi','1887749999','10-Mar-1978','M','S','Pakistani','Mustafa
Kamal',NULL,'H#2,ST#20,
Islamabad',44000,'0300-4329984','hmustafa@hotmail.com','BS','10-JAN-2005');
Insert into st.student values(9,'Amir
Husain','5677499993','20-Jan-1981','M','S','Pakistani','Mustafa
Kamal',NULL,'H#2,ST#20,
Islamabad',44000,'0300-4329984','hmustafa@hotmail.com','BS','10-JAN-2005');





SELECT XMLELEMENT       ( NAME "Program",           XMLAGG            ( XMLELEMENT                ( NAME "Student",
s.name::xml)            )       ) AS "Registered Student"
 
from st.student s ;






Re: Postgresql 8.3 beta crash

From
Heikki Linnakangas
Date:
Sheikh Amjad wrote:
> Following test case is crashing the postgresql-8.3-beta
> create schema st;
>
> CREATE TABLE ST.STUDENT(
>    STUDENT_ID     VARCHAR2(10),
>    NAME         VARCHAR(50) NOT NULL,
>    NIC         CHAR(11),
>    DOB          DATE NOT NULL,
>    GENDER         CHAR(1) NOT NULL,
>    MAR_STAT      CHAR(1) NOT NULL,
>    NATION         VARCHAR2(15),
>    FNAME         VARCHAR2(50) NOT NULL,
>    GNAME         VARCHAR2(50),
>    ADDRESS            VARCHAR2(100) NOT NULL,
>    POS_CODE     VARCHAR2(8),
>    PER_TEL     VARCHAR2(15),

VARCHAR2? That smells like Oracle...

I was able to reproduce this after replacing those VARCHAR2's with
VARCHAR. I added some debugging elog's (attached), and it looks like
libxml2 is trying xml_pfree a pointer we never gave it in any of the
alloc functions. Log attached, last xml_pfree crashes and it's the first
time 853c180 is mentioned.

I guess the next step is to narrow it down to a self-contained test case
and send a bug report to libxml people.

--
   Heikki Linnakangas
   EnterpriseDB   http://www.enterprisedb.com
Index: src/backend/utils/adt/xml.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/utils/adt/xml.c,v
retrieving revision 1.49
diff -c -r1.49 xml.c
*** src/backend/utils/adt/xml.c    13 Oct 2007 20:46:47 -0000    1.49
--- src/backend/utils/adt/xml.c    31 Oct 2007 16:22:16 -0000
***************
*** 1209,1228 ****
  static void *
  xml_palloc(size_t size)
  {
!     return palloc(size);
  }


  static void *
  xml_repalloc(void *ptr, size_t size)
  {
!     return repalloc(ptr, size);
  }


  static void
  xml_pfree(void *ptr)
  {
      pfree(ptr);
  }

--- 1209,1238 ----
  static void *
  xml_palloc(size_t size)
  {
!     void *ptr;
!
!     ptr = palloc(size);
!     elog(LOG, "xml_palloc(%d) = %x", size, ptr);
!     return ptr;
  }


  static void *
  xml_repalloc(void *ptr, size_t size)
  {
!     void *ptr_new;
!
!     ptr_new = repalloc(ptr, size);
!     elog(LOG, "xml_repalloc(%x, %d) = %x", ptr, size, ptr_new);
!
!     return ptr_new;
  }


  static void
  xml_pfree(void *ptr)
  {
+     elog(LOG, "xml_pfree(%x)", ptr);
      pfree(ptr);
  }

***************
*** 1230,1236 ****
  static char *
  xml_pstrdup(const char *string)
  {
!     return pstrdup(string);
  }


--- 1240,1253 ----
  static char *
  xml_pstrdup(const char *string)
  {
!     char *str;
!
!
!     str = pstrdup(string);
!
!     elog(LOG, "xml_pstrdup(%x) = %x", string, str);
!
!     return str;
  }


LOG:  database system was shut down at 2007-10-31 16:23:45 GMT
LOG:  autovacuum launcher started
LOG:  database system is ready to accept connections
LOG:  xml_palloc(200) = 8558714
LOG:  xml_pstrdup(bff98e78) = 8558820
LOG:  xml_palloc(20) = 8558834
LOG:  xml_pstrdup(bff98e78) = 8558860
LOG:  xml_palloc(20) = 855887c
LOG:  xml_pstrdup(bff98e78) = 85588a8
LOG:  xml_palloc(20) = 85588c4
LOG:  xml_pstrdup(bff98e78) = 85588f0
LOG:  xml_palloc(20) = 8558904
LOG:  xml_pstrdup(bff98e78) = 8558930
LOG:  xml_palloc(20) = 855894c
LOG:  xml_pstrdup(bff98e78) = 8558978
LOG:  xml_palloc(20) = 855898c
LOG:  xml_pstrdup(bff98e78) = 85589b8
LOG:  xml_palloc(20) = 85589d4
LOG:  xml_pstrdup(bff98e78) = 8558a00
LOG:  xml_palloc(20) = 8558a14
LOG:  xml_palloc(440) = 8558a40
LOG:  xml_palloc(28) = 8558c4c
LOG:  xml_palloc(2048) = 8558c78
LOG:  xml_palloc(128) = 8559484
LOG:  xml_palloc(20) = 8559510
LOG:  xml_palloc(40) = 855953c
LOG:  xml_palloc(40) = 8559588
LOG:  xml_palloc(40) = 85595d4
LOG:  xml_palloc(88) = 8559620
LOG:  xml_palloc(4) = 85596ac
LOG:  xml_palloc(440) = 85596c0
LOG:  xml_palloc(28) = 85598cc
LOG:  xml_palloc(2048) = 85598f8
LOG:  xml_palloc(128) = 855a104
LOG:  xml_palloc(20) = 855a190
LOG:  xml_palloc(40) = 855a1bc
LOG:  xml_palloc(40) = 855a208
LOG:  xml_palloc(40) = 855a254
LOG:  xml_palloc(36) = 855a2a0
LOG:  xml_palloc(16) = 855a2ec
LOG:  xml_palloc(8194) = 855a6d4
LOG:  xml_palloc(60) = 855a308
LOG:  xml_palloc(88) = 855a354
LOG:  xml_palloc(4) = 855a3e0
LOG:  xml_palloc(60) = 855a3f4
LOG:  xml_palloc(11) = 855a440
LOG:  xml_palloc(24) = 855a45c
LOG:  xml_palloc(37) = 855a488
LOG:  xml_palloc(4) = 855a4d4
LOG:  xml_palloc(1024) = 855c6fc
LOG:  xml_palloc(16) = 855cb08
LOG:  xml_palloc(60) = 855cb24
LOG:  xml_palloc(15) = 855cb70
LOG:  xml_pfree(855a6d4)
LOG:  xml_pfree(855a2ec)
LOG:  xml_pfree(855a2a0)
LOG:  xml_pfree(855a308)
LOG:  xml_pfree(855a254)
LOG:  xml_pfree(855a208)
LOG:  xml_pfree(855a1bc)
LOG:  xml_pfree(855a190)
LOG:  xml_pfree(855a104)
LOG:  xml_pfree(855cb08)
LOG:  xml_pfree(85598f8)
LOG:  xml_pfree(855c6fc)
LOG:  xml_pfree(85598cc)
LOG:  xml_pfree(85596c0)
LOG:  xml_pfree(855cb70)
LOG:  xml_pfree(855cb24)
LOG:  xml_pfree(855a440)
LOG:  xml_pfree(855a3f4)
LOG:  xml_pfree(855a3e0)
LOG:  xml_pfree(855a354)
LOG:  xml_palloc(6) = 855a3e0
LOG:  xml_pfree(85595d4)
LOG:  xml_pfree(8559588)
LOG:  xml_pfree(855953c)
LOG:  xml_pfree(8559510)
LOG:  xml_pfree(8559484)
LOG:  xml_pfree(8558c78)
LOG:  xml_pfree(8558c4c)
LOG:  xml_pfree(8558a40)
LOG:  xml_pfree(8558a00)
LOG:  xml_pfree(8558a14)
LOG:  xml_pfree(85589b8)
LOG:  xml_pfree(85589d4)
LOG:  xml_pfree(8558978)
LOG:  xml_pfree(855898c)
LOG:  xml_pfree(8558930)
LOG:  xml_pfree(855894c)
LOG:  xml_pfree(85588f0)
LOG:  xml_pfree(8558904)
LOG:  xml_pfree(85588a8)
LOG:  xml_pfree(85588c4)
LOG:  xml_pfree(8558860)
LOG:  xml_pfree(855887c)
LOG:  xml_pfree(8558820)
LOG:  xml_pfree(8558834)
LOG:  xml_pfree(8558714)
LOG:  xml_pfree(855a488)
LOG:  xml_pfree(855a4d4)
LOG:  xml_pfree(855a45c)
LOG:  xml_pfree(855a3e0)
LOG:  xml_pfree(8559620)
LOG:  xml_pfree(853c180)
LOG:  server process (PID 13473) was terminated by signal 11: Segmentation fault
LOG:  terminating any other active server processes
LOG:  all server processes terminated; reinitializing
LOG:  database system was interrupted; last known up at 2007-10-31 16:23:55 GMT
LOG:  database system was not properly shut down; automatic recovery in progress
LOG:  record with zero length at 0/70DE00
LOG:  redo is not required
LOG:  autovacuum launcher started
LOG:  database system is ready to accept connections

Re: Postgresql 8.3 beta crash

From
Heikki Linnakangas
Date:
I wrote:
> I was able to reproduce this after replacing those VARCHAR2's with 
> VARCHAR. I added some debugging elog's (attached), and it looks like
> libxml2 is trying xml_pfree a pointer we never gave it in any of the 
> alloc functions. Log attached, last xml_pfree crashes and it's the first
> time 853c180 is mentioned.

Looking closer, I think it's a memory management bug on our end. I 
hadn't looked at the way we use palloc with xml before.

So my current theory is:

In xmlelement(), we use ExecEvalExpr(), which in turn calls xml_parse. 
xml_parse calls xmlCleanupParser(). But when we call ExecEvalExpr(), 
we're in the middle of constructing an xml buffer, so calling 
xmlCleanupBuffer() probably frees something we still need.

Does that sound plausible to you libxml experts out there? If so, how 
about we move the calls to ExecEvalExpr() before we start building the 
xml buffer?

--   Heikki Linnakangas  EnterpriseDB   http://www.enterprisedb.com


Re: Postgresql 8.3 beta crash

From
Tom Lane
Date:
Heikki Linnakangas <heikki@enterprisedb.com> writes:
> So my current theory is:

> In xmlelement(), we use ExecEvalExpr(), which in turn calls xml_parse. 
> xml_parse calls xmlCleanupParser(). But when we call ExecEvalExpr(), 
> we're in the middle of constructing an xml buffer, so calling 
> xmlCleanupBuffer() probably frees something we still need.

No, your first theory is closer to the mark.  What is happening is that
xmlelement neglects to call xml_init, therefore the various stuff
allocated by libxml is allocated using malloc().  Then xml_parse is
called, and it *does* do xml_init(), which calls xmlMemSetup.  Then
when we return to xmlelement and start freeing stuff, libxml tries
to use xml_pfree to free something it got from malloc().

I think that (1) we need a call to xml_init here, and hence also a
PG_TRY block; (2) there is a lot of stuff in xml_init that should be
one-time-only, why does it not have an "already done" flag?
        regards, tom lane


Re: Postgresql 8.3 beta crash

From
Heikki Linnakangas
Date:
Tom Lane wrote:
> Heikki Linnakangas <heikki@enterprisedb.com> writes:
>> So my current theory is:
> 
>> In xmlelement(), we use ExecEvalExpr(), which in turn calls xml_parse. 
>> xml_parse calls xmlCleanupParser(). But when we call ExecEvalExpr(), 
>> we're in the middle of constructing an xml buffer, so calling 
>> xmlCleanupBuffer() probably frees something we still need.
> 
> No, your first theory is closer to the mark.  What is happening is that
> xmlelement neglects to call xml_init, therefore the various stuff
> allocated by libxml is allocated using malloc().  Then xml_parse is
> called, and it *does* do xml_init(), which calls xmlMemSetup.  Then
> when we return to xmlelement and start freeing stuff, libxml tries
> to use xml_pfree to free something it got from malloc().

Oh, yes, you're right.

It still feels unsafe to call ExecEvalExpr while holding on to xml 
structs. It means that it's not safe for external modules to use libxml2 
and call xmlMemSetup or xmlSetGenericErrorFunc themselves.

> I think that (1) we need a call to xml_init here, and hence also a
> PG_TRY block; 

xml_init doesn't actually do anything that would need to be free'd in 
case of error. But yeah, it does seem like a good idea to free the "text 
writer" and "xml buffer" allocated at the beginning of xmlelement(). 
They should be allocated by xml_palloc in the current memory context, 
though, and freed by the memory context reset as usual, but apparently 
we don't trust that for xml document or dtd objects either.

> (2) there is a lot of stuff in xml_init that should be
> one-time-only, why does it not have an "already done" flag?

Hmm. There's the check "sizeof(char) == sizeof(xmlChar)", which in fact 
should be evaluated at compile time (should that actually be an 
#error?). Then there's the call to xmlSetGenericErrorFunc and 
xmlMemSetup which only need to be called once. But it doesn't hurt to 
call them again, and it does protect us in case a dynamically loaded 
module sets them differently. And then there's the call to xmlInitParser 
which does need to be called every time.

--   Heikki Linnakangas  EnterpriseDB   http://www.enterprisedb.com


Re: Postgresql 8.3 beta crash

From
Gregory Stark
Date:
"Heikki Linnakangas" <heikki@enterprisedb.com> writes:

> Hmm. There's the check "sizeof(char) == sizeof(xmlChar)", which in fact should
> be evaluated at compile time (should that actually be an #error?). 

sizeof() isn't expanded by cpp (and cannot be due to cross-compilation) so it
can't be a #error. It could be an autoconf test though.

--  Gregory Stark EnterpriseDB          http://www.enterprisedb.com Ask me about EnterpriseDB's PostGIS support!


Re: Postgresql 8.3 beta crash

From
Tom Lane
Date:
Heikki Linnakangas <heikki@enterprisedb.com> writes:
> Tom Lane wrote:
>> I think that (1) we need a call to xml_init here, and hence also a
>> PG_TRY block; 

> xml_init doesn't actually do anything that would need to be free'd in 
> case of error. But yeah, it does seem like a good idea to free the "text 
> writer" and "xml buffer" allocated at the beginning of xmlelement(). 
> They should be allocated by xml_palloc in the current memory context, 
> though, and freed by the memory context reset as usual, but apparently 
> we don't trust that for xml document or dtd objects either.

Well, xml_init calls xmlInitParser() which needs to be cleaned up.
But since xmlelement doesn't need that, maybe we should factor it
out of xml_init.

As for the try/catch blocks instead of relying on memory context
cleanup, I'm not entirely sure if that's still needed or if it's a
hangover from before we understood how to use xmlMemSetup.  The note
at line 27ff of xml.c implies that libxml keeps static pointers to
allocated things that it thinks will survive indefinitely, so we
may have to have these.  I'm suspicious whether xmlelement doesn't
have a problem if the called expressions error out ...

Peter, any comment on this stuff?
        regards, tom lane


Re: Postgresql 8.3 beta crash

From
Peter Eisentraut
Date:
Tom Lane wrote:
> No, your first theory is closer to the mark.  What is happening is
> that xmlelement neglects to call xml_init, therefore the various
> stuff allocated by libxml is allocated using malloc().  Then
> xml_parse is called, and it *does* do xml_init(), which calls
> xmlMemSetup.  Then when we return to xmlelement and start freeing
> stuff, libxml tries to use xml_pfree to free something it got from
> malloc().

That sounds plausible.

-- 
Peter Eisentraut
http://developer.postgresql.org/~petere/


Re: Postgresql 8.3 beta crash

From
Peter Eisentraut
Date:
Heikki Linnakangas wrote:
> It still feels unsafe to call ExecEvalExpr while holding on to xml
> structs. It means that it's not safe for external modules to use
> libxml2 and call xmlMemSetup or xmlSetGenericErrorFunc themselves.

Well yeah, they shouldn't do that.  I don't think we want to support 
that.

-- 
Peter Eisentraut
http://developer.postgresql.org/~petere/


Re: Postgresql 8.3 beta crash

From
Peter Eisentraut
Date:
Tom Lane wrote:
> Well, xml_init calls xmlInitParser() which needs to be cleaned up.
> But since xmlelement doesn't need that, maybe we should factor it
> out of xml_init.

That could help.

> As for the try/catch blocks instead of relying on memory context
> cleanup, I'm not entirely sure if that's still needed or if it's a
> hangover from before we understood how to use xmlMemSetup.

It's for the xmlCleanupParser().

> The note 
> at line 27ff of xml.c implies that libxml keeps static pointers to
> allocated things that it thinks will survive indefinitely, so we
> may have to have these.  I'm suspicious whether xmlelement doesn't
> have a problem if the called expressions error out ...

Hmm.  That could also be fixed if we separated xml_init() and 
xmlInitParser().
-- 
Peter Eisentraut
http://developer.postgresql.org/~petere/


Re: Postgresql 8.3 beta crash

From
Tom Lane
Date:
Peter Eisentraut <peter_e@gmx.net> writes:
> Tom Lane wrote:
>> The note 
>> at line 27ff of xml.c implies that libxml keeps static pointers to
>> allocated things that it thinks will survive indefinitely, so we
>> may have to have these.  I'm suspicious whether xmlelement doesn't
>> have a problem if the called expressions error out ...

> Hmm.  That could also be fixed if we separated xml_init() and 
> xmlInitParser().

Do we know that only xmlInitParser() sets up such static pointers?

If we do, I wonder whether we could call xmlInitParser once in
TopMemoryContext (or before changing the memory function pointers
at all) and then never do xmlCleanupParser?
        regards, tom lane


Re: Postgresql 8.3 beta crash

From
Tom Lane
Date:
Peter Eisentraut <peter_e@gmx.net> writes:
> Heikki Linnakangas wrote:
>> It still feels unsafe to call ExecEvalExpr while holding on to xml
>> structs. It means that it's not safe for external modules to use
>> libxml2 and call xmlMemSetup or xmlSetGenericErrorFunc themselves.

> Well yeah, they shouldn't do that.  I don't think we want to support 
> that.

I'm with Heikki that it would be cleaner/safer if we could allow that.
The particular case that's bothering me is the idea that something like
Perl could well try to use libxml internally, and if so it'd very likely
call these functions.  Now if Perl thinks it's got sole control, and
tries to (say) re-use the results of xmlInitParser across calls, we're
screwed anyway.  But that's not an argument for designing our own code
in a way that guarantees it can't share libxml with other code.

So I'm thinking that we should continue to call xmlMemSetup,
xmlSetGenericErrorFunc, and xmlInitParser (if needed) at the start of
every XML function, and reorganize the code so that we don't call out
to random other code until we've shut down libxml again.

The main disadvantage I can see of reorganizing like that is that
it will increase xmlelement's transient memory consumption, since it
will need to accumulate all the OutputFunctionCall result strings
before it starts to pass them to libxml.  This probably isn't a huge
problem though.

Has anyone started actively working on this yet?  If not, I can tackle
it.
        regards, tom lane


Re: Postgresql 8.3 beta crash

From
Peter Eisentraut
Date:
Am Mittwoch, 31. Oktober 2007 schrieb Sheikh Amjad:
> Following test case is crashing the postgresql-8.3-beta

> SELECT XMLELEMENT
>         ( NAME "Program",
>             XMLAGG
>              ( XMLELEMENT
>                  ( NAME "Student", s.name::xml )
>              )
>         ) AS "Registered Student"
>
> >from st.student s ;

Btw., I didn't forget this, but I haven't found an extended period of quiet 
time to develop a proper solution.

-- 
Peter Eisentraut
http://developer.postgresql.org/~petere/


Re: Postgresql 8.3 beta crash

From
Tom Lane
Date:
Peter Eisentraut <peter_e@gmx.net> writes:
> Am Mittwoch, 31. Oktober 2007 schrieb Sheikh Amjad:
>> Following test case is crashing the postgresql-8.3-beta

> Btw., I didn't forget this, but I haven't found an extended period of quiet 
> time to develop a proper solution.

I fixed it a few days ago...
        regards, tom lane