Thread: Lonely Patch Seeks Long-Term Commitment to Codebase

Lonely Patch Seeks Long-Term Commitment to Codebase

From
Ben Lamb
Date:
Hi,

I sent a patch to this list earlier this month to improve the speed of
PGunescapeBytea(). IMHO it is handicapped as it can take over a minute
to process a large amount of data, e.g. a 500Kb JPEG.

The attached patch replaces the sscanf function that causes the delay,
the code was borrowed from the PostgreSQL Python interface with
permission from the author.

Please could I have some feedback on whether the patch is likely to be
committed? Has it been rejected? Is it stuck on someone's TODO list? Was
the code unsuitable? Have I neglected some procedure, etc?

Best regards,

Ben Lamb.
--- fe-exec.c    Thu May  8 11:49:34 2003
+++ fe-exec.c.latest    Thu May  8 11:45:17 2003
@@ -180,6 +180,8 @@
     return result;
 }

+#define VAL(CH) ((CH) - '0')
+
 /*
  *        PQunescapeBytea - converts the null terminated string representation
  *        of a bytea, strtext, into binary, filling a buffer. It returns a
@@ -187,101 +189,66 @@
  *        buffer in retbuflen. The pointer may subsequently be used as an
  *        argument to the function free(3). It is the reverse of PQescapeBytea.
  *
- *        The following transformations are reversed:
- *        '\0' == ASCII  0 == \000
- *        '\'' == ASCII 39 == \'
- *        '\\' == ASCII 92 == \\
+ *        The following transformations are made:
+ *        \'   == ASCII 39 == '
+ *        \\   == ASCII 92 == \
+ *        \ooo == a byte whose value = ooo (ooo is an octal number)
+ *        \x   == x (x is any character not matched by the above transformations)
  *
- *        States:
- *        0    normal        0->1->2->3->4
- *        1    \               1->5
- *        2    \0               1->6
- *        3    \00
- *        4    \000
- *        5    \'
- *        6    \\
  */
 unsigned char *
 PQunescapeBytea(unsigned char *strtext, size_t *retbuflen)
 {
-    size_t        buflen;
-    unsigned char *buffer,
-               *sp,
-               *bp;
-    unsigned int state = 0;
+    size_t strtextlen, buflen;
+    unsigned char *buffer, *tmpbuf;
+    int i, j, byte;

-    if (strtext == NULL)
+    if (strtext == NULL) {
         return NULL;
-    buflen = strlen(strtext);    /* will shrink, also we discover if
-                                 * strtext */
-    buffer = (unsigned char *) malloc(buflen);    /* isn't NULL terminated */
+    }
+
+    strtextlen = strlen(strtext);    /* will shrink, also we discover if
+                                     * strtext isn't NULL terminated */
+    buffer = (unsigned char *)malloc(strtextlen);
     if (buffer == NULL)
         return NULL;
-    for (bp = buffer, sp = strtext; *sp != '\0'; bp++, sp++)
+
+    for (i = j = buflen = 0; i < strtextlen;)
     {
-        switch (state)
+        switch (strtext[i])
         {
-            case 0:
-                if (*sp == '\\')
-                    state = 1;
-                *bp = *sp;
-                break;
-            case 1:
-                if (*sp == '\'')    /* state=5 */
-                {                /* replace \' with 39 */
-                    bp--;
-                    *bp = '\'';
-                    buflen--;
-                    state = 0;
-                }
-                else if (*sp == '\\')    /* state=6 */
-                {                /* replace \\ with 92 */
-                    bp--;
-                    *bp = '\\';
-                    buflen--;
-                    state = 0;
-                }
+            case '\\':
+                i++;
+                if (strtext[i] == '\\')
+                    buffer[j++] = strtext[i++];
                 else
                 {
-                    if (isdigit(*sp))
-                        state = 2;
-                    else
-                        state = 0;
-                    *bp = *sp;
+                    if ((isdigit(strtext[i]))   &&
+                        (isdigit(strtext[i+1])) &&
+                        (isdigit(strtext[i+2])))
+                    {
+                        byte = VAL(strtext[i++]);
+                        byte = (byte << 3) + VAL(strtext[i++]);
+                        buffer[j++] = (byte << 3) + VAL(strtext[i++]);
+                    }
                 }
                 break;
-            case 2:
-                if (isdigit(*sp))
-                    state = 3;
-                else
-                    state = 0;
-                *bp = *sp;
-                break;
-            case 3:
-                if (isdigit(*sp))        /* state=4 */
-                {
-                    int            v;

-                    bp -= 3;
-                    sscanf(sp - 2, "%03o", &v);
-                    *bp = v;
-                    buflen -= 3;
-                    state = 0;
-                }
-                else
-                {
-                    *bp = *sp;
-                    state = 0;
-                }
-                break;
+            default:
+                buffer[j++] = strtext[i++];
         }
     }
-    buffer = realloc(buffer, buflen);
-    if (buffer == NULL)
-        return NULL;
+    buflen = j; /* buflen is the length of the unquoted data */
+    tmpbuf = realloc(buffer, buflen);
+
+    if (!tmpbuf)
+    {
+        free(buffer);
+        return 0;
+    }

     *retbuflen = buflen;
-    return buffer;
+    return tmpbuf;
 }

 /* ----------------


Re: Lonely Patch Seeks Long-Term Commitment to Codebase

From
"Matthew T. O'Connor"
Date:
I think Bruce is still working through his backlog of patches.  I know I'm
waiting for my pg_autovacuum patch to be applied which was submitted on May
7th.

Just give him some time, he's been working hard on the win32 port, a worthy
and difficult task IMHO.

Matthew

----- Original Message -----
From: "Ben Lamb" <pgsql-patches@zurgy.org>
To: <pgsql-patches@postgresql.org>
Sent: Wednesday, May 28, 2003 3:26 PM
Subject: [PATCHES] Lonely Patch Seeks Long-Term Commitment to Codebase


> Hi,
>
> I sent a patch to this list earlier this month to improve the speed of
> PGunescapeBytea(). IMHO it is handicapped as it can take over a minute
> to process a large amount of data, e.g. a 500Kb JPEG.
>
> The attached patch replaces the sscanf function that causes the delay,
> the code was borrowed from the PostgreSQL Python interface with
> permission from the author.
>
> Please could I have some feedback on whether the patch is likely to be
> committed? Has it been rejected? Is it stuck on someone's TODO list? Was
> the code unsuitable? Have I neglected some procedure, etc?
>
> Best regards,
>
> Ben Lamb.
>


----------------------------------------------------------------------------
----


> --- fe-exec.c Thu May  8 11:49:34 2003
> +++ fe-exec.c.latest Thu May  8 11:45:17 2003
> @@ -180,6 +180,8 @@
>   return result;
>  }
>
> +#define VAL(CH) ((CH) - '0')
> +
>  /*
>   * PQunescapeBytea - converts the null terminated string representation
>   * of a bytea, strtext, into binary, filling a buffer. It returns a
> @@ -187,101 +189,66 @@
>   * buffer in retbuflen. The pointer may subsequently be used as an
>   * argument to the function free(3). It is the reverse of PQescapeBytea.
>   *
> - * The following transformations are reversed:
> - * '\0' == ASCII  0 == \000
> - * '\'' == ASCII 39 == \'
> - * '\\' == ASCII 92 == \\
> + * The following transformations are made:
> + * \'   == ASCII 39 == '
> + * \\   == ASCII 92 == \
> + * \ooo == a byte whose value = ooo (ooo is an octal number)
> + * \x   == x (x is any character not matched by the above
transformations)
>   *
> - * States:
> - * 0 normal 0->1->2->3->4
> - * 1 \    1->5
> - * 2 \0    1->6
> - * 3 \00
> - * 4 \000
> - * 5 \'
> - * 6 \\
>   */
>  unsigned char *
>  PQunescapeBytea(unsigned char *strtext, size_t *retbuflen)
>  {
> - size_t buflen;
> - unsigned char *buffer,
> -    *sp,
> -    *bp;
> - unsigned int state = 0;
> + size_t strtextlen, buflen;
> + unsigned char *buffer, *tmpbuf;
> + int i, j, byte;
>
> - if (strtext == NULL)
> + if (strtext == NULL) {
>   return NULL;
> - buflen = strlen(strtext); /* will shrink, also we discover if
> - * strtext */
> - buffer = (unsigned char *) malloc(buflen); /* isn't NULL terminated */
> + }
> +
> + strtextlen = strlen(strtext); /* will shrink, also we discover if
> + * strtext isn't NULL terminated */
> + buffer = (unsigned char *)malloc(strtextlen);
>   if (buffer == NULL)
>   return NULL;
> - for (bp = buffer, sp = strtext; *sp != '\0'; bp++, sp++)
> +
> + for (i = j = buflen = 0; i < strtextlen;)
>   {
> - switch (state)
> + switch (strtext[i])
>   {
> - case 0:
> - if (*sp == '\\')
> - state = 1;
> - *bp = *sp;
> - break;
> - case 1:
> - if (*sp == '\'') /* state=5 */
> - { /* replace \' with 39 */
> - bp--;
> - *bp = '\'';
> - buflen--;
> - state = 0;
> - }
> - else if (*sp == '\\') /* state=6 */
> - { /* replace \\ with 92 */
> - bp--;
> - *bp = '\\';
> - buflen--;
> - state = 0;
> - }
> + case '\\':
> + i++;
> + if (strtext[i] == '\\')
> + buffer[j++] = strtext[i++];
>   else
>   {
> - if (isdigit(*sp))
> - state = 2;
> - else
> - state = 0;
> - *bp = *sp;
> + if ((isdigit(strtext[i]))   &&
> + (isdigit(strtext[i+1])) &&
> + (isdigit(strtext[i+2])))
> + {
> + byte = VAL(strtext[i++]);
> + byte = (byte << 3) + VAL(strtext[i++]);
> + buffer[j++] = (byte << 3) + VAL(strtext[i++]);
> + }
>   }
>   break;
> - case 2:
> - if (isdigit(*sp))
> - state = 3;
> - else
> - state = 0;
> - *bp = *sp;
> - break;
> - case 3:
> - if (isdigit(*sp)) /* state=4 */
> - {
> - int v;
>
> - bp -= 3;
> - sscanf(sp - 2, "%03o", &v);
> - *bp = v;
> - buflen -= 3;
> - state = 0;
> - }
> - else
> - {
> - *bp = *sp;
> - state = 0;
> - }
> - break;
> + default:
> + buffer[j++] = strtext[i++];
>   }
>   }
> - buffer = realloc(buffer, buflen);
> - if (buffer == NULL)
> - return NULL;
> + buflen = j; /* buflen is the length of the unquoted data */
> + tmpbuf = realloc(buffer, buflen);
> +
> + if (!tmpbuf)
> + {
> + free(buffer);
> + return 0;
> + }
>
>   *retbuflen = buflen;
> - return buffer;
> + return tmpbuf;
>  }
>
>  /* ----------------
>
>


----------------------------------------------------------------------------
----


>
> ---------------------------(end of broadcast)---------------------------
> TIP 2: you can get off all lists at once with the unregister command
>     (send "unregister YourEmailAddressHere" to majordomo@postgresql.org)
>


Re: Lonely Patch Seeks Long-Term Commitment to Codebase

From
"Andrew Dunstan"
Date:
I think he said he was going to take a break of about a week to catch up on
patches, so I was waiting till that time was over before making a similar
query. I haven't seen a lot of "Thankyou, this patch has been applied"
messages, though.

cheers

andrew

----- Original Message -----
From: "Matthew T. O'Connor" <matthew@zeut.net>
To: "Ben Lamb" <pgsql-patches@zurgy.org>; <pgsql-patches@postgresql.org>
Sent: Wednesday, May 28, 2003 4:30 PM
Subject: Re: [PATCHES] Lonely Patch Seeks Long-Term Commitment to Codebase


> I think Bruce is still working through his backlog of patches.  I know I'm
> waiting for my pg_autovacuum patch to be applied which was submitted on
May
> 7th.
>
> Just give him some time, he's been working hard on the win32 port, a
worthy
> and difficult task IMHO.
>
> Matthew
>
> ----- Original Message -----
> From: "Ben Lamb" <pgsql-patches@zurgy.org>
> To: <pgsql-patches@postgresql.org>
> Sent: Wednesday, May 28, 2003 3:26 PM
> Subject: [PATCHES] Lonely Patch Seeks Long-Term Commitment to Codebase
>
>
> > Hi,
> >
> > I sent a patch to this list earlier this month to improve the speed of
> > PGunescapeBytea(). IMHO it is handicapped as it can take over a minute
> > to process a large amount of data, e.g. a 500Kb JPEG.
> >
> > The attached patch replaces the sscanf function that causes the delay,
> > the code was borrowed from the PostgreSQL Python interface with
> > permission from the author.
> >
> > Please could I have some feedback on whether the patch is likely to be
> > committed? Has it been rejected? Is it stuck on someone's TODO list? Was
> > the code unsuitable? Have I neglected some procedure, etc?
> >
> > Best regards,
> >
> > Ben Lamb.
> >
>
>
> --------------------------------------------------------------------------
--
> ----
>
>
> > --- fe-exec.c Thu May  8 11:49:34 2003
> > +++ fe-exec.c.latest Thu May  8 11:45:17 2003
> > @@ -180,6 +180,8 @@
> >   return result;
> >  }
> >
> > +#define VAL(CH) ((CH) - '0')
> > +
> >  /*
> >   * PQunescapeBytea - converts the null terminated string representation
> >   * of a bytea, strtext, into binary, filling a buffer. It returns a
> > @@ -187,101 +189,66 @@
> >   * buffer in retbuflen. The pointer may subsequently be used as an
> >   * argument to the function free(3). It is the reverse of
PQescapeBytea.
> >   *
> > - * The following transformations are reversed:
> > - * '\0' == ASCII  0 == \000
> > - * '\'' == ASCII 39 == \'
> > - * '\\' == ASCII 92 == \\
> > + * The following transformations are made:
> > + * \'   == ASCII 39 == '
> > + * \\   == ASCII 92 == \
> > + * \ooo == a byte whose value = ooo (ooo is an octal number)
> > + * \x   == x (x is any character not matched by the above
> transformations)
> >   *
> > - * States:
> > - * 0 normal 0->1->2->3->4
> > - * 1 \    1->5
> > - * 2 \0    1->6
> > - * 3 \00
> > - * 4 \000
> > - * 5 \'
> > - * 6 \\
> >   */
> >  unsigned char *
> >  PQunescapeBytea(unsigned char *strtext, size_t *retbuflen)
> >  {
> > - size_t buflen;
> > - unsigned char *buffer,
> > -    *sp,
> > -    *bp;
> > - unsigned int state = 0;
> > + size_t strtextlen, buflen;
> > + unsigned char *buffer, *tmpbuf;
> > + int i, j, byte;
> >
> > - if (strtext == NULL)
> > + if (strtext == NULL) {
> >   return NULL;
> > - buflen = strlen(strtext); /* will shrink, also we discover if
> > - * strtext */
> > - buffer = (unsigned char *) malloc(buflen); /* isn't NULL terminated */
> > + }
> > +
> > + strtextlen = strlen(strtext); /* will shrink, also we discover if
> > + * strtext isn't NULL terminated */
> > + buffer = (unsigned char *)malloc(strtextlen);
> >   if (buffer == NULL)
> >   return NULL;
> > - for (bp = buffer, sp = strtext; *sp != '\0'; bp++, sp++)
> > +
> > + for (i = j = buflen = 0; i < strtextlen;)
> >   {
> > - switch (state)
> > + switch (strtext[i])
> >   {
> > - case 0:
> > - if (*sp == '\\')
> > - state = 1;
> > - *bp = *sp;
> > - break;
> > - case 1:
> > - if (*sp == '\'') /* state=5 */
> > - { /* replace \' with 39 */
> > - bp--;
> > - *bp = '\'';
> > - buflen--;
> > - state = 0;
> > - }
> > - else if (*sp == '\\') /* state=6 */
> > - { /* replace \\ with 92 */
> > - bp--;
> > - *bp = '\\';
> > - buflen--;
> > - state = 0;
> > - }
> > + case '\\':
> > + i++;
> > + if (strtext[i] == '\\')
> > + buffer[j++] = strtext[i++];
> >   else
> >   {
> > - if (isdigit(*sp))
> > - state = 2;
> > - else
> > - state = 0;
> > - *bp = *sp;
> > + if ((isdigit(strtext[i]))   &&
> > + (isdigit(strtext[i+1])) &&
> > + (isdigit(strtext[i+2])))
> > + {
> > + byte = VAL(strtext[i++]);
> > + byte = (byte << 3) + VAL(strtext[i++]);
> > + buffer[j++] = (byte << 3) + VAL(strtext[i++]);
> > + }
> >   }
> >   break;
> > - case 2:
> > - if (isdigit(*sp))
> > - state = 3;
> > - else
> > - state = 0;
> > - *bp = *sp;
> > - break;
> > - case 3:
> > - if (isdigit(*sp)) /* state=4 */
> > - {
> > - int v;
> >
> > - bp -= 3;
> > - sscanf(sp - 2, "%03o", &v);
> > - *bp = v;
> > - buflen -= 3;
> > - state = 0;
> > - }
> > - else
> > - {
> > - *bp = *sp;
> > - state = 0;
> > - }
> > - break;
> > + default:
> > + buffer[j++] = strtext[i++];
> >   }
> >   }
> > - buffer = realloc(buffer, buflen);
> > - if (buffer == NULL)
> > - return NULL;
> > + buflen = j; /* buflen is the length of the unquoted data */
> > + tmpbuf = realloc(buffer, buflen);
> > +
> > + if (!tmpbuf)
> > + {
> > + free(buffer);
> > + return 0;
> > + }
> >
> >   *retbuflen = buflen;
> > - return buffer;
> > + return tmpbuf;
> >  }
> >
> >  /* ----------------
> >
> >
>
>
> --------------------------------------------------------------------------
--
> ----
>
>
> >
> > ---------------------------(end of broadcast)---------------------------
> > TIP 2: you can get off all lists at once with the unregister command
> >     (send "unregister YourEmailAddressHere" to majordomo@postgresql.org)
> >
>
>
> ---------------------------(end of broadcast)---------------------------
> TIP 2: you can get off all lists at once with the unregister command
>     (send "unregister YourEmailAddressHere" to majordomo@postgresql.org)


Re: Lonely Patch Seeks Long-Term Commitment to Codebase

From
Ben Lamb
Date:
On Wed, May 28, 2003 at 04:30:39PM -0400, Matthew T. O'Connor wrote:
> I think Bruce is still working through his backlog of patches.  I know I'm
> waiting for my pg_autovacuum patch to be applied which was submitted on May
> 7th.

Apologies, I'm a newbie and wasn't sure what to expect when I sent the patch to the list.

Thanks,

Ben.

Re: Lonely Patch Seeks Long-Term Commitment to Codebase

From
"Matthew T. O'Connor"
Date:
From: "Ben Lamb" <pgsql-patches@zurgy.org>
> Apologies, I'm a newbie and wasn't sure what to expect when I sent the
patch to the list.

No problem, keep sending patches :-)


Re: Lonely Patch Seeks Long-Term Commitment to Codebase

From
Bruce Momjian
Date:
Matthew T. O'Connor wrote:
> From: "Ben Lamb" <pgsql-patches@zurgy.org>
> > Apologies, I'm a newbie and wasn't sure what to expect when I sent the
> patch to the list.
>
> No problem, keep sending patches :-)

I am working through my email. I am in early April now, so I have a few
weeks to go.  These are the emails/patches that need special attention.

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

Re: Lonely Patch Seeks Long-Term Commitment to Codebase

From
Bruce Momjian
Date:
Ben, never mind.  I see this is now identical to the newest version of
the patch you posted.  Sorry.

---------------------------------------------------------------------------

Ben Lamb wrote:
> Hi,
>
> I sent a patch to this list earlier this month to improve the speed of
> PGunescapeBytea(). IMHO it is handicapped as it can take over a minute
> to process a large amount of data, e.g. a 500Kb JPEG.
>
> The attached patch replaces the sscanf function that causes the delay,
> the code was borrowed from the PostgreSQL Python interface with
> permission from the author.
>
> Please could I have some feedback on whether the patch is likely to be
> committed? Has it been rejected? Is it stuck on someone's TODO list? Was
> the code unsuitable? Have I neglected some procedure, etc?
>
> Best regards,
>
> Ben Lamb.

> --- fe-exec.c    Thu May  8 11:49:34 2003
> +++ fe-exec.c.latest    Thu May  8 11:45:17 2003
> @@ -180,6 +180,8 @@
>      return result;
>  }
>
> +#define VAL(CH) ((CH) - '0')
> +
>  /*
>   *        PQunescapeBytea - converts the null terminated string representation
>   *        of a bytea, strtext, into binary, filling a buffer. It returns a
> @@ -187,101 +189,66 @@
>   *        buffer in retbuflen. The pointer may subsequently be used as an
>   *        argument to the function free(3). It is the reverse of PQescapeBytea.
>   *
> - *        The following transformations are reversed:
> - *        '\0' == ASCII  0 == \000
> - *        '\'' == ASCII 39 == \'
> - *        '\\' == ASCII 92 == \\
> + *        The following transformations are made:
> + *        \'   == ASCII 39 == '
> + *        \\   == ASCII 92 == \
> + *        \ooo == a byte whose value = ooo (ooo is an octal number)
> + *        \x   == x (x is any character not matched by the above transformations)
>   *
> - *        States:
> - *        0    normal        0->1->2->3->4
> - *        1    \               1->5
> - *        2    \0               1->6
> - *        3    \00
> - *        4    \000
> - *        5    \'
> - *        6    \\
>   */
>  unsigned char *
>  PQunescapeBytea(unsigned char *strtext, size_t *retbuflen)
>  {
> -    size_t        buflen;
> -    unsigned char *buffer,
> -               *sp,
> -               *bp;
> -    unsigned int state = 0;
> +    size_t strtextlen, buflen;
> +    unsigned char *buffer, *tmpbuf;
> +    int i, j, byte;
>
> -    if (strtext == NULL)
> +    if (strtext == NULL) {
>          return NULL;
> -    buflen = strlen(strtext);    /* will shrink, also we discover if
> -                                 * strtext */
> -    buffer = (unsigned char *) malloc(buflen);    /* isn't NULL terminated */
> +    }
> +
> +    strtextlen = strlen(strtext);    /* will shrink, also we discover if
> +                                     * strtext isn't NULL terminated */
> +    buffer = (unsigned char *)malloc(strtextlen);
>      if (buffer == NULL)
>          return NULL;
> -    for (bp = buffer, sp = strtext; *sp != '\0'; bp++, sp++)
> +
> +    for (i = j = buflen = 0; i < strtextlen;)
>      {
> -        switch (state)
> +        switch (strtext[i])
>          {
> -            case 0:
> -                if (*sp == '\\')
> -                    state = 1;
> -                *bp = *sp;
> -                break;
> -            case 1:
> -                if (*sp == '\'')    /* state=5 */
> -                {                /* replace \' with 39 */
> -                    bp--;
> -                    *bp = '\'';
> -                    buflen--;
> -                    state = 0;
> -                }
> -                else if (*sp == '\\')    /* state=6 */
> -                {                /* replace \\ with 92 */
> -                    bp--;
> -                    *bp = '\\';
> -                    buflen--;
> -                    state = 0;
> -                }
> +            case '\\':
> +                i++;
> +                if (strtext[i] == '\\')
> +                    buffer[j++] = strtext[i++];
>                  else
>                  {
> -                    if (isdigit(*sp))
> -                        state = 2;
> -                    else
> -                        state = 0;
> -                    *bp = *sp;
> +                    if ((isdigit(strtext[i]))   &&
> +                        (isdigit(strtext[i+1])) &&
> +                        (isdigit(strtext[i+2])))
> +                    {
> +                        byte = VAL(strtext[i++]);
> +                        byte = (byte << 3) + VAL(strtext[i++]);
> +                        buffer[j++] = (byte << 3) + VAL(strtext[i++]);
> +                    }
>                  }
>                  break;
> -            case 2:
> -                if (isdigit(*sp))
> -                    state = 3;
> -                else
> -                    state = 0;
> -                *bp = *sp;
> -                break;
> -            case 3:
> -                if (isdigit(*sp))        /* state=4 */
> -                {
> -                    int            v;
>
> -                    bp -= 3;
> -                    sscanf(sp - 2, "%03o", &v);
> -                    *bp = v;
> -                    buflen -= 3;
> -                    state = 0;
> -                }
> -                else
> -                {
> -                    *bp = *sp;
> -                    state = 0;
> -                }
> -                break;
> +            default:
> +                buffer[j++] = strtext[i++];
>          }
>      }
> -    buffer = realloc(buffer, buflen);
> -    if (buffer == NULL)
> -        return NULL;
> +    buflen = j; /* buflen is the length of the unquoted data */
> +    tmpbuf = realloc(buffer, buflen);
> +
> +    if (!tmpbuf)
> +    {
> +        free(buffer);
> +        return 0;
> +    }
>
>      *retbuflen = buflen;
> -    return buffer;
> +    return tmpbuf;
>  }
>
>  /* ----------------
>

>
> ---------------------------(end of broadcast)---------------------------
> TIP 2: you can get off all lists at once with the unregister command
>     (send "unregister YourEmailAddressHere" to majordomo@postgresql.org)

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

Re: Lonely Patch Seeks Long-Term Commitment to Codebase

From
Bruce Momjian
Date:
Ben, this patch is slightly different from your previous patch.  Is this
the better version?

---------------------------------------------------------------------------

Ben Lamb wrote:
> Hi,
>
> I sent a patch to this list earlier this month to improve the speed of
> PGunescapeBytea(). IMHO it is handicapped as it can take over a minute
> to process a large amount of data, e.g. a 500Kb JPEG.
>
> The attached patch replaces the sscanf function that causes the delay,
> the code was borrowed from the PostgreSQL Python interface with
> permission from the author.
>
> Please could I have some feedback on whether the patch is likely to be
> committed? Has it been rejected? Is it stuck on someone's TODO list? Was
> the code unsuitable? Have I neglected some procedure, etc?
>
> Best regards,
>
> Ben Lamb.

> --- fe-exec.c    Thu May  8 11:49:34 2003
> +++ fe-exec.c.latest    Thu May  8 11:45:17 2003
> @@ -180,6 +180,8 @@
>      return result;
>  }
>
> +#define VAL(CH) ((CH) - '0')
> +
>  /*
>   *        PQunescapeBytea - converts the null terminated string representation
>   *        of a bytea, strtext, into binary, filling a buffer. It returns a
> @@ -187,101 +189,66 @@
>   *        buffer in retbuflen. The pointer may subsequently be used as an
>   *        argument to the function free(3). It is the reverse of PQescapeBytea.
>   *
> - *        The following transformations are reversed:
> - *        '\0' == ASCII  0 == \000
> - *        '\'' == ASCII 39 == \'
> - *        '\\' == ASCII 92 == \\
> + *        The following transformations are made:
> + *        \'   == ASCII 39 == '
> + *        \\   == ASCII 92 == \
> + *        \ooo == a byte whose value = ooo (ooo is an octal number)
> + *        \x   == x (x is any character not matched by the above transformations)
>   *
> - *        States:
> - *        0    normal        0->1->2->3->4
> - *        1    \               1->5
> - *        2    \0               1->6
> - *        3    \00
> - *        4    \000
> - *        5    \'
> - *        6    \\
>   */
>  unsigned char *
>  PQunescapeBytea(unsigned char *strtext, size_t *retbuflen)
>  {
> -    size_t        buflen;
> -    unsigned char *buffer,
> -               *sp,
> -               *bp;
> -    unsigned int state = 0;
> +    size_t strtextlen, buflen;
> +    unsigned char *buffer, *tmpbuf;
> +    int i, j, byte;
>
> -    if (strtext == NULL)
> +    if (strtext == NULL) {
>          return NULL;
> -    buflen = strlen(strtext);    /* will shrink, also we discover if
> -                                 * strtext */
> -    buffer = (unsigned char *) malloc(buflen);    /* isn't NULL terminated */
> +    }
> +
> +    strtextlen = strlen(strtext);    /* will shrink, also we discover if
> +                                     * strtext isn't NULL terminated */
> +    buffer = (unsigned char *)malloc(strtextlen);
>      if (buffer == NULL)
>          return NULL;
> -    for (bp = buffer, sp = strtext; *sp != '\0'; bp++, sp++)
> +
> +    for (i = j = buflen = 0; i < strtextlen;)
>      {
> -        switch (state)
> +        switch (strtext[i])
>          {
> -            case 0:
> -                if (*sp == '\\')
> -                    state = 1;
> -                *bp = *sp;
> -                break;
> -            case 1:
> -                if (*sp == '\'')    /* state=5 */
> -                {                /* replace \' with 39 */
> -                    bp--;
> -                    *bp = '\'';
> -                    buflen--;
> -                    state = 0;
> -                }
> -                else if (*sp == '\\')    /* state=6 */
> -                {                /* replace \\ with 92 */
> -                    bp--;
> -                    *bp = '\\';
> -                    buflen--;
> -                    state = 0;
> -                }
> +            case '\\':
> +                i++;
> +                if (strtext[i] == '\\')
> +                    buffer[j++] = strtext[i++];
>                  else
>                  {
> -                    if (isdigit(*sp))
> -                        state = 2;
> -                    else
> -                        state = 0;
> -                    *bp = *sp;
> +                    if ((isdigit(strtext[i]))   &&
> +                        (isdigit(strtext[i+1])) &&
> +                        (isdigit(strtext[i+2])))
> +                    {
> +                        byte = VAL(strtext[i++]);
> +                        byte = (byte << 3) + VAL(strtext[i++]);
> +                        buffer[j++] = (byte << 3) + VAL(strtext[i++]);
> +                    }
>                  }
>                  break;
> -            case 2:
> -                if (isdigit(*sp))
> -                    state = 3;
> -                else
> -                    state = 0;
> -                *bp = *sp;
> -                break;
> -            case 3:
> -                if (isdigit(*sp))        /* state=4 */
> -                {
> -                    int            v;
>
> -                    bp -= 3;
> -                    sscanf(sp - 2, "%03o", &v);
> -                    *bp = v;
> -                    buflen -= 3;
> -                    state = 0;
> -                }
> -                else
> -                {
> -                    *bp = *sp;
> -                    state = 0;
> -                }
> -                break;
> +            default:
> +                buffer[j++] = strtext[i++];
>          }
>      }
> -    buffer = realloc(buffer, buflen);
> -    if (buffer == NULL)
> -        return NULL;
> +    buflen = j; /* buflen is the length of the unquoted data */
> +    tmpbuf = realloc(buffer, buflen);
> +
> +    if (!tmpbuf)
> +    {
> +        free(buffer);
> +        return 0;
> +    }
>
>      *retbuflen = buflen;
> -    return buffer;
> +    return tmpbuf;
>  }
>
>  /* ----------------
>

>
> ---------------------------(end of broadcast)---------------------------
> TIP 2: you can get off all lists at once with the unregister command
>     (send "unregister YourEmailAddressHere" to majordomo@postgresql.org)

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073