ISBN/ISSN/ISMN/EAN13 module - Mailing list pgsql-patches

From Jeremy Kronuz
Subject ISBN/ISSN/ISMN/EAN13 module
Date
Msg-id BAY107-F328919B6E12D3501B66606D8340@phx.gbl
Whole thread Raw
Responses Re: ISBN/ISSN/ISMN/EAN13 module  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-patches
> >> I'm reviewing this for addition to contrib/ now.  I notice that there
>is
> >> no clear license statement.  Is it OK to put the following into the
> >> README file?
>
> > Tom, yes. Also, I just put copyright in the files that contain a
>significant
> > amount of code written by me. (i.e. isn.h and isn.c).
>
>Excellent, thanks.
>
> > Also, did you get the 'isn-1_0_beta_20060924' version from my ftp?
>'cause
> > that�s the last one I updated (2006-09-24).  It's at:
> > ftp://ftp.kronuz.com/pub/programming/isn-1_0_beta_20060924.tar.bz2 (just
>in
> > case)
>
>You had submitted two slightly different versions to the mailing list
>awhile back.  I took the one that seemed to have later file dates and
>did some fixes/editorializations on that.  Please look at what I've
>just committed to PG CVS and see if you want to make any adjustments
>--- if so, submit a patch through the usual pgsql-patches channel.
>

Tom, I've checked the version in the cvs and I had made significant changes
from that version. I fixed some major bugs that prevented some ISBN numbers
from working. I've merged the changes made for the cvs with my own, hoping I
didn't miss anything that was already fixed in the cvs... I noticed you
changed some log messages and that you added GET_STR and GET_TEXT. indeed
there was that thing that took me some time to figure in both
ean13_cast_to_text() and isn_cast_to_text() functions, not changing to a
valid TEXT the return value; but I did not know about the other
*_cast_from_text() functions having the problem of the missing GET_STR() (I
was using VARDATA()).

As I changed so much things with the patch (I'm sending the patch to
pgsql-patches too), Could you please check that those two functions
(ean13_cast_to_text() and isn_cast_to_text()) to see if the use of
GET_TEXT() would be a better option from the one I'm using? (I hope I left
all the improved messages okay)

Regards,
Kronuz.

Patch follows next:
diff -ruN oldisn/isn.c isn/isn.c
--- oldisn/isn.c    2006-09-08 23:07:52.000000000 -0500
+++ isn/isn.c    2006-09-09 16:58:43.098148200 -0500
@@ -31,7 +31,7 @@

enum isn_type { INVALID, ANY, EAN13, ISBN, ISMN, ISSN, UPC };

-static const char *isn_names[] = { "ISN", "ISN", "EAN13", "ISBN", "ISMN",
"ISSN", "UPC" };
+static const char *isn_names[] = { "EAN13/UPC/ISxN", "EAN13/UPC/ISxN",
"EAN13", "ISBN", "ISMN", "ISSN", "UPC" };

static bool g_weak = false;
static bool g_initialized = false;
@@ -43,11 +43,11 @@

/***********************************************************************
  **
- **        Routines for ISNs.
+ **        Routines for EAN13/UPC/ISxNs.
  **
  ** Note:
  **  In this code, a normalized string is one that is known to be a valid
- **  ISN number containing only digits and hyphens and with enough space
+ **  ISxN number containing only digits and hyphens and with enough space
  **  to hold the full 13 digits plus the maximum of four hyphens.
  ***********************************************************************/

@@ -217,7 +217,7 @@
}

/*
- * weight_checkdig -- Receives a buffer with a normalized ISN string
number,
+ * weight_checkdig -- Receives a buffer with a normalized ISxN string
number,
  *                     and the length to weight.
  *
  * Returns the weight of the number (the check digit value, 0-10)
@@ -239,7 +239,7 @@


/*
- * checkdig --- Receives a buffer with a normalized ISN string number,
+ * checkdig --- Receives a buffer with a normalized ISxN string number,
  *               and the length to check.
  *
  * Returns the check digit value (0-9)
@@ -267,8 +267,94 @@
}

/*
- * ean2isn --- Convert in-place a normalized EAN13 string to the
corresponding
- *            ISN string number. Assumes the input string is normalized.
+ * ean2isn --- Try to convert an ean13 number to a UPC/ISxN number.
+ *             This doesn't verify for a valid check digit.
+ *
+ * If errorOK is false, ereport a useful error message if the ean13 is bad.
+ * If errorOK is true, just return "false" for bad input.
+ */
+static
+bool ean2isn(ean13 ean, bool errorOK, ean13 *result, enum isn_type accept)
+{
+    enum isn_type type = INVALID;
+
+    char buf[MAXEAN13LEN + 1];
+    char *firstdig, *aux;
+    unsigned digval;
+    unsigned search;
+    ean13 ret = ean;
+
+    ean >>= 1;
+    /* verify it's in the EAN13 range */
+    if(ean > UINT64CONST(9999999999999))
+        goto eantoobig;
+
+    /* convert the number */
+    search = 0;
+    firstdig = aux = buf + 13;
+    *aux = '\0';        /* terminate string; aux points to last digit */
+    do {
+        digval = (unsigned)(ean % 10);        /* get the decimal value */
+        ean /= 10;                                                /* get next digit */
+        *--aux = (char)(digval + '0');            /* convert to ascii and store */
+    } while(ean && search++<12);
+    while(search++<12) *--aux = '0';            /* fill the remaining EAN13 with '0' */
+
+    /* find out the data type: */
+    if(!strncmp("978", buf, 3)) { /* ISBN */
+        type = ISBN;
+    } else if(!strncmp("977", buf, 3)) { /* ISSN */
+        type = ISSN;
+    } else if(!strncmp("9790", buf, 4)) { /* ISMN */
+        type = ISMN;
+    } else if(!strncmp("979", buf, 3)) { /* ISBN-13 */
+        type = ISBN;
+    } else if(*buf == '0') { /* UPC */
+        type = UPC;
+    } else {
+        type = EAN13;
+    }
+    if(accept != ANY && accept != EAN13 && accept != type) goto eanwrongtype;
+
+    *result = ret;
+    return true;
+
+eanwrongtype:
+    if(!errorOK) {
+        if(type!=EAN13) {
+            ereport(ERROR,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("cannot cast EAN13(%s) to %s for number: \"%s\"",
+                        isn_names[type], isn_names[accept], buf)));
+        } else {
+            ereport(ERROR,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("cannot cast %s to %s for number: \"%s\"",
+                        isn_names[type], isn_names[accept], buf)));
+        }
+    }
+    return false;
+
+eantoobig:
+    if(!errorOK) {
+        char    eanbuf[64];
+
+        /*
+         * Format the number separately to keep the machine-dependent
+         * format code out of the translatable message text
+         */
+        snprintf(eanbuf, sizeof(eanbuf), EAN13_FORMAT, ean);
+        ereport(ERROR,
+                (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                 errmsg("value \"%s\" is out of range for %s type",
+                        eanbuf, isn_names[type])));
+    }
+    return false;
+}
+
+/*
+ * ean2UPC/ISxN --- Convert in-place a normalized EAN13 string to the
corresponding
+ *                  UPC/ISxN string number. Assumes the input string is
normalized.
  */
static inline
void ean2ISBN(char *isn)
@@ -325,7 +411,8 @@
{
    ean13 ean = 0;    /* current ean */
    while(*num) {
-        ean = 10 * ean + ((*num++) - '0');
+        if(isdigit(*num)) ean = 10 * ean + (*num - '0');
+        num++;
    }
     return (ean<<1); /* also give room to a flag */
}
@@ -336,7 +423,7 @@
  *                the string (maximum MAXEAN13LEN+1 bytes)
  *                This doesn't verify for a valid check digit.
  *
- * If shortType is true, the returned string is in the old ISN short
format.
+ * If shortType is true, the returned string is in the old ISxN short
format.
  * If errorOK is false, ereport a useful error message if the string is
bad.
  * If errorOK is true, just return "false" for bad input.
  */
@@ -369,8 +456,8 @@
        digval = (unsigned)(ean % 10);        /* get the decimal value */
        ean /= 10;                                                /* get next digit */
        *--aux = (char)(digval + '0');            /* convert to ascii and store */
-        if(++search == 1) *--aux = '-';            /* the check digit is always there */
-    } while(ean);
+        if(search == 0) *--aux = '-';            /* the check digit is always there */
+    } while(ean && search++<13);
    while(search++<13) *--aux = '0';            /* fill the remaining EAN13 with '0' */

    /* The string should be in this form: ???DDDDDDDDDDDD-D" */
@@ -409,7 +496,7 @@
        TABLE_index = NULL;
    }

-    /* verify it's a logically valid EAN13/ISN */
+    /* verify it's a logically valid EAN13/UPC/ISxN */
    digval = search;
    search = hyphenate(result+digval, result+digval+2, TABLE, TABLE_index);

@@ -452,8 +539,8 @@
        snprintf(eanbuf, sizeof(eanbuf), EAN13_FORMAT, ean);
        ereport(ERROR,
                (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                 errmsg("value \"%s\" is out of range for ISN type",
-                        eanbuf)));
+                 errmsg("value \"%s\" is out of range for %s type",
+                        eanbuf, isn_names[type])));
    }
    return false;
}
@@ -483,7 +570,7 @@
    /* recognize and validate the number: */
    while(*aux2 && length <= 13) {
        last = (*(aux2+1) == '!' || *(aux2+1) == '\0'); /* is the last character
*/
-        digit = isdigit(*aux2); /* is current character a digit? */
+        digit = (isdigit(*aux2)!=0); /* is current character a digit? */
        if(*aux2=='?' && last) /* automagically calculate check digit if it's '?'
*/
            magic = digit = true;
        if(length == 0 &&  (*aux2=='M' || *aux2=='m')) {
@@ -583,19 +670,25 @@
            break;
    }

-    if(!valid && !magic) goto eanbadcheck;
-
+  /* fix the check digit: */
    for(aux1 = buf; *aux1 && *aux1 <= ' '; aux1++);
    aux1[12] = checkdig(aux1, 13) + '0';
    aux1[13] = '\0';

+    if(!valid && !magic) goto eanbadcheck;
+
    *result = str2ean(aux1);
    *result |= valid?0:1;
-
    return true;

eanbadcheck:
-    if(!g_weak) {
+    if(g_weak) { /* weak input mode is activated: */
+      /* set the "invalid-check-digit-on-input" flag */
+        *result = str2ean(aux1);
+        *result |= 1;
+    return true;
+    }
+
        if(!errorOK) {
            if(rcheck == (unsigned)-1) {
                ereport(ERROR,
@@ -610,30 +703,6 @@
            }
        }
        return false;
-    }
-
-    if(accept != EAN13 && accept != ANY && type != accept) goto eanwrongtype;
-
-    /* fix the check digit: */
-    for(aux1 = buf; *aux1 && *aux1 <= ' '; aux1++);
-    aux1[12] = checkdig(aux1, 13) + '0';
-    aux1[13] = '\0';
-    *result = str2ean(aux1);
-
-     /* set the "invalid-check-digit-on-input" flag */
-    *result |= 1;
-
-    /* just warn about the error when there was a real check digit error: */
-    if(check != rcheck) {
-        if(rcheck == (unsigned)-1) {
-            elog(WARNING, "invalid %s number: \"%s\"",
-                isn_names[accept], str);
-        } else {
-            elog(WARNING, "invalid check digit for %s number: \"%s\", should be %c",
-                isn_names[accept], str, (rcheck==10)?('X'):(rcheck+'0'));
-        }
-    }
-    return true;

eaninvalid:
    if(!errorOK)
@@ -647,8 +716,8 @@
    if(!errorOK)
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                 errmsg("invalid %s type for number: \"%s\"",
-                        isn_names[accept], str)));
+                 errmsg("cannot cast %s to %s for number: \"%s\"",
+                        isn_names[type], isn_names[accept], str)));
    return false;

eantoobig:
@@ -784,26 +853,97 @@
Datum
ean13_cast_to_text(PG_FUNCTION_ARGS)
{
-    ean13        val = PG_GETARG_EAN13(0);
-    char        buf[MAXEAN13LEN + 1];
+    /* arg is ean13, but easier to leave it as Datum */
+    Datum        arg = PG_GETARG_DATUM(0);
+    char       *s;
+    int            len;
+    text       *result;

-    (void) ean2string(val, false, buf, false);
+    s = DatumGetCString(DirectFunctionCall1(ean13_out, arg));
+    len = strlen(s);
+
+    result = (text *) palloc(VARHDRSZ + len);

-    PG_RETURN_TEXT_P(GET_TEXT(buf));
+    VARATT_SIZEP(result) = len + VARHDRSZ;
+    memcpy(VARDATA(result), s, len);
+
+    pfree(s);
+
+    PG_RETURN_TEXT_P(result);
}

PG_FUNCTION_INFO_V1(isn_cast_to_text);
Datum
isn_cast_to_text(PG_FUNCTION_ARGS)
{
+    /* arg is ean13, but easier to leave it as Datum */
+    Datum        arg = PG_GETARG_DATUM(0);
+    char       *s;
+    int            len;
+    text       *result;
+
+    s = DatumGetCString(DirectFunctionCall1(isn_out, arg));
+    len = strlen(s);
+
+    result = (text *) palloc(VARHDRSZ + len);
+
+    VARATT_SIZEP(result) = len + VARHDRSZ;
+    memcpy(VARDATA(result), s, len);
+
+    pfree(s);
+
+    PG_RETURN_TEXT_P(result);
+}
+
+PG_FUNCTION_INFO_V1(isbn_cast_from_ean13);
+Datum
+isbn_cast_from_ean13(PG_FUNCTION_ARGS)
+{
    ean13        val = PG_GETARG_EAN13(0);
-    char        buf[MAXEAN13LEN + 1];
+    ean13        result;

-    (void) ean2string(val, false, buf, true);
+    (void) ean2isn(val, false, &result, ISBN);
+
+    PG_RETURN_EAN13(result);
+}
+
+PG_FUNCTION_INFO_V1(ismn_cast_from_ean13);
+Datum
+ismn_cast_from_ean13(PG_FUNCTION_ARGS)
+{
+    ean13        val = PG_GETARG_EAN13(0);
+    ean13        result;

-    PG_RETURN_TEXT_P(GET_TEXT(buf));
+    (void) ean2isn(val, false, &result, ISMN);
+
+    PG_RETURN_EAN13(result);
}

+PG_FUNCTION_INFO_V1(issn_cast_from_ean13);
+Datum
+issn_cast_from_ean13(PG_FUNCTION_ARGS)
+{
+    ean13        val = PG_GETARG_EAN13(0);
+    ean13        result;
+
+    (void) ean2isn(val, false, &result, ISSN);
+
+    PG_RETURN_EAN13(result);
+}
+
+PG_FUNCTION_INFO_V1(upc_cast_from_ean13);
+Datum
+upc_cast_from_ean13(PG_FUNCTION_ARGS)
+{
+    ean13        val = PG_GETARG_EAN13(0);
+    ean13        result;
+
+    (void) ean2isn(val, false, &result, UPC);
+
+    PG_RETURN_EAN13(result);
+}
+
+
PG_FUNCTION_INFO_V1(ean13_cast_from_text);
Datum
ean13_cast_from_text(PG_FUNCTION_ARGS)
diff -ruN oldisn/isn.h isn/isn.h
--- oldisn/isn.h    2006-09-08 23:07:52.000000000 -0500
+++ isn/isn.h    2006-09-09 16:35:49.486379700 -0500
@@ -27,8 +27,8 @@

#define EAN13_FORMAT UINT64_FORMAT

-#define PG_GETARG_EAN13(n) PG_GETARG_INT64(n)
-#define PG_RETURN_EAN13(x) PG_RETURN_INT64(x)
+#define PG_GETARG_EAN13(n) PG_GETARG_INT64((int64)n)
+#define PG_RETURN_EAN13(x) PG_RETURN_INT64((int64)x)

extern Datum isn_out(PG_FUNCTION_ARGS);
extern Datum ean13_out(PG_FUNCTION_ARGS);
@@ -45,6 +45,10 @@
extern Datum ismn_cast_from_text(PG_FUNCTION_ARGS);
extern Datum issn_cast_from_text(PG_FUNCTION_ARGS);
extern Datum upc_cast_from_text(PG_FUNCTION_ARGS);
+extern Datum isbn_cast_from_ean13(PG_FUNCTION_ARGS);
+extern Datum ismn_cast_from_ean13(PG_FUNCTION_ARGS);
+extern Datum issn_cast_from_ean13(PG_FUNCTION_ARGS);
+extern Datum upc_cast_from_ean13(PG_FUNCTION_ARGS);

extern Datum is_valid(PG_FUNCTION_ARGS);
extern Datum make_valid(PG_FUNCTION_ARGS);
diff -ruN oldisn/isn.sql.in isn/isn.sql.in
--- oldisn/isn.sql.in    2006-09-08 23:07:52.000000000 -0500
+++ isn/isn.sql.in    2006-09-09 16:03:53.771250000 -0500
@@ -2131,86 +2131,111 @@
-- Type casts:
--
---------------------------------------------------
+CREATE FUNCTION isbn13(ean13)
+RETURNS isbn13
+AS 'MODULE_PATHNAME', 'isbn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION ismn13(ean13)
+RETURNS ismn13
+AS 'MODULE_PATHNAME', 'ismn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION issn13(ean13)
+RETURNS issn13
+AS 'MODULE_PATHNAME', 'issn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION isbn(ean13)
+RETURNS isbn
+AS 'MODULE_PATHNAME', 'isbn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION ismn(ean13)
+RETURNS ismn
+AS 'MODULE_PATHNAME', 'ismn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION issn(ean13)
+RETURNS issn
+AS 'MODULE_PATHNAME', 'issn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION upc(ean13)
+RETURNS upc
+AS 'MODULE_PATHNAME', 'upc_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+
+
CREATE FUNCTION ean13(text)
RETURNS ean13
AS 'MODULE_PATHNAME', 'ean13_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION isbn13(text)
RETURNS isbn13
AS 'MODULE_PATHNAME', 'isbn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION ismn13(text)
RETURNS ismn13
AS 'MODULE_PATHNAME', 'ismn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION issn13(text)
RETURNS issn13
AS 'MODULE_PATHNAME', 'issn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION isbn(text)
RETURNS isbn
AS 'MODULE_PATHNAME', 'isbn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION ismn(text)
RETURNS ismn
AS 'MODULE_PATHNAME', 'ismn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION issn(text)
RETURNS issn
AS 'MODULE_PATHNAME', 'issn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION upc(text)
RETURNS upc
AS 'MODULE_PATHNAME', 'upc_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;

+
CREATE FUNCTION text(ean13)
RETURNS text
AS 'MODULE_PATHNAME', 'ean13_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(isbn13)
RETURNS text
AS 'MODULE_PATHNAME', 'ean13_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(ismn13)
RETURNS text
AS 'MODULE_PATHNAME', 'ean13_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(issn13)
RETURNS text
AS 'MODULE_PATHNAME', 'ean13_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(isbn)
RETURNS text
AS 'MODULE_PATHNAME', 'isn_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(ismn)
RETURNS text
AS 'MODULE_PATHNAME', 'isn_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(issn)
RETURNS text
AS 'MODULE_PATHNAME', 'isn_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(upc)
RETURNS text
AS 'MODULE_PATHNAME', 'isn_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;

+CREATE CAST (ean13 AS isbn13) WITH FUNCTION isbn13(ean13);
+CREATE CAST (ean13 AS isbn) WITH FUNCTION isbn(ean13);
+CREATE CAST (ean13 AS ismn13) WITH FUNCTION ismn13(ean13);
+CREATE CAST (ean13 AS ismn) WITH FUNCTION ismn(ean13);
+CREATE CAST (ean13 AS issn13) WITH FUNCTION issn13(ean13);
+CREATE CAST (ean13 AS issn) WITH FUNCTION issn(ean13);
+CREATE CAST (ean13 AS upc) WITH FUNCTION upc(ean13);
+
CREATE CAST (isbn13 AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (isbn AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (ismn13 AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
@@ -2218,6 +2243,7 @@
CREATE CAST (issn13 AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (issn AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (upc AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
+
CREATE CAST (isbn AS isbn13) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (isbn13 AS isbn) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (ismn AS ismn13) WITHOUT FUNCTION AS ASSIGNMENT;



pgsql-patches by date:

Previous
From: Christopher Browne
Date:
Subject: Re: [HACKERS] Fix linking of OpenLDAP libraries
Next
From: Tom Lane
Date:
Subject: Re: ISBN/ISSN/ISMN/EAN13 module