Re: [PATCHES] Chinese GB18030 support is implemented! - Mailing list pgsql-announce
From | Bill Huang |
---|---|
Subject | Re: [PATCHES] Chinese GB18030 support is implemented! |
Date | |
Msg-id | 3CFF151D.8030402@ybb.ne.jp Whole thread Raw |
In response to | Chinese GB18030 support is implemented! (Bill Huang <bill_huanghb@ybb.ne.jp>) |
List | pgsql-announce |
-GB18030 support is also available on odbc front-end side. Best Regards, Bill Bill Huang wrote: >Hello, > >As postgresql is widely used in the world,many Chinese users are looking >forward to use such a high performanced database management >system.However since the Chinese new codepage standard GB18030 is not >completely supported,postgresql is limitted to be used in China. > >Now I have managed to implement the GB18030 support upon the latest >version,so the following functions are added after the patches are added. > >-Chinese GB18030 encoding is available on front-end side,while on >backend side,EUC_CN or MIC is used. >-Encoding convertion between MIC and GB18030 is implement. >-GB18030 locale support is available on front-end side. >-GB18030 locale test is added. > >Any help for testing with these patches and sugguestions for GB18030 >support are greatly appreciated. > >Best Regards, >Bill > > >------------------------------------------------------------------------ > >--- postgresql-7.2.1/src/backend/utils/mb/conv.c.org Thu Jun 6 11:52:24 2002 >+++ postgresql-7.2.1/src/backend/utils/mb/conv.c Thu Jun 6 12:20:36 2002 >@@ -502,6 +502,96 @@ > } > > /* >+ * GB18030 ---> MIC >+ * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp> >+ */ >+static void >+gb180302mic(unsigned char *gb18030, unsigned char *p, int len) >+{ >+ int c1; >+ int c2; >+ >+ while (len > 0 && (c1 = *gb18030++)) >+ { >+ if (c1 < 0x80) >+ { /* should be ASCII */ >+ len--; >+ *p++ = c1; >+ } >+ else if(c1 >= 0x81 && c1 <= 0xfe) >+ { >+ c2 = *gb18030++; >+ >+ if(c2 >= 0x30 && c2 <= 0x69){ >+ len -= 4; >+ *p++ = c1; >+ *p++ = c2; >+ *p++ = *gb18030++; >+ *p++ = *gb18030++; >+ *p++ = *gb18030++; >+ } >+ else if ((c2 >=0x40 && c2 <= 0x7e) ||(c2 >=0x80 && c2 <= 0xfe)){ >+ len -= 2; >+ *p++ = c1; >+ *p++ = c2; >+ *p++ = *gb18030++; >+ } >+ else{ /*throw the strange code*/ >+ len--; >+ } >+ } >+ } >+ *p = '\0'; >+} >+ >+/* >+ * MIC ---> GB18030 >+ * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp> >+ */ >+static void >+mic2gb18030(unsigned char *mic, unsigned char *p, int len) >+{ >+ int c1; >+ int c2; >+ >+ while (len > 0 && (c1 = *mic)) >+ { >+ len -= pg_mic_mblen(mic++); >+ >+ if (c1 <= 0x7f) /*ASCII*/ >+ { >+ *p++ = c1; >+ } >+ else if (c1 >= 0x81 && c1 <= 0xfe) >+ { >+ c2 = *mic++; >+ >+ if((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfe)){ >+ *p++ = c1; >+ *p++ = c2; >+ } >+ else if(c2 >= 0x30 && c2 <= 0x39){ >+ *p++ = c1; >+ *p++ = c2; >+ *p++ = *mic++; >+ *p++ = *mic++; >+ } >+ else{ >+ mic--; >+ printBogusChar(&mic, &p); >+ mic--; >+ printBogusChar(&mic, &p); >+ } >+ } >+ else{ >+ mic--; >+ printBogusChar(&mic, &p); >+ } >+ } >+ *p = '\0'; >+} >+ >+/* > * EUC_TW ---> MIC > */ > static void >@@ -1583,6 +1673,26 @@ > } > > /* >+ * UTF-8 ---> GB18030 >+ */ >+static void >+utf_to_gb18030(unsigned char *utf, unsigned char *euc, int len) >+ >+{ >+ utf_to_local(utf, euc, ULmapEUC_CN, >+ sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), len); >+} >+ >+/* >+ * GB18030 ---> UTF-8 >+ */ >+static void >+gb18030_to_utf(unsigned char *euc, unsigned char *utf, int len) >+{ >+ local_to_utf(euc, utf, LUmapEUC_CN, >+ sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len); >+} >+/* > * UTF-8 ---> EUC_KR > */ > static void >@@ -1754,6 +1864,9 @@ > PG_BIG5, big52mic, mic2big5, big5_to_utf, utf_to_big5 > }, > { >+ PG_GB18030, gb180302mic, mic2gb18030, gb18030_to_utf, utf_to_gb18030 >+ }, >+ { > PG_WIN1250, win12502mic, mic2win1250, 0, 0 > }, > }; >@@ -1841,6 +1954,9 @@ > PG_BIG5, big52mic, mic2big5, 0, 0 > }, > { >+ PG_GB18030, gb180302mic, mic2gb18030, 0, 0 >+ }, >+ { > PG_WIN1250, win12502mic, mic2win1250, 0, 0 > }, > }; > > >------------------------------------------------------------------------ > >--- postgresql-7.2.1/src/backend/utils/mb/encnames.c.org Mon Jun 3 19:24:10 2002 >+++ postgresql-7.2.1/src/backend/utils/mb/encnames.c Mon Jun 3 19:25:26 2002 >@@ -173,6 +173,9 @@ > { > "windows1251", PG_WIN1251 > }, /* Windows-1251; Microsoft */ >+ { >+ "gb18030", PG_GB18030 >+ }, /* GB18030; GB18030 */ > > { > NULL, 0 >@@ -268,6 +271,9 @@ > "BIG5", PG_BIG5 > }, > { >+ "GB18030", PG_GB18030 >+ }, >+ { > "WIN1250", PG_WIN1250 > } > }; > > >------------------------------------------------------------------------ > >--- postgresql-7.2.1/src/interfaces/odbc/multibyte.c.org Wed Jun 5 18:28:30 2002 >+++ postgresql-7.2.1/src/interfaces/odbc/multibyte.c Wed Jun 5 19:48:01 2002 >@@ -48,6 +48,28 @@ > mb_st = 0; > } > break; >+ /* Chinese GB18030 support >+ * By Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp> >+ * */ >+ case GB18030: >+ { >+ if (mb_st < 2 && s[i] > 0x81) >+ mb_st = 2; >+ else if (mb_st == 2) >+ if(s[i] >= 0x30 && s[i] <= 0x39) >+ mb_st = 3; >+ else >+ mb_st = 1; >+ else if (mb_st == 3) >+ if(s[i] >= 0x30 && s[i] <= 0x39) >+ mb_st = 1; >+ else >+ mb_st = 3; >+ else >+ mb_st = 0; >+ } >+ break; >+ > default: > mb_st = 0; > } >@@ -87,6 +109,16 @@ > { > multibyte_client_encoding = BIG5; > return ("BIG5"); >+ }/* Chinese GB18030 support. >+ * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp> >+ */ >+ if (strstr(str, "%27GB18030%27") || >+ strstr(str, "%27gb18030%27") || >+ strstr(str, "'GB18030'") || >+ strstr(str, "'gb18030'") ) >+ { >+ multibyte_client_encoding = GB18030; >+ return ("GB18030"); > } > return ("OTHER"); > } >@@ -127,6 +159,25 @@ > else > multibyte_status = 0; > } >+ break; >+ /*Chinese GB18030 support.Added by Bill Huang <bhuang@redhat.com> <bill_huanghb@ybb.ne.jp>*/ >+ case GB18030: >+ { >+ if (multibyte_status < 2 && s > 0x80) >+ multibyte_status = 2; >+ else if (multibyte_status = 2) >+ if (s >= 0x30 && s <= 0x39) >+ multibyte_status = 3; >+ else >+ multibyte_status = 1; >+ else if (multibyte_status = 3) >+ if (s >= 0x30 && s <= 0x39) >+ multibyte_status = 1; >+ else >+ multibyte_status = 3; >+ else >+ multibyte_status = 0; >+ } > break; > default: > multibyte_status = 0; > > >------------------------------------------------------------------------ > >--- postgresql-7.2.1/src/interfaces/odbc/multibyte.h.org Wed Jun 5 19:51:20 2002 >+++ postgresql-7.2.1/src/interfaces/odbc/multibyte.h Wed Jun 5 19:51:35 2002 >@@ -28,6 +28,7 @@ > #define SJIS 32 /* Shift JIS */ > #define BIG5 33 /* Big5 */ > #define WIN1250 34 /* windows-1250 */ >+#define GB18030 35 /* GB18030 */ > > extern int multibyte_client_encoding; /* Multibyte client encoding. */ > extern int multibyte_status; /* Multibyte charcter status. */ > > >------------------------------------------------------------------------ > >--- postgresql-7.2.1/src/include/mb/pg_wchar.h.org Mon May 27 20:07:58 2002 >+++ postgresql-7.2.1/src/include/mb/pg_wchar.h Mon May 27 20:08:59 2002 >@@ -182,6 +182,7 @@ > /* followings are for client encoding only */ > PG_SJIS, /* Shift JIS */ > PG_BIG5, /* Big5 */ >+ PG_GB18030, /* GB18030 */ > PG_WIN1250, /* windows-1250 */ > > _PG_LAST_ENCODING_ /* mark only */ > > >------------------------------------------------------------------------ > >--- postgresql-7.2.1/src/backend/utils/mb/wchar.c.org Mon May 27 20:02:44 2002 >+++ postgresql-7.2.1/src/backend/utils/mb/wchar.c Mon May 27 20:03:12 2002 >@@ -457,6 +457,33 @@ > return (len); > } > >+/* >+ * GB18030 >+ * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp> >+ */ >+static int >+pg_gb18030_mblen(const unsigned char *s) >+{ >+ int len; >+ >+ if (*s <= 0x7f) >+ { /* kanji? */ >+ len = 1; >+ } >+ else >+ { /* should be ASCII */ >+ >+ if((*(s+1) >0x40 && *(s+1) <= 0x7e) >+ || (*(s+1) >= 0x80 && *(s+1) <= 0xfe)) >+ len = 2; >+ else if(*(s+1) >0x30 && *(s+1) <= 0x39) >+ len = 4; >+ else >+ len = 2; >+ } >+ return (len); >+} >+ > pg_wchar_tbl pg_wchar_table[] = { > {pg_ascii2wchar_with_len, pg_ascii_mblen, 1}, /* 0; PG_SQL_ASCII */ > {pg_eucjp2wchar_with_len, pg_eucjp_mblen, 3}, /* 1; PG_EUC_JP */ >@@ -483,6 +510,7 @@ > {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 22; ISO-8859-15 */ > {pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 23; ISO-8859-16 */ > {0, pg_sjis_mblen, 2}, /* 24; PG_SJIS */ >+ {0, pg_gb18030_mblen, 2}, /* 25; PG_GB18030 */ > {0, pg_big5_mblen, 2}, /* 25; PG_BIG5 */ > {pg_latin12wchar_with_len, pg_latin1_mblen, 1} /* 26; PG_WIN1250 */ > }; > > >------------------------------------------------------------------------ > > >---------------------------(end of broadcast)--------------------------- >TIP 2: you can get off all lists at once with the unregister command > (send "unregister YourEmailAddressHere" to majordomo@postgresql.org) > > postgresql-7.2.1-conv-gb18030.patch > > Content-Type: > > text/plain > Content-Encoding: > > base64 > > > ------------------------------------------------------------------------ > postgresql-7.2.1-encnames-gb18030.patch > > Content-Type: > > text/plain > Content-Encoding: > > base64 > > > ------------------------------------------------------------------------ > postgresql-7.2.1-multibyte-gb18030.patch > > Content-Type: > > text/plain > Content-Encoding: > > base64 > > > ------------------------------------------------------------------------ > postgresql-7.2.1-multibyteh-gb18030.patch > > Content-Type: > > text/plain > Content-Encoding: > > base64 > > > ------------------------------------------------------------------------ > postgresql-7.2.1-pg-wchar-gb18030.patch > > Content-Type: > > text/plain > Content-Encoding: > > base64 > > > ------------------------------------------------------------------------ > postgresql-7.2.1-wchar-gb18030.patch > > Content-Type: > > text/plain > Content-Encoding: > > base64 > > > ------------------------------------------------------------------------ > Part 1.8 > > Content-Type: > > text/plain > Content-Encoding: > > 8bit > > -- /---------------------------/ 黄 宏彬 (Bill Huang) E-mail:bill_huanghb@ybb.ne.jp Cell phone:090-9979-4631 /---------------------------/
pgsql-announce by date: