PostgreSQL and LDAP - Mailing list pgsql-general

From Holger Marzen
Subject PostgreSQL and LDAP
Date
Msg-id Pine.LNX.4.44.0203061904160.10119-100000@bluebell.marzen.de
Whole thread Raw
List pgsql-general
- Fun with OpenLDAP and PostgreSQL -
Holger Marzen <holger@marzen.de>

What does a novice do with a database as his first project? You name it:
an address database. You can insert dates of births and write an alarm
cron job that helps you not to forget your friends' birthdays. Very
important. And you can try to access it with Perl or PHP and build your
own web application. Great! And you can insert email addresses ... STOP!
Did I type email addresses? Yes, indeed. And where do you need them? In
your browser? No. In your email client.

Unfortunately there is no email client that supports PostgreSQL or ODBC.
But some of them can talk LDAP.

So what did I do? I got OpenLDAP and played around with it. It comes
with it's own database. But I don't want another database, I want - of
course - PostgreSQL.

Fortunatly OpenLDAM supports a "shell backend", an own script or
programm that accesses the database. So I wrote a Perl script that
accesses my PostgreSQL database and returns found email addresses.

One problem arouse: German umlaut characters and the "scharfes S". I
found out that Pine accepted iso-latin1 coded output but Netscape
messenger wanted UTF-8. Both transmitted the chracters in a strange way:
a backslash and to characters that form an hexadecimal value. I could
handle and distinguish it with different "base" parameters:

Pine:
  ldap-servers=localhost "/base=encoding=latin1,o=Marzen,c=DE"

Netscape:
  Server Root: encoding=utf8,o=Marzen,c=DE

(Unfortunately at least the old Linux libc5 Netscape Messenger 4.7
 doesn't work properly. Sometimes it works, sometimes I get an "unknown
 error")

So then we need the slapd.conf. The magic line on my server is:

  database shell
  suffix          "o=Marzen,c=DE"
  search          /usr/local/bin/ldap-adressbook-search.pl

All we still have to setup is the Perl script
ldap-adressbook-search.plldap-adressbook-search.pl. It uses 2 small
program with copied parts from other people to do the UTF conversion,
latin1toutf and utftolatin1.

--latin1toutf---------------------------------------------------------------
/* Read Latin-1 (ISO-8859-1) characters from stdin, convert them
   to UTF-8, and write the converted characters to stdout.
   UTF-8 is defined by RFC 2279.
*/
#include <errno.h>
#include <stdio.h>

int
main (int argc, char** argv)
{
    register int c;
    while ((c = getchar()) != EOF) {
        if ((c & 0x80) == 0) {
            putchar (c);
        } else {
            putchar (0xC0 | (0x03 & (c >> 6)));
            putchar (0x80 | (0x3F & c));
        }
    }
    if ( ! feof (stdin)) {
        errno = ferror (stdin);
        perror (argv[0]);
    }
    return 0;
}

--utf2latin1----------------------------------------------------------------
/* Read UTF-8 characters from stdin, convert them to Latin-1
   (ISO-8859-1), and write the converted characters to stdout.
   UTF-8 is defined by RFC 2279.
*/
#include <errno.h>
#include <stdio.h>

static char UTF8len[64]
/* A map from the most-significant 6 bits of the first byte
   to the total number of bytes in a UTF-8 character.
*/
= {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* erroneous */
   2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6};

int
main (int argc, char** argv)
{
    register int c;
    while ((c = getchar()) != EOF) {
        auto int len = UTF8len [(c >> 2) & 0x3F];
        register unsigned long u;
        switch (len) {
          case 6: u = c & 0x01; break;
          case 5: u = c & 0x03; break;
          case 4: u = c & 0x07; break;
          case 3: u = c & 0x0F; break;
          case 2: u = c & 0x1F; break;
          case 1: u = c & 0x7F; break;
          case 0: /* erroneous: c is the middle of a character. */
                  u = c & 0x3F; len = 5; break;
        }
        while (--len && (c = getchar()) != EOF) {
            if ((c & 0xC0) == 0x80) {
                u = (u << 6) | (c & 0x3F);
            } else { /* unexpected start of a new character */
                ungetc (c, stdin);
                break;
            }
        }
        if (u <= 0xFF) {
            putchar (u);
        } else { /* this character can't be represented in Latin-1 */
            putchar ('?'); /* a reasonable alternative is 0x1A (SUB) */
        }
        if (c == EOF) break;
    }
    if ( ! feof (stdin)) {
        errno = ferror (stdin);
        perror (argv[0]);
    }
    return 0;
}

--ldap-adressbook-search.pl-------------------------------------------------

#!/usr/bin/perl

# Netscape Navigator
# - Umlautcodierung UTF als "\xx\yy"
#
# Pine
# - Umlautcodierung latin1 als "\xx"
#

use DBI;

#-- Hauptteil

$utf8 = 0;
$base = "";
$filter = "";
$data_source = "dbi:Pg:dbname=db1 user=ldap password=xxxxxxxx";
$username = "";
$password = "";

while (<>)
 {
 chomp;
 /^base:/    && do {($tag, $base)   = split};
 /^filter:/  && do {($tag, $filter) = split};
 }

if ($base =~ /encoding=utf8/)
  {
  $utf8 = 1;
  }

$filter =~ s/.*=([^)]*).*/\1/;
$filter =~ s/\*//g;
$f = "";
$i = 0;
$l = length($filter);
while ($i < $l)
  {
  $c = substr($filter,$i,1);
  if ($c ne "\\")
    {
    $f = "${f}${c}";
    $i++;
    }
  else
    {
    $r = $l - $i;
    if ($r > 2)
      {
      $i++;
      $p = pack("C",hex(substr($filter,$i,2)));
      $f = "${f}${p}";
      $i += 2;
      }
    else
      {
      $i++;
      }
    }
  }

$filter = $f;
if ($utf8)
  {
  $filter = `echo -n "$filter" | /usr/local/bin/utftolatin1`;
  print "filter: $filter\n";
  }


#-- Verbinden mit der DB
$dbh = DBI->connect($data_source, $username, $password)
     || die "Can't connect: $DBI::errstr";

$sql_statement = "
SELECT nachname, vorname, email, kommentar from adr where
           (
           lower(nachname)   like lower('%${filter}%') or
           lower(vorname)    like lower('%${filter}%') or
           lower(email)      like lower('%${filter}%') or
           lower(kommentar)  like lower('%${filter}%')
       )
       and email         like '%@%'
           order by nachname, vorname asc;
";

$sth = $dbh->prepare($sql_statement)
     || die "Can't prepare: $DBI::errstr";
$sth->execute
     || print $dberrmsg;

while ( @row = $sth->fetchrow_array )
  {
  $nachname = $row[0];
  $vorname = $row[1];
  $kommentar = $row[3];
  $kommentar =~ s/\n/ /g;
  $kommentar =~ s/:/ /g;

  if ($utf8)
    {
    $nachname = `echo -n "$nachname" | /usr/local/bin/latin1toutf`;
    $vorname = `echo -n "$vorname" | /usr/local/bin/latin1toutf`;
    $kommentar = `echo -n "$kommentar" | /usr/local/bin/latin1toutf`;
    }
  $mail = $row[2];
  chomp($mail);
  chomp($vorname);
  chomp($nachname);
  chomp($kommentar);
  chomp($mail);
  chomp($base);
  print("dn: cn=$vorname $nachname, $base\n");
  print("cn: $vorname $nachname\n");
  print("cn: $kommentar\n");
  print("mail: $mail\n");
  print("\n");
  }

$sth->finish;
$dbh->disconnect;

print("RESULT\ncode: 0\n");
exit 0


pgsql-general by date:

Previous
From: thiemo
Date:
Subject: Re: Compiling problems
Next
From: "Thomas T. Thai"
Date:
Subject: Re: help with getting index scan