#!/usr/bin/perl -pi
#
# Convert Mozilla Thunderbird Address Book to correct LDIF-format
# for importing to OpenLDAP-Servers
#
# This script does:
# * check if an "sn" attribute is available for the entry and
# if not, creates one with the text "not given"
# (inetOrgPerson has this attribute mentioned as mandatory...)
# * some sanity checks on the dn-attribute (including the value for cn,
# etc.) and creates a value for cn if non given
# * decode BASE64 encoded values to make them available for
# substitutions later on
# * Substitute unicode characters (umlauts, etc.) with the corresponding
# ASCII-values (although disabled, see comment below)
# * remove the mail attribute from the dn-string and instead fills out
# all other relevant information (dc, ou, ...)
# * set up the correct objectclasses for mozillaOrgPerson
# (top -> inetOrgPerson -> mozillaOrgPerson)
# * remove some unsopported attributes (like preferred mail format, etc.)
# * encode previously decoded (and maybe modified) values to BASE64
#
# This script does _NOT_:
# * check if a cn is mentioned more than once
# (I wanted to implement this, but it would lead to confusion and
# frustration, because multiple cn's may contain different data
# and so you have to merge it yourself, over and out)
#
# Invocation of this script:
# ./this_script ldif-file
#
# Basic-Requirements:
# * Perl (who would have guessed, hm?)
# * for BASE64-stuff to work: MIME::Base64 Perl library
#
# by Alexander Griesser [perl@tuxx-home.at]
# 2005-10-04
use MIME::Base64;
# main initialization
if(not defined $sn_exists)
{
$sn_exists = 0;
$cn_exists = 0;
# where to store your contacts?
$basedn=",ou=Contacts,dc=molkereiow,dc=local";
$unknown_index = 1;
}
if($_ =~ /^sn:/)
{
$sn_exists = 1;
}
if($_ =~ /^cn:/)
{
$cn_exists = 1;
}
if($_ =~ /^$/)
{
if($sn_exists == 0)
{
print "sn: not given\n";
}
# if no cn-attribute exists, create one (must be the same
# as below (No Name #), otherwise LDAP will complain with
# LDAP_NAMING_VIOLATION).
#
# Note: we do not have to encode/decode something here, because
# the value is autogenerated and doesn't contain special characters
if($cn_exists == 0)
{
print "cn: " . $cn . "\n";
}
$sn_exists = 0;
$cn_exists = 0;
}
# decode BASE64-encoded lines for our substitution
if($_ =~ /^(\w+):: (.*)/)
{
$_ = sprintf("%s:: %s\n", $1, decode_base64($2));
}
# We need to check the cn attribute of the dn-line, because
# some contacts may not have a "mail=" attribute set and then we have
# to add our context-stuff at the end of the line
if($_ =~ /^(dn:+\s*)/)
{
$prefix = $1;
$cn = $_;
# remove the \n at the end of the line
chop $cn;
# extract the cn out of the dn-line
$cn =~ s/^dn:+\s*cn=([^=]+)(,\s*mail=.*)??$/$1/;
# if no cn is given, $cn should start with "dn:", so we have to check for that
# and provide a unique name for this entry
if($cn =~ /^dn:/)
{
$cn = "NoName " . $unkown_index++;
}
# do some sanity checks on the cn (e.g. may not contain commas, ...)
$cn =~ s/,/ /g;
# build a new dn-line
$_ = $prefix . "cn=" . $cn . $basedn . "\n";
}
if($_ =~ /^(cn:+\s*)/)
{
$_ = $1 . $cn . "\n";
}
# substitute all unicode characters (umlauts and other stuff) with their
# corresponding ascii-values
#
# feel free to complete this list or post another way of how this could be
# done better ;)
#
# When importing with phpldapadmin this is not necessary - does the LDAP server
# itself handle this or is it phpldapadmin??
#
#s/\303\237/\337/g; # ß
#s/\303\244/\344/g; # ä
#s/\303\266/\366/g; # ö
#s/\303\274/\374/g; # ü
#s/\303\204/\304/g; # Ä
#s/\303\226/\326/g; # Ö
#s/\303\234/\334/g; # Ü
#s/\303\241/\341/g; # á
#s/\303\251/\351/g; # é
#s/\303\263/\363/g; # ó
#s/\303\272/\372/g; # ú
# Delete modifytimestamp (we are not allowed to set this attribute when importing)
s/^modifytimestamp.*\n//;
# Delete xmozillausehtmlmail (not supported by the current scheme)
s/^xmozillausehtmlmail.*\n//;
# Rewrite homeurl to be mozillaHomeUrl and workurl to be mozillaWorkUrl
s/^homeurl:/mozillaHomeUrl:/;
s/^workurl:/mozillaWorkUrl:/;
# Ensure that the objectclasses are correct
s/^objectclass: mozillaAbPersonObsolete.*\n//;
s/^objectclass: person.*\n//;
s/^objectclass: organizationalPerson.*\n//;
s/^objectclass: inetOrgPerson.*/objectclass: inetOrgPerson\nobjectclass: mozillaOrgPerson/;
# encode BASE64-decoded lines again
if($_ =~ /^(\w+):: (.*)/)
{
# since base64 encoded strings may be longer than n bytes and get automatically
# wrapped around to the next line, we have to take care of that
#
# if the string is too long, it contains an \n in it and to comply with the LDIF (??)
# syntax, we have to insert a whitespace before the rest of this string (of course except
# for the last \n, therefor we chop it first and printf it out afterwards)
$prefix = $1;
$base64str = encode_base64($2);
chop $base64str;
$base64str =~ s/\n/\n /g;
$_ = sprintf("%s:: %s\n", $prefix, $base64str);
}