/* @(#)unicode.c 1.4 02/06/10 Copyright 2001 J. Schilling */
#ifndef lint
static char sccsid[] =
"@(#)unicode.c 1.4 02/06/10 Copyright 2001 J. Schilling";
#endif
/*
* Routines to convert from/to UNICODE
*
* This is currently a very simple implementation that only
* handles ISO-8859-1 coding. There should be a better solution
* in the future.
*
* Copyright (c) 2001 J. Schilling
*/
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <mconfig.h>
#include <stdio.h>
#include "star.h"
#include <standard.h>
#include <schily.h>
#include "starsubs.h"
EXPORT int to_utf8 __PR((Uchar *to, Uchar *from));
EXPORT int to_utf8l __PR((Uchar *to, Uchar *from, int len));
EXPORT BOOL from_utf8 __PR((Uchar *to, Uchar *from));
EXPORT BOOL from_utf8l __PR((Uchar *to, Uchar *from, int *len));
EXPORT int
to_utf8(to, from)
register Uchar *to;
register Uchar *from;
{
register Uchar *oto = to;
register Uchar c;
while ((c = *from++) != '\0') {
if (c <= 0x7F) {
*to++ = c;
} else if (c <= 0xBF) {
*to++ = 0xC2;
*to++ = c;
} else { /*c <= 0xFF */
*to++ = 0xC3;
*to++ = c & 0xBF;
}
}
*to = '\0';
return (to - oto);
}
EXPORT int
to_utf8l(to, from, len)
register Uchar *to;
register Uchar *from;
register int len;
{
register Uchar *oto = to;
register Uchar c;
while (--len >= 0) {
c = *from++;
if (c <= 0x7F) {
*to++ = c;
} else if (c <= 0xBF) {
*to++ = 0xC2;
*to++ = c;
} else { /*c <= 0xFF */
*to++ = 0xC3;
*to++ = c & 0xBF;
}
}
*to = '\0';
return (to - oto);
}
EXPORT BOOL
from_utf8(to, from)
register Uchar *to;
register Uchar *from;
{
register Uchar c;
register BOOL ret = TRUE;
while ((c = *from++) != '\0') {
if (c <= 0x7F) {
*to++ = c;
} else if (c == 0xC0) {
*to++ = *from++ & 0x7F;
} else if (c == 0xC1) {
*to++ = (*from++ | 0x40) & 0x7F;
} else if (c == 0xC2) {
*to++ = *from++;
} else if (c == 0xC3) {
*to++ = *from++ | 0x40;
} else {
ret = FALSE; /* unknown/illegal UTF-8 char*/
*to++ = '_'; /* use default character */
if (c < 0xE0) {
from++; /* 2 bytes in total */
} else if (c < 0xF0) {
from += 2; /* 3 bytes in total */
} else if (c < 0xF8) {
from += 3; /* 4 bytes in total */
} else if (c < 0xFC) {
from += 4; /* 5 bytes in total */
} else if (c < 0xFE) {
from += 5; /* 6 bytes in total */
} else {
while ((c = *from) != '\0') {
/*
* Test for 7 bit ASCII + non prefix
*/
if (c <= 0xBF)
break;
from++;
}
}
}
}
*to = '\0';
return (ret);
}
EXPORT BOOL
from_utf8l(to, from, lenp)
register Uchar *to;
register Uchar *from;
int *lenp;
{
register Uchar *oto = to;
register Uchar c;
register BOOL ret = TRUE;
register int len = *lenp;
while (--len >= 0) {
c = *from++;
if (c <= 0x7F) {
*to++ = c;
} else if (c == 0xC0) {
*to++ = *from++ & 0x7F;
len--;
} else if (c == 0xC1) {
*to++ = (*from++ | 0x40) & 0x7F;
len--;
} else if (c == 0xC2) {
*to++ = *from++;
len--;
} else if (c == 0xC3) {
*to++ = *from++ | 0x40;
len--;
} else {
ret = FALSE; /* unknown/illegal UTF-8 char*/
*to++ = '_'; /* use default character */
if (c < 0xE0) {
from++; /* 2 bytes in total */
len--;
} else if (c < 0xF0) {
from += 2; /* 3 bytes in total */
len -= 2;
} else if (c < 0xF8) {
from += 3; /* 4 bytes in total */
len -= 3;
} else if (c < 0xFC) {
from += 4; /* 5 bytes in total */
len -= 4;
} else if (c < 0xFE) {
from += 5; /* 6 bytes in total */
len -= 5;
} else {
while (len > 0) {
c = *from;
/*
* Test for 7 bit ASCII + non prefix
*/
if (c <= 0xBF)
break;
from++;
len--;
}
}
}
}
*to = '\0';
*lenp = (to - oto);
return (ret);
}
syntax highlighted by Code2HTML, v. 0.9.1