libxosd-xft/src/utf8.c
2021-06-30 13:01:32 +05:30

204 lines
4.3 KiB
C

/*
Copyright 2021 Dakshinamurthy Karra (dakshinamurthy.karra@jaliansystems.com)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>
#define isodigit(c) ((c) >= '0' && (c) <= '7')
#define hextobin(c) ((c) >= 'a' && (c) <= 'f' ? (c) - 'a' + 10 : (c) >= 'A' && (c) <= 'F' ? (c) - 'A' + 10 \
: (c) - '0')
#define octtobin(c) ((c) - '0')
#ifndef FALLTHROUGH
#if __GNUC__ < 7
#define FALLTHROUGH ((void)0)
#else
#define FALLTHROUGH __attribute__((__fallthrough__))
#endif
#endif
static inline unsigned char to_uchar(char ch) { return ch; }
int u8_uctomb(char *s, unsigned int uc, int n)
{
if (uc < 0x80)
{
if (n > 0)
{
s[0] = uc;
return 1;
}
/* else return -2, below. */
}
else
{
int count;
if (uc < 0x800)
count = 2;
else if (uc < 0x10000)
{
if (uc < 0xd800 || uc >= 0xe000)
count = 3;
else
return -1;
}
else if (uc < 0x110000)
count = 4;
else
return -1;
if (n >= count)
{
switch (count) /* note: code falls through cases! */
{
case 4:
s[3] = 0x80 | (uc & 0x3f);
uc = uc >> 6;
uc |= 0x10000;
FALLTHROUGH;
case 3:
s[2] = 0x80 | (uc & 0x3f);
uc = uc >> 6;
uc |= 0x800;
FALLTHROUGH;
case 2:
s[1] = 0x80 | (uc & 0x3f);
uc = uc >> 6;
uc |= 0xc0;
/*case 1:*/ s[0] = uc;
}
return count;
}
}
return -2;
}
static int
print_unicode_char(unsigned int code, char **b)
{
char inbuf[8];
memset(inbuf, 0, sizeof(inbuf));
int count;
count = u8_uctomb((char *)inbuf, code, sizeof(inbuf));
if (count < 0)
return -3;
memcpy(*b, inbuf, count);
*b += count;
return count;
}
static int
print_esc(const char *escstart, char **b)
{
const char *p = escstart + 1;
int esc_length; /* Length of \nnn escape. */
if (*p == 'u' || *p == 'U')
{
char esc_char = *p;
unsigned int uni_value;
uni_value = 0;
for (esc_length = (esc_char == 'u' ? 4 : 8), ++p;
esc_length > 0;
--esc_length, ++p)
{
if (!isxdigit(to_uchar(*p)))
return -1;
uni_value = uni_value * 16 + hextobin(*p);
}
/* A universal character name shall not specify a character short
identifier in the range 00000000 through 00000020, 0000007F through
0000009F, or 0000D800 through 0000DFFF inclusive. A universal
character name shall not designate a character in the required
character set. */
if ((uni_value <= 0x9f && uni_value != 0x24 && uni_value != 0x40 && uni_value != 0x60) || (uni_value >= 0xd800 && uni_value <= 0xdfff))
return -2;
print_unicode_char(uni_value, b);
}
else
{
char *c = *b;
int i = 1;
*c++ = '\\';
if (*p)
{
i++;
*c++ = *p++;
}
*b += i;
}
return p - escstart - 1;
}
#define UTF8_BUFFER_LENGTH 4096
char *print_utf8(const char *s)
{
char buffer[UTF8_BUFFER_LENGTH];
char *p = &buffer[0];
while (*s)
{
if (*s == '\\')
{
int ret = print_esc(s, &p);
if (ret < 0)
{
fprintf(stderr, "\nError: %d\n", ret);
s += 1;
}
else
{
s += ret + 1;
}
}
else
{
*p++ = *s++;
}
}
*p = 0;
return strdup(buffer);
}
#ifdef MAIN
int main(int argc, char **argv)
{
for (int i = 1; i < argc; i++)
{
char *s = print_utf8(argv[i]);
puts(s);
free(s);
}
exit(0);
}
#endif
/* vim: foldmethod=marker tabstop=2 shiftwidth=2 expandtab
*/