fiss

Friedel's Initialization and Service Supervision
Log | Files | Refs | LICENSE

rune.c (4071B)


      1 /*
      2  * The authors of this software are Rob Pike and Ken Thompson.
      3  *              Copyright (c) 2002 by Lucent Technologies.
      4  * Permission to use, copy, modify, and distribute this software for any
      5  * purpose without fee is hereby granted, provided that this entire notice
      6  * is included in all copies of any software which is or includes a copy
      7  * or modification of this software and in all copies of the supporting
      8  * documentation for such software.
      9  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
     10  * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
     11  * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
     12  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
     13  */
     14 #include "plan9.h"
     15 #include "utf.h"
     16 
     17 #include <stdarg.h>
     18 #include <string.h>
     19 
     20 enum {
     21 	Bit1 = 7,
     22 	Bitx = 6,
     23 	Bit2 = 5,
     24 	Bit3 = 4,
     25 	Bit4 = 3,
     26 	Bit5 = 2,
     27 
     28 	T1 = ((1 << (Bit1 + 1)) - 1) ^ 0xFF, /* 0000 0000 */
     29 	Tx = ((1 << (Bitx + 1)) - 1) ^ 0xFF, /* 1000 0000 */
     30 	T2 = ((1 << (Bit2 + 1)) - 1) ^ 0xFF, /* 1100 0000 */
     31 	T3 = ((1 << (Bit3 + 1)) - 1) ^ 0xFF, /* 1110 0000 */
     32 	T4 = ((1 << (Bit4 + 1)) - 1) ^ 0xFF, /* 1111 0000 */
     33 	T5 = ((1 << (Bit5 + 1)) - 1) ^ 0xFF, /* 1111 1000 */
     34 
     35 	Rune1 = (1 << (Bit1 + 0 * Bitx)) - 1, /* 0000 0000 0000 0000 0111 1111 */
     36 	Rune2 = (1 << (Bit2 + 1 * Bitx)) - 1, /* 0000 0000 0000 0111 1111 1111 */
     37 	Rune3 = (1 << (Bit3 + 2 * Bitx)) - 1, /* 0000 0000 1111 1111 1111 1111 */
     38 	Rune4 = (1 << (Bit4 + 3 * Bitx)) - 1, /* 0011 1111 1111 1111 1111 1111 */
     39 
     40 	Maskx = (1 << Bitx) - 1, /* 0011 1111 */
     41 	Testx = Maskx ^ 0xFF,    /* 1100 0000 */
     42 
     43 	Bad = Runeerror
     44 };
     45 
     46 int chartorune(Rune* rune, char* str) {
     47 	int  c, c1, c2, c3;
     48 	long l;
     49 
     50 	/*
     51 	 * one character sequence
     52 	 *	00000-0007F => T1
     53 	 */
     54 	c = *(uchar*) str;
     55 	if (c < Tx) {
     56 		*rune = c;
     57 		return 1;
     58 	}
     59 
     60 	/*
     61 	 * two character sequence
     62 	 *	0080-07FF => T2 Tx
     63 	 */
     64 	c1 = *(uchar*) (str + 1) ^ Tx;
     65 	if (c1 & Testx)
     66 		goto bad;
     67 	if (c < T3) {
     68 		if (c < T2)
     69 			goto bad;
     70 		l = ((c << Bitx) | c1) & Rune2;
     71 		if (l <= Rune1)
     72 			goto bad;
     73 		*rune = l;
     74 		return 2;
     75 	}
     76 
     77 	/*
     78 	 * three character sequence
     79 	 *	0800-FFFF => T3 Tx Tx
     80 	 */
     81 	c2 = *(uchar*) (str + 2) ^ Tx;
     82 	if (c2 & Testx)
     83 		goto bad;
     84 	if (c < T4) {
     85 		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
     86 		if (l <= Rune2)
     87 			goto bad;
     88 		*rune = l;
     89 		return 3;
     90 	}
     91 
     92 	/*
     93 	 * four character sequence
     94 	 *	10000-10FFFF => T4 Tx Tx Tx
     95 	 */
     96 	if (UTFmax >= 4) {
     97 		c3 = *(uchar*) (str + 3) ^ Tx;
     98 		if (c3 & Testx)
     99 			goto bad;
    100 		if (c < T5) {
    101 			l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
    102 			if (l <= Rune3)
    103 				goto bad;
    104 			if (l > Runemax)
    105 				goto bad;
    106 			*rune = l;
    107 			return 4;
    108 		}
    109 	}
    110 
    111 	/*
    112 	 * bad decoding
    113 	 */
    114 bad:
    115 	*rune = Bad;
    116 	return 1;
    117 }
    118 
    119 int runetochar(char* str, Rune* rune) {
    120 	long c;
    121 
    122 	/*
    123 	 * one character sequence
    124 	 *	00000-0007F => 00-7F
    125 	 */
    126 	c = *rune;
    127 	if (c <= Rune1) {
    128 		str[0] = c;
    129 		return 1;
    130 	}
    131 
    132 	/*
    133 	 * two character sequence
    134 	 *	00080-007FF => T2 Tx
    135 	 */
    136 	if (c <= Rune2) {
    137 		str[0] = T2 | (c >> 1 * Bitx);
    138 		str[1] = Tx | (c & Maskx);
    139 		return 2;
    140 	}
    141 
    142 	/*
    143 	 * three character sequence
    144 	 *	00800-0FFFF => T3 Tx Tx
    145 	 */
    146 	if (c > Runemax)
    147 		c = Runeerror;
    148 	if (c <= Rune3) {
    149 		str[0] = T3 | (c >> 2 * Bitx);
    150 		str[1] = Tx | ((c >> 1 * Bitx) & Maskx);
    151 		str[2] = Tx | (c & Maskx);
    152 		return 3;
    153 	}
    154 
    155 	/*
    156 	 * four character sequence
    157 	 *	010000-1FFFFF => T4 Tx Tx Tx
    158 	 */
    159 	str[0] = T4 | (c >> 3 * Bitx);
    160 	str[1] = Tx | ((c >> 2 * Bitx) & Maskx);
    161 	str[2] = Tx | ((c >> 1 * Bitx) & Maskx);
    162 	str[3] = Tx | (c & Maskx);
    163 	return 4;
    164 }
    165 
    166 int runelen(long c) {
    167 	Rune rune;
    168 	char str[10];
    169 
    170 	rune = c;
    171 	return runetochar(str, &rune);
    172 }
    173 
    174 int runenlen(Rune* r, int nrune) {
    175 	int nb, c;
    176 
    177 	nb = 0;
    178 	while (nrune--) {
    179 		c = *r++;
    180 		if (c <= Rune1)
    181 			nb++;
    182 		else if (c <= Rune2)
    183 			nb += 2;
    184 		else if (c <= Rune3 || c > Runemax)
    185 			nb += 3;
    186 		else
    187 			nb += 4;
    188 	}
    189 	return nb;
    190 }
    191 
    192 int fullrune(char* str, int n) {
    193 	int c;
    194 
    195 	if (n <= 0)
    196 		return 0;
    197 	c = *(uchar*) str;
    198 	if (c < Tx)
    199 		return 1;
    200 	if (c < T3)
    201 		return n >= 2;
    202 	if (UTFmax == 3 || c < T4)
    203 		return n >= 3;
    204 	return n >= 4;
    205 }