Commit fe98d9fa authored by Kurt Zeilenga's avatar Kurt Zeilenga
Browse files

Initial revision

parent 9fec1299
#
# $Id: MUTTUCData.txt,v 1.3 1999/10/29 00:04:35 mleisher Exp $
#
# Copyright 1999 Computing Research Labs, New Mexico State University
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
#
# Implementation specific character properties.
#
#
# Space, other.
#
0009;;Ss;;;;;;;;;;;;
000A;;Ss;;;;;;;;;;;;
000B;;Ss;;;;;;;;;;;;
000C;;Ss;;;;;;;;;;;;
000D;;Ss;;;;;;;;;;;;
#
# Non-breaking.
#
00A0;;Nb;;;;;;;;;;;;
2007;;Nb;;;;;;;;;;;;
2011;;Nb;;;;;;;;;;;;
FEFF;;Nb;;;;;;;;;;;;
#
# Symmetric.
#
0028;;Sy;;;;;;;;;;;;
0029;;Sy;;;;;;;;;;;;
005B;;Sy;;;;;;;;;;;;
005D;;Sy;;;;;;;;;;;;
007B;;Sy;;;;;;;;;;;;
007D;;Sy;;;;;;;;;;;;
00AB;;Sy;;;;;;;;;;;;
00BB;;Sy;;;;;;;;;;;;
0F3A;;Sy;;;;;;;;;;;;
0F3B;;Sy;;;;;;;;;;;;
0F3C;;Sy;;;;;;;;;;;;
0F3D;;Sy;;;;;;;;;;;;
0F3E;;Sy;;;;;;;;;;;;
0F3F;;Sy;;;;;;;;;;;;
2018;;Sy;;;;;;;;;;;;
2019;;Sy;;;;;;;;;;;;
201A;;Sy;;;;;;;;;;;;
201B;;Sy;;;;;;;;;;;;
201C;;Sy;;;;;;;;;;;;
201D;;Sy;;;;;;;;;;;;
201E;;Sy;;;;;;;;;;;;
201F;;Sy;;;;;;;;;;;;
2039;;Sy;;;;;;;;;;;;
203A;;Sy;;;;;;;;;;;;
2045;;Sy;;;;;;;;;;;;
2046;;Sy;;;;;;;;;;;;
207D;;Sy;;;;;;;;;;;;
207E;;Sy;;;;;;;;;;;;
208D;;Sy;;;;;;;;;;;;
208E;;Sy;;;;;;;;;;;;
2329;;Sy;;;;;;;;;;;;
232A;;Sy;;;;;;;;;;;;
3008;;Sy;;;;;;;;;;;;
3009;;Sy;;;;;;;;;;;;
300A;;Sy;;;;;;;;;;;;
300B;;Sy;;;;;;;;;;;;
300C;;Sy;;;;;;;;;;;;
300D;;Sy;;;;;;;;;;;;
300E;;Sy;;;;;;;;;;;;
300F;;Sy;;;;;;;;;;;;
3010;;Sy;;;;;;;;;;;;
3011;;Sy;;;;;;;;;;;;
3014;;Sy;;;;;;;;;;;;
3015;;Sy;;;;;;;;;;;;
3016;;Sy;;;;;;;;;;;;
3017;;Sy;;;;;;;;;;;;
3018;;Sy;;;;;;;;;;;;
3019;;Sy;;;;;;;;;;;;
301A;;Sy;;;;;;;;;;;;
301B;;Sy;;;;;;;;;;;;
301D;;Sy;;;;;;;;;;;;
301E;;Sy;;;;;;;;;;;;
301F;;Sy;;;;;;;;;;;;
FD3E;;Sy;;;;;;;;;;;;
FD3F;;Sy;;;;;;;;;;;;
FE35;;Sy;;;;;;;;;;;;
FE36;;Sy;;;;;;;;;;;;
FE37;;Sy;;;;;;;;;;;;
FE38;;Sy;;;;;;;;;;;;
FE39;;Sy;;;;;;;;;;;;
FE3A;;Sy;;;;;;;;;;;;
FE3B;;Sy;;;;;;;;;;;;
FE3C;;Sy;;;;;;;;;;;;
FE3D;;Sy;;;;;;;;;;;;
FE3E;;Sy;;;;;;;;;;;;
FE3F;;Sy;;;;;;;;;;;;
FE40;;Sy;;;;;;;;;;;;
FE41;;Sy;;;;;;;;;;;;
FE42;;Sy;;;;;;;;;;;;
FE43;;Sy;;;;;;;;;;;;
FE44;;Sy;;;;;;;;;;;;
FE59;;Sy;;;;;;;;;;;;
FE5A;;Sy;;;;;;;;;;;;
FE5B;;Sy;;;;;;;;;;;;
FE5C;;Sy;;;;;;;;;;;;
FE5D;;Sy;;;;;;;;;;;;
FE5E;;Sy;;;;;;;;;;;;
FF08;;Sy;;;;;;;;;;;;
FF09;;Sy;;;;;;;;;;;;
FF3B;;Sy;;;;;;;;;;;;
FF3D;;Sy;;;;;;;;;;;;
FF5B;;Sy;;;;;;;;;;;;
FF5D;;Sy;;;;;;;;;;;;
FF62;;Sy;;;;;;;;;;;;
FF63;;Sy;;;;;;;;;;;;
#
# Hex digit.
#
0030;;Hd;;;;;;;;;;;;
0031;;Hd;;;;;;;;;;;;
0032;;Hd;;;;;;;;;;;;
0033;;Hd;;;;;;;;;;;;
0034;;Hd;;;;;;;;;;;;
0035;;Hd;;;;;;;;;;;;
0036;;Hd;;;;;;;;;;;;
0037;;Hd;;;;;;;;;;;;
0038;;Hd;;;;;;;;;;;;
0039;;Hd;;;;;;;;;;;;
0041;;Hd;;;;;;;;;;;;
0042;;Hd;;;;;;;;;;;;
0043;;Hd;;;;;;;;;;;;
0044;;Hd;;;;;;;;;;;;
0045;;Hd;;;;;;;;;;;;
0046;;Hd;;;;;;;;;;;;
0061;;Hd;;;;;;;;;;;;
0062;;Hd;;;;;;;;;;;;
0063;;Hd;;;;;;;;;;;;
0064;;Hd;;;;;;;;;;;;
0065;;Hd;;;;;;;;;;;;
0066;;Hd;;;;;;;;;;;;
FF10;;Hd;;;;;;;;;;;;
FF11;;Hd;;;;;;;;;;;;
FF12;;Hd;;;;;;;;;;;;
FF13;;Hd;;;;;;;;;;;;
FF14;;Hd;;;;;;;;;;;;
FF15;;Hd;;;;;;;;;;;;
FF16;;Hd;;;;;;;;;;;;
FF17;;Hd;;;;;;;;;;;;
FF18;;Hd;;;;;;;;;;;;
FF19;;Hd;;;;;;;;;;;;
FF21;;Hd;;;;;;;;;;;;
FF22;;Hd;;;;;;;;;;;;
FF23;;Hd;;;;;;;;;;;;
FF24;;Hd;;;;;;;;;;;;
FF25;;Hd;;;;;;;;;;;;
FF26;;Hd;;;;;;;;;;;;
FF41;;Hd;;;;;;;;;;;;
FF42;;Hd;;;;;;;;;;;;
FF43;;Hd;;;;;;;;;;;;
FF44;;Hd;;;;;;;;;;;;
FF45;;Hd;;;;;;;;;;;;
FF46;;Hd;;;;;;;;;;;;
#
# Quote marks.
#
0022;;Qm;;;;;;;;;;;;
0027;;Qm;;;;;;;;;;;;
00AB;;Qm;;;;;;;;;;;;
00BB;;Qm;;;;;;;;;;;;
2018;;Qm;;;;;;;;;;;;
2019;;Qm;;;;;;;;;;;;
201A;;Qm;;;;;;;;;;;;
201B;;Qm;;;;;;;;;;;;
201C;;Qm;;;;;;;;;;;;
201D;;Qm;;;;;;;;;;;;
201E;;Qm;;;;;;;;;;;;
201F;;Qm;;;;;;;;;;;;
2039;;Qm;;;;;;;;;;;;
203A;;Qm;;;;;;;;;;;;
300C;;Qm;;;;;;;;;;;;
300D;;Qm;;;;;;;;;;;;
300E;;Qm;;;;;;;;;;;;
300F;;Qm;;;;;;;;;;;;
301D;;Qm;;;;;;;;;;;;
301E;;Qm;;;;;;;;;;;;
301F;;Qm;;;;;;;;;;;;
FE41;;Qm;;;;;;;;;;;;
FE42;;Qm;;;;;;;;;;;;
FE43;;Qm;;;;;;;;;;;;
FE44;;Qm;;;;;;;;;;;;
FF02;;Qm;;;;;;;;;;;;
FF07;;Qm;;;;;;;;;;;;
FF62;;Qm;;;;;;;;;;;;
FF63;;Qm;;;;;;;;;;;;
#
# Special Devanagari forms
#
E900;DEVANAGARI KSHA LIGATURE;Lo;0;L;0915 094D 0937;;;;N;;;;;
E901;DEVANAGARI GNYA LIGATURE;Lo;0;L;091C 094D 091E;;;;N;;;;;
E902;DEVANAGARI TTA LIGATURE;Lo;0;L;0924 094D 0924;;;;N;;;;;
E903;DEVANAGARI TRA LIGATURE;Lo;0;L;0924 094D 0930;;;;N;;;;;
E904;DEVANAGARI SHCHA LIGATURE;Lo;0;L;0936 094D 091B;;;;N;;;;;
E905;DEVANAGARI SHRA LIGATURE;Lo;0;L;0936 094D 0930;;;;N;;;;;
E906;DEVANAGARI SHVA LIGATURE;Lo;0;L;0936 094D 0935;;;;N;;;;;
E907;DEVANAGARI KRA LIGATURE;Lo;0;L;;;;;N;;;;;
E908;DEVANAGARI JRA LIGATURE;Lo;0;L;;;;;N;;;;;
E909;DEVANAGARI ZRA LIGATURE;Lo;0;L;;;;;N;;;;;
E90A;DEVANAGARI PHRA LIGATURE;Lo;0;L;;;;;N;;;;;
E90B;DEVANAGARI FRA LIGATURE;Lo;0;L;;;;;N;;;;;
E90C;DEVANAGARI PRA LIGATURE;Lo;0;L;;;;;N;;;;;
E90D;DEVANAGARI SRA LIGATURE;Lo;0;L;;;;;N;;;;;
E90E;DEVANAGARI RU LIGATURE;Lo;0;L;;;;;N;;;;;
E90F;DEVANAGARI RUU LIGATURE;Lo;0;L;;;;;N;;;;;
E915;DEVANAGARI HALF LETTER KA;Lo;0;L;;;;;N;;;;;
E916;DEVANAGARI HALF LETTER KHA;Lo;0;L;;;;;N;;;;;
E917;DEVANAGARI HALF LETTER GA;Lo;0;L;;;;;N;;;;;
E918;DEVANAGARI HALF LETTER GHA;Lo;0;L;;;;;N;;;;;
E919;DEVANAGARI HALF LETTER NGA;Lo;0;L;;;;;N;;;;;
E91A;DEVANAGARI HALF LETTER CA;Lo;0;L;;;;;N;;;;;
E91B;DEVANAGARI HALF LETTER CHA;Lo;0;L;;;;;N;;;;;
E91C;DEVANAGARI HALF LETTER JA;Lo;0;L;;;;;N;;;;;
E91D;DEVANAGARI HALF LETTER JHA;Lo;0;L;;;;;N;;;;;
E91E;DEVANAGARI HALF LETTER NYA;Lo;0;L;;;;;N;;;;;
E91F;DEVANAGARI HALF LETTER TTA;Lo;0;L;;;;;N;;;;;
E920;DEVANAGARI HALF LETTER TTHA;Lo;0;L;;;;;N;;;;;
E921;DEVANAGARI HALF LETTER DDA;Lo;0;L;;;;;N;;;;;
E922;DEVANAGARI HALF LETTER DDHA;Lo;0;L;;;;;N;;;;;
E923;DEVANAGARI HALF LETTER NNA;Lo;0;L;;;;;N;;;;;
E924;DEVANAGARI HALF LETTER TA;Lo;0;L;;;;;N;;;;;
E925;DEVANAGARI HALF LETTER THA;Lo;0;L;;;;;N;;;;;
E926;DEVANAGARI HALF LETTER DA;Lo;0;L;;;;;N;;;;;
E927;DEVANAGARI HALF LETTER DHA;Lo;0;L;;;;;N;;;;;
E928;DEVANAGARI HALF LETTER NA;Lo;0;L;;;;;N;;;;;
E929;DEVANAGARI HALF LETTER NNNA;Lo;0;L;0928 093C;;;;N;;;;;
E92A;DEVANAGARI HALF LETTER PA;Lo;0;L;;;;;N;;;;;
E92B;DEVANAGARI HALF LETTER PHA;Lo;0;L;;;;;N;;;;;
E92C;DEVANAGARI HALF LETTER BA;Lo;0;L;;;;;N;;;;;
E92D;DEVANAGARI HALF LETTER BHA;Lo;0;L;;;;;N;;;;;
E92E;DEVANAGARI HALF LETTER MA;Lo;0;L;;;;;N;;;;;
E92F;DEVANAGARI HALF LETTER YA;Lo;0;L;;;;;N;;;;;
E930;DEVANAGARI HALF LETTER RA;Lo;0;L;;;;;N;;;;;
E931;DEVANAGARI HALF LETTER RRA;Lo;0;L;0930 093C;;;;N;;;;;
E932;DEVANAGARI HALF LETTER LA;Lo;0;L;;;;;N;;;;;
E933;DEVANAGARI HALF LETTER LLA;Lo;0;L;;;;;N;;;;;
E934;DEVANAGARI HALF LETTER LLLA;Lo;0;L;0933 093C;;;;N;;;;;
E935;DEVANAGARI HALF LETTER VA;Lo;0;L;;;;;N;;;;;
E936;DEVANAGARI HALF LETTER SHA;Lo;0;L;;;;;N;;;;;
E937;DEVANAGARI HALF LETTER SSA;Lo;0;L;;;;;N;;;;;
E938;DEVANAGARI HALF LETTER SA;Lo;0;L;;;;;N;;;;;
E939;DEVANAGARI HALF LETTER HA;Lo;0;L;;;;;N;;;;;
E940;DEVANAGARI KKA LIGATURE;Lo;0;L;0915 094D 0915;;;;N;;;;;
E941;DEVANAGARI KTA LIGATURE;Lo;0;L;0915 094D 0924;;;;N;;;;;
E942;DEVANAGARI NGKA LIGATURE;Lo;0;L;0919 094D 0915;;;;N;;;;;
E943;DEVANAGARI NGKHA LIGATURE;Lo;0;L;0919 094D 0916;;;;N;;;;;
E944;DEVANAGARI NGGA LIGATURE;Lo;0;L;0919 094D 0917;;;;N;;;;;
E945;DEVANAGARI NGGHA LIGATURE;Lo;0;L;0919 094D 0918;;;;N;;;;;
E946;DEVANAGARI NYJA LIGATURE;Lo;0;L;091E 094D 091C;;;;N;;;;;
E947;DEVANAGARI DGHA LIGATURE;Lo;0;L;0926 094D 0918;;;;N;;;;;
E948;DEVANAGARI DDA LIGATURE;Lo;0;L;0926 094D 0926;;;;N;;;;;
E949;DEVANAGARI DDHA LIGATURE;Lo;0;L;0926 094D 0927;;;;N;;;;;
E94A;DEVANAGARI DBA LIGATURE;Lo;0;L;0926 094D 092C;;;;N;;;;;
E94B;DEVANAGARI DBHA LIGATURE;Lo;0;L;0926 094D 092D;;;;N;;;;;
E94C;DEVANAGARI DMA LIGATURE;Lo;0;L;0926 094D 092E;;;;N;;;;;
E94D;DEVANAGARI DYA LIGATURE;Lo;0;L;0926 094D 092F;;;;N;;;;;
E94E;DEVANAGARI DVA LIGATURE;Lo;0;L;0926 094D 0935;;;;N;;;;;
E94F;DEVANAGARI TT-TTA LIGATURE;Lo;0;L;091F 094D 091F;;;;N;;;;;
E950;DEVANAGARI TT-TTHA LIGATURE;Lo;0;L;091F 094D 0920;;;;N;;;;;
E951;DEVANAGARI TTH-TTHA LIGATURE;Lo;0;L;0920 094D 0920;;;;N;;;;;
E952;DEVANAGARI DD-GA LIGATURE;Lo;0;L;0921 094D 0917;;;;N;;;;;
E953;DEVANAGARI DD-DDA LIGATURE;Lo;0;L;0921 094D 0921;;;;N;;;;;
E954;DEVANAGARI DD-DDHA LIGATURE;Lo;0;L;0921 094D 0922;;;;N;;;;;
E955;DEVANAGARI NNA LIGATURE;Lo;0;L;0928 094D 0928;;;;N;;;;;
E956;DEVANAGARI HMA LIGATURE;Lo;0;L;0939 094D 092E;;;;N;;;;;
E957;DEVANAGARI HYA LIGATURE;Lo;0;L;0939 094D 092F;;;;N;;;;;
E958;DEVANAGARI HLA LIGATURE;Lo;0;L;0939 094D 0932;;;;N;;;;;
E959;DEVANAGARI HVA LIGATURE;Lo;0;L;0939 094D 0935;;;;N;;;;;
E95A;DEVANAGARI STRA LIGATURE;Lo;0;L;0938 094D 0924 094D 0930;;;;N;;;;;
E970;DEVANAGARI HALF KSHA LIGATURE;Lo;0;L;0915 094D 0937;;;;N;;;;;
E971;DEVANAGARI HALF GNYA LIGATURE;Lo;0;L;091C 094D 091E;;;;N;;;;;
E972;DEVANAGARI HALF TTA LIGATURE;Lo;0;L;0924 094D 0924;;;;N;;;;;
E973;DEVANAGARI HALF TRA LIGATURE;Lo;0;L;0924 094D 0930;;;;N;;;;;
E974;DEVANAGARI HALF SHCHA LIGATURE;Lo;0;L;0936 094D 091B;;;;N;;;;;
E975;DEVANAGARI HALF SHRA LIGATURE;Lo;0;L;0936 094D 0930;;;;N;;;;;
E976;DEVANAGARI HALF SHVA LIGATURE;Lo;0;L;0936 094D 0935;;;;N;;;;;
E97B;DEVANAGARI SIGN RRA-REPHA;Mn;36;L;;;;;N;;;;;
E97C;DEVANAGARI HAR LIGATURE;Lo;0;L;0939 0943;;;;N;;;;;
E97D;DEVANAGARI SIGN EYELASH RA;Lo;0;L;;;;;N;;;;;
E97E;DEVANAGARI SIGN REPHA;Mn;36;L;;;;;N;;;;;
E97F;DEVANAGARI SIGN SUBJOINED RA;Mn;36;L;;;;;N;;;;;
#
# $Id: README,v 1.32 1999/11/29 16:41:05 mleisher Exp $
#
MUTT UCData Package 2.4
-----------------------
This is a package that supports ctype-like operations for Unicode UCS-2 text
(and surrogates), case mapping, decomposition lookup, and provides a
bidirectional reordering algorithm. To use it, you will need to get the
latest "UnicodeData-*.txt" (or later) file from the Unicode Web or FTP site.
The character information portion of the package consists of three parts:
1. A program called "ucgendat" which generates five data files from the
UnicodeData-*.txt file. The files are:
A. case.dat - the case mappings.
B. ctype.dat - the character property tables.
C. decomp.dat - the character decompositions.
D. cmbcl.dat - the non-zero combining classes.
E. num.dat - the codes representing numbers.
2. The "ucdata.[ch]" files which implement the functions needed to
check to see if a character matches groups of properties, to map between
upper, lower, and title case, to look up the decomposition of a
character, look up the combining class of a character, and get the number
value of a character.
3. The UCData.java class which provides the same API (with minor changes for
the numbers) and loads the same binary data files as the C code.
A short reference to the functions available is in the "api.txt" file.
Techie Details
==============
The "ucgendat" program parses files from the command line which are all in the
Unicode Character Database (UCDB) format. An additional properties file,
"MUTTUCData.txt", provides some extra properties for some characters.
The program looks for the two character properties fields (2 and 4), the
combining class field (3), the decomposition field (5), the numeric value
field (8), and the case mapping fields (12, 13, and 14). The decompositions
are recursively expanded before being written out.
The decomposition table contains all the canonical decompositions. This means
all decompositions that do not have tags such as "<compat>" or "<font>".
The data is almost all stored as unsigned longs (32-bits assumed) and the
routines that load the data take care of endian swaps when necessary. This
also means that surrogates (>= 0x10000) can be placed in the data files the
"ucgendat" program parses.
The data is written as external files and broken into five parts so it can be
selectively updated at runtime if necessary.
The data files currently generated from the "ucgendat" program total about 56K
in size all together.
The format of the binary data files is documented in the "format.txt" file.
==========================================================================
The "Pretty Good Bidi Algorithm"
--------------------------------
This routine provides an alternative to the Unicode Bidi algorithm. The
difference is that this version of the PGBA does not handle the explicit
directional codes (LRE, RLE, LRO, RLO, PDF). It should now produce the same
results as the Unicode BiDi algorithm for implicit reordering. Included are
functions for doing cursor motion in both logical and visual order.
This implementation is provided to demonstrate an effective alternate method
for implicit reordering. To make this useful for an application, it probably
needs some changes to the memory allocation and deallocation, as well as data
structure additions for rendering.
Mark Leisher <mleisher@crl.nmsu.edu>
19 November 1999
-----------------------------------------------------------------------------
CHANGES
=======
Version 2.4
-----------
1. Improved some bidi algorithm documentation in the code.
2. Fixed a code mixup that produced a non-working version.
Version 2.3
-----------
1. Fixed a misspelling in the ucpgba.h header file.
2. Fixed a bug which caused trailing weak non-digit sequences to be left out of
the reordered string in the bidi algorithm.
3. Fixed a problem with weak sequences containing non-spacing marks in the
bidi algorithm.
4. Fixed a problem with text runs of the opposite direction of the string
surrounding a weak + neutral text run appearing in the wrong order in the
bidi algorithm.
5. Added a default overall direction parameter to the reordering function for
cases of strings with no strong directional characters in the bidi
algorithm.
6. The bidi API documentation was improved.
7. Added a man page for the bidi API.
Version 2.2
-----------
1. Fixed a problem with the bidi algorithm locating directional section
boundaries.
2. Fixed a problem with the bidi algorithm starting the reordering correctly.
3. Fixed a problem with the bidi algorithm determining end boundaries for LTR
segments.
4. Fixed a problem with the bidi algorithm reordering weak (digits and number
separators) segments.
5. Added automatic switching of symmetrically paired characters when
reversing RTL segments.
6. Added a missing symmetric character to the extra character properties in
MUTTUCData.txt.
7. Added support for doing logical and visual cursor traversal.
Version 2.1
-----------
1. Updated the ucgendat program to handle the Unicode 3.0 character database
properties. The AL and BM bidi properties gets marked as strong RTL and
Other Neutral, the NSM, LRE, RLE, PDF, LRO, and RLO controls all get marked
as Other Neutral.
2. Fixed some problems with testing against signed values in the UCData.java
code and some minor cleanup.
3. Added the "Pretty Good Bidi Algorithm."
Version 2.0
-----------
1. Removed the old Java stuff for a new class that loads directly from the
same data files as the C code does.
2. Fixed a problem with choosing the correct field when mapping case.
3. Adjust some search routines to start their search in the correct position.
4. Moved the copyright year to 1999.
Version 1.9
-----------
1. Fixed a problem with an incorrect amount of storage being allocated for the
combining class nodes.
2. Fixed an invalid initialization in the number code.
3. Changed the Java template file formatting a bit.
4. Added tables and function for getting decompositions in the Java class.
Version 1.8
-----------
1. Fixed a problem with adding certain ranges.
2. Added two more macros for testing for identifiers.
3. Tested with the UnicodeData-2.1.5.txt file.
Version 1.7
-----------
1. Fixed a problem with looking up decompositions in "ucgendat."
Version 1.6
-----------
1. Added two new properties introduced with UnicodeData-2.1.4.txt.
2. Changed the "ucgendat.c" program a little to automatically align the
property data on a 4-byte boundary when new properties are added.
3. Changed the "ucgendat.c" programs to only generate canonical
decompositions.
4. Added two new macros ucisinitialpunct() and ucisfinalpunct() to check for
initial and final punctuation characters.
5. Minor additions and changes to the documentation.
Version 1.5
-----------
1. Changed all file open calls to include binary mode with "b" for DOS/WIN
platforms.
2. Wrapped the unistd.h include so it won't be included when compiled under
Win32.
3. Fixed a bad range check for hex digits in ucgendat.c.
4. Fixed a bad endian swap for combining classes.
5. Added code to make a number table and associated lookup functions.
Functions added are ucnumber(), ucdigit(), and ucgetnumber(). The last
function is to maintain compatibility with John Cowan's "uctype" package.
Version 1.4
-----------
1. Fixed a bug with adding a range.
2. Fixed a bug with inserting a range in order.
3. Fixed incorrectly specified ucisdefined() and ucisundefined() macros.
4. Added the missing unload for the combining class data.
5. Fixed a bad macro placement in ucisweak().
Version 1.3
-----------
1. Bug with case mapping calculations fixed.
2. Bug with empty character property entries fixed.
3. Bug with incorrect type in the combining class lookup fixed.
4. Some corrections done to api.txt.
5. Bug in certain character property lookups fixed.
6. Added a character property table that records the defined characters.
7. Replaced ucisunknown() with ucisdefined() and ucisundefined().
Version 1.2
-----------
1. Added code to ucgendat to generate a combining class table.
2. Fixed an endian problem with the byte count of decompositions.
3. Fixed some minor problems in the "format.txt" file.
4. Removed some bogus "Ss" values from MUTTUCData.txt file.
5. Added API function to get combining class.
6. Changed the open mode to "rb" so binary data files will be opened correctly
on DOS/WIN as well as other platforms.
7. Added the "api.txt" file.
Version 1.1
-----------
1. Added ucisxdigit() which I overlooked.
2. Added UC_LT to the ucisalpha() macro which I overlooked.
3. Change uciscntrl() to include UC_CF.
4. Added ucisocntrl() and ucfntcntrl() macros.
5. Added a ucisblank() which I overlooked.
6. Added missing properties to ucissymbol() and ucisnumber().
7. Added ucisgraph() and ucisprint().
8. Changed the "Mr" property to "Sy" to mark this subset of mirroring
characters as symmetric to avoid trampling the Unicode/ISO10646 sense of
mirroring.
9. Added another property called "Ss" which includes control characters
traditionally seen as spaces in the isspace() macro.
10. Added a bunch of macros to be API compatible with John Cowan's package.
ACKNOWLEDGEMENTS
================
Thanks go to John Cowan <cowan@locke.ccil.org> for pointing out lots of
missing things and giving me stuff, particularly a bunch of new macros.
Thanks go to Bob Verbrugge <bob_verbrugge@nl.compuware.com> for pointing out
various bugs.
Thanks go to Christophe Pierret <cpierret@businessobjects.com> for pointing
out that file modes need to have "b" for DOS/WIN machines, pointing out
unistd.h is not a Win 32 header, and pointing out a problem with ucisalnum().
Thanks go to Kent Johnson <kent@pondview.mv.com> for finding a bug that caused
incomplete decompositions to be generated by the "ucgendat" program.
Thanks go to Valeriy E. Ushakov <uwe@ptc.spbu.ru> for spotting an allocation
error and an initialization error.
This diff is collapsed.
/*
* $Id: UCDataTest.java,v 1.1 1999/08/23 16:14:08 mleisher Exp $
*
* Copyright 1999 Computing Research Labs, New Mexico State University
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
* OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
import java.io.*;
import java.net.*;
import UCData.*;