0027808: Some geometric_tolerances are not imported.
[occt.git] / src / NCollection / NCollection_UtfIterator.lxx
CommitLineData
a174a3c5 1// Created on: 2013-01-28
2// Created by: Kirill GAVRILOV
d5f74e42 3// Copyright (c) 2013-2014 OPEN CASCADE SAS
a174a3c5 4//
973c2be1 5// This file is part of Open CASCADE Technology software library.
6//
d5f74e42 7// This library is free software; you can redistribute it and/or modify it under
8// the terms of the GNU Lesser General Public License version 2.1 as published
973c2be1 9// by the Free Software Foundation, with special exception defined in the file
10// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
11// distribution for complete text of the license and disclaimer of any warranty.
12//
13// Alternatively, this file may be used under the terms of Open CASCADE
14// commercial license or contractual agreement.
a174a3c5 15
16// Portions of code are copyrighted by Unicode, Inc.
17//
d94fa32e 18// Copyright (c) 2001-2004 Unicode, Inc.
a174a3c5 19//
20// Disclaimer
21//
22// This source code is provided as is by Unicode, Inc. No claims are
23// made as to fitness for any particular purpose. No warranties of any
24// kind are expressed or implied. The recipient agrees to determine
25// applicability of information provided. If this file has been
26// purchased on magnetic or optical media from Unicode, Inc., the
27// sole remedy for any claim will be exchange of defective media
28// within 90 days of receipt.
29//
30// Limitations on Rights to Redistribute This Code
31//
32// Unicode, Inc. hereby grants the right to freely use the information
33// supplied in this file in the creation of products supporting the
34// Unicode Standard, and to make copies of this file in any form
35// for internal or external distribution as long as this notice
36// remains attached.
37
38//! The first character in a UTF-8 sequence indicates how many bytes
39//! to read (among other things).
40template<typename Type>
41const unsigned char NCollection_UtfIterator<Type>::UTF8_BYTES_MINUS_ONE[256] =
42{
43 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
44 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
45 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
46 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
47 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
48 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
49 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
50 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
51};
52
53//! Magic values subtracted from a buffer value during UTF-8 conversion.
54//! This table contains as many values as there might be trailing bytes
55//! in a UTF-8 sequence.
56template<typename Type>
57const unsigned long NCollection_UtfIterator<Type>::offsetsFromUTF8[6] =
58{
59 0x00000000UL, 0x00003080UL, 0x000E2080UL,
60 0x03C82080UL, 0xFA082080UL, 0x82082080UL
61};
62
63//! The first character in a UTF-8 sequence indicates how many bytes to read.
64template<typename Type>
65const unsigned char NCollection_UtfIterator<Type>::UTF8_FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
66
67// =======================================================================
68// function : readUTF8
69// purpose : Get a UTF-8 character; leave the tracking pointer at the start of the next character.
70// Not protected against invalid UTF-8.
71// =======================================================================
72template<typename Type>
73inline void NCollection_UtfIterator<Type>::readUTF8()
74{
75 // unsigned arithmetic used
76 Standard_Utf8UChar* aPos = (Standard_Utf8UChar* )myPosNext;
77 const unsigned char aBytesToRead = UTF8_BYTES_MINUS_ONE[*aPos];
78 myCharUtf32 = 0;
79 switch (aBytesToRead)
80 {
81 case 5: myCharUtf32 += *aPos++; myCharUtf32 <<= 6; // remember, illegal UTF-8
82 case 4: myCharUtf32 += *aPos++; myCharUtf32 <<= 6; // remember, illegal UTF-8
83 case 3: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
84 case 2: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
85 case 1: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
86 case 0: myCharUtf32 += *aPos++;
87 }
88 myCharUtf32 -= offsetsFromUTF8[aBytesToRead];
89 myPosNext = (Type* )aPos;
90}
91
92// magic numbers
93template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF8_BYTE_MASK = 0xBF;
94template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF8_BYTE_MARK = 0x80;
95template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_START = 0xD800;
96template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_END = 0xDBFF;
97template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_START = 0xDC00;
98template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_END = 0xDFFF;
99template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_SHIFT = 10;
100template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_BASE = 0x0010000UL;
101template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_MASK = 0x3FFUL;
102template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF32_MAX_BMP = 0x0000FFFFUL;
103template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF32_MAX_LEGAL = 0x0010FFFFUL;
104
105// =======================================================================
106// function : readUTF16
107// purpose :
108// =======================================================================
109template<typename Type> inline
110void NCollection_UtfIterator<Type>::readUTF16()
111{
112 Standard_Utf32Char aChar = *myPosNext++;
113 // if we have the first half of the surrogate pair
114 if (aChar >= UTF16_SURROGATE_HIGH_START
115 && aChar <= UTF16_SURROGATE_HIGH_END)
116 {
656ec77a 117 const Standard_Utf32Char aChar2 = *myPosNext;
a174a3c5 118 // complete the surrogate pair
119 if (aChar2 >= UTF16_SURROGATE_LOW_START
120 && aChar2 <= UTF16_SURROGATE_LOW_END)
121 {
122 aChar = ((aChar - UTF16_SURROGATE_HIGH_START) << UTF16_SURROGATE_HIGH_SHIFT)
123 + (aChar2 - UTF16_SURROGATE_LOW_START) + UTF16_SURROGATE_LOW_BASE;
124 ++myPosNext;
125 }
126 }
127 myCharUtf32 = aChar;
128}
129
130// =======================================================================
131// function : AdvanceBytesUtf8
132// purpose :
133// =======================================================================
134template<typename Type> inline
135Standard_Integer NCollection_UtfIterator<Type>::AdvanceBytesUtf8() const
136{
137 if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
138 && myCharUtf32 <= UTF16_SURROGATE_LOW_END)
139 {
140 // UTF-16 surrogate values are illegal in UTF-32
141 return 0;
142 }
143 else if (myCharUtf32 < Standard_Utf32Char(0x80))
144 {
145 return 1;
146 }
147 else if (myCharUtf32 < Standard_Utf32Char(0x800))
148 {
149 return 2;
150 }
151 else if (myCharUtf32 < Standard_Utf32Char(0x10000))
152 {
153 return 3;
154 }
155 else if (myCharUtf32 <= UTF32_MAX_LEGAL)
156 {
157 return 4;
158 }
159 else
160 {
161 // illegal
162 return 0;
163 }
164}
165
166// =======================================================================
167// function : GetUtf8
168// purpose :
169// =======================================================================
170template<typename Type> inline
171Standard_Utf8Char* NCollection_UtfIterator<Type>::GetUtf8 (Standard_Utf8Char* theBuffer) const
172{
173 // unsigned arithmetic used
174 return (Standard_Utf8Char* )GetUtf8 ((Standard_Utf8UChar* )theBuffer);
175}
176
177// =======================================================================
178// function : GetUtf8
179// purpose :
180// =======================================================================
181template<typename Type> inline
182Standard_Utf8UChar* NCollection_UtfIterator<Type>::GetUtf8 (Standard_Utf8UChar* theBuffer) const
183{
184 Standard_Utf32Char aChar = myCharUtf32;
185 if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
186 && myCharUtf32 <= UTF16_SURROGATE_LOW_END)
187 {
188 // UTF-16 surrogate values are illegal in UTF-32
189 return theBuffer;
190 }
191 else if (myCharUtf32 < Standard_Utf32Char(0x80))
192 {
193 *theBuffer++ = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[1]);
194 return theBuffer;
195 }
196 else if (myCharUtf32 < Standard_Utf32Char(0x800))
197 {
198 *++theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
199 *--theBuffer = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[2]);
200 return theBuffer + 2;
201 }
202 else if (myCharUtf32 < Standard_Utf32Char(0x10000))
203 {
204 theBuffer += 3;
205 *--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
206 *--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
207 *--theBuffer = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[3]);
208 return theBuffer + 3;
209 }
210 else if (myCharUtf32 <= UTF32_MAX_LEGAL)
211 {
212 theBuffer += 4;
213 *--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
214 *--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
215 *--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
216 *--theBuffer = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[4]);
217 return theBuffer + 4;
218 }
219 else
220 {
221 // illegal
222 return theBuffer;
223 }
224}
225
226// =======================================================================
227// function : AdvanceBytesUtf16
228// purpose :
229// =======================================================================
230template<typename Type> inline
231Standard_Integer NCollection_UtfIterator<Type>::AdvanceBytesUtf16() const
232{
233 if (myCharUtf32 <= UTF32_MAX_BMP) // target is a character <= 0xFFFF
234 {
235 // UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values
236 if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
237 && myCharUtf32 <= UTF16_SURROGATE_LOW_END)
238 {
239 return 0;
240 }
241 else
242 {
243 return Standard_Integer(sizeof(Standard_Utf16Char));
244 }
245 }
246 else if (myCharUtf32 > UTF32_MAX_LEGAL)
247 {
248 // illegal
249 return 0;
250 }
251 else
252 {
253 // target is a character in range 0xFFFF - 0x10FFFF
254 // surrogate pair
255 return Standard_Integer(sizeof(Standard_Utf16Char) * 2);
256 }
257}
258
259// =======================================================================
260// function : GetUtf16
261// purpose :
262// =======================================================================
263template<typename Type> inline
264Standard_Utf16Char* NCollection_UtfIterator<Type>::GetUtf16 (Standard_Utf16Char* theBuffer) const
265{
266 if (myCharUtf32 <= UTF32_MAX_BMP) // target is a character <= 0xFFFF
267 {
268 // UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values
269 if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
270 && myCharUtf32 <= UTF16_SURROGATE_LOW_END)
271 {
272 return theBuffer;
273 }
274 else
275 {
276 *theBuffer++ = Standard_Utf16Char(myCharUtf32);
277 return theBuffer;
278 }
279 }
280 else if (myCharUtf32 > UTF32_MAX_LEGAL)
281 {
282 // illegal
283 return theBuffer;
284 }
285 else
286 {
287 // surrogate pair
288 Standard_Utf32Char aChar = myCharUtf32 - UTF16_SURROGATE_LOW_BASE;
289 *theBuffer++ = Standard_Utf16Char((aChar >> UTF16_SURROGATE_HIGH_SHIFT) + UTF16_SURROGATE_HIGH_START);
290 *theBuffer++ = Standard_Utf16Char((aChar & UTF16_SURROGATE_LOW_MASK) + UTF16_SURROGATE_LOW_START);
291 return theBuffer;
292 }
293}
294
295// =======================================================================
296// function : GetUtf32
297// purpose :
298// =======================================================================
299template<typename Type> inline
300Standard_Utf32Char* NCollection_UtfIterator<Type>::GetUtf32 (Standard_Utf32Char* theBuffer) const
301{
302 *theBuffer++ = myCharUtf32;
303 return theBuffer;
304}
305
306// =======================================================================
307// function : AdvanceBytesUtf
308// purpose :
309// =======================================================================
310template<typename Type> template<typename TypeWrite> inline
311Standard_Integer NCollection_UtfIterator<Type>::AdvanceBytesUtf() const
312{
313 switch (sizeof(TypeWrite))
314 {
315 case sizeof(Standard_Utf8Char): return AdvanceBytesUtf8();
316 case sizeof(Standard_Utf16Char): return AdvanceBytesUtf16();
317 case sizeof(Standard_Utf32Char): return AdvanceBytesUtf32();
318 default: return 0; // invalid case
319 }
320}
321
322// =======================================================================
323// function : GetUtf
324// purpose :
325// =======================================================================
326template<typename Type> template<typename TypeWrite> inline
327TypeWrite* NCollection_UtfIterator<Type>::GetUtf (TypeWrite* theBuffer) const
328{
329 switch (sizeof(TypeWrite))
330 {
331 case sizeof(Standard_Utf8Char): return (TypeWrite* )GetUtf8 ((Standard_Utf8UChar* )theBuffer);
332 case sizeof(Standard_Utf16Char): return (TypeWrite* )GetUtf16((Standard_Utf16Char* )theBuffer);
333 case sizeof(Standard_Utf32Char): return (TypeWrite* )GetUtf32((Standard_Utf32Char* )theBuffer);
334 default: return NULL; // invalid case
335 }
336}