a174a3c5 |
1 | // Created on: 2013-01-28 |
2 | // Created by: Kirill GAVRILOV |
d5f74e42 |
3 | // Copyright (c) 2013-2014 OPEN CASCADE SAS |
a174a3c5 |
4 | // |
973c2be1 |
5 | // This file is part of Open CASCADE Technology software library. |
a174a3c5 |
6 | // |
d5f74e42 |
7 | // This library is free software; you can redistribute it and/or modify it under |
8 | // the terms of the GNU Lesser General Public License version 2.1 as published |
973c2be1 |
9 | // by the Free Software Foundation, with special exception defined in the file |
10 | // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT |
11 | // distribution for complete text of the license and disclaimer of any warranty. |
a174a3c5 |
12 | // |
973c2be1 |
13 | // Alternatively, this file may be used under the terms of Open CASCADE |
14 | // commercial license or contractual agreement. |
a174a3c5 |
15 | |
16 | #ifndef _NCollection_UtfString_H__ |
17 | #define _NCollection_UtfString_H__ |
18 | |
cf0786da |
19 | #include <NCollection_UtfIterator.hxx> |
a174a3c5 |
20 | |
2cb44241 |
21 | #include <cstring> |
a174a3c5 |
22 | #include <cstdlib> |
23 | |
24 | //! This template class represent constant UTF-* string. |
25 | //! String stored in memory continuously, always NULL-terminated |
26 | //! and can be used as standard C-string using ToCString() method. |
27 | //! |
28 | //! Notice that changing the string is not allowed |
29 | //! and any modifications should produce new string. |
cf0786da |
30 | //! |
31 | //! In comments to this class, terms "Unicode symbol" is used as |
32 | //! synonym of "Unicode code point". |
a174a3c5 |
33 | template<typename Type> |
34 | class NCollection_UtfString |
35 | { |
36 | |
37 | public: |
38 | |
39 | NCollection_UtfIterator<Type> Iterator() const |
40 | { |
41 | return NCollection_UtfIterator<Type> (myString); |
42 | } |
43 | |
cf0786da |
44 | //! @return the size of the buffer in bytes, excluding NULL-termination symbol |
a174a3c5 |
45 | Standard_Integer Size() const |
46 | { |
47 | return mySize; |
48 | } |
49 | |
50 | //! @return the length of the string in Unicode symbols |
51 | Standard_Integer Length() const |
52 | { |
53 | return myLength; |
54 | } |
55 | |
56 | //! Retrieve Unicode symbol at specified position. |
57 | //! Warning! This is a slow access. Iterator should be used for consecutive parsing. |
58 | //! @param theCharIndex the index of the symbol, should be lesser than Length() |
59 | //! @return the Unicode symbol value |
60 | Standard_Utf32Char GetChar (const Standard_Integer theCharIndex) const; |
61 | |
62 | //! Retrieve string buffer at specified position. |
63 | //! Warning! This is a slow access. Iterator should be used for consecutive parsing. |
cf0786da |
64 | //! @param theCharIndex the index of the symbol, should be less than Length() |
65 | //! (first symbol of the string has index 0) |
a174a3c5 |
66 | //! @return the pointer to the symbol |
67 | const Type* GetCharBuffer (const Standard_Integer theCharIndex) const; |
68 | |
69 | //! Retrieve Unicode symbol at specified position. |
70 | //! Warning! This is a slow access. Iterator should be used for consecutive parsing. |
71 | Standard_Utf32Char operator[] (const Standard_Integer theCharIndex) const |
72 | { |
73 | return GetChar (theCharIndex); |
74 | } |
75 | |
76 | //! Initialize empty string. |
77 | NCollection_UtfString(); |
78 | |
79 | //! Copy constructor. |
80 | //! @param theCopy string to copy. |
81 | NCollection_UtfString (const NCollection_UtfString& theCopy); |
82 | |
6286195c |
83 | #ifndef OCCT_NO_RVALUE_REFERENCE |
84 | //! Move constructor |
85 | NCollection_UtfString (NCollection_UtfString&& theOther); |
86 | #endif |
87 | |
cf0786da |
88 | //! Copy constructor from UTF-8 string. |
89 | //! @param theCopyUtf8 UTF-8 string to copy |
90 | //! @param theLength optional length limit in Unicode symbols (NOT bytes!) |
91 | //! The string is copied till NULL symbol or, if theLength >0, |
92 | //! till either NULL or theLength-th symbol (which comes first). |
a174a3c5 |
93 | NCollection_UtfString (const char* theCopyUtf8, |
94 | const Standard_Integer theLength = -1); |
95 | |
cf0786da |
96 | //! Copy constructor from UTF-16 string. |
97 | //! @param theCopyUtf16 UTF-16 string to copy |
a174a3c5 |
98 | //! @param theLength the length limit in Unicode symbols (NOT bytes!) |
cf0786da |
99 | //! The string is copied till NULL symbol or, if theLength >0, |
100 | //! till either NULL or theLength-th symbol (which comes first). |
a174a3c5 |
101 | NCollection_UtfString (const Standard_Utf16Char* theCopyUtf16, |
102 | const Standard_Integer theLength = -1); |
103 | |
cf0786da |
104 | //! Copy constructor from UTF-32 string. |
105 | //! @param theCopyUtf32 UTF-32 string to copy |
a174a3c5 |
106 | //! @param theLength the length limit in Unicode symbols (NOT bytes!) |
cf0786da |
107 | //! The string is copied till NULL symbol or, if theLength >0, |
108 | //! till either NULL or theLength-th symbol (which comes first). |
a174a3c5 |
109 | NCollection_UtfString (const Standard_Utf32Char* theCopyUtf32, |
110 | const Standard_Integer theLength = -1); |
111 | |
15173be5 |
112 | #if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) || (defined(_MSC_VER) && _MSC_VER >= 1900) |
cf0786da |
113 | //! Copy constructor from wide UTF string. |
114 | //! @param theCopyUtfWide wide UTF string to copy |
a174a3c5 |
115 | //! @param theLength the length limit in Unicode symbols (NOT bytes!) |
cf0786da |
116 | //! The string is copied till NULL symbol or, if theLength >0, |
117 | //! till either NULL or theLength-th symbol (which comes first). |
fb0b0531 |
118 | //! |
119 | //! This constructor is undefined if Standard_WideChar is the same type as Standard_Utf16Char. |
a174a3c5 |
120 | NCollection_UtfString (const Standard_WideChar* theCopyUtfWide, |
121 | const Standard_Integer theLength = -1); |
fb0b0531 |
122 | #endif |
a174a3c5 |
123 | |
cf0786da |
124 | //! Copy from Unicode string in UTF-8, UTF-16, or UTF-32 encoding, |
125 | //! determined by size of TypeFrom character type. |
126 | //! @param theStringUtf Unicode string |
a174a3c5 |
127 | //! @param theLength the length limit in Unicode symbols |
cf0786da |
128 | //! The string is copied till NULL symbol or, if theLength >0, |
129 | //! till either NULL or theLength-th symbol (which comes first). |
a174a3c5 |
130 | template <typename TypeFrom> |
cf0786da |
131 | inline void FromUnicode (const TypeFrom* theStringUtf, |
132 | const Standard_Integer theLength = -1) |
133 | { |
134 | NCollection_UtfIterator<TypeFrom> anIterRead (theStringUtf); |
135 | if (*anIterRead == 0) |
136 | { |
137 | // special case |
138 | Clear(); |
139 | return; |
140 | } |
141 | fromUnicodeImpl (theStringUtf, theLength, anIterRead); |
142 | } |
a174a3c5 |
143 | |
cf0786da |
144 | //! Copy from multibyte string in current system locale. |
145 | //! @param theString multibyte string |
a174a3c5 |
146 | //! @param theLength the length limit in Unicode symbols |
cf0786da |
147 | //! The string is copied till NULL symbol or, if theLength >0, |
148 | //! till either NULL or theLength-th symbol (which comes first). |
a174a3c5 |
149 | void FromLocale (const char* theString, |
150 | const Standard_Integer theLength = -1); |
151 | |
152 | //! Destructor. |
153 | ~NCollection_UtfString(); |
154 | |
155 | //! Compares this string with another one. |
156 | bool IsEqual (const NCollection_UtfString& theCompare) const; |
157 | |
158 | //! Returns the substring. |
159 | //! @param theStart start index (inclusive) of subString |
160 | //! @param theEnd end index (exclusive) of subString |
161 | //! @return the substring |
162 | NCollection_UtfString SubString (const Standard_Integer theStart, |
163 | const Standard_Integer theEnd) const; |
164 | |
165 | //! Returns NULL-terminated Unicode string. |
316ea293 |
166 | //! Should not be modified or deleted! |
a174a3c5 |
167 | //! @return (const Type* ) pointer to string |
168 | const Type* ToCString() const |
169 | { |
170 | return myString; |
171 | } |
172 | |
173 | //! @return copy in UTF-8 format |
174 | const NCollection_UtfString<Standard_Utf8Char> ToUtf8() const; |
175 | |
176 | //! @return copy in UTF-16 format |
177 | const NCollection_UtfString<Standard_Utf16Char> ToUtf16() const; |
178 | |
179 | //! @return copy in UTF-32 format |
180 | const NCollection_UtfString<Standard_Utf32Char> ToUtf32() const; |
181 | |
182 | //! @return copy in wide format (UTF-16 on Windows and UTF-32 on Linux) |
183 | const NCollection_UtfString<Standard_WideChar> ToUtfWide() const; |
184 | |
cf0786da |
185 | //! Converts the string into string in the current system locale. |
a174a3c5 |
186 | //! @param theBuffer output buffer |
187 | //! @param theSizeBytes buffer size in bytes |
188 | //! @return true on success |
189 | bool ToLocale (char* theBuffer, |
190 | const Standard_Integer theSizeBytes) const; |
191 | |
192 | //! @return true if string is empty |
193 | bool IsEmpty() const |
194 | { |
195 | return myString[0] == Type(0); |
196 | } |
197 | |
198 | //! Zero string. |
199 | void Clear(); |
200 | |
201 | public: //! @name assign operators |
202 | |
203 | //! Copy from another string. |
6286195c |
204 | const NCollection_UtfString& Assign (const NCollection_UtfString& theOther); |
205 | |
206 | //! Exchange the data of two strings (without reallocating memory). |
207 | void Swap (NCollection_UtfString& theOther); |
208 | |
209 | //! Copy from another string. |
210 | const NCollection_UtfString& operator= (const NCollection_UtfString& theOther) { return Assign (theOther); } |
211 | |
212 | #ifndef OCCT_NO_RVALUE_REFERENCE |
213 | //! Move assignment operator. |
214 | NCollection_UtfString& operator= (NCollection_UtfString&& theOther) { Swap (theOther); return *this; } |
215 | #endif |
a174a3c5 |
216 | |
217 | //! Copy from UTF-8 NULL-terminated string. |
218 | const NCollection_UtfString& operator= (const char* theStringUtf8); |
219 | |
220 | //! Copy from wchar_t UTF NULL-terminated string. |
221 | const NCollection_UtfString& operator= (const Standard_WideChar* theStringUtfWide); |
222 | |
223 | //! Join strings. |
224 | NCollection_UtfString& operator+= (const NCollection_UtfString& theAppend); |
225 | |
226 | //! Join two strings. |
227 | friend NCollection_UtfString operator+ (const NCollection_UtfString& theLeft, |
228 | const NCollection_UtfString& theRight) |
229 | { |
230 | NCollection_UtfString aSumm; |
231 | strFree (aSumm.myString); |
232 | aSumm.mySize = theLeft.mySize + theRight.mySize; |
233 | aSumm.myLength = theLeft.myLength + theRight.myLength; |
234 | aSumm.myString = strAlloc (aSumm.mySize); |
235 | |
236 | // copy bytes |
237 | strCopy ((Standard_Byte* )aSumm.myString, (const Standard_Byte* )theLeft.myString, theLeft.mySize); |
238 | strCopy ((Standard_Byte* )aSumm.myString + theLeft.mySize, (const Standard_Byte* )theRight.myString, theRight.mySize); |
239 | return aSumm; |
240 | } |
241 | |
242 | public: //! @name compare operators |
243 | |
244 | bool operator== (const NCollection_UtfString& theCompare) const |
245 | { |
246 | return IsEqual (theCompare); |
247 | } |
248 | bool operator!= (const NCollection_UtfString& theCompare) const; |
249 | |
250 | private: //! @name low-level methods |
251 | |
cf0786da |
252 | //! Implementation of copy routine for string of the same type |
253 | void fromUnicodeImpl (const Type* theStringUtf, const Standard_Integer theLength, NCollection_UtfIterator<Type>& theIterator) |
254 | { |
255 | Type* anOldBuffer = myString; // necessary in case of self-copying |
256 | |
257 | // advance to the end |
258 | const Standard_Integer aLengthMax = (theLength > 0) ? theLength : IntegerLast(); |
259 | for(; *theIterator != 0 && theIterator.Index() < aLengthMax; ++theIterator) {} |
260 | |
261 | mySize = Standard_Integer((Standard_Byte* )theIterator.BufferHere() - (Standard_Byte* )theStringUtf); |
262 | myLength = theIterator.Index(); |
263 | myString = strAlloc (mySize); |
264 | strCopy ((Standard_Byte* )myString, (const Standard_Byte* )theStringUtf, mySize); |
265 | |
266 | strFree (anOldBuffer); |
267 | } |
268 | |
269 | //! Implementation of copy routine for string of other types |
a174a3c5 |
270 | template<typename TypeFrom> |
cf0786da |
271 | void fromUnicodeImpl (typename opencascade::std::enable_if<! opencascade::std::is_same<Type, TypeFrom>::value, const TypeFrom*>::type theStringUtf, |
272 | const Standard_Integer theLength, NCollection_UtfIterator<TypeFrom>& theIterator) |
273 | { |
274 | Type* anOldBuffer = myString; // necessary in case of self-copying |
275 | |
276 | mySize = 0; |
277 | const Standard_Integer aLengthMax = (theLength > 0) ? theLength : IntegerLast(); |
278 | for (; *theIterator != 0 && theIterator.Index() < aLengthMax; ++theIterator) |
279 | { |
280 | mySize += theIterator.template AdvanceBytesUtf<Type>(); |
281 | } |
282 | myLength = theIterator.Index(); |
283 | |
284 | myString = strAlloc (mySize); |
285 | |
286 | // copy string |
287 | theIterator.Init (theStringUtf); |
288 | Type* anIterWrite = myString; |
289 | for (; *theIterator != 0 && theIterator.Index() < myLength; ++theIterator) |
290 | { |
291 | anIterWrite = theIterator.GetUtf (anIterWrite); |
292 | } |
293 | |
294 | strFree (anOldBuffer); |
295 | } |
a174a3c5 |
296 | |
297 | //! Allocate NULL-terminated string buffer. |
298 | static Type* strAlloc (const Standard_Size theSizeBytes) |
299 | { |
300 | Type* aPtr = (Type* )Standard::Allocate (theSizeBytes + sizeof(Type)); |
301 | if (aPtr != NULL) |
302 | { |
303 | // always NULL-terminate the string |
304 | aPtr[theSizeBytes / sizeof(Type)] = Type(0); |
305 | } |
306 | return aPtr; |
307 | } |
308 | |
309 | //! Release string buffer and nullify the pointer. |
310 | static void strFree (Type*& thePtr) |
311 | { |
547702a1 |
312 | Standard::Free (thePtr); |
a174a3c5 |
313 | } |
314 | |
315 | //! Provides bytes interface to avoid incorrect pointer arithmetics. |
316 | static void strCopy (Standard_Byte* theStrDst, |
317 | const Standard_Byte* theStrSrc, |
318 | const Standard_Integer theSizeBytes) |
319 | { |
2cb44241 |
320 | std::memcpy (theStrDst, theStrSrc, (Standard_Size )theSizeBytes); |
a174a3c5 |
321 | } |
322 | |
323 | //! Compare two Unicode strings per-byte. |
324 | static bool strAreEqual (const Type* theString1, |
325 | const Standard_Integer theSizeBytes1, |
326 | const Type* theString2, |
327 | const Standard_Integer theSizeBytes2) |
328 | { |
329 | return (theSizeBytes1 == theSizeBytes2) |
2cb44241 |
330 | && (std::memcmp (theString1, theString2, (Standard_Size )theSizeBytes1) == 0); |
a174a3c5 |
331 | } |
332 | |
333 | private: //! @name private fields |
334 | |
335 | Type* myString; //!< string buffer |
336 | Standard_Integer mySize; //!< buffer size in bytes, excluding NULL-termination symbol |
337 | Standard_Integer myLength; //!< length of the string in Unicode symbols (cached value, excluding NULL-termination symbol) |
338 | |
339 | }; |
340 | |
341 | typedef NCollection_UtfString<Standard_Utf8Char> NCollection_Utf8String; |
342 | typedef NCollection_UtfString<Standard_Utf16Char> NCollection_Utf16String; |
343 | typedef NCollection_UtfString<Standard_Utf32Char> NCollection_Utf32String; |
344 | typedef NCollection_UtfString<Standard_WideChar> NCollection_UtfWideString; |
345 | |
346 | // template implementation (inline methods) |
347 | #include "NCollection_UtfString.lxx" |
348 | |
349 | #endif // _NCollection_UtfString_H__ |