0032402: Coding Rules - eliminate msvc warning C4668 (symbol is not defined as a...
[occt.git] / src / NCollection / NCollection_UtfString.hxx
CommitLineData
a174a3c5 1// Created on: 2013-01-28
2// Created by: Kirill GAVRILOV
d5f74e42 3// Copyright (c) 2013-2014 OPEN CASCADE SAS
a174a3c5 4//
973c2be1 5// This file is part of Open CASCADE Technology software library.
a174a3c5 6//
d5f74e42 7// This library is free software; you can redistribute it and/or modify it under
8// the terms of the GNU Lesser General Public License version 2.1 as published
973c2be1 9// by the Free Software Foundation, with special exception defined in the file
10// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
11// distribution for complete text of the license and disclaimer of any warranty.
a174a3c5 12//
973c2be1 13// Alternatively, this file may be used under the terms of Open CASCADE
14// commercial license or contractual agreement.
a174a3c5 15
16#ifndef _NCollection_UtfString_H__
17#define _NCollection_UtfString_H__
18
cf0786da 19#include <NCollection_UtfIterator.hxx>
a174a3c5 20
2cb44241 21#include <cstring>
a174a3c5 22#include <cstdlib>
23
24//! This template class represent constant UTF-* string.
25//! String stored in memory continuously, always NULL-terminated
26//! and can be used as standard C-string using ToCString() method.
27//!
28//! Notice that changing the string is not allowed
29//! and any modifications should produce new string.
cf0786da 30//!
31//! In comments to this class, terms "Unicode symbol" is used as
32//! synonym of "Unicode code point".
a174a3c5 33template<typename Type>
34class NCollection_UtfString
35{
36
37public:
38
39 NCollection_UtfIterator<Type> Iterator() const
40 {
41 return NCollection_UtfIterator<Type> (myString);
42 }
43
cf0786da 44 //! @return the size of the buffer in bytes, excluding NULL-termination symbol
a174a3c5 45 Standard_Integer Size() const
46 {
47 return mySize;
48 }
49
50 //! @return the length of the string in Unicode symbols
51 Standard_Integer Length() const
52 {
53 return myLength;
54 }
55
56 //! Retrieve Unicode symbol at specified position.
57 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
58 //! @param theCharIndex the index of the symbol, should be lesser than Length()
59 //! @return the Unicode symbol value
60 Standard_Utf32Char GetChar (const Standard_Integer theCharIndex) const;
61
62 //! Retrieve string buffer at specified position.
63 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
cf0786da 64 //! @param theCharIndex the index of the symbol, should be less than Length()
65 //! (first symbol of the string has index 0)
a174a3c5 66 //! @return the pointer to the symbol
67 const Type* GetCharBuffer (const Standard_Integer theCharIndex) const;
68
69 //! Retrieve Unicode symbol at specified position.
70 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
71 Standard_Utf32Char operator[] (const Standard_Integer theCharIndex) const
72 {
73 return GetChar (theCharIndex);
74 }
75
76 //! Initialize empty string.
77 NCollection_UtfString();
78
79 //! Copy constructor.
80 //! @param theCopy string to copy.
81 NCollection_UtfString (const NCollection_UtfString& theCopy);
82
6286195c 83#ifndef OCCT_NO_RVALUE_REFERENCE
84 //! Move constructor
85 NCollection_UtfString (NCollection_UtfString&& theOther);
86#endif
87
cf0786da 88 //! Copy constructor from UTF-8 string.
89 //! @param theCopyUtf8 UTF-8 string to copy
90 //! @param theLength optional length limit in Unicode symbols (NOT bytes!)
91 //! The string is copied till NULL symbol or, if theLength >0,
92 //! till either NULL or theLength-th symbol (which comes first).
a174a3c5 93 NCollection_UtfString (const char* theCopyUtf8,
94 const Standard_Integer theLength = -1);
95
cf0786da 96 //! Copy constructor from UTF-16 string.
97 //! @param theCopyUtf16 UTF-16 string to copy
a174a3c5 98 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
cf0786da 99 //! The string is copied till NULL symbol or, if theLength >0,
100 //! till either NULL or theLength-th symbol (which comes first).
a174a3c5 101 NCollection_UtfString (const Standard_Utf16Char* theCopyUtf16,
102 const Standard_Integer theLength = -1);
103
cf0786da 104 //! Copy constructor from UTF-32 string.
105 //! @param theCopyUtf32 UTF-32 string to copy
a174a3c5 106 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
cf0786da 107 //! The string is copied till NULL symbol or, if theLength >0,
108 //! till either NULL or theLength-th symbol (which comes first).
a174a3c5 109 NCollection_UtfString (const Standard_Utf32Char* theCopyUtf32,
110 const Standard_Integer theLength = -1);
111
15173be5 112#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) || (defined(_MSC_VER) && _MSC_VER >= 1900)
cf0786da 113 //! Copy constructor from wide UTF string.
114 //! @param theCopyUtfWide wide UTF string to copy
a174a3c5 115 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
cf0786da 116 //! The string is copied till NULL symbol or, if theLength >0,
117 //! till either NULL or theLength-th symbol (which comes first).
fb0b0531 118 //!
119 //! This constructor is undefined if Standard_WideChar is the same type as Standard_Utf16Char.
a174a3c5 120 NCollection_UtfString (const Standard_WideChar* theCopyUtfWide,
121 const Standard_Integer theLength = -1);
fb0b0531 122#endif
a174a3c5 123
cf0786da 124 //! Copy from Unicode string in UTF-8, UTF-16, or UTF-32 encoding,
125 //! determined by size of TypeFrom character type.
126 //! @param theStringUtf Unicode string
a174a3c5 127 //! @param theLength the length limit in Unicode symbols
cf0786da 128 //! The string is copied till NULL symbol or, if theLength >0,
129 //! till either NULL or theLength-th symbol (which comes first).
a174a3c5 130 template <typename TypeFrom>
cf0786da 131 inline void FromUnicode (const TypeFrom* theStringUtf,
132 const Standard_Integer theLength = -1)
133 {
134 NCollection_UtfIterator<TypeFrom> anIterRead (theStringUtf);
135 if (*anIterRead == 0)
136 {
137 // special case
138 Clear();
139 return;
140 }
141 fromUnicodeImpl (theStringUtf, theLength, anIterRead);
142 }
a174a3c5 143
cf0786da 144 //! Copy from multibyte string in current system locale.
145 //! @param theString multibyte string
a174a3c5 146 //! @param theLength the length limit in Unicode symbols
cf0786da 147 //! The string is copied till NULL symbol or, if theLength >0,
148 //! till either NULL or theLength-th symbol (which comes first).
a174a3c5 149 void FromLocale (const char* theString,
150 const Standard_Integer theLength = -1);
151
152 //! Destructor.
153 ~NCollection_UtfString();
154
155 //! Compares this string with another one.
156 bool IsEqual (const NCollection_UtfString& theCompare) const;
157
158 //! Returns the substring.
159 //! @param theStart start index (inclusive) of subString
160 //! @param theEnd end index (exclusive) of subString
161 //! @return the substring
162 NCollection_UtfString SubString (const Standard_Integer theStart,
163 const Standard_Integer theEnd) const;
164
165 //! Returns NULL-terminated Unicode string.
316ea293 166 //! Should not be modified or deleted!
a174a3c5 167 //! @return (const Type* ) pointer to string
168 const Type* ToCString() const
169 {
170 return myString;
171 }
172
173 //! @return copy in UTF-8 format
174 const NCollection_UtfString<Standard_Utf8Char> ToUtf8() const;
175
176 //! @return copy in UTF-16 format
177 const NCollection_UtfString<Standard_Utf16Char> ToUtf16() const;
178
179 //! @return copy in UTF-32 format
180 const NCollection_UtfString<Standard_Utf32Char> ToUtf32() const;
181
182 //! @return copy in wide format (UTF-16 on Windows and UTF-32 on Linux)
183 const NCollection_UtfString<Standard_WideChar> ToUtfWide() const;
184
cf0786da 185 //! Converts the string into string in the current system locale.
a174a3c5 186 //! @param theBuffer output buffer
187 //! @param theSizeBytes buffer size in bytes
188 //! @return true on success
189 bool ToLocale (char* theBuffer,
190 const Standard_Integer theSizeBytes) const;
191
192 //! @return true if string is empty
193 bool IsEmpty() const
194 {
195 return myString[0] == Type(0);
196 }
197
198 //! Zero string.
199 void Clear();
200
201public: //! @name assign operators
202
203 //! Copy from another string.
6286195c 204 const NCollection_UtfString& Assign (const NCollection_UtfString& theOther);
205
206 //! Exchange the data of two strings (without reallocating memory).
207 void Swap (NCollection_UtfString& theOther);
208
209 //! Copy from another string.
210 const NCollection_UtfString& operator= (const NCollection_UtfString& theOther) { return Assign (theOther); }
211
212#ifndef OCCT_NO_RVALUE_REFERENCE
213 //! Move assignment operator.
214 NCollection_UtfString& operator= (NCollection_UtfString&& theOther) { Swap (theOther); return *this; }
215#endif
a174a3c5 216
217 //! Copy from UTF-8 NULL-terminated string.
218 const NCollection_UtfString& operator= (const char* theStringUtf8);
219
220 //! Copy from wchar_t UTF NULL-terminated string.
221 const NCollection_UtfString& operator= (const Standard_WideChar* theStringUtfWide);
222
223 //! Join strings.
224 NCollection_UtfString& operator+= (const NCollection_UtfString& theAppend);
225
226 //! Join two strings.
227 friend NCollection_UtfString operator+ (const NCollection_UtfString& theLeft,
228 const NCollection_UtfString& theRight)
229 {
230 NCollection_UtfString aSumm;
231 strFree (aSumm.myString);
232 aSumm.mySize = theLeft.mySize + theRight.mySize;
233 aSumm.myLength = theLeft.myLength + theRight.myLength;
234 aSumm.myString = strAlloc (aSumm.mySize);
235
236 // copy bytes
237 strCopy ((Standard_Byte* )aSumm.myString, (const Standard_Byte* )theLeft.myString, theLeft.mySize);
238 strCopy ((Standard_Byte* )aSumm.myString + theLeft.mySize, (const Standard_Byte* )theRight.myString, theRight.mySize);
239 return aSumm;
240 }
241
242public: //! @name compare operators
243
244 bool operator== (const NCollection_UtfString& theCompare) const
245 {
246 return IsEqual (theCompare);
247 }
248 bool operator!= (const NCollection_UtfString& theCompare) const;
249
250private: //! @name low-level methods
251
cf0786da 252 //! Implementation of copy routine for string of the same type
253 void fromUnicodeImpl (const Type* theStringUtf, const Standard_Integer theLength, NCollection_UtfIterator<Type>& theIterator)
254 {
255 Type* anOldBuffer = myString; // necessary in case of self-copying
256
257 // advance to the end
258 const Standard_Integer aLengthMax = (theLength > 0) ? theLength : IntegerLast();
259 for(; *theIterator != 0 && theIterator.Index() < aLengthMax; ++theIterator) {}
260
261 mySize = Standard_Integer((Standard_Byte* )theIterator.BufferHere() - (Standard_Byte* )theStringUtf);
262 myLength = theIterator.Index();
263 myString = strAlloc (mySize);
264 strCopy ((Standard_Byte* )myString, (const Standard_Byte* )theStringUtf, mySize);
265
266 strFree (anOldBuffer);
267 }
268
269 //! Implementation of copy routine for string of other types
a174a3c5 270 template<typename TypeFrom>
cf0786da 271 void fromUnicodeImpl (typename opencascade::std::enable_if<! opencascade::std::is_same<Type, TypeFrom>::value, const TypeFrom*>::type theStringUtf,
272 const Standard_Integer theLength, NCollection_UtfIterator<TypeFrom>& theIterator)
273 {
274 Type* anOldBuffer = myString; // necessary in case of self-copying
275
276 mySize = 0;
277 const Standard_Integer aLengthMax = (theLength > 0) ? theLength : IntegerLast();
278 for (; *theIterator != 0 && theIterator.Index() < aLengthMax; ++theIterator)
279 {
280 mySize += theIterator.template AdvanceBytesUtf<Type>();
281 }
282 myLength = theIterator.Index();
283
284 myString = strAlloc (mySize);
285
286 // copy string
287 theIterator.Init (theStringUtf);
288 Type* anIterWrite = myString;
289 for (; *theIterator != 0 && theIterator.Index() < myLength; ++theIterator)
290 {
291 anIterWrite = theIterator.GetUtf (anIterWrite);
292 }
293
294 strFree (anOldBuffer);
295 }
a174a3c5 296
297 //! Allocate NULL-terminated string buffer.
298 static Type* strAlloc (const Standard_Size theSizeBytes)
299 {
300 Type* aPtr = (Type* )Standard::Allocate (theSizeBytes + sizeof(Type));
301 if (aPtr != NULL)
302 {
303 // always NULL-terminate the string
304 aPtr[theSizeBytes / sizeof(Type)] = Type(0);
305 }
306 return aPtr;
307 }
308
309 //! Release string buffer and nullify the pointer.
310 static void strFree (Type*& thePtr)
311 {
547702a1 312 Standard::Free (thePtr);
a174a3c5 313 }
314
315 //! Provides bytes interface to avoid incorrect pointer arithmetics.
316 static void strCopy (Standard_Byte* theStrDst,
317 const Standard_Byte* theStrSrc,
318 const Standard_Integer theSizeBytes)
319 {
2cb44241 320 std::memcpy (theStrDst, theStrSrc, (Standard_Size )theSizeBytes);
a174a3c5 321 }
322
323 //! Compare two Unicode strings per-byte.
324 static bool strAreEqual (const Type* theString1,
325 const Standard_Integer theSizeBytes1,
326 const Type* theString2,
327 const Standard_Integer theSizeBytes2)
328 {
329 return (theSizeBytes1 == theSizeBytes2)
2cb44241 330 && (std::memcmp (theString1, theString2, (Standard_Size )theSizeBytes1) == 0);
a174a3c5 331 }
332
333private: //! @name private fields
334
335 Type* myString; //!< string buffer
336 Standard_Integer mySize; //!< buffer size in bytes, excluding NULL-termination symbol
337 Standard_Integer myLength; //!< length of the string in Unicode symbols (cached value, excluding NULL-termination symbol)
338
339};
340
341typedef NCollection_UtfString<Standard_Utf8Char> NCollection_Utf8String;
342typedef NCollection_UtfString<Standard_Utf16Char> NCollection_Utf16String;
343typedef NCollection_UtfString<Standard_Utf32Char> NCollection_Utf32String;
344typedef NCollection_UtfString<Standard_WideChar> NCollection_UtfWideString;
345
346// template implementation (inline methods)
347#include "NCollection_UtfString.lxx"
348
349#endif // _NCollection_UtfString_H__