1 // Created on: 2013-01-28
2 // Created by: Kirill GAVRILOV
3 // Copyright (c) 2013-2014 OPEN CASCADE SAS
5 // This file is part of Open CASCADE Technology software library.
7 // This library is free software; you can redistribute it and/or modify it under
8 // the terms of the GNU Lesser General Public License version 2.1 as published
9 // by the Free Software Foundation, with special exception defined in the file
10 // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
11 // distribution for complete text of the license and disclaimer of any warranty.
13 // Alternatively, this file may be used under the terms of Open CASCADE
14 // commercial license or contractual agreement.
16 #ifndef _NCollection_UtfString_H__
17 #define _NCollection_UtfString_H__
19 #include <NCollection_UtfIterator.hxx>
24 //! This template class represent constant UTF-* string.
25 //! String stored in memory continuously, always NULL-terminated
26 //! and can be used as standard C-string using ToCString() method.
28 //! Notice that changing the string is not allowed
29 //! and any modifications should produce new string.
31 //! In comments to this class, terms "Unicode symbol" is used as
32 //! synonym of "Unicode code point".
33 template<typename Type>
34 class NCollection_UtfString
39 NCollection_UtfIterator<Type> Iterator() const
41 return NCollection_UtfIterator<Type> (myString);
44 //! @return the size of the buffer in bytes, excluding NULL-termination symbol
45 Standard_Integer Size() const
50 //! @return the length of the string in Unicode symbols
51 Standard_Integer Length() const
56 //! Retrieve Unicode symbol at specified position.
57 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
58 //! @param theCharIndex the index of the symbol, should be lesser than Length()
59 //! @return the Unicode symbol value
60 Standard_Utf32Char GetChar (const Standard_Integer theCharIndex) const;
62 //! Retrieve string buffer at specified position.
63 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
64 //! @param theCharIndex the index of the symbol, should be less than Length()
65 //! (first symbol of the string has index 0)
66 //! @return the pointer to the symbol
67 const Type* GetCharBuffer (const Standard_Integer theCharIndex) const;
69 //! Retrieve Unicode symbol at specified position.
70 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
71 Standard_Utf32Char operator[] (const Standard_Integer theCharIndex) const
73 return GetChar (theCharIndex);
76 //! Initialize empty string.
77 NCollection_UtfString();
80 //! @param theCopy string to copy.
81 NCollection_UtfString (const NCollection_UtfString& theCopy);
83 #ifndef OCCT_NO_RVALUE_REFERENCE
85 NCollection_UtfString (NCollection_UtfString&& theOther);
88 //! Copy constructor from UTF-8 string.
89 //! @param theCopyUtf8 UTF-8 string to copy
90 //! @param theLength optional length limit in Unicode symbols (NOT bytes!)
91 //! The string is copied till NULL symbol or, if theLength >0,
92 //! till either NULL or theLength-th symbol (which comes first).
93 NCollection_UtfString (const char* theCopyUtf8,
94 const Standard_Integer theLength = -1);
96 //! Copy constructor from UTF-16 string.
97 //! @param theCopyUtf16 UTF-16 string to copy
98 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
99 //! The string is copied till NULL symbol or, if theLength >0,
100 //! till either NULL or theLength-th symbol (which comes first).
101 NCollection_UtfString (const Standard_Utf16Char* theCopyUtf16,
102 const Standard_Integer theLength = -1);
104 //! Copy constructor from UTF-32 string.
105 //! @param theCopyUtf32 UTF-32 string to copy
106 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
107 //! The string is copied till NULL symbol or, if theLength >0,
108 //! till either NULL or theLength-th symbol (which comes first).
109 NCollection_UtfString (const Standard_Utf32Char* theCopyUtf32,
110 const Standard_Integer theLength = -1);
112 #if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) || (defined(_MSC_VER) && _MSC_VER >= 1900)
113 //! Copy constructor from wide UTF string.
114 //! @param theCopyUtfWide wide UTF string to copy
115 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
116 //! The string is copied till NULL symbol or, if theLength >0,
117 //! till either NULL or theLength-th symbol (which comes first).
119 //! This constructor is undefined if Standard_WideChar is the same type as Standard_Utf16Char.
120 NCollection_UtfString (const Standard_WideChar* theCopyUtfWide,
121 const Standard_Integer theLength = -1);
124 //! Copy from Unicode string in UTF-8, UTF-16, or UTF-32 encoding,
125 //! determined by size of TypeFrom character type.
126 //! @param theStringUtf Unicode string
127 //! @param theLength the length limit in Unicode symbols
128 //! The string is copied till NULL symbol or, if theLength >0,
129 //! till either NULL or theLength-th symbol (which comes first).
130 template <typename TypeFrom>
131 inline void FromUnicode (const TypeFrom* theStringUtf,
132 const Standard_Integer theLength = -1)
134 NCollection_UtfIterator<TypeFrom> anIterRead (theStringUtf);
135 if (*anIterRead == 0)
141 fromUnicodeImpl (theStringUtf, theLength, anIterRead);
144 //! Copy from multibyte string in current system locale.
145 //! @param theString multibyte string
146 //! @param theLength the length limit in Unicode symbols
147 //! The string is copied till NULL symbol or, if theLength >0,
148 //! till either NULL or theLength-th symbol (which comes first).
149 void FromLocale (const char* theString,
150 const Standard_Integer theLength = -1);
153 ~NCollection_UtfString();
155 //! Compares this string with another one.
156 bool IsEqual (const NCollection_UtfString& theCompare) const;
158 //! Returns the substring.
159 //! @param theStart start index (inclusive) of subString
160 //! @param theEnd end index (exclusive) of subString
161 //! @return the substring
162 NCollection_UtfString SubString (const Standard_Integer theStart,
163 const Standard_Integer theEnd) const;
165 //! Returns NULL-terminated Unicode string.
166 //! Should not be modified or deleted!
167 //! @return (const Type* ) pointer to string
168 const Type* ToCString() const
173 //! @return copy in UTF-8 format
174 const NCollection_UtfString<Standard_Utf8Char> ToUtf8() const;
176 //! @return copy in UTF-16 format
177 const NCollection_UtfString<Standard_Utf16Char> ToUtf16() const;
179 //! @return copy in UTF-32 format
180 const NCollection_UtfString<Standard_Utf32Char> ToUtf32() const;
182 //! @return copy in wide format (UTF-16 on Windows and UTF-32 on Linux)
183 const NCollection_UtfString<Standard_WideChar> ToUtfWide() const;
185 //! Converts the string into string in the current system locale.
186 //! @param theBuffer output buffer
187 //! @param theSizeBytes buffer size in bytes
188 //! @return true on success
189 bool ToLocale (char* theBuffer,
190 const Standard_Integer theSizeBytes) const;
192 //! @return true if string is empty
195 return myString[0] == Type(0);
201 public: //! @name assign operators
203 //! Copy from another string.
204 const NCollection_UtfString& Assign (const NCollection_UtfString& theOther);
206 //! Exchange the data of two strings (without reallocating memory).
207 void Swap (NCollection_UtfString& theOther);
209 //! Copy from another string.
210 const NCollection_UtfString& operator= (const NCollection_UtfString& theOther) { return Assign (theOther); }
212 #ifndef OCCT_NO_RVALUE_REFERENCE
213 //! Move assignment operator.
214 NCollection_UtfString& operator= (NCollection_UtfString&& theOther) { Swap (theOther); return *this; }
217 //! Copy from UTF-8 NULL-terminated string.
218 const NCollection_UtfString& operator= (const char* theStringUtf8);
220 //! Copy from wchar_t UTF NULL-terminated string.
221 const NCollection_UtfString& operator= (const Standard_WideChar* theStringUtfWide);
224 NCollection_UtfString& operator+= (const NCollection_UtfString& theAppend);
226 //! Join two strings.
227 friend NCollection_UtfString operator+ (const NCollection_UtfString& theLeft,
228 const NCollection_UtfString& theRight)
230 NCollection_UtfString aSumm;
231 strFree (aSumm.myString);
232 aSumm.mySize = theLeft.mySize + theRight.mySize;
233 aSumm.myLength = theLeft.myLength + theRight.myLength;
234 aSumm.myString = strAlloc (aSumm.mySize);
237 strCopy ((Standard_Byte* )aSumm.myString, (const Standard_Byte* )theLeft.myString, theLeft.mySize);
238 strCopy ((Standard_Byte* )aSumm.myString + theLeft.mySize, (const Standard_Byte* )theRight.myString, theRight.mySize);
242 public: //! @name compare operators
244 bool operator== (const NCollection_UtfString& theCompare) const
246 return IsEqual (theCompare);
248 bool operator!= (const NCollection_UtfString& theCompare) const;
250 private: //! @name low-level methods
252 //! Implementation of copy routine for string of the same type
253 void fromUnicodeImpl (const Type* theStringUtf, const Standard_Integer theLength, NCollection_UtfIterator<Type>& theIterator)
255 Type* anOldBuffer = myString; // necessary in case of self-copying
257 // advance to the end
258 const Standard_Integer aLengthMax = (theLength > 0) ? theLength : IntegerLast();
259 for(; *theIterator != 0 && theIterator.Index() < aLengthMax; ++theIterator) {}
261 mySize = Standard_Integer((Standard_Byte* )theIterator.BufferHere() - (Standard_Byte* )theStringUtf);
262 myLength = theIterator.Index();
263 myString = strAlloc (mySize);
264 strCopy ((Standard_Byte* )myString, (const Standard_Byte* )theStringUtf, mySize);
266 strFree (anOldBuffer);
269 //! Implementation of copy routine for string of other types
270 template<typename TypeFrom>
271 void fromUnicodeImpl (typename opencascade::std::enable_if<! opencascade::std::is_same<Type, TypeFrom>::value, const TypeFrom*>::type theStringUtf,
272 const Standard_Integer theLength, NCollection_UtfIterator<TypeFrom>& theIterator)
274 Type* anOldBuffer = myString; // necessary in case of self-copying
277 const Standard_Integer aLengthMax = (theLength > 0) ? theLength : IntegerLast();
278 for (; *theIterator != 0 && theIterator.Index() < aLengthMax; ++theIterator)
280 mySize += theIterator.template AdvanceBytesUtf<Type>();
282 myLength = theIterator.Index();
284 myString = strAlloc (mySize);
287 theIterator.Init (theStringUtf);
288 Type* anIterWrite = myString;
289 for (; *theIterator != 0 && theIterator.Index() < myLength; ++theIterator)
291 anIterWrite = theIterator.GetUtf (anIterWrite);
294 strFree (anOldBuffer);
297 //! Allocate NULL-terminated string buffer.
298 static Type* strAlloc (const Standard_Size theSizeBytes)
300 Type* aPtr = (Type* )Standard::Allocate (theSizeBytes + sizeof(Type));
303 // always NULL-terminate the string
304 aPtr[theSizeBytes / sizeof(Type)] = Type(0);
309 //! Release string buffer and nullify the pointer.
310 static void strFree (Type*& thePtr)
312 Standard::Free (thePtr);
315 //! Provides bytes interface to avoid incorrect pointer arithmetics.
316 static void strCopy (Standard_Byte* theStrDst,
317 const Standard_Byte* theStrSrc,
318 const Standard_Integer theSizeBytes)
320 std::memcpy (theStrDst, theStrSrc, (Standard_Size )theSizeBytes);
323 //! Compare two Unicode strings per-byte.
324 static bool strAreEqual (const Type* theString1,
325 const Standard_Integer theSizeBytes1,
326 const Type* theString2,
327 const Standard_Integer theSizeBytes2)
329 return (theSizeBytes1 == theSizeBytes2)
330 && (std::memcmp (theString1, theString2, (Standard_Size )theSizeBytes1) == 0);
333 private: //! @name private fields
335 Type* myString; //!< string buffer
336 Standard_Integer mySize; //!< buffer size in bytes, excluding NULL-termination symbol
337 Standard_Integer myLength; //!< length of the string in Unicode symbols (cached value, excluding NULL-termination symbol)
341 typedef NCollection_UtfString<Standard_Utf8Char> NCollection_Utf8String;
342 typedef NCollection_UtfString<Standard_Utf16Char> NCollection_Utf16String;
343 typedef NCollection_UtfString<Standard_Utf32Char> NCollection_Utf32String;
344 typedef NCollection_UtfString<Standard_WideChar> NCollection_UtfWideString;
346 // template implementation (inline methods)
347 #include "NCollection_UtfString.lxx"
349 #endif // _NCollection_UtfString_H__