1 // Created on: 2013-01-28
2 // Created by: Kirill GAVRILOV
3 // Copyright (c) 2013 OPEN CASCADE SAS
5 // This file is part of Open CASCADE Technology software library.
7 // This library is free software; you can redistribute it and / or modify it
8 // under the terms of the GNU Lesser General Public version 2.1 as published
9 // by the Free Software Foundation, with special exception defined in the file
10 // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
11 // distribution for complete text of the license and disclaimer of any warranty.
13 // Alternatively, this file may be used under the terms of Open CASCADE
14 // commercial license or contractual agreement.
16 #ifndef _NCollection_UtfString_H__
17 #define _NCollection_UtfString_H__
19 #include "NCollection_UtfIterator.hxx"
21 #include <Standard.hxx>
26 //! This template class represent constant UTF-* string.
27 //! String stored in memory continuously, always NULL-terminated
28 //! and can be used as standard C-string using ToCString() method.
30 //! Notice that changing the string is not allowed
31 //! and any modifications should produce new string.
32 template<typename Type>
33 class NCollection_UtfString
38 NCollection_UtfIterator<Type> Iterator() const
40 return NCollection_UtfIterator<Type> (myString);
43 //! @return the size of the buffer, excluding NULL-termination symbol
44 Standard_Integer Size() const
49 //! @return the length of the string in Unicode symbols
50 Standard_Integer Length() const
55 //! Retrieve Unicode symbol at specified position.
56 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
57 //! @param theCharIndex the index of the symbol, should be lesser than Length()
58 //! @return the Unicode symbol value
59 Standard_Utf32Char GetChar (const Standard_Integer theCharIndex) const;
61 //! Retrieve string buffer at specified position.
62 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
63 //! @param theCharIndex the index of the symbol, should be lesser than Length()
64 //! @return the pointer to the symbol
65 const Type* GetCharBuffer (const Standard_Integer theCharIndex) const;
67 //! Retrieve Unicode symbol at specified position.
68 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
69 Standard_Utf32Char operator[] (const Standard_Integer theCharIndex) const
71 return GetChar (theCharIndex);
74 //! Initialize empty string.
75 NCollection_UtfString();
78 //! @param theCopy string to copy.
79 NCollection_UtfString (const NCollection_UtfString& theCopy);
81 //! Copy constructor from NULL-terminated UTF-8 string.
82 //! @param theCopyUtf8 NULL-terminated UTF-8 string to copy
83 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
84 NCollection_UtfString (const char* theCopyUtf8,
85 const Standard_Integer theLength = -1);
87 //! Copy constructor from NULL-terminated UTF-16 string.
88 //! @param theCopyUtf16 NULL-terminated UTF-16 string to copy
89 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
90 NCollection_UtfString (const Standard_Utf16Char* theCopyUtf16,
91 const Standard_Integer theLength = -1);
93 //! Copy constructor from NULL-terminated UTF-32 string.
94 //! @param theCopyUtf32 NULL-terminated UTF-32 string to copy
95 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
96 NCollection_UtfString (const Standard_Utf32Char* theCopyUtf32,
97 const Standard_Integer theLength = -1);
99 //! Copy constructor from NULL-terminated wide UTF string.
100 //! @param theCopyUtfWide NULL-terminated wide UTF string to copy
101 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
102 NCollection_UtfString (const Standard_WideChar* theCopyUtfWide,
103 const Standard_Integer theLength = -1);
105 //! Copy from NULL-terminated Unicode string.
106 //! @param theStringUtf NULL-terminated Unicode string
107 //! @param theLength the length limit in Unicode symbols
108 template <typename TypeFrom>
109 void FromUnicode (const TypeFrom* theStringUtf,
110 const Standard_Integer theLength = -1);
112 //! Copy from NULL-terminated multibyte string in system locale.
113 //! You should avoid this function unless extreme necessity.
114 //! @param theString NULL-terminated multibyte string
115 //! @param theLength the length limit in Unicode symbols
116 void FromLocale (const char* theString,
117 const Standard_Integer theLength = -1);
120 ~NCollection_UtfString();
122 //! Compares this string with another one.
123 bool IsEqual (const NCollection_UtfString& theCompare) const;
125 //! Returns the substring.
126 //! @param theStart start index (inclusive) of subString
127 //! @param theEnd end index (exclusive) of subString
128 //! @return the substring
129 NCollection_UtfString SubString (const Standard_Integer theStart,
130 const Standard_Integer theEnd) const;
132 //! Returns NULL-terminated Unicode string.
133 //! Should not be modifed or deleted!
134 //! @return (const Type* ) pointer to string
135 const Type* ToCString() const
140 //! @return copy in UTF-8 format
141 const NCollection_UtfString<Standard_Utf8Char> ToUtf8() const;
143 //! @return copy in UTF-16 format
144 const NCollection_UtfString<Standard_Utf16Char> ToUtf16() const;
146 //! @return copy in UTF-32 format
147 const NCollection_UtfString<Standard_Utf32Char> ToUtf32() const;
149 //! @return copy in wide format (UTF-16 on Windows and UTF-32 on Linux)
150 const NCollection_UtfString<Standard_WideChar> ToUtfWide() const;
152 //! Converts the string into multibyte string.
153 //! You should avoid this function unless extreme necessity.
154 //! @param theBuffer output buffer
155 //! @param theSizeBytes buffer size in bytes
156 //! @return true on success
157 bool ToLocale (char* theBuffer,
158 const Standard_Integer theSizeBytes) const;
160 //! @return true if string is empty
163 return myString[0] == Type(0);
169 public: //! @name assign operators
171 //! Copy from another string.
172 const NCollection_UtfString& operator= (const NCollection_UtfString& theOther);
174 //! Copy from UTF-8 NULL-terminated string.
175 const NCollection_UtfString& operator= (const char* theStringUtf8);
177 //! Copy from wchar_t UTF NULL-terminated string.
178 const NCollection_UtfString& operator= (const Standard_WideChar* theStringUtfWide);
181 NCollection_UtfString& operator+= (const NCollection_UtfString& theAppend);
183 //! Join two strings.
184 friend NCollection_UtfString operator+ (const NCollection_UtfString& theLeft,
185 const NCollection_UtfString& theRight)
187 NCollection_UtfString aSumm;
188 strFree (aSumm.myString);
189 aSumm.mySize = theLeft.mySize + theRight.mySize;
190 aSumm.myLength = theLeft.myLength + theRight.myLength;
191 aSumm.myString = strAlloc (aSumm.mySize);
194 strCopy ((Standard_Byte* )aSumm.myString, (const Standard_Byte* )theLeft.myString, theLeft.mySize);
195 strCopy ((Standard_Byte* )aSumm.myString + theLeft.mySize, (const Standard_Byte* )theRight.myString, theRight.mySize);
199 public: //! @name compare operators
201 bool operator== (const NCollection_UtfString& theCompare) const
203 return IsEqual (theCompare);
205 bool operator!= (const NCollection_UtfString& theCompare) const;
207 private: //! @name low-level methods
209 //! Compute advance for specified string.
210 //! @param theStringUtf pointer to the NULL-terminated Unicode string
211 //! @param theLengthMax length limit (to cut the string), set to -1 to compute up to NULL-termination symbol
212 //! @param theSizeBytes advance in bytes (out)
213 //! @param theLength string length (out)
214 template<typename TypeFrom>
215 static void strGetAdvance (const TypeFrom* theStringUtf,
216 const Standard_Integer theLengthMax,
217 Standard_Integer& theSizeBytes,
218 Standard_Integer& theLength);
220 //! Allocate NULL-terminated string buffer.
221 static Type* strAlloc (const Standard_Size theSizeBytes)
223 Type* aPtr = (Type* )Standard::Allocate (theSizeBytes + sizeof(Type));
226 // always NULL-terminate the string
227 aPtr[theSizeBytes / sizeof(Type)] = Type(0);
232 //! Release string buffer and nullify the pointer.
233 static void strFree (Type*& thePtr)
235 Standard::Free (thePtr);
238 //! Provides bytes interface to avoid incorrect pointer arithmetics.
239 static void strCopy (Standard_Byte* theStrDst,
240 const Standard_Byte* theStrSrc,
241 const Standard_Integer theSizeBytes)
243 ::memcpy (theStrDst, theStrSrc, (Standard_Size )theSizeBytes);
246 //! Compare two Unicode strings per-byte.
247 static bool strAreEqual (const Type* theString1,
248 const Standard_Integer theSizeBytes1,
249 const Type* theString2,
250 const Standard_Integer theSizeBytes2)
252 return (theSizeBytes1 == theSizeBytes2)
253 && (::memcmp (theString1, theString2, (Standard_Size )theSizeBytes1) == 0);
256 private: //! @name private fields
258 Type* myString; //!< string buffer
259 Standard_Integer mySize; //!< buffer size in bytes, excluding NULL-termination symbol
260 Standard_Integer myLength; //!< length of the string in Unicode symbols (cached value, excluding NULL-termination symbol)
264 typedef NCollection_UtfString<Standard_Utf8Char> NCollection_Utf8String;
265 typedef NCollection_UtfString<Standard_Utf16Char> NCollection_Utf16String;
266 typedef NCollection_UtfString<Standard_Utf32Char> NCollection_Utf32String;
267 typedef NCollection_UtfString<Standard_WideChar> NCollection_UtfWideString;
269 // template implementation (inline methods)
270 #include "NCollection_UtfString.lxx"
272 #endif // _NCollection_UtfString_H__