42cf5bc1 |
1 | // Created on: 1993-02-22 |
2 | // Created by: Mireille MERCIEN |
3 | // Copyright (c) 1993-1999 Matra Datavision |
4 | // Copyright (c) 1999-2014 OPEN CASCADE SAS |
5 | // |
6 | // This file is part of Open CASCADE Technology software library. |
7 | // |
8 | // This library is free software; you can redistribute it and/or modify it under |
9 | // the terms of the GNU Lesser General Public License version 2.1 as published |
10 | // by the Free Software Foundation, with special exception defined in the file |
11 | // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT |
12 | // distribution for complete text of the license and disclaimer of any warranty. |
13 | // |
14 | // Alternatively, this file may be used under the terms of Open CASCADE |
15 | // commercial license or contractual agreement. |
16 | |
17 | #ifndef _TCollection_ExtendedString_HeaderFile |
18 | #define _TCollection_ExtendedString_HeaderFile |
19 | |
20 | #include <Standard.hxx> |
21 | #include <Standard_DefineAlloc.hxx> |
22 | #include <Standard_Handle.hxx> |
23 | |
24 | #include <Standard_PExtCharacter.hxx> |
25 | #include <Standard_Integer.hxx> |
26 | #include <Standard_CString.hxx> |
27 | #include <Standard_Boolean.hxx> |
28 | #include <Standard_ExtString.hxx> |
29 | #include <Standard_Character.hxx> |
30 | #include <Standard_ExtCharacter.hxx> |
31 | #include <Standard_Real.hxx> |
32 | #include <Standard_OStream.hxx> |
33 | #include <Standard_PCharacter.hxx> |
34 | class Standard_NullObject; |
35 | class Standard_OutOfRange; |
36 | class Standard_NumericError; |
37 | class Standard_NegativeValue; |
38 | class TCollection_AsciiString; |
39 | |
40 | |
fb0b0531 |
41 | //! A variable-length sequence of "extended" (UNICODE) characters (16-bit character type). |
42 | //! It provides editing operations with built-in memory management |
43 | //! to make ExtendedString objects easier to use than ordinary extended character arrays. |
44 | //! ExtendedString objects follow "value semantics", that is, they are the actual strings, |
45 | //! not handles to strings, and are copied through assignment. |
46 | //! You may use HExtendedString objects to get handles to strings. |
47 | //! |
48 | //! Beware that class can transparently store UTF-16 string with surrogate pairs |
49 | //! (Unicode symbol represented by two 16-bit code units). |
50 | //! However, surrogate pairs are not considered by the following methods: |
51 | //! - Method ::Length() return the number of 16-bit code units, not the number of Unicode symbols. |
52 | //! - Methods taking/returning symbol index work with 16-bit code units, not true Unicode symbols, |
53 | //! including ::Remove(), ::SetValue(), ::Value(), ::Search(), ::Trunc() and others. |
54 | //! If application needs to process surrogate pairs, NCollection_Utf16Iter class can be used |
55 | //! for iterating through Unicode string (UTF-32 code unit will be returned for each position). |
42cf5bc1 |
56 | class TCollection_ExtendedString |
57 | { |
58 | public: |
59 | |
60 | DEFINE_STANDARD_ALLOC |
61 | |
62 | |
63 | //! Initializes a ExtendedString to an empty ExtendedString. |
64 | Standard_EXPORT TCollection_ExtendedString(); |
65 | |
66 | //! Creation by converting a CString to an extended |
67 | //! string. If <isMultiByte> is true then the string is |
68 | //! treated as having UTF-8 coding. If it is not a UTF-8 |
69 | //! then <isMultiByte> is ignored and each character is |
70 | //! copied to ExtCharacter. |
71 | Standard_EXPORT TCollection_ExtendedString(const Standard_CString astring, const Standard_Boolean isMultiByte = Standard_False); |
72 | |
73 | //! Creation by converting an ExtString to an extended string. |
74 | Standard_EXPORT TCollection_ExtendedString(const Standard_ExtString astring); |
fb0b0531 |
75 | |
15173be5 |
76 | #if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) |
fb0b0531 |
77 | //! Initialize from wide-char string considering it as Unicode string |
78 | //! (the size of wide char is a platform-dependent - e.g. on Windows wchar_t is UTF-16). |
79 | //! |
80 | //! This constructor is unavailable if application is built with deprecated msvc option "-Zc:wchar_t-", |
81 | //! since OCCT itself is never built with this option. |
82 | Standard_EXPORT TCollection_ExtendedString (const Standard_WideChar* theStringUtf); |
83 | #endif |
42cf5bc1 |
84 | |
85 | //! Initializes a AsciiString with a single character. |
86 | Standard_EXPORT TCollection_ExtendedString(const Standard_Character aChar); |
87 | |
88 | //! Initializes a ExtendedString with a single character. |
89 | Standard_EXPORT TCollection_ExtendedString(const Standard_ExtCharacter aChar); |
90 | |
91 | //! Initializes a ExtendedString with <length> space allocated. |
92 | //! and filled with <filler>.This is useful for buffers. |
93 | Standard_EXPORT TCollection_ExtendedString(const Standard_Integer length, const Standard_ExtCharacter filler); |
94 | |
95 | //! Initializes an ExtendedString with an integer value |
96 | Standard_EXPORT TCollection_ExtendedString(const Standard_Integer value); |
97 | |
98 | //! Initializes an ExtendedString with a real value |
99 | Standard_EXPORT TCollection_ExtendedString(const Standard_Real value); |
100 | |
101 | //! Initializes a ExtendedString with another ExtendedString. |
102 | Standard_EXPORT TCollection_ExtendedString(const TCollection_ExtendedString& astring); |
6286195c |
103 | |
104 | #ifndef OCCT_NO_RVALUE_REFERENCE |
105 | //! Move constructor |
0f57ab75 |
106 | TCollection_ExtendedString (TCollection_ExtendedString&& theOther) |
6286195c |
107 | : mystring (theOther.mystring), |
108 | mylength (theOther.mylength) |
109 | { |
110 | theOther.mystring = NULL; |
111 | theOther.mylength = 0; |
112 | } |
113 | #endif |
114 | |
42cf5bc1 |
115 | //! Creation by converting an Ascii string to an extended |
116 | //! string. The string is treated as having UTF-8 coding. |
117 | //! If it is not a UTF-8 then each character is copied to ExtCharacter. |
118 | Standard_EXPORT TCollection_ExtendedString(const TCollection_AsciiString& astring); |
119 | |
120 | //! Appends the other extended string to this extended string. |
121 | //! Note that this method is an alias of operator +=. |
122 | //! Example: aString += anotherString |
123 | Standard_EXPORT void AssignCat (const TCollection_ExtendedString& other); |
124 | void operator += (const TCollection_ExtendedString& other) |
125 | { |
126 | AssignCat(other); |
127 | } |
128 | |
129 | //! Appends <other> to me. |
130 | Standard_EXPORT TCollection_ExtendedString Cat (const TCollection_ExtendedString& other) const; |
131 | TCollection_ExtendedString operator + (const TCollection_ExtendedString& other) const |
132 | { |
133 | return Cat(other); |
134 | } |
135 | |
136 | //! Substitutes all the characters equal to aChar by NewChar |
137 | //! in the ExtendedString <me>. |
138 | //! The substitution can be case sensitive. |
139 | //! If you don't use default case sensitive, no matter wether aChar |
140 | //! is uppercase or not. |
141 | Standard_EXPORT void ChangeAll (const Standard_ExtCharacter aChar, const Standard_ExtCharacter NewChar); |
142 | |
143 | //! Removes all characters contained in <me>. |
144 | //! This produces an empty ExtendedString. |
145 | Standard_EXPORT void Clear(); |
146 | |
147 | //! Copy <fromwhere> to <me>. |
148 | //! Used as operator = |
149 | Standard_EXPORT void Copy (const TCollection_ExtendedString& fromwhere); |
150 | void operator = (const TCollection_ExtendedString& fromwhere) |
151 | { |
152 | Copy(fromwhere); |
153 | } |
6286195c |
154 | |
155 | //! Exchange the data of two strings (without reallocating memory). |
156 | Standard_EXPORT void Swap (TCollection_ExtendedString& theOther); |
157 | |
158 | #ifndef OCCT_NO_RVALUE_REFERENCE |
159 | //! Move assignment operator |
160 | TCollection_ExtendedString& operator= (TCollection_ExtendedString&& theOther) { Swap (theOther); return *this; } |
161 | #endif |
162 | |
42cf5bc1 |
163 | //! Frees memory allocated by ExtendedString. |
fb0b0531 |
164 | Standard_EXPORT ~TCollection_ExtendedString(); |
42cf5bc1 |
165 | |
166 | //! Insert a Character at position <where>. |
167 | Standard_EXPORT void Insert (const Standard_Integer where, const Standard_ExtCharacter what); |
168 | |
169 | //! Insert a ExtendedString at position <where>. |
170 | Standard_EXPORT void Insert (const Standard_Integer where, const TCollection_ExtendedString& what); |
fb0b0531 |
171 | |
42cf5bc1 |
172 | //! Returns True if this string contains no characters. |
fb0b0531 |
173 | Standard_Boolean IsEmpty() const { return mylength == 0; } |
174 | |
42cf5bc1 |
175 | //! Returns true if the characters in this extended |
176 | //! string are identical to the characters in the other extended string. |
177 | //! Note that this method is an alias of operator == |
178 | Standard_EXPORT Standard_Boolean IsEqual (const Standard_ExtString other) const; |
179 | Standard_Boolean operator == (const Standard_ExtString other) const |
180 | { |
181 | return IsEqual(other); |
182 | } |
183 | |
184 | //! Returns true if the characters in this extended |
185 | //! string are identical to the characters in the other extended string. |
186 | //! Note that this method is an alias of operator == |
187 | Standard_EXPORT Standard_Boolean IsEqual (const TCollection_ExtendedString& other) const; |
188 | Standard_Boolean operator == (const TCollection_ExtendedString& other) const |
189 | { |
190 | return IsEqual(other); |
191 | } |
192 | |
193 | //! Returns true if there are differences between the |
194 | //! characters in this extended string and the other extended string. |
195 | //! Note that this method is an alias of operator !=. |
196 | Standard_EXPORT Standard_Boolean IsDifferent (const Standard_ExtString other) const; |
197 | Standard_Boolean operator != (const Standard_ExtString other) const |
198 | { |
199 | return IsDifferent(other); |
200 | } |
201 | |
202 | //! Returns true if there are differences between the |
203 | //! characters in this extended string and the other extended string. |
204 | //! Note that this method is an alias of operator !=. |
205 | Standard_EXPORT Standard_Boolean IsDifferent (const TCollection_ExtendedString& other) const; |
206 | Standard_Boolean operator != (const TCollection_ExtendedString& other) const |
207 | { |
208 | return IsDifferent(other); |
209 | } |
210 | |
211 | //! Returns TRUE if <me> is less than <other>. |
212 | Standard_EXPORT Standard_Boolean IsLess (const Standard_ExtString other) const; |
213 | Standard_Boolean operator < (const Standard_ExtString other) const |
214 | { |
215 | return IsLess(other); |
216 | } |
217 | |
218 | //! Returns TRUE if <me> is less than <other>. |
219 | Standard_EXPORT Standard_Boolean IsLess (const TCollection_ExtendedString& other) const; |
220 | Standard_Boolean operator < (const TCollection_ExtendedString& other) const |
221 | { |
222 | return IsLess(other); |
223 | } |
224 | |
225 | //! Returns TRUE if <me> is greater than <other>. |
226 | Standard_EXPORT Standard_Boolean IsGreater (const Standard_ExtString other) const; |
227 | Standard_Boolean operator > (const Standard_ExtString other) const |
228 | { |
229 | return IsGreater(other); |
230 | } |
231 | |
232 | //! Returns TRUE if <me> is greater than <other>. |
233 | Standard_EXPORT Standard_Boolean IsGreater (const TCollection_ExtendedString& other) const; |
234 | Standard_Boolean operator > (const TCollection_ExtendedString& other) const |
235 | { |
236 | return IsGreater(other); |
237 | } |
fb0b0531 |
238 | |
239 | //! Determines whether the beginning of this string instance matches the specified string. |
240 | Standard_EXPORT Standard_Boolean StartsWith (const TCollection_ExtendedString& theStartString) const; |
241 | |
242 | //! Determines whether the end of this string instance matches the specified string. |
243 | Standard_EXPORT Standard_Boolean EndsWith (const TCollection_ExtendedString& theEndString) const; |
244 | |
42cf5bc1 |
245 | //! Returns True if the ExtendedString contains only |
246 | //! "Ascii Range" characters . |
247 | Standard_EXPORT Standard_Boolean IsAscii() const; |
fb0b0531 |
248 | |
249 | //! Returns the number of 16-bit code units |
250 | //! (might be greater than number of Unicode symbols if string contains surrogate pairs). |
42cf5bc1 |
251 | Standard_EXPORT Standard_Integer Length() const; |
252 | |
253 | //! Displays <me> . |
254 | Standard_EXPORT void Print (Standard_OStream& astream) const; |
255 | friend Standard_EXPORT Standard_OStream& operator << (Standard_OStream& astream,const TCollection_ExtendedString& astring); |
256 | |
257 | //! Removes every <what> characters from <me>. |
258 | Standard_EXPORT void RemoveAll (const Standard_ExtCharacter what); |
259 | |
260 | //! Erases <ahowmany> characters from position <where>,<where> included. |
261 | Standard_EXPORT void Remove (const Standard_Integer where, const Standard_Integer ahowmany = 1); |
262 | |
263 | //! Searches a ExtendedString in <me> from the beginning |
264 | //! and returns position of first item <what> matching. |
265 | //! it returns -1 if not found. |
266 | Standard_EXPORT Standard_Integer Search (const TCollection_ExtendedString& what) const; |
267 | |
268 | //! Searches a ExtendedString in another ExtendedString from the |
269 | //! end and returns position of first item <what> matching. |
270 | //! it returns -1 if not found. |
271 | Standard_EXPORT Standard_Integer SearchFromEnd (const TCollection_ExtendedString& what) const; |
272 | |
273 | //! Replaces one character in the ExtendedString at position <where>. |
274 | //! If <where> is less than zero or greater than the length of <me> |
275 | //! an exception is raised. |
276 | Standard_EXPORT void SetValue (const Standard_Integer where, const Standard_ExtCharacter what); |
277 | |
278 | //! Replaces a part of <me> by another ExtendedString see above. |
279 | Standard_EXPORT void SetValue (const Standard_Integer where, const TCollection_ExtendedString& what); |
280 | |
281 | //! Splits this extended string into two sub-strings at position where. |
282 | //! - The second sub-string (from position |
283 | //! where + 1 of this string to the end) is |
284 | //! returned in a new extended string. |
285 | //! - this extended string is modified: its last |
286 | //! characters are removed, it becomes equal to |
287 | //! the first sub-string (from the first character to position where). |
288 | //! Example: |
289 | //! aString contains "abcdefg" |
290 | //! aString.Split(3) gives <me> = "abc" and returns "defg" |
291 | Standard_EXPORT TCollection_ExtendedString Split (const Standard_Integer where); |
292 | |
293 | //! Extracts <whichone> token from <me>. |
294 | //! By default, the <separators> is set to space and tabulation. |
295 | //! By default, the token extracted is the first one (whichone = 1). |
296 | //! <separators> contains all separators you need. |
297 | //! If no token indexed by <whichone> is found, it returns an empty AsciiString. |
298 | //! Example: |
299 | //! aString contains "This is a message" |
300 | //! aString.Token() returns "This" |
301 | //! aString.Token(" ",4) returns "message" |
302 | //! aString.Token(" ",2) returns "is" |
303 | //! aString.Token(" ",9) returns "" |
304 | //! Other separators than space character and tabulation are allowed : |
305 | //! aString contains "1234; test:message , value" |
306 | //! aString.Token("; :,",4) returns "value" |
307 | //! aString.Token("; :,",2) returns "test" |
308 | Standard_EXPORT TCollection_ExtendedString Token (const Standard_ExtString separators, const Standard_Integer whichone = 1) const; |
309 | |
310 | //! Returns pointer to ExtString |
487bf1ce |
311 | Standard_EXPORT Standard_ExtString ToExtString() const; |
fb0b0531 |
312 | |
313 | #ifdef _WIN32 |
314 | //! Returns pointer to string as wchar_t* on Windows platform where wchar_t* is considered as UTF-16 string. |
315 | //! This method is useful to pass string into wide-char system APIs, |
316 | //! and makes sense only on Windows (other systems use UTF-8 and can miss wide-char functions at all). |
317 | const Standard_WideChar* ToWideString() const { return (const Standard_WideChar*)ToExtString(); } |
318 | #endif |
319 | |
42cf5bc1 |
320 | //! Truncates <me> to <ahowmany> characters. |
321 | //! Example: me = "Hello Dolly" -> Trunc(3) -> me = "Hel" |
322 | //! Exceptions |
323 | //! Standard_OutOfRange if ahowmany is greater |
324 | //! than the length of this string. |
325 | Standard_EXPORT void Trunc (const Standard_Integer ahowmany); |
326 | |
327 | //! Returns character at position <where> in <me>. |
328 | //! If <where> is less than zero or greater than the lenght of |
329 | //! <me>, an exception is raised. |
330 | //! Example: |
331 | //! aString contains "Hello" |
332 | //! aString.Value(2) returns 'e' |
333 | //! Exceptions |
334 | //! Standard_OutOfRange if where lies outside |
335 | //! the bounds of this extended string. |
336 | Standard_EXPORT Standard_ExtCharacter Value (const Standard_Integer where) const; |
fb0b0531 |
337 | |
2b2be3fb |
338 | //! Returns a hashed value for the extended string within the range 1 .. theUpper. |
fb0b0531 |
339 | //! Note: if string is ASCII, the computed value is the same as the value computed with the HashCode function on a |
340 | //! TCollection_AsciiString string composed with equivalent ASCII characters. |
2b2be3fb |
341 | //! @param theExtendedString the extended string which hash code is to be computed |
342 | //! @param theUpperBound the upper bound of the range a computing hash code must be within |
343 | //! @return a computed hash code, in the range [1, theUpperBound] |
fb0b0531 |
344 | static Standard_Integer HashCode (const TCollection_ExtendedString& theString, |
2b2be3fb |
345 | const Standard_Integer theUpperBound) |
fb0b0531 |
346 | { |
2b2be3fb |
347 | return ::HashCode (theString.ToExtString(), theUpperBound); |
fb0b0531 |
348 | } |
349 | |
42cf5bc1 |
350 | //! Returns true if the characters in this extended |
351 | //! string are identical to the characters in the other extended string. |
352 | //! Note that this method is an alias of operator ==. |
fb0b0531 |
353 | static Standard_Boolean IsEqual (const TCollection_ExtendedString& theString1, |
354 | const TCollection_ExtendedString& theString2) |
355 | { |
356 | return theString1.IsEqual (theString2); |
357 | } |
358 | |
42cf5bc1 |
359 | //! Converts the internal <mystring> to UTF8 coding and |
360 | //! returns length of the out CString. A memory for the |
361 | //! <theCString> should be allocated before call! |
362 | Standard_EXPORT Standard_Integer ToUTF8CString (Standard_PCharacter& theCString) const; |
363 | |
364 | //! Returns expected CString length in UTF8 coding. |
365 | //! It can be used for memory calculation before converting |
366 | //! to CString containing symbols in UTF8 coding. |
367 | Standard_EXPORT Standard_Integer LengthOfCString() const; |
368 | |
42cf5bc1 |
369 | private: |
370 | |
42cf5bc1 |
371 | //! Returns true if the input CString was successfuly converted |
372 | //! to UTF8 coding |
373 | Standard_EXPORT Standard_Boolean ConvertToUnicode (const Standard_CString astring); |
374 | |
fb0b0531 |
375 | private: |
42cf5bc1 |
376 | |
fb0b0531 |
377 | Standard_PExtCharacter mystring; //!< NULL-terminated string |
378 | Standard_Integer mylength; //!< length in 16-bit code units (excluding terminating NULL symbol) |
42cf5bc1 |
379 | |
380 | }; |
381 | |
2b2be3fb |
382 | //! Computes a hash code for the given extended string, in the range [1, theUpperBound] |
383 | //! @param theExtendedString the extended string which hash code is to be computed |
384 | //! @param theUpperBound the upper bound of the range a computing hash code must be within |
385 | //! @return a computed hash code, in the range [1, theUpperBound] |
386 | inline Standard_Integer HashCode (const TCollection_ExtendedString& theExtendedString, |
387 | const Standard_Integer theUpperBound) |
fb0b0531 |
388 | { |
2b2be3fb |
389 | return TCollection_ExtendedString::HashCode (theExtendedString, theUpperBound); |
fb0b0531 |
390 | } |
42cf5bc1 |
391 | |
392 | #endif // _TCollection_ExtendedString_HeaderFile |