[occt.git] / src / NCollection / NCollection_UtfIterator.lxx

// Created on: 2013-01-28
// Created by: Kirill GAVRILOV
// Copyright (c) 2013-2014 OPEN CASCADE SAS
//
// This file is part of Open CASCADE Technology software library.
//
// This library is free software; you can redistribute it and/or modify it under
// the terms of the GNU Lesser General Public License version 2.1 as published
// by the Free Software Foundation, with special exception defined in the file
// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
// distribution for complete text of the license and disclaimer of any warranty.
//
// Alternatively, this file may be used under the terms of Open CASCADE
// commercial license or contractual agreement.

// Portions of code are copyrighted by Unicode, Inc.
//
// Copyright (c) 2001-2004 Unicode, Inc.
//
// Disclaimer
//
// This source code is provided as is by Unicode, Inc. No claims are
// made as to fitness for any particular purpose. No warranties of any
// kind are expressed or implied. The recipient agrees to determine
// applicability of information provided. If this file has been
// purchased on magnetic or optical media from Unicode, Inc., the
// sole remedy for any claim will be exchange of defective media
// within 90 days of receipt.
//
// Limitations on Rights to Redistribute This Code
//
// Unicode, Inc. hereby grants the right to freely use the information
// supplied in this file in the creation of products supporting the
// Unicode Standard, and to make copies of this file in any form
// for internal or external distribution as long as this notice
// remains attached.

//! The first character in a UTF-8 sequence indicates how many bytes
//! to read (among other things).
template<typename Type>
const unsigned char NCollection_UtfIterator<Type>::UTF8_BYTES_MINUS_ONE[256] =
{
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};

//! Magic values subtracted from a buffer value during UTF-8 conversion.
//! This table contains as many values as there might be trailing bytes
//! in a UTF-8 sequence.
template<typename Type>
const unsigned long NCollection_UtfIterator<Type>::offsetsFromUTF8[6] =
{
  0x00000000UL, 0x00003080UL, 0x000E2080UL,
  0x03C82080UL, 0xFA082080UL, 0x82082080UL
};

//! The first character in a UTF-8 sequence indicates how many bytes to read.
template<typename Type>
const unsigned char NCollection_UtfIterator<Type>::UTF8_FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };

// =======================================================================
// function : readUTF8
// purpose  : Get a UTF-8 character; leave the tracking pointer at the start of the next character.
//            Not protected against invalid UTF-8.
// =======================================================================
template<typename Type>
inline void NCollection_UtfIterator<Type>::readUTF8()
{
  // unsigned arithmetic used
  Standard_Utf8UChar* aPos = (Standard_Utf8UChar* )myPosNext;
  const unsigned char aBytesToRead = UTF8_BYTES_MINUS_ONE[*aPos];
  myCharUtf32 = 0;
  switch (aBytesToRead)
  {
    case 5: myCharUtf32 += *aPos++; myCharUtf32 <<= 6; // remember, illegal UTF-8
      Standard_FALLTHROUGH
    case 4: myCharUtf32 += *aPos++; myCharUtf32 <<= 6; // remember, illegal UTF-8
      Standard_FALLTHROUGH
    case 3: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
      Standard_FALLTHROUGH
    case 2: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
      Standard_FALLTHROUGH
    case 1: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
      Standard_FALLTHROUGH
    case 0: myCharUtf32 += *aPos++;
  }
  myCharUtf32 -= offsetsFromUTF8[aBytesToRead];
  myPosNext = (Type* )aPos;
}

// magic numbers
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF8_BYTE_MASK = 0xBF;
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF8_BYTE_MARK = 0x80;
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_START = 0xD800;
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_END   = 0xDBFF;
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_START  = 0xDC00;
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_END    = 0xDFFF;
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_SHIFT = 10;
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_BASE   = 0x0010000UL;
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_MASK   = 0x3FFUL;
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF32_MAX_BMP   = 0x0000FFFFUL;
template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF32_MAX_LEGAL = 0x0010FFFFUL;

// =======================================================================
// function : readUTF16
// purpose  :
// =======================================================================
template<typename Type> inline
void NCollection_UtfIterator<Type>::readUTF16()
{
  Standard_Utf32Char aChar = *myPosNext++;
  // if we have the first half of the surrogate pair
  if (aChar >= UTF16_SURROGATE_HIGH_START
   && aChar <= UTF16_SURROGATE_HIGH_END)
  {
    const Standard_Utf32Char aChar2 = *myPosNext;
    // complete the surrogate pair
    if (aChar2 >= UTF16_SURROGATE_LOW_START
     && aChar2 <= UTF16_SURROGATE_LOW_END)
    {
      aChar = ((aChar - UTF16_SURROGATE_HIGH_START) << UTF16_SURROGATE_HIGH_SHIFT)
            + (aChar2 - UTF16_SURROGATE_LOW_START)   + UTF16_SURROGATE_LOW_BASE;
      ++myPosNext;
    }
  }
  myCharUtf32 = aChar;
}

// =======================================================================
// function : AdvanceBytesUtf8
// purpose  :
// =======================================================================
template<typename Type> inline
Standard_Integer NCollection_UtfIterator<Type>::AdvanceBytesUtf8() const
{
  if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
   && myCharUtf32 <= UTF16_SURROGATE_LOW_END)
   {
    // UTF-16 surrogate values are illegal in UTF-32
    return 0;
  }
  else if (myCharUtf32 < Standard_Utf32Char(0x80))
  {
    return 1;
  }
  else if (myCharUtf32 < Standard_Utf32Char(0x800))
  {
    return 2;
  }
  else if (myCharUtf32 < Standard_Utf32Char(0x10000))
  {
    return 3;
  }
  else if (myCharUtf32 <= UTF32_MAX_LEGAL)
  {
    return 4;
  }
  else
  {
    // illegal
    return 0;
  }
}

// =======================================================================
// function : GetUtf8
// purpose  :
// =======================================================================
template<typename Type> inline
Standard_Utf8Char* NCollection_UtfIterator<Type>::GetUtf8 (Standard_Utf8Char* theBuffer) const
{
  // unsigned arithmetic used
  return (Standard_Utf8Char* )GetUtf8 ((Standard_Utf8UChar* )theBuffer);
}

// =======================================================================
// function : GetUtf8
// purpose  :
// =======================================================================
template<typename Type> inline
Standard_Utf8UChar* NCollection_UtfIterator<Type>::GetUtf8 (Standard_Utf8UChar* theBuffer) const
{
  Standard_Utf32Char aChar = myCharUtf32;
  if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
   && myCharUtf32 <= UTF16_SURROGATE_LOW_END)
  {
    // UTF-16 surrogate values are illegal in UTF-32
    return theBuffer;
  }
  else if (myCharUtf32 < Standard_Utf32Char(0x80))
  {
    *theBuffer++ = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[1]);
    return theBuffer;
  }
  else if (myCharUtf32 < Standard_Utf32Char(0x800))
  {
    *++theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
    *--theBuffer = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[2]);
    return theBuffer + 2;
  }
  else if (myCharUtf32 < Standard_Utf32Char(0x10000))
  {
    theBuffer += 3;
    *--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
    *--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
    *--theBuffer = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[3]);
    return theBuffer + 3;
  }
  else if (myCharUtf32 <= UTF32_MAX_LEGAL)
  {
    theBuffer += 4;
    *--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
    *--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
    *--theBuffer = Standard_Utf8UChar((aChar | UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
    *--theBuffer = Standard_Utf8UChar (aChar | UTF8_FIRST_BYTE_MARK[4]);
    return theBuffer + 4;
  }
  else
  {
    // illegal
    return theBuffer;
  }
}

// =======================================================================
// function : AdvanceBytesUtf16
// purpose  :
// =======================================================================
template<typename Type> inline
Standard_Integer NCollection_UtfIterator<Type>::AdvanceBytesUtf16() const
{
  return AdvanceCodeUnitsUtf16() * sizeof(Standard_Utf16Char);
}

// =======================================================================
// function : AdvanceCodeUnitsUtf16
// purpose  :
// =======================================================================
template<typename Type> inline
Standard_Integer NCollection_UtfIterator<Type>::AdvanceCodeUnitsUtf16() const
{
  if (myCharUtf32 <= UTF32_MAX_BMP) // target is a character <= 0xFFFF
  {
    // UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values
    if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
     && myCharUtf32 <= UTF16_SURROGATE_LOW_END)
    {
      return 0;
    }
    else
    {
      return 1;
    }
  }
  else if (myCharUtf32 > UTF32_MAX_LEGAL)
  {
    // illegal
    return 0;
  }
  else
  {
    // target is a character in range 0xFFFF - 0x10FFFF
    // surrogate pair
    return 2;
  }
}

// =======================================================================
// function : GetUtf16
// purpose  :
// =======================================================================
template<typename Type> inline
Standard_Utf16Char* NCollection_UtfIterator<Type>::GetUtf16 (Standard_Utf16Char* theBuffer) const
{
  if (myCharUtf32 <= UTF32_MAX_BMP) // target is a character <= 0xFFFF
  {
    // UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values
    if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
     && myCharUtf32 <= UTF16_SURROGATE_LOW_END)
    {
      return theBuffer;
    }
    else
    {
      *theBuffer++ = Standard_Utf16Char(myCharUtf32);
      return theBuffer;
    }
  }
  else if (myCharUtf32 > UTF32_MAX_LEGAL)
  {
    // illegal
    return theBuffer;
  }
  else
  {
    // surrogate pair
    Standard_Utf32Char aChar = myCharUtf32 - UTF16_SURROGATE_LOW_BASE;
    *theBuffer++ = Standard_Utf16Char((aChar >> UTF16_SURROGATE_HIGH_SHIFT) + UTF16_SURROGATE_HIGH_START);
    *theBuffer++ = Standard_Utf16Char((aChar &  UTF16_SURROGATE_LOW_MASK)   + UTF16_SURROGATE_LOW_START);
    return theBuffer;
  }
}

// =======================================================================
// function : GetUtf32
// purpose  :
// =======================================================================
template<typename Type> inline
Standard_Utf32Char* NCollection_UtfIterator<Type>::GetUtf32 (Standard_Utf32Char* theBuffer) const
{
  *theBuffer++ = myCharUtf32;
  return theBuffer;
}
Commit	Line	Data
a174a3c5	1	// Created on: 2013-01-28
a174a3c5	2	// Created by: Kirill GAVRILOV
d5f74e42	3	// Copyright (c) 2013-2014 OPEN CASCADE SAS
a174a3c5	4	//
973c2be1	5	// This file is part of Open CASCADE Technology software library.
973c2be1	6	//
d5f74e42	7	// This library is free software; you can redistribute it and/or modify it under
d5f74e42	8	// the terms of the GNU Lesser General Public License version 2.1 as published
973c2be1	9	// by the Free Software Foundation, with special exception defined in the file
	10	// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
	11	// distribution for complete text of the license and disclaimer of any warranty.
	12	//
	13	// Alternatively, this file may be used under the terms of Open CASCADE
	14	// commercial license or contractual agreement.
a174a3c5	15
	16	// Portions of code are copyrighted by Unicode, Inc.
	17	//
d94fa32e	18	// Copyright (c) 2001-2004 Unicode, Inc.
a174a3c5	19	//
	20	// Disclaimer
	21	//
	22	// This source code is provided as is by Unicode, Inc. No claims are
	23	// made as to fitness for any particular purpose. No warranties of any
	24	// kind are expressed or implied. The recipient agrees to determine
	25	// applicability of information provided. If this file has been
	26	// purchased on magnetic or optical media from Unicode, Inc., the
	27	// sole remedy for any claim will be exchange of defective media
	28	// within 90 days of receipt.
	29	//
	30	// Limitations on Rights to Redistribute This Code
	31	//
	32	// Unicode, Inc. hereby grants the right to freely use the information
	33	// supplied in this file in the creation of products supporting the
	34	// Unicode Standard, and to make copies of this file in any form
	35	// for internal or external distribution as long as this notice
	36	// remains attached.
	37
	38	//! The first character in a UTF-8 sequence indicates how many bytes
	39	//! to read (among other things).
	40	template<typename Type>
	41	const unsigned char NCollection_UtfIterator<Type>::UTF8_BYTES_MINUS_ONE[256] =
	42	{
	43	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	44	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	45	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	46	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	47	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	48	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
	49	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
	50	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
	51	};
	52
	53	//! Magic values subtracted from a buffer value during UTF-8 conversion.
	54	//! This table contains as many values as there might be trailing bytes
	55	//! in a UTF-8 sequence.
	56	template<typename Type>
	57	const unsigned long NCollection_UtfIterator<Type>::offsetsFromUTF8[6] =
	58	{
	59	0x00000000UL, 0x00003080UL, 0x000E2080UL,
	60	0x03C82080UL, 0xFA082080UL, 0x82082080UL
	61	};
	62
	63	//! The first character in a UTF-8 sequence indicates how many bytes to read.
	64	template<typename Type>
	65	const unsigned char NCollection_UtfIterator<Type>::UTF8_FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
	66
	67	// =======================================================================
	68	// function : readUTF8
	69	// purpose : Get a UTF-8 character; leave the tracking pointer at the start of the next character.
	70	// Not protected against invalid UTF-8.
	71	// =======================================================================
	72	template<typename Type>
	73	inline void NCollection_UtfIterator<Type>::readUTF8()
	74	{
	75	// unsigned arithmetic used
	76	Standard_Utf8UChar* aPos = (Standard_Utf8UChar* )myPosNext;
	77	const unsigned char aBytesToRead = UTF8_BYTES_MINUS_ONE[*aPos];
	78	myCharUtf32 = 0;
	79	switch (aBytesToRead)
	80	{
	81	case 5: myCharUtf32 += *aPos++; myCharUtf32 <<= 6; // remember, illegal UTF-8
b1811c1d	82	Standard_FALLTHROUGH
a174a3c5	83	case 4: myCharUtf32 += *aPos++; myCharUtf32 <<= 6; // remember, illegal UTF-8
b1811c1d	84	Standard_FALLTHROUGH
a174a3c5	85	case 3: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
b1811c1d	86	Standard_FALLTHROUGH
a174a3c5	87	case 2: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
b1811c1d	88	Standard_FALLTHROUGH
a174a3c5	89	case 1: myCharUtf32 += *aPos++; myCharUtf32 <<= 6;
b1811c1d	90	Standard_FALLTHROUGH
a174a3c5	91	case 0: myCharUtf32 += *aPos++;
	92	}
	93	myCharUtf32 -= offsetsFromUTF8[aBytesToRead];
	94	myPosNext = (Type* )aPos;
	95	}
	96
	97	// magic numbers
	98	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF8_BYTE_MASK = 0xBF;
	99	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF8_BYTE_MARK = 0x80;
	100	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_START = 0xD800;
	101	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_END = 0xDBFF;
	102	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_START = 0xDC00;
	103	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_END = 0xDFFF;
	104	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_HIGH_SHIFT = 10;
	105	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_BASE = 0x0010000UL;
	106	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF16_SURROGATE_LOW_MASK = 0x3FFUL;
	107	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF32_MAX_BMP = 0x0000FFFFUL;
	108	template<typename Type> const unsigned long NCollection_UtfIterator<Type>::UTF32_MAX_LEGAL = 0x0010FFFFUL;
	109
	110	// =======================================================================
	111	// function : readUTF16
	112	// purpose :
	113	// =======================================================================
	114	template<typename Type> inline
	115	void NCollection_UtfIterator<Type>::readUTF16()
	116	{
	117	Standard_Utf32Char aChar = *myPosNext++;
	118	// if we have the first half of the surrogate pair
	119	if (aChar >= UTF16_SURROGATE_HIGH_START
	120	&& aChar <= UTF16_SURROGATE_HIGH_END)
	121	{
656ec77a	122	const Standard_Utf32Char aChar2 = *myPosNext;
a174a3c5	123	// complete the surrogate pair
	124	if (aChar2 >= UTF16_SURROGATE_LOW_START
	125	&& aChar2 <= UTF16_SURROGATE_LOW_END)
	126	{
	127	aChar = ((aChar - UTF16_SURROGATE_HIGH_START) << UTF16_SURROGATE_HIGH_SHIFT)
	128	+ (aChar2 - UTF16_SURROGATE_LOW_START) + UTF16_SURROGATE_LOW_BASE;
	129	++myPosNext;
	130	}
	131	}
	132	myCharUtf32 = aChar;
	133	}
	134
	135	// =======================================================================
	136	// function : AdvanceBytesUtf8
	137	// purpose :
	138	// =======================================================================
	139	template<typename Type> inline
	140	Standard_Integer NCollection_UtfIterator<Type>::AdvanceBytesUtf8() const
	141	{
	142	if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
	143	&& myCharUtf32 <= UTF16_SURROGATE_LOW_END)
	144	{
	145	// UTF-16 surrogate values are illegal in UTF-32
	146	return 0;
	147	}
	148	else if (myCharUtf32 < Standard_Utf32Char(0x80))
	149	{
	150	return 1;
	151	}
	152	else if (myCharUtf32 < Standard_Utf32Char(0x800))
	153	{
	154	return 2;
	155	}
	156	else if (myCharUtf32 < Standard_Utf32Char(0x10000))
	157	{
	158	return 3;
	159	}
	160	else if (myCharUtf32 <= UTF32_MAX_LEGAL)
	161	{
	162	return 4;
	163	}
	164	else
	165	{
	166	// illegal
	167	return 0;
	168	}
	169	}
	170
	171	// =======================================================================
	172	// function : GetUtf8
	173	// purpose :
	174	// =======================================================================
	175	template<typename Type> inline
	176	Standard_Utf8Char* NCollection_UtfIterator<Type>::GetUtf8 (Standard_Utf8Char* theBuffer) const
	177	{
	178	// unsigned arithmetic used
	179	return (Standard_Utf8Char* )GetUtf8 ((Standard_Utf8UChar* )theBuffer);
	180	}
	181
	182	// =======================================================================
	183	// function : GetUtf8
	184	// purpose :
	185	// =======================================================================
	186	template<typename Type> inline
187	Standard_Utf8UChar* NCollection_UtfIterator<Type>::GetUtf8 (Standard_Utf8UChar* theBuffer) const
188	{
189	Standard_Utf32Char aChar = myCharUtf32;
190	if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
191	&& myCharUtf32 <= UTF16_SURROGATE_LOW_END)
192	{
193	// UTF-16 surrogate values are illegal in UTF-32
194	return theBuffer;
195	}
196	else if (myCharUtf32 < Standard_Utf32Char(0x80))
197	{
198	*theBuffer++ = Standard_Utf8UChar (aChar \| UTF8_FIRST_BYTE_MARK[1]);
199	return theBuffer;
200	}
201	else if (myCharUtf32 < Standard_Utf32Char(0x800))
202	{
203	*++theBuffer = Standard_Utf8UChar((aChar \| UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
204	*--theBuffer = Standard_Utf8UChar (aChar \| UTF8_FIRST_BYTE_MARK[2]);
205	return theBuffer + 2;
206	}
207	else if (myCharUtf32 < Standard_Utf32Char(0x10000))
208	{
209	theBuffer += 3;
210	*--theBuffer = Standard_Utf8UChar((aChar \| UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
211	*--theBuffer = Standard_Utf8UChar((aChar \| UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
212	*--theBuffer = Standard_Utf8UChar (aChar \| UTF8_FIRST_BYTE_MARK[3]);
213	return theBuffer + 3;
214	}
215	else if (myCharUtf32 <= UTF32_MAX_LEGAL)
216	{
217	theBuffer += 4;
218	*--theBuffer = Standard_Utf8UChar((aChar \| UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
219	*--theBuffer = Standard_Utf8UChar((aChar \| UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
220	*--theBuffer = Standard_Utf8UChar((aChar \| UTF8_BYTE_MARK) & UTF8_BYTE_MASK); aChar >>= 6;
221	*--theBuffer = Standard_Utf8UChar (aChar \| UTF8_FIRST_BYTE_MARK[4]);
222	return theBuffer + 4;
223	}
224	else
225	{
226	// illegal
227	return theBuffer;
228	}
229	}
230
231	// =======================================================================
232	// function : AdvanceBytesUtf16
233	// purpose :
234	// =======================================================================
235	template<typename Type> inline
236	Standard_Integer NCollection_UtfIterator<Type>::AdvanceBytesUtf16() const
fb0b0531	237	{
	238	return AdvanceCodeUnitsUtf16() * sizeof(Standard_Utf16Char);
	239	}
	240
	241	// =======================================================================
	242	// function : AdvanceCodeUnitsUtf16
	243	// purpose :
	244	// =======================================================================
	245	template<typename Type> inline
	246	Standard_Integer NCollection_UtfIterator<Type>::AdvanceCodeUnitsUtf16() const
a174a3c5	247	{
	248	if (myCharUtf32 <= UTF32_MAX_BMP) // target is a character <= 0xFFFF
	249	{
	250	// UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values
	251	if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
	252	&& myCharUtf32 <= UTF16_SURROGATE_LOW_END)
	253	{
	254	return 0;
	255	}
	256	else
	257	{
fb0b0531	258	return 1;
a174a3c5	259	}
	260	}
	261	else if (myCharUtf32 > UTF32_MAX_LEGAL)
	262	{
	263	// illegal
	264	return 0;
	265	}
	266	else
	267	{
	268	// target is a character in range 0xFFFF - 0x10FFFF
	269	// surrogate pair
fb0b0531	270	return 2;
a174a3c5	271	}
	272	}
	273
	274	// =======================================================================
	275	// function : GetUtf16
	276	// purpose :
	277	// =======================================================================
	278	template<typename Type> inline
	279	Standard_Utf16Char* NCollection_UtfIterator<Type>::GetUtf16 (Standard_Utf16Char* theBuffer) const
	280	{
	281	if (myCharUtf32 <= UTF32_MAX_BMP) // target is a character <= 0xFFFF
	282	{
	283	// UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values
	284	if (myCharUtf32 >= UTF16_SURROGATE_HIGH_START
	285	&& myCharUtf32 <= UTF16_SURROGATE_LOW_END)
	286	{
	287	return theBuffer;
	288	}
	289	else
	290	{
	291	*theBuffer++ = Standard_Utf16Char(myCharUtf32);
	292	return theBuffer;
	293	}
	294	}
	295	else if (myCharUtf32 > UTF32_MAX_LEGAL)
	296	{
	297	// illegal
	298	return theBuffer;
	299	}
	300	else
	301	{
	302	// surrogate pair
	303	Standard_Utf32Char aChar = myCharUtf32 - UTF16_SURROGATE_LOW_BASE;
	304	*theBuffer++ = Standard_Utf16Char((aChar >> UTF16_SURROGATE_HIGH_SHIFT) + UTF16_SURROGATE_HIGH_START);
	305	*theBuffer++ = Standard_Utf16Char((aChar & UTF16_SURROGATE_LOW_MASK) + UTF16_SURROGATE_LOW_START);
	306	return theBuffer;
	307	}
	308	}
	309
	310	// =======================================================================
	311	// function : GetUtf32
	312	// purpose :
	313	// =======================================================================
	314	template<typename Type> inline
	315	Standard_Utf32Char* NCollection_UtfIterator<Type>::GetUtf32 (Standard_Utf32Char* theBuffer) const
	316	{
	317	*theBuffer++ = myCharUtf32;
	318	return theBuffer;
	319	}