1 // Created on: 1996-09-26
2 // Created by: Arnaud BOUZY
3 // Copyright (c) 1996-1999 Matra Datavision
4 // Copyright (c) 1999-2014 OPEN CASCADE SAS
6 // This file is part of Open CASCADE Technology software library.
8 // This library is free software; you can redistribute it and/or modify it under
9 // the terms of the GNU Lesser General Public License version 2.1 as published
10 // by the Free Software Foundation, with special exception defined in the file
11 // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
12 // distribution for complete text of the license and disclaimer of any warranty.
14 // Alternatively, this file may be used under the terms of Open CASCADE
15 // commercial license or contractual agreement.
18 #include <Resource_ConvertUnicode.hxx>
19 #include <Resource_Manager.hxx>
20 #include <Resource_Unicode.hxx>
21 #include <TCollection_AsciiString.hxx>
22 #include <TCollection_ExtendedString.hxx>
23 #include <NCollection_UtfString.hxx>
24 #include <Standard_NotImplemented.hxx>
25 #include "Resource_CodePages.pxx"
26 #include "Resource_GBK.pxx"
27 #include "Resource_Big5.pxx"
29 #define isjis(c) (((c)>=0x21 && (c)<=0x7e))
30 #define iseuc(c) (((c)>=0xa1 && (c)<=0xfe))
31 #define issjis1(c) (((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xef))
33 #define issjis2(c) ((c)>=0x40 && (c)<=0xfc && (c)!=0x7f)
35 #define ishankana(c) ((c)>=0xa0 && (c)<=0xdf)
37 static inline Standard_Boolean isshift (unsigned char c) { return c >= 0x80; }
38 static inline Standard_Boolean isshift (unsigned int c) { return c >= 0x80 && c <= 0xff; }
40 void Resource_Unicode::ConvertSJISToUnicode(const Standard_CString fromstr,TCollection_ExtendedString& tostr)
44 unsigned char* currentstr = ((unsigned char*) fromstr);
46 // BIG INDIAN USED HERE
47 while(*currentstr != '\0') {
48 if (issjis1(*currentstr)) {
50 ph = ((unsigned int) *currentstr);
51 // Be Carefull with first and second !!
55 pl = ((unsigned int) *currentstr);
58 Resource_sjis_to_unicode(&ph,&pl);
59 Standard_ExtCharacter curcar = ((Standard_ExtCharacter) ((ph << 8) | pl));
60 TCollection_ExtendedString curext(curcar);
61 tostr.AssignCat(curext);
64 TCollection_ExtendedString curext(((char) *currentstr));
66 tostr.AssignCat(curext);
72 void Resource_Unicode::ConvertEUCToUnicode(const Standard_CString fromstr,TCollection_ExtendedString& tostr)
76 unsigned char* currentstr = ((unsigned char*) fromstr);
78 // BIG INDIAN USED HERE
79 while(*currentstr != '\0') {
80 if (iseuc(*currentstr)) {
82 ph = ((unsigned int) *currentstr);
83 // Be Carefull with first and second !!
87 pl = ((unsigned int) *currentstr);
90 Resource_euc_to_unicode(&ph,&pl);
91 Standard_ExtCharacter curcar = ((Standard_ExtCharacter) ((ph << 8) | pl));
92 TCollection_ExtendedString curext(curcar);
93 tostr.AssignCat(curext);
96 TCollection_ExtendedString curext(((char) *currentstr));
98 tostr.AssignCat(curext);
103 void Resource_Unicode::ConvertGBToUnicode(const Standard_CString fromstr,TCollection_ExtendedString& tostr)
107 unsigned char* currentstr = ((unsigned char*) fromstr);
109 // BIG INDIAN USED HERE
110 while(*currentstr != '\0') {
111 if (isshift(*currentstr)) {
113 ph = ((unsigned int) *currentstr);
114 // Be Carefull with first and second !!
118 pl = ((unsigned int) *currentstr);
121 Resource_gb_to_unicode(&ph,&pl);
122 Standard_ExtCharacter curcar = ((Standard_ExtCharacter) ((ph << 8) | pl));
123 TCollection_ExtendedString curext(curcar);
124 tostr.AssignCat(curext);
127 TCollection_ExtendedString curext(((char) *currentstr));
129 tostr.AssignCat(curext);
134 Standard_Boolean Resource_Unicode::ConvertGBKToUnicode(const Standard_CString fromstr, TCollection_ExtendedString& tostr)
138 unsigned char* currentch = ((unsigned char*) fromstr);
139 unsigned int gb1 = 0x00, gb2 = 0x00, gb3 = 0x00;
141 while(*currentch != '\0') {
144 if (!(*currentch >= 0x30 && *currentch <= 0x39))
146 TCollection_ExtendedString curext3(((char) *currentch));
147 TCollection_ExtendedString curext2(((char) gb3));
148 TCollection_ExtendedString curext1(((char) gb2));
149 tostr.Insert(0, curext3);
150 tostr.Insert(0, curext2);
151 tostr.Insert(0, curext1);
155 return Standard_False;
158 unsigned int codepnt = ((gb1 - 0x81) * (10 * 126 * 10)) + ((gb2 - 0x30) * (10 * 126)) + ((gb3 - 0x81) * 10) + *currentch - 0x30;
161 unsigned short uni = gbkuni [codepnt];
162 Standard_ExtCharacter curcar = ((Standard_ExtCharacter)uni);
163 TCollection_ExtendedString curext(curcar);
164 tostr.AssignCat(curext);
169 return Standard_False;
171 else if (gb2 != 0x00)
173 if (*currentch >= 0x81 && *currentch <= 0xFE)
175 gb3 = (unsigned int)(*currentch);
179 TCollection_ExtendedString curext2(((char) *currentch));
180 TCollection_ExtendedString curext1(((char) gb2));
181 tostr.Insert(0, curext2);
182 tostr.Insert(0, curext1);
185 return Standard_False;
187 else if (gb1 != 0x00)
189 if (*currentch >= 0x30 && *currentch <= 0x39)
191 gb2 = (unsigned int)(*currentch);
196 unsigned int lead = gb1;
197 unsigned int pointer = 0;
199 unsigned int offset = *currentch < 0x7F ? 0x40 : 0x41;
201 if ((*currentch >= 0x40 && *currentch <= 0x7E) ||
202 (*currentch >= 0x80 && *currentch <= 0xFE))
204 pointer = (lead - 0x81) * 190 + (*currentch - offset);
208 unsigned short uni = gbkuni [pointer];
209 Standard_ExtCharacter curcar = ((Standard_ExtCharacter)uni);
210 TCollection_ExtendedString curext(curcar);
211 tostr.AssignCat(curext);
216 if (*currentch <= 0x7F)
219 TCollection_ExtendedString curext(((char) *currentch));
221 tostr.Insert(0, curext);
224 return Standard_False;
228 if (*currentch <= 0x7F)
231 TCollection_ExtendedString curext(((char) *currentch));
233 tostr.AssignCat(curext);
235 else if (*currentch == 0x80)
238 Standard_ExtCharacter curcar = ((Standard_ExtCharacter)((0x20 << 8) | 0xAC));
239 TCollection_ExtendedString curext(curcar);
240 tostr.AssignCat(curext);
243 else if (*currentch >= 0x81 && *currentch <= 0xFE) {
245 gb1 = (unsigned int)(*currentch);
249 return Standard_False;
252 return Standard_True;
255 Standard_Boolean Resource_Unicode::ConvertBig5ToUnicode(const Standard_CString fromstr, TCollection_ExtendedString& tostr)
259 unsigned char* currentch = ((unsigned char*) fromstr);
260 unsigned int big5lead = 0x00;
262 while(*currentch != '\0') {
263 if (big5lead != 0x00)
265 unsigned int lead = big5lead;
266 unsigned int pointer = 0;
268 unsigned int offset = *currentch < 0x7F ? 0x40 : 0x62;
270 if ((*currentch >= 0x40 && *currentch <= 0x7E) ||
271 (*currentch >= 0xA1 && *currentch <= 0xFE))
273 pointer = (lead - 0x81) * 157 + (*currentch - offset);
275 Standard_Integer aLength = tostr.Length();
278 tostr.Insert(aLength+1,(Standard_ExtCharacter)0x00CA);
279 tostr.Insert(aLength+2,(Standard_ExtCharacter)0x0304);
284 tostr.Insert(aLength+1,(Standard_ExtCharacter)0x00CA);
285 tostr.Insert(aLength+2,(Standard_ExtCharacter)0x030C);
290 tostr.Insert(aLength+1,(Standard_ExtCharacter)0x00EA);
291 tostr.Insert(aLength+2,(Standard_ExtCharacter)0x0304);
296 tostr.Insert(aLength+1,(Standard_ExtCharacter)0x00EA);
297 tostr.Insert(aLength+2,(Standard_ExtCharacter)0x030C);
304 unsigned int uni = big5uni [pointer];
307 Standard_ExtCharacter curcar = ((Standard_ExtCharacter)uni);
308 tostr.Insert(aLength+1,curcar);
312 Standard_Utf32Char* aChar32 = new Standard_Utf32Char[1];
314 NCollection_Utf32String aStr32(aChar32);
315 NCollection_Utf16String aStr16 = aStr32.ToUtf16();
317 if (aStr16.Size() != 4) return Standard_False; // not a surrogate pair
318 const Standard_Utf16Char* aChar16 = aStr16.ToCString();
319 tostr.Insert(aLength+1,(Standard_ExtCharacter)(*aChar16));
321 tostr.Insert(aLength+2,(Standard_ExtCharacter)(*aChar16));
329 if (*currentch <= 0x7F)
332 TCollection_ExtendedString curext(((char) *currentch));
334 tostr.Insert(0, curext);
337 return Standard_False;
341 if (*currentch <= 0x7F)
344 TCollection_ExtendedString curext(((char) *currentch));
346 tostr.AssignCat(curext);
348 else if (*currentch >= 0x81 && *currentch <= 0xFE) {
350 big5lead = (unsigned int)(*currentch);
354 return Standard_False;
357 return Standard_True;
360 Standard_Boolean Resource_Unicode::ConvertUnicodeToSJIS(const TCollection_ExtendedString& fromstr,
361 Standard_PCharacter& tostr,
362 const Standard_Integer maxsize)
364 Standard_Integer nbtrans = 0;
365 Standard_Integer nbext = 1;
366 Standard_Boolean finished = Standard_False;
367 Standard_ExtCharacter curcar;
369 // BIG INDIAN USED HERE
372 if (nbext > fromstr.Length()) {
373 finished = Standard_True;
374 tostr[nbtrans] = '\0';
377 curcar = fromstr.Value(nbext);
379 ph = (((unsigned int) curcar) >> 8) & 0xFF;
380 pl = ((unsigned int) curcar) & 0xFF;
381 Resource_unicode_to_sjis(&ph,&pl);
383 if (nbtrans < (maxsize-3)) {
384 tostr[nbtrans] = ((char) ph);
386 tostr[nbtrans] = ((char) pl);
390 tostr[nbtrans] = '\0';
392 return Standard_False;
396 tostr[nbtrans] = ((char) pl);
399 if (nbtrans >= (maxsize - 1)) {
400 tostr[maxsize-1] = '\0';
401 finished = Standard_True;
402 return Standard_False;
406 return Standard_True;
409 Standard_Boolean Resource_Unicode::ConvertUnicodeToEUC(const TCollection_ExtendedString& fromstr,
410 Standard_PCharacter& tostr,
411 const Standard_Integer maxsize)
413 Standard_Integer nbtrans = 0;
414 Standard_Integer nbext = 1;
415 Standard_Boolean finished = Standard_False;
416 Standard_ExtCharacter curcar;
418 // BIG INDIAN USED HERE
421 if (nbext > fromstr.Length()) {
422 finished = Standard_True;
423 tostr[nbtrans] = '\0';
426 curcar = fromstr.Value(nbext);
428 ph = (((unsigned int) curcar) >> 8) & 0xFF;
429 pl = ((unsigned int) curcar) & 0xFF;
430 Resource_unicode_to_euc(&ph,&pl);
432 if (nbtrans < (maxsize-3)) {
433 tostr[nbtrans] = ((char) ph);
435 tostr[nbtrans] = ((char) pl);
439 tostr[nbtrans-1] = '\0';
441 return Standard_False;
445 tostr[nbtrans] = ((char) pl);
448 if (nbtrans >= (maxsize - 1)) {
449 tostr[maxsize-1] = '\0';
450 finished = Standard_True;
451 return Standard_False;
455 return Standard_True;
458 Standard_Boolean Resource_Unicode::ConvertUnicodeToGB(const TCollection_ExtendedString& fromstr,
459 Standard_PCharacter& tostr,
460 const Standard_Integer maxsize)
462 Standard_Integer nbtrans = 0;
463 Standard_Integer nbext = 1;
464 Standard_Boolean finished = Standard_False;
465 Standard_ExtCharacter curcar;
467 // BIG INDIAN USED HERE
470 if (nbext > fromstr.Length()) {
471 finished = Standard_True;
472 tostr[nbtrans] = '\0';
475 curcar = fromstr.Value(nbext);
477 ph = (((unsigned int) curcar) >> 8) & 0xFF;
478 pl = ((unsigned int) curcar) & 0xFF;
479 Resource_unicode_to_gb(&ph,&pl);
481 if (nbtrans < (maxsize-3)) {
482 tostr[nbtrans] = ((char) ph);
484 tostr[nbtrans] = ((char) pl);
488 tostr[nbtrans-1] = '\0';
490 return Standard_False;
494 tostr[nbtrans] = ((char) curcar) & 0xFF;
497 if (nbtrans >= (maxsize - 1)) {
498 tostr[maxsize-1] = '\0';
499 finished = Standard_True;
500 return Standard_False;
504 return Standard_True;
507 Standard_Boolean Resource_Unicode::ConvertUnicodeToANSI(const TCollection_ExtendedString& fromstr,
508 Standard_PCharacter& tostr,
509 const Standard_Integer maxsize)
511 Standard_Integer nbtrans = 0;
512 Standard_Integer nbext = 1;
513 Standard_Boolean finished = Standard_False;
514 Standard_ExtCharacter curcar;
516 // BIG INDIAN USED HERE
519 if (nbext > fromstr.Length()) {
520 finished = Standard_True;
521 tostr[nbtrans] = '\0';
524 curcar = fromstr.Value(nbext);
526 ph = ((unsigned int) curcar) >> 8;
527 pl = ((unsigned int) curcar) & 0xFF;
529 tostr[nbtrans] = ((char) pl);
532 tostr[nbtrans] = ' ';
536 if (nbtrans >= (maxsize - 1)) {
537 tostr[maxsize-1] = '\0';
538 finished = Standard_True;
539 return Standard_False;
542 return Standard_True;
545 static Standard_Boolean AlreadyRead = Standard_False;
547 static Resource_FormatType& Resource_Current_Format()
549 static Resource_FormatType theformat = Resource_ANSI;
551 AlreadyRead = Standard_True ;
552 Handle(Resource_Manager) mgr = new Resource_Manager("CharSet");
553 if (mgr->Find("FormatType")) {
554 TCollection_AsciiString form = mgr->Value("FormatType");
555 if (form.IsEqual("SJIS")) {
556 theformat = Resource_SJIS;
558 else if (form.IsEqual("EUC")) {
559 theformat = Resource_EUC;
561 else if (form.IsEqual("GB")) {
562 theformat = Resource_GB;
565 theformat = Resource_ANSI;
569 theformat = Resource_ANSI;
575 void Resource_Unicode::SetFormat(const Resource_FormatType typecode)
577 AlreadyRead = Standard_True;
578 Resource_Current_Format() = typecode;
581 Resource_FormatType Resource_Unicode::GetFormat()
583 return Resource_Current_Format();
587 void Resource_Unicode::ReadFormat()
589 AlreadyRead = Standard_False;
590 Resource_Unicode::GetFormat();
593 void Resource_Unicode::ConvertFormatToUnicode (const Resource_FormatType theFormat,
594 const Standard_CString theFromStr,
595 TCollection_ExtendedString& theToStr)
599 case Resource_FormatType_SJIS:
601 ConvertSJISToUnicode (theFromStr, theToStr);
604 case Resource_FormatType_EUC:
606 ConvertEUCToUnicode(theFromStr, theToStr);
609 case Resource_FormatType_GB:
611 ConvertGBToUnicode(theFromStr, theToStr);
614 case Resource_FormatType_ANSI:
616 theToStr = TCollection_ExtendedString(theFromStr, Standard_False);
619 case Resource_FormatType_CP1250:
620 case Resource_FormatType_CP1251:
621 case Resource_FormatType_CP1252:
622 case Resource_FormatType_CP1253:
623 case Resource_FormatType_CP1254:
624 case Resource_FormatType_CP1255:
625 case Resource_FormatType_CP1256:
626 case Resource_FormatType_CP1257:
627 case Resource_FormatType_CP1258:
628 case Resource_FormatType_iso8859_1:
629 case Resource_FormatType_iso8859_2:
630 case Resource_FormatType_iso8859_3:
631 case Resource_FormatType_iso8859_4:
632 case Resource_FormatType_iso8859_5:
633 case Resource_FormatType_iso8859_6:
634 case Resource_FormatType_iso8859_7:
635 case Resource_FormatType_iso8859_8:
636 case Resource_FormatType_iso8859_9:
638 const int aCodePageIndex = (int)theFormat - (int)Resource_FormatType_CP1250;
639 const Standard_ExtString aCodePage = THE_CODEPAGES_ANSI[aCodePageIndex];
641 for (const char* anInputPntr = theFromStr; *anInputPntr != '\0'; ++anInputPntr)
643 unsigned char anInputChar = (unsigned char)(*anInputPntr);
644 Standard_ExtCharacter aRes = (anInputChar & 0x80) != 0
645 ? aCodePage[(0x7f & anInputChar)]
651 theToStr.AssignCat(aRes);
655 case Resource_FormatType_Big5:
657 ConvertBig5ToUnicode(theFromStr, theToStr);
660 case Resource_FormatType_GBK:
662 ConvertGBKToUnicode(theFromStr, theToStr);
665 case Resource_FormatType_UTF8:
667 theToStr = TCollection_ExtendedString (theFromStr, Standard_True);
670 case Resource_FormatType_SystemLocale:
672 NCollection_Utf16String aString;
673 aString.FromLocale (theFromStr);
674 theToStr = TCollection_ExtendedString (aString.ToCString());
680 Standard_Boolean Resource_Unicode::ConvertUnicodeToFormat(const Resource_FormatType theFormat,
681 const TCollection_ExtendedString& theFromStr,
682 Standard_PCharacter& theToStr,
683 const Standard_Integer theMaxSize)
687 case Resource_FormatType_SJIS:
689 return ConvertUnicodeToSJIS (theFromStr, theToStr, theMaxSize);
691 case Resource_FormatType_EUC:
693 return ConvertUnicodeToEUC (theFromStr, theToStr, theMaxSize);
695 case Resource_FormatType_GB:
697 return ConvertUnicodeToGB (theFromStr, theToStr, theMaxSize);
699 case Resource_FormatType_ANSI:
701 return ConvertUnicodeToANSI(theFromStr, theToStr, theMaxSize);
703 case Resource_FormatType_CP1250:
704 case Resource_FormatType_CP1251:
705 case Resource_FormatType_CP1252:
706 case Resource_FormatType_CP1253:
707 case Resource_FormatType_CP1254:
708 case Resource_FormatType_CP1255:
709 case Resource_FormatType_CP1256:
710 case Resource_FormatType_CP1257:
711 case Resource_FormatType_CP1258:
712 case Resource_FormatType_iso8859_1:
713 case Resource_FormatType_iso8859_2:
714 case Resource_FormatType_iso8859_3:
715 case Resource_FormatType_iso8859_4:
716 case Resource_FormatType_iso8859_5:
717 case Resource_FormatType_iso8859_6:
718 case Resource_FormatType_iso8859_7:
719 case Resource_FormatType_iso8859_8:
720 case Resource_FormatType_iso8859_9:
722 if (theMaxSize < theFromStr.Length())
724 return Standard_False;
726 const int aCodePageIndex = (int)theFormat - (int)Resource_FormatType_CP1250;
727 const Standard_ExtString aCodePage = THE_CODEPAGES_ANSI[aCodePageIndex];
728 for (Standard_Integer aToCharInd = 0; aToCharInd < theMaxSize - 1; ++aToCharInd)
730 Standard_Boolean isFind = Standard_False;
731 Standard_ExtCharacter aFromChar = theFromStr.Value(aToCharInd + 1);
734 // zero value should be handled explicitly to avoid false conversion by
735 // selected code page that may have unused values (encoded as zero)
736 theToStr[aToCharInd] = '\0';
740 // find the character in the code page
741 for (unsigned char anIndCP = 0; aFromChar != 0 && anIndCP < 128; ++anIndCP)
743 if (aCodePage[anIndCP] == aFromChar)
745 theToStr[aToCharInd] = anIndCP | 0x80;
746 isFind = Standard_True;
749 // if character is not found, put '?'
752 theToStr[aToCharInd] = '?';
756 theToStr[theMaxSize - 1] = '\0';
757 return Standard_True;
759 case Resource_FormatType_UTF8:
761 if (theMaxSize < theFromStr.LengthOfCString())
763 return Standard_False;
765 theFromStr.ToUTF8CString (theToStr);
766 return Standard_True;
768 case Resource_FormatType_SystemLocale:
770 const NCollection_Utf16String aString (theFromStr.ToExtString());
771 return aString.ToLocale (theToStr, theMaxSize);
773 case Resource_FormatType_GBK:
774 case Resource_FormatType_Big5:
776 throw Standard_NotImplemented("Resource_Unicode::ConvertUnicodeToFormat - convert from GBK and Big5 to Unocode is not implemented");
779 return Standard_False;