0029352: Adding support of GBK and Big5 code pages
[occt.git] / src / Resource / Resource_Unicode.cxx
CommitLineData
b311480e 1// Created on: 1996-09-26
2// Created by: Arnaud BOUZY
3// Copyright (c) 1996-1999 Matra Datavision
973c2be1 4// Copyright (c) 1999-2014 OPEN CASCADE SAS
b311480e 5//
973c2be1 6// This file is part of Open CASCADE Technology software library.
b311480e 7//
d5f74e42 8// This library is free software; you can redistribute it and/or modify it under
9// the terms of the GNU Lesser General Public License version 2.1 as published
973c2be1 10// by the Free Software Foundation, with special exception defined in the file
11// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
12// distribution for complete text of the license and disclaimer of any warranty.
b311480e 13//
973c2be1 14// Alternatively, this file may be used under the terms of Open CASCADE
15// commercial license or contractual agreement.
7fd59977 16
42cf5bc1 17
31e026ba 18#include <NCollection_UtfString.hxx>
19#include <Resource_Big5.h>
7fd59977 20#include <Resource_ConvertUnicode.hxx>
31e026ba 21#include <Resource_GBK.h>
7fd59977 22#include <Resource_Manager.hxx>
42cf5bc1 23#include <Resource_Unicode.hxx>
7fd59977 24#include <TCollection_AsciiString.hxx>
42cf5bc1 25#include <TCollection_ExtendedString.hxx>
7fd59977 26
27#define isjis(c) (((c)>=0x21 && (c)<=0x7e))
28#define iseuc(c) (((c)>=0xa1 && (c)<=0xfe))
29#define issjis1(c) (((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xef))
30
31#define issjis2(c) ((c)>=0x40 && (c)<=0xfc && (c)!=0x7f)
32
33#define ishankana(c) ((c)>=0xa0 && (c)<=0xdf)
34
35static inline Standard_Boolean isshift (unsigned char c) { return c >= 0x80; }
36static inline Standard_Boolean isshift (unsigned int c) { return c >= 0x80 && c <= 0xff; }
37
38void Resource_Unicode::ConvertSJISToUnicode(const Standard_CString fromstr,TCollection_ExtendedString& tostr)
39{
40 tostr.Clear();
41
42 unsigned char* currentstr = ((unsigned char*) fromstr);
43 unsigned int pl,ph;
44 // BIG INDIAN USED HERE
45 while(*currentstr != '\0') {
46 if (issjis1(*currentstr)) {
47
48 ph = ((unsigned int) *currentstr);
49 // Be Carefull with first and second !!
50
51 currentstr++;
52
53 pl = ((unsigned int) *currentstr);
54 currentstr++;
55
56 Resource_sjis_to_unicode(&ph,&pl);
57 Standard_ExtCharacter curcar = ((Standard_ExtCharacter) ((ph << 8) | pl));
58 TCollection_ExtendedString curext(curcar);
59 tostr.AssignCat(curext);
60 }
61 else {
62 TCollection_ExtendedString curext(((char) *currentstr));
63 currentstr++;
64 tostr.AssignCat(curext);
65 }
66 }
67}
68
69
70void Resource_Unicode::ConvertEUCToUnicode(const Standard_CString fromstr,TCollection_ExtendedString& tostr)
71{
72 tostr.Clear();
73
74 unsigned char* currentstr = ((unsigned char*) fromstr);
75 unsigned int pl,ph;
76 // BIG INDIAN USED HERE
77 while(*currentstr != '\0') {
78 if (iseuc(*currentstr)) {
79
80 ph = ((unsigned int) *currentstr);
81 // Be Carefull with first and second !!
82
83 currentstr++;
84
85 pl = ((unsigned int) *currentstr);
86 currentstr++;
87
88 Resource_euc_to_unicode(&ph,&pl);
89 Standard_ExtCharacter curcar = ((Standard_ExtCharacter) ((ph << 8) | pl));
90 TCollection_ExtendedString curext(curcar);
91 tostr.AssignCat(curext);
92 }
93 else {
94 TCollection_ExtendedString curext(((char) *currentstr));
95 currentstr++;
96 tostr.AssignCat(curext);
97 }
98 }
99}
100
101void Resource_Unicode::ConvertGBToUnicode(const Standard_CString fromstr,TCollection_ExtendedString& tostr)
102{
103 tostr.Clear();
104
105 unsigned char* currentstr = ((unsigned char*) fromstr);
106 unsigned int pl,ph;
107 // BIG INDIAN USED HERE
108 while(*currentstr != '\0') {
109 if (isshift(*currentstr)) {
110
111 ph = ((unsigned int) *currentstr);
112 // Be Carefull with first and second !!
113
114 currentstr++;
115
116 pl = ((unsigned int) *currentstr);
117 currentstr++;
118
119 Resource_gb_to_unicode(&ph,&pl);
120 Standard_ExtCharacter curcar = ((Standard_ExtCharacter) ((ph << 8) | pl));
121 TCollection_ExtendedString curext(curcar);
122 tostr.AssignCat(curext);
123 }
124 else {
125 TCollection_ExtendedString curext(((char) *currentstr));
126 currentstr++;
127 tostr.AssignCat(curext);
128 }
129 }
130}
131
31e026ba 132Standard_Boolean Resource_Unicode::ConvertGBKToUnicode(const Standard_CString fromstr, TCollection_ExtendedString& tostr)
133{
134 tostr.Clear();
135
136 unsigned char* currentch = ((unsigned char*) fromstr);
137 unsigned int gb1 = 0x00, gb2 = 0x00, gb3 = 0x00;
138
139 while(*currentch != '\0') {
140 if (gb3 != 0x00)
141 {
142 if (!(*currentch >= 0x30 && *currentch <= 0x39))
143 {
144 TCollection_ExtendedString curext3(((char) *currentch));
145 TCollection_ExtendedString curext2(((char) gb3));
146 TCollection_ExtendedString curext1(((char) gb2));
147 tostr.Insert(0, curext3);
148 tostr.Insert(0, curext2);
149 tostr.Insert(0, curext1);
150 gb1 = 0;
151 gb2 = 0;
152 gb3 = 0;
153 return Standard_False;
154 }
155
156 unsigned int codepnt = ((gb1 - 0x81) * (10 * 126 * 10)) + ((gb2 - 0x30) * (10 * 126)) + ((gb3 - 0x81) * 10) + *currentch - 0x30;
157 if (codepnt < 23940)
158 {
159 unsigned short uni = gbkuni [codepnt];
160 Standard_ExtCharacter curcar = ((Standard_ExtCharacter)uni);
161 TCollection_ExtendedString curext(curcar);
162 tostr.AssignCat(curext);
163 currentch++;
164 continue;
165 }
166
167 return Standard_False;
168 }
169 else if (gb2 != 0x00)
170 {
171 if (*currentch >= 0x81 && *currentch <= 0xFE)
172 {
173 gb3 = (unsigned int)(*currentch);
174 currentch++;
175 continue;
176 }
177 TCollection_ExtendedString curext2(((char) *currentch));
178 TCollection_ExtendedString curext1(((char) gb2));
179 tostr.Insert(0, curext2);
180 tostr.Insert(0, curext1);
181 gb1 = 0;
182 gb2 = 0;
183 return Standard_False;
184 }
185 else if (gb1 != 0x00)
186 {
187 if (*currentch >= 0x30 && *currentch <= 0x39)
188 {
189 gb2 = (unsigned int)(*currentch);
190 currentch++;
191 continue;
192 }
193
194 unsigned int lead = gb1;
195 unsigned int pointer = 0;
196 gb1 = 0x00;
197 unsigned int offset = *currentch < 0x7F ? 0x40 : 0x41;
198
199 if ((*currentch >= 0x40 && *currentch <= 0x7E) ||
200 (*currentch >= 0x80 && *currentch <= 0xFE))
201 {
202 pointer = (lead - 0x81) * 190 + (*currentch - offset);
203
204 if (pointer < 23940)
205 {
206 unsigned short uni = gbkuni [pointer];
207 Standard_ExtCharacter curcar = ((Standard_ExtCharacter)uni);
208 TCollection_ExtendedString curext(curcar);
209 tostr.AssignCat(curext);
210 currentch++;
211 continue;
212 }
213 }
214 if (*currentch <= 0x7F)
215 {
216 // ASCII symbol
217 TCollection_ExtendedString curext(((char) *currentch));
218 currentch++;
219 tostr.Insert(0, curext);
220 continue;
221 }
222 return Standard_False;
223 }
224 else
225 {
226 if (*currentch <= 0x7F)
227 {
228 // ASCII symbol
229 TCollection_ExtendedString curext(((char) *currentch));
230 currentch++;
231 tostr.AssignCat(curext);
232 }
233 else if (*currentch == 0x80)
234 {
235 // Special symbol
236 Standard_ExtCharacter curcar = ((Standard_ExtCharacter)((0x20 << 8) | 0xAC));
237 TCollection_ExtendedString curext(curcar);
238 tostr.AssignCat(curext);
239 currentch++;
240 }
241 else if (*currentch >= 0x81 && *currentch <= 0xFE) {
242 // Chinese symbol
243 gb1 = (unsigned int)(*currentch);
244 currentch++;
245 }
246 else
247 return Standard_False;
248 }
249 }
250 return Standard_True;
251}
252
253Standard_Boolean Resource_Unicode::ConvertBig5ToUnicode(const Standard_CString fromstr, TCollection_ExtendedString& tostr)
254{
255 tostr.Clear();
256
257 unsigned char* currentch = ((unsigned char*) fromstr);
258 unsigned int big5lead = 0x00;
259
260 while(*currentch != '\0') {
261 if (big5lead != 0x00)
262 {
263 unsigned int lead = big5lead;
264 unsigned int pointer = 0;
265 big5lead = 0x00;
266 unsigned int offset = *currentch < 0x7F ? 0x40 : 0x62;
267
268 if ((*currentch >= 0x40 && *currentch <= 0x7E) ||
269 (*currentch >= 0xA1 && *currentch <= 0xFE))
270 {
271 pointer = (lead - 0x81) * 157 + (*currentch - offset);
272
273 Standard_Integer aLength = tostr.Length();
274 switch (pointer) {
275 case 1133: {
276 tostr.Insert(aLength+1,(Standard_ExtCharacter)0x00CA);
277 tostr.Insert(aLength+2,(Standard_ExtCharacter)0x0304);
278 currentch++;
279 continue;
280 }
281 case 1135: {
282 tostr.Insert(aLength+1,(Standard_ExtCharacter)0x00CA);
283 tostr.Insert(aLength+2,(Standard_ExtCharacter)0x030C);
284 currentch++;
285 continue;
286 }
287 case 1164: {
288 tostr.Insert(aLength+1,(Standard_ExtCharacter)0x00EA);
289 tostr.Insert(aLength+2,(Standard_ExtCharacter)0x0304);
290 currentch++;
291 continue;
292 }
293 case 1166: {
294 tostr.Insert(aLength+1,(Standard_ExtCharacter)0x00EA);
295 tostr.Insert(aLength+2,(Standard_ExtCharacter)0x030C);
296 currentch++;
297 continue;
298 }
299 default: {
300 if (pointer < 19782)
301 {
302 unsigned int uni = big5uni [pointer];
303 if (uni <= 0xFFFF)
304 {
305 Standard_ExtCharacter curcar = ((Standard_ExtCharacter)uni);
306 tostr.Insert(aLength+1,curcar);
307 }
308 else
309 {
310 Standard_Utf32Char* aChar32 = new Standard_Utf32Char[1];
311 aChar32[0] = uni;
312 NCollection_Utf32String aStr32(aChar32);
313 NCollection_Utf16String aStr16 = aStr32.ToUtf16();
314
315 if (aStr16.Size() != 4) return Standard_False; // not a surrogate pair
316 const Standard_Utf16Char* aChar16 = aStr16.ToCString();
317 tostr.Insert(aLength+1,(Standard_ExtCharacter)(*aChar16));
318 aChar16++;
319 tostr.Insert(aLength+2,(Standard_ExtCharacter)(*aChar16));
320 }
321 currentch++;
322 continue;
323 }
324 }
325 }
326 }
327 if (*currentch <= 0x7F)
328 {
329 // ASCII symbol
330 TCollection_ExtendedString curext(((char) *currentch));
331 currentch++;
332 tostr.Insert(0, curext);
333 continue;
334 }
335 return Standard_False;
336 }
337 else
338 {
339 if (*currentch <= 0x7F)
340 {
341 // ASCII symbol
342 TCollection_ExtendedString curext(((char) *currentch));
343 currentch++;
344 tostr.AssignCat(curext);
345 }
346 else if (*currentch >= 0x81 && *currentch <= 0xFE) {
347 // Chinese symbol
348 big5lead = (unsigned int)(*currentch);
349 currentch++;
350 }
351 else
352 return Standard_False;
353 }
354 }
355 return Standard_True;
356}
357
7fd59977 358void Resource_Unicode::ConvertANSIToUnicode(const Standard_CString fromstr,TCollection_ExtendedString& tostr)
359{
360 tostr.Clear();
361
362 TCollection_ExtendedString curext(fromstr);
363 tostr.AssignCat(curext);
364}
365
366Standard_Boolean Resource_Unicode::ConvertUnicodeToSJIS(const TCollection_ExtendedString& fromstr,
367 Standard_PCharacter& tostr,
368 const Standard_Integer maxsize)
369{
370 Standard_Integer nbtrans = 0;
371 Standard_Integer nbext = 1;
372 Standard_Boolean finished = Standard_False;
373 Standard_ExtCharacter curcar;
374 unsigned int pl,ph;
375 // BIG INDIAN USED HERE
376
377 while (!finished) {
378 if (nbext > fromstr.Length()) {
379 finished = Standard_True;
380 tostr[nbtrans] = '\0';
381 }
382 else {
383 curcar = fromstr.Value(nbext);
384 nbext++;
385 ph = (((unsigned int) curcar) >> 8) & 0xFF;
386 pl = ((unsigned int) curcar) & 0xFF;
387 Resource_unicode_to_sjis(&ph,&pl);
388 if (issjis1(ph)) {
389 if (nbtrans < (maxsize-3)) {
390 tostr[nbtrans] = ((char) ph);
391 nbtrans++;
392 tostr[nbtrans] = ((char) pl);
393 nbtrans++;
394 }
395 else {
396 tostr[nbtrans] = '\0';
397 nbtrans = maxsize-1;
398 return Standard_False;
399 }
400 }
401 else {
402 tostr[nbtrans] = ((char) pl);
403 nbtrans++;
404 }
405 if (nbtrans >= (maxsize - 1)) {
406 tostr[maxsize-1] = '\0';
407 finished = Standard_True;
408 return Standard_False;
409 }
410 }
411 }
412 return Standard_True;
413}
414
415Standard_Boolean Resource_Unicode::ConvertUnicodeToEUC(const TCollection_ExtendedString& fromstr,
416 Standard_PCharacter& tostr,
417 const Standard_Integer maxsize)
418{
419 Standard_Integer nbtrans = 0;
420 Standard_Integer nbext = 1;
421 Standard_Boolean finished = Standard_False;
422 Standard_ExtCharacter curcar;
423 unsigned int pl,ph;
424 // BIG INDIAN USED HERE
425
426 while (!finished) {
427 if (nbext > fromstr.Length()) {
428 finished = Standard_True;
429 tostr[nbtrans] = '\0';
430 }
431 else {
432 curcar = fromstr.Value(nbext);
433 nbext++;
434 ph = (((unsigned int) curcar) >> 8) & 0xFF;
435 pl = ((unsigned int) curcar) & 0xFF;
436 Resource_unicode_to_euc(&ph,&pl);
437 if (iseuc(ph)) {
438 if (nbtrans < (maxsize-3)) {
439 tostr[nbtrans] = ((char) ph);
440 nbtrans++;
441 tostr[nbtrans] = ((char) pl);
442 nbtrans++;
443 }
444 else {
445 tostr[nbtrans-1] = '\0';
446 nbtrans = maxsize-1;
447 return Standard_False;
448 }
449 }
450 else {
451 tostr[nbtrans] = ((char) pl);
452 nbtrans++;
453 }
454 if (nbtrans >= (maxsize - 1)) {
455 tostr[maxsize-1] = '\0';
456 finished = Standard_True;
457 return Standard_False;
458 }
459 }
460 }
461 return Standard_True;
462}
463
464Standard_Boolean Resource_Unicode::ConvertUnicodeToGB(const TCollection_ExtendedString& fromstr,
465 Standard_PCharacter& tostr,
466 const Standard_Integer maxsize)
467{
468 Standard_Integer nbtrans = 0;
469 Standard_Integer nbext = 1;
470 Standard_Boolean finished = Standard_False;
471 Standard_ExtCharacter curcar;
472 unsigned int pl,ph;
473 // BIG INDIAN USED HERE
474
475 while (!finished) {
476 if (nbext > fromstr.Length()) {
477 finished = Standard_True;
478 tostr[nbtrans] = '\0';
479 }
480 else {
481 curcar = fromstr.Value(nbext);
482 nbext++;
483 ph = (((unsigned int) curcar) >> 8) & 0xFF;
484 pl = ((unsigned int) curcar) & 0xFF;
485 Resource_unicode_to_gb(&ph,&pl);
486 if (isshift(ph)) {
487 if (nbtrans < (maxsize-3)) {
488 tostr[nbtrans] = ((char) ph);
489 nbtrans++;
490 tostr[nbtrans] = ((char) pl);
491 nbtrans++;
492 }
493 else {
494 tostr[nbtrans-1] = '\0';
495 nbtrans = maxsize-1;
496 return Standard_False;
497 }
498 }
499 else {
500 tostr[nbtrans] = ((char) curcar) & 0xFF;
501 nbtrans++;
502 }
503 if (nbtrans >= (maxsize - 1)) {
504 tostr[maxsize-1] = '\0';
505 finished = Standard_True;
506 return Standard_False;
507 }
508 }
509 }
510 return Standard_True;
511}
512
513Standard_Boolean Resource_Unicode::ConvertUnicodeToANSI(const TCollection_ExtendedString& fromstr,
514 Standard_PCharacter& tostr,
515 const Standard_Integer maxsize)
516{
517 Standard_Integer nbtrans = 0;
518 Standard_Integer nbext = 1;
519 Standard_Boolean finished = Standard_False;
520 Standard_ExtCharacter curcar;
521 unsigned int pl,ph;
522 // BIG INDIAN USED HERE
523
524 while (!finished) {
525 if (nbext > fromstr.Length()) {
526 finished = Standard_True;
527 tostr[nbtrans] = '\0';
528 }
529 else {
530 curcar = fromstr.Value(nbext);
531 nbext++;
532 ph = ((unsigned int) curcar) >> 8;
533 pl = ((unsigned int) curcar) & 0xFF;
534 if (ph == 0) {
535 tostr[nbtrans] = ((char) pl);
536 }
537 else {
538 tostr[nbtrans] = ' ';
539 }
540 nbtrans++;
541 }
542 if (nbtrans >= (maxsize - 1)) {
543 tostr[maxsize-1] = '\0';
544 finished = Standard_True;
545 return Standard_False;
546 }
547 }
548 return Standard_True;
549}
550
551static Standard_Boolean AlreadyRead = Standard_False;
552
553static Resource_FormatType& Resource_Current_Format()
554{
555 static Resource_FormatType theformat = Resource_ANSI;
556 if (!AlreadyRead) {
557 AlreadyRead = Standard_True ;
558 Handle(Resource_Manager) mgr = new Resource_Manager("CharSet");
559 if (mgr->Find("FormatType")) {
560 TCollection_AsciiString form = mgr->Value("FormatType");
561 if (form.IsEqual("SJIS")) {
562 theformat = Resource_SJIS;
563 }
564 else if (form.IsEqual("EUC")) {
565 theformat = Resource_EUC;
566 }
567 else if (form.IsEqual("GB")) {
568 theformat = Resource_GB;
569 }
570 else {
571 theformat = Resource_ANSI;
572 }
573 }
574 else {
575 theformat = Resource_ANSI;
576 }
577 }
578 return theformat;
579}
580
581void Resource_Unicode::SetFormat(const Resource_FormatType typecode)
582{
583 AlreadyRead = Standard_True;
584 Resource_Current_Format() = typecode;
585}
586
587Resource_FormatType Resource_Unicode::GetFormat()
588{
589 return Resource_Current_Format();
590}
591
592
593void Resource_Unicode::ReadFormat()
594{
595 AlreadyRead = Standard_False;
596 Resource_Unicode::GetFormat();
597}
598
599void Resource_Unicode::ConvertFormatToUnicode(const Standard_CString fromstr,
600 TCollection_ExtendedString& tostr)
601{
602 Resource_FormatType theform = Resource_Unicode::GetFormat();
603 switch (theform) {
604 case Resource_SJIS :
605 {
606 ConvertSJISToUnicode(fromstr,tostr);
607 break;
608 }
609 case Resource_EUC :
610 {
611 ConvertEUCToUnicode(fromstr,tostr);
612 break;
613 }
614 case Resource_GB :
615 {
616 ConvertGBToUnicode(fromstr,tostr);
617 break;
618 }
619 case Resource_ANSI :
620 {
621 ConvertANSIToUnicode(fromstr,tostr);
622 break;
623 }
624 }
625}
626
627Standard_Boolean Resource_Unicode::ConvertUnicodeToFormat(const TCollection_ExtendedString& fromstr,
628 Standard_PCharacter& tostr,
629 const Standard_Integer maxsize)
630{
631 Resource_FormatType theform = Resource_Unicode::GetFormat();
632 switch (theform) {
633 case Resource_SJIS :
634 {
635 return ConvertUnicodeToSJIS(fromstr,tostr,maxsize);
636 }
637 case Resource_EUC :
638 {
639 return ConvertUnicodeToEUC(fromstr,tostr,maxsize);
640 }
641 case Resource_GB :
642 {
643 return ConvertUnicodeToGB(fromstr,tostr,maxsize);
644 }
645 case Resource_ANSI :
646 {
647 return ConvertUnicodeToANSI(fromstr,tostr,maxsize);
648 }
649 }
650 return Standard_False;
651}
652