]> OCCT Git - occt.git/commitdiff
0028454: Data Exchange, STEP reader - names with special characters cannot be read
authordpasukhi <dpasukhi@opencascade.com>
Fri, 9 Oct 2020 10:57:30 +0000 (13:57 +0300)
committerbugmaster <bugmaster@opencascade.com>
Thu, 22 Oct 2020 14:54:29 +0000 (17:54 +0300)
- Add support of the control directives ( "\X2\" "\X4" "\X\" "\P*\" "\S\");
- Make param "read.stepcaf.codepage" base for conversion inside StepData instead of CAF;
- Rename "read.stepcaf.codepage" to "read.step.codepage".
- Add ISO 8859-1 - 9 code pages for conversion
- Add Resource_FormatType_NoConversion format type, that indicates non-conversion behavior
- Update old test cases that contain control directives

23 files changed:
src/Resource/FILES
src/Resource/Resource_ANSI.pxx [deleted file]
src/Resource/Resource_CodePages.pxx [new file with mode: 0644]
src/Resource/Resource_FormatType.hxx
src/Resource/Resource_Unicode.cxx
src/STEPCAFControl/STEPCAFControl_Controller.cxx
src/STEPCAFControl/STEPCAFControl_Reader.cxx
src/STEPCAFControl/STEPCAFControl_Reader.hxx
src/STEPControl/STEPControl_Controller.cxx
src/StepData/StepData_StepModel.cxx
src/StepData/StepData_StepModel.hxx
src/StepData/StepData_StepReaderData.cxx
src/StepData/StepData_StepReaderData.hxx
src/StepFile/StepFile_Read.cxx
src/TCollection/TCollection_ExtendedString.cxx
src/TCollection/TCollection_ExtendedString.hxx
tests/bugs/step/bug28454_1 [new file with mode: 0644]
tests/bugs/step/bug28454_2 [new file with mode: 0644]
tests/bugs/step/bug30694
tests/bugs/step/bug31670
tests/bugs/step/bug31670_1
tests/gdt/view/B4
tests/gdt/view/B7

index aa2da19f8fd451c8177e36f7bc8f81501d91f58e..272c4c514485f7782136101b5d81db690852cde4 100755 (executable)
@@ -1,5 +1,5 @@
-Resource_ANSI.pxx
 Resource_Big5.pxx
+Resource_CodePages.pxx
 Resource_ConvertUnicode.c
 Resource_ConvertUnicode.hxx
 Resource_DataMapOfAsciiStringAsciiString.hxx
diff --git a/src/Resource/Resource_ANSI.pxx b/src/Resource/Resource_ANSI.pxx
deleted file mode 100644 (file)
index d02353a..0000000
+++ /dev/null
@@ -1,342 +0,0 @@
-// Copyright (c) 2020 OPEN CASCADE SAS
-//
-// This file is part of Open CASCADE Technology software library.
-//
-// This library is free software; you can redistribute it and/or modify it under
-// the terms of the GNU Lesser General Public License version 2.1 as published
-// by the Free Software Foundation, with special exception defined in the file
-// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
-// distribution for complete text of the license and disclaimer of any warranty.
-//
-// Alternatively, this file may be used under the terms of Open CASCADE
-// commercial license or contractual agreement.
-
-#include <Standard_TypeDef.hxx>
-
-// Code pages ANSI -> UTF16
-static const Standard_ExtCharacter THE_CODEPAGES_ANSI[9][128] =
-{
-  {
-    // code page: cp1250
-    0x20ac, 0x81, 0x201a, 0x83,
-    0x201e, 0x2026, 0x2020, 0x2021,
-    0x88, 0x2030, 0x160, 0x2039,
-    0x15a, 0x164, 0x17d, 0x179,
-    0x90, 0x2018, 0x2019, 0x201c,
-    0x201d, 0x2022, 0x2013, 0x2014,
-    0x98, 0x2122, 0x161, 0x203a,
-    0x15b, 0x165, 0x17e, 0x17a,
-    0xa0, 0x2c7, 0x2d8, 0x141,
-    0xa4, 0x104, 0xa6, 0xa7,
-    0xa8, 0xa9, 0x15e, 0xab,
-    0xac, 0xad, 0xae, 0x17b,
-    0xb0, 0xb1, 0x2db, 0x142,
-    0xb4, 0xb5, 0xb6, 0xb7,
-    0xb8, 0x105, 0x15f, 0xbb,
-    0x13d, 0x2dd, 0x13e, 0x17c,
-    0x154, 0xc1, 0xc2, 0x102,
-    0xc4, 0x139, 0x106, 0xc7,
-    0x10c, 0xc9, 0x118, 0xcb,
-    0x11a, 0xcd, 0xce, 0x10e,
-    0x110, 0x143, 0x147, 0xd3,
-    0xd4, 0x150, 0xd6, 0xd7,
-    0x158, 0x16e, 0xda, 0x170,
-    0xdc, 0xdd, 0x162, 0xdf,
-    0x155, 0xe1, 0xe2, 0x103,
-    0xe4, 0x13a, 0x107, 0xe7,
-    0x10d, 0xe9, 0x119, 0xeb,
-    0x11b, 0xed, 0xee, 0x10f,
-    0x111, 0x144, 0x148, 0xf3,
-    0xf4, 0x151, 0xf6, 0xf7,
-    0x159, 0x16f, 0xfa, 0x171,
-    0xfc, 0xfd, 0x163, 0x2d9
-  },
-
-  {
-    // code page: cp1251
-    0x402, 0x403, 0x201a, 0x453,
-    0x201e, 0x2026, 0x2020, 0x2021,
-    0x20ac, 0x2030, 0x409, 0x2039,
-    0x40a, 0x40c, 0x40b, 0x40f,
-    0x452, 0x2018, 0x2019, 0x201c,
-    0x201d, 0x2022, 0x2013, 0x2014,
-    0x98, 0x2122, 0x459, 0x203a,
-    0x45a, 0x45c, 0x45b, 0x45f,
-    0xa0, 0x40e, 0x45e, 0x408,
-    0xa4, 0x490, 0xa6, 0xa7,
-    0x401, 0xa9, 0x404, 0xab,
-    0xac, 0xad, 0xae, 0x407,
-    0xb0, 0xb1, 0x406, 0x456,
-    0x491, 0xb5, 0xb6, 0xb7,
-    0x451, 0x2116, 0x454, 0xbb,
-    0x458, 0x405, 0x455, 0x457,
-    0x410, 0x411, 0x412, 0x413,
-    0x414, 0x415, 0x416, 0x417,
-    0x418, 0x419, 0x41a, 0x41b,
-    0x41c, 0x41d, 0x41e, 0x41f,
-    0x420, 0x421, 0x422, 0x423,
-    0x424, 0x425, 0x426, 0x427,
-    0x428, 0x429, 0x42a, 0x42b,
-    0x42c, 0x42d, 0x42e, 0x42f,
-    0x430, 0x431, 0x432, 0x433,
-    0x434, 0x435, 0x436, 0x437,
-    0x438, 0x439, 0x43a, 0x43b,
-    0x43c, 0x43d, 0x43e, 0x43f,
-    0x440, 0x441, 0x442, 0x443,
-    0x444, 0x445, 0x446, 0x447,
-    0x448, 0x449, 0x44a, 0x44b,
-    0x44c, 0x44d, 0x44e, 0x44f
-  },
-
-  {
-    // code page: cp1252
-    0x20ac, 0x81, 0x201a, 0x192,
-    0x201e, 0x2026, 0x2020, 0x2021,
-    0x2c6, 0x2030, 0x160, 0x2039,
-    0x152, 0x8d, 0x17d, 0x8f,
-    0x90, 0x2018, 0x2019, 0x201c,
-    0x201d, 0x2022, 0x2013, 0x2014,
-    0x2dc, 0x2122, 0x161, 0x203a,
-    0x153, 0x9d, 0x17e, 0x178,
-    0xa0, 0xa1, 0xa2, 0xa3,
-    0xa4, 0xa5, 0xa6, 0xa7,
-    0xa8, 0xa9, 0xaa, 0xab,
-    0xac, 0xad, 0xae, 0xaf,
-    0xb0, 0xb1, 0xb2, 0xb3,
-    0xb4, 0xb5, 0xb6, 0xb7,
-    0xb8, 0xb9, 0xba, 0xbb,
-    0xbc, 0xbd, 0xbe, 0xbf,
-    0xc0, 0xc1, 0xc2, 0xc3,
-    0xc4, 0xc5, 0xc6, 0xc7,
-    0xc8, 0xc9, 0xca, 0xcb,
-    0xcc, 0xcd, 0xce, 0xcf,
-    0xd0, 0xd1, 0xd2, 0xd3,
-    0xd4, 0xd5, 0xd6, 0xd7,
-    0xd8, 0xd9, 0xda, 0xdb,
-    0xdc, 0xdd, 0xde, 0xdf,
-    0xe0, 0xe1, 0xe2, 0xe3,
-    0xe4, 0xe5, 0xe6, 0xe7,
-    0xe8, 0xe9, 0xea, 0xeb,
-    0xec, 0xed, 0xee, 0xef,
-    0xf0, 0xf1, 0xf2, 0xf3,
-    0xf4, 0xf5, 0xf6, 0xf7,
-    0xf8, 0xf9, 0xfa, 0xfb,
-    0xfc, 0xfd, 0xfe, 0xff
-  },
-
-  {
-    // code page: cp1253
-    0x20ac, 0x81, 0x201a, 0x192,
-    0x201e, 0x2026, 0x2020, 0x2021,
-    0x88, 0x2030, 0x8a, 0x2039,
-    0x8c, 0x8d, 0x8e, 0x8f,
-    0x90, 0x2018, 0x2019, 0x201c,
-    0x201d, 0x2022, 0x2013, 0x2014,
-    0x98, 0x2122, 0x9a, 0x203a,
-    0x9c, 0x9d, 0x9e, 0x9f,
-    0xa0, 0x385, 0x386, 0xa3,
-    0xa4, 0xa5, 0xa6, 0xa7,
-    0xa8, 0xa9, 0x0, 0xab,
-    0xac, 0xad, 0xae, 0x2015,
-    0xb0, 0xb1, 0xb2, 0xb3,
-    0x384, 0xb5, 0xb6, 0xb7,
-    0x388, 0x389, 0x38a, 0xbb,
-    0x38c, 0xbd, 0x38e, 0x38f,
-    0x390, 0x391, 0x392, 0x393,
-    0x394, 0x395, 0x396, 0x397,
-    0x398, 0x399, 0x39a, 0x39b,
-    0x39c, 0x39d, 0x39e, 0x39f,
-    0x3a0, 0x3a1, 0x0, 0x3a3,
-    0x3a4, 0x3a5, 0x3a6, 0x3a7,
-    0x3a8, 0x3a9, 0x3aa, 0x3ab,
-    0x3ac, 0x3ad, 0x3ae, 0x3af,
-    0x3b0, 0x3b1, 0x3b2, 0x3b3,
-    0x3b4, 0x3b5, 0x3b6, 0x3b7,
-    0x3b8, 0x3b9, 0x3ba, 0x3bb,
-    0x3bc, 0x3bd, 0x3be, 0x3bf,
-    0x3c0, 0x3c1, 0x3c2, 0x3c3,
-    0x3c4, 0x3c5, 0x3c6, 0x3c7,
-    0x3c8, 0x3c9, 0x3ca, 0x3cb,
-    0x3cc, 0x3cd, 0x3ce, 0x0
-  },
-
-  {
-    // code page: cp1254
-    0x20ac, 0x81, 0x201a, 0x192,
-    0x201e, 0x2026, 0x2020, 0x2021,
-    0x2c6, 0x2030, 0x160, 0x2039,
-    0x152, 0x8d, 0x8e, 0x8f,
-    0x90, 0x2018, 0x2019, 0x201c,
-    0x201d, 0x2022, 0x2013, 0x2014,
-    0x2dc, 0x2122, 0x161, 0x203a,
-    0x153, 0x9d, 0x9e, 0x178,
-    0xa0, 0xa1, 0xa2, 0xa3,
-    0xa4, 0xa5, 0xa6, 0xa7,
-    0xa8, 0xa9, 0xaa, 0xab,
-    0xac, 0xad, 0xae, 0xaf,
-    0xb0, 0xb1, 0xb2, 0xb3,
-    0xb4, 0xb5, 0xb6, 0xb7,
-    0xb8, 0xb9, 0xba, 0xbb,
-    0xbc, 0xbd, 0xbe, 0xbf,
-    0xc0, 0xc1, 0xc2, 0xc3,
-    0xc4, 0xc5, 0xc6, 0xc7,
-    0xc8, 0xc9, 0xca, 0xcb,
-    0xcc, 0xcd, 0xce, 0xcf,
-    0x11e, 0xd1, 0xd2, 0xd3,
-    0xd4, 0xd5, 0xd6, 0xd7,
-    0xd8, 0xd9, 0xda, 0xdb,
-    0xdc, 0x130, 0x15e, 0xdf,
-    0xe0, 0xe1, 0xe2, 0xe3,
-    0xe4, 0xe5, 0xe6, 0xe7,
-    0xe8, 0xe9, 0xea, 0xeb,
-    0xec, 0xed, 0xee, 0xef,
-    0x11f, 0xf1, 0xf2, 0xf3,
-    0xf4, 0xf5, 0xf6, 0xf7,
-    0xf8, 0xf9, 0xfa, 0xfb,
-    0xfc, 0x131, 0x15f, 0xff
-  },
-
-  {
-    // code page: cp1255
-    0x20ac, 0x81, 0x201a, 0x192,
-    0x201e, 0x2026, 0x2020, 0x2021,
-    0x2c6, 0x2030, 0x8a, 0x2039,
-    0x8c, 0x8d, 0x8e, 0x8f,
-    0x90, 0x2018, 0x2019, 0x201c,
-    0x201d, 0x2022, 0x2013, 0x2014,
-    0x2dc, 0x2122, 0x9a, 0x203a,
-    0x9c, 0x9d, 0x9e, 0x9f,
-    0xa0, 0xa1, 0xa2, 0xa3,
-    0x20aa, 0xa5, 0xa6, 0xa7,
-    0xa8, 0xa9, 0xd7, 0xab,
-    0xac, 0xad, 0xae, 0xaf,
-    0xb0, 0xb1, 0xb2, 0xb3,
-    0xb4, 0xb5, 0xb6, 0xb7,
-    0xb8, 0xb9, 0xf7, 0xbb,
-    0xbc, 0xbd, 0xbe, 0xbf,
-    0x5b0, 0x5b1, 0x5b2, 0x5b3,
-    0x5b4, 0x5b5, 0x5b6, 0x5b7,
-    0x5b8, 0x5b9, 0x5ba, 0x5bb,
-    0x5bc, 0x5bd, 0x5be, 0x5bf,
-    0x5c0, 0x5c1, 0x5c2, 0x5c3,
-    0x5f0, 0x5f1, 0x5f2, 0x5f3,
-    0x5f4, 0x0, 0x0, 0x0,
-    0x0, 0x0, 0x0, 0x0,
-    0x5d0, 0x5d1, 0x5d2, 0x5d3,
-    0x5d4, 0x5d5, 0x5d6, 0x5d7,
-    0x5d8, 0x5d9, 0x5da, 0x5db,
-    0x5dc, 0x5dd, 0x5de, 0x5df,
-    0x5e0, 0x5e1, 0x5e2, 0x5e3,
-    0x5e4, 0x5e5, 0x5e6, 0x5e7,
-    0x5e8, 0x5e9, 0x5ea, 0x0,
-    0x0, 0x200e, 0x200f, 0x0
-  },
-
-  {
-    // code page: cp1256
-    0x20ac, 0x67e, 0x201a, 0x192,
-    0x201e, 0x2026, 0x2020, 0x2021,
-    0x2c6, 0x2030, 0x679, 0x2039,
-    0x152, 0x686, 0x698, 0x688,
-    0x6af, 0x2018, 0x2019, 0x201c,
-    0x201d, 0x2022, 0x2013, 0x2014,
-    0x6a9, 0x2122, 0x691, 0x203a,
-    0x153, 0x200c, 0x200d, 0x6ba,
-    0xa0, 0x60c, 0xa2, 0xa3,
-    0xa4, 0xa5, 0xa6, 0xa7,
-    0xa8, 0xa9, 0x6be, 0xab,
-    0xac, 0xad, 0xae, 0xaf,
-    0xb0, 0xb1, 0xb2, 0xb3,
-    0xb4, 0xb5, 0xb6, 0xb7,
-    0xb8, 0xb9, 0x61b, 0xbb,
-    0xbc, 0xbd, 0xbe, 0x61f,
-    0x6c1, 0x621, 0x622, 0x623,
-    0x624, 0x625, 0x626, 0x627,
-    0x628, 0x629, 0x62a, 0x62b,
-    0x62c, 0x62d, 0x62e, 0x62f,
-    0x630, 0x631, 0x632, 0x633,
-    0x634, 0x635, 0x636, 0xd7,
-    0x637, 0x638, 0x639, 0x63a,
-    0x640, 0x641, 0x642, 0x643,
-    0xe0, 0x644, 0xe2, 0x645,
-    0x646, 0x647, 0x648, 0xe7,
-    0xe8, 0xe9, 0xea, 0xeb,
-    0x649, 0x64a, 0xee, 0xef,
-    0x64b, 0x64c, 0x64d, 0x64e,
-    0xf4, 0x64f, 0x650, 0xf7,
-    0x651, 0xf9, 0x652, 0xfb,
-    0xfc, 0x200e, 0x200f, 0x6d2
-  },
-
-  {
-    // code page: cp1257
-    0x20ac, 0x81, 0x201a, 0x83,
-    0x201e, 0x2026, 0x2020, 0x2021,
-    0x88, 0x2030, 0x8a, 0x2039,
-    0x8c, 0xa8, 0x2c7, 0xb8,
-    0x90, 0x2018, 0x2019, 0x201c,
-    0x201d, 0x2022, 0x2013, 0x2014,
-    0x98, 0x2122, 0x9a, 0x203a,
-    0x9c, 0xaf, 0x2db, 0x9f,
-    0xa0, 0x0, 0xa2, 0xa3,
-    0xa4, 0x0, 0xa6, 0xa7,
-    0xd8, 0xa9, 0x156, 0xab,
-    0xac, 0xad, 0xae, 0xc6,
-    0xb0, 0xb1, 0xb2, 0xb3,
-    0xb4, 0xb5, 0xb6, 0xb7,
-    0xf8, 0xb9, 0x157, 0xbb,
-    0xbc, 0xbd, 0xbe, 0xe6,
-    0x104, 0x12e, 0x100, 0x106,
-    0xc4, 0xc5, 0x118, 0x112,
-    0x10c, 0xc9, 0x179, 0x116,
-    0x122, 0x136, 0x12a, 0x13b,
-    0x160, 0x143, 0x145, 0xd3,
-    0x14c, 0xd5, 0xd6, 0xd7,
-    0x172, 0x141, 0x15a, 0x16a,
-    0xdc, 0x17b, 0x17d, 0xdf,
-    0x105, 0x12f, 0x101, 0x107,
-    0xe4, 0xe5, 0x119, 0x113,
-    0x10d, 0xe9, 0x17a, 0x117,
-    0x123, 0x137, 0x12b, 0x13c,
-    0x161, 0x144, 0x146, 0xf3,
-    0x14d, 0xf5, 0xf6, 0xf7,
-    0x173, 0x142, 0x15b, 0x16b,
-    0xfc, 0x17c, 0x17e, 0x2d9
-  },
-
-  {
-    // code page: cp1258
-    0x20ac, 0x81, 0x201a, 0x192,
-    0x201e, 0x2026, 0x2020, 0x2021,
-    0x2c6, 0x2030, 0x8a, 0x2039,
-    0x152, 0x8d, 0x8e, 0x8f,
-    0x90, 0x2018, 0x2019, 0x201c,
-    0x201d, 0x2022, 0x2013, 0x2014,
-    0x2dc, 0x2122, 0x9a, 0x203a,
-    0x153, 0x9d, 0x9e, 0x178,
-    0xa0, 0xa1, 0xa2, 0xa3,
-    0xa4, 0xa5, 0xa6, 0xa7,
-    0xa8, 0xa9, 0xaa, 0xab,
-    0xac, 0xad, 0xae, 0xaf,
-    0xb0, 0xb1, 0xb2, 0xb3,
-    0xb4, 0xb5, 0xb6, 0xb7,
-    0xb8, 0xb9, 0xba, 0xbb,
-    0xbc, 0xbd, 0xbe, 0xbf,
-    0xc0, 0xc1, 0xc2, 0x102,
-    0xc4, 0xc5, 0xc6, 0xc7,
-    0xc8, 0xc9, 0xca, 0xcb,
-    0x300, 0xcd, 0xce, 0xcf,
-    0x110, 0xd1, 0x309, 0xd3,
-    0xd4, 0x1a0, 0xd6, 0xd7,
-    0xd8, 0xd9, 0xda, 0xdb,
-    0xdc, 0x1af, 0x303, 0xdf,
-    0xe0, 0xe1, 0xe2, 0x103,
-    0xe4, 0xe5, 0xe6, 0xe7,
-    0xe8, 0xe9, 0xea, 0xeb,
-    0x301, 0xed, 0xee, 0xef,
-    0x111, 0xf1, 0x323, 0xf3,
-    0xf4, 0x1a1, 0xf6, 0xf7,
-    0xf8, 0xf9, 0xfa, 0xfb,
-    0xfc, 0x1b0, 0x20ab, 0xff
-  }
-};
\ No newline at end of file
diff --git a/src/Resource/Resource_CodePages.pxx b/src/Resource/Resource_CodePages.pxx
new file mode 100644 (file)
index 0000000..9b5a92f
--- /dev/null
@@ -0,0 +1,666 @@
+// Copyright (c) 2020 OPEN CASCADE SAS
+//
+// This file is part of Open CASCADE Technology software library.
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License version 2.1 as published
+// by the Free Software Foundation, with special exception defined in the file
+// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
+// distribution for complete text of the license and disclaimer of any warranty.
+//
+// Alternatively, this file may be used under the terms of Open CASCADE
+// commercial license or contractual agreement.
+
+#include <Standard_TypeDef.hxx>
+
+// Code pages ANSI -> UTF16
+static const Standard_ExtCharacter THE_CODEPAGES_ANSI[Resource_FormatType_iso8859_9 - Resource_FormatType_CP1250 + 1][128] =
+{
+  {
+    // code page: cp1250
+    0x20ac, 0x81, 0x201a, 0x83,
+    0x201e, 0x2026, 0x2020, 0x2021,
+    0x88, 0x2030, 0x160, 0x2039,
+    0x15a, 0x164, 0x17d, 0x179,
+    0x90, 0x2018, 0x2019, 0x201c,
+    0x201d, 0x2022, 0x2013, 0x2014,
+    0x98, 0x2122, 0x161, 0x203a,
+    0x15b, 0x165, 0x17e, 0x17a,
+    0xa0, 0x2c7, 0x2d8, 0x141,
+    0xa4, 0x104, 0xa6, 0xa7,
+    0xa8, 0xa9, 0x15e, 0xab,
+    0xac, 0xad, 0xae, 0x17b,
+    0xb0, 0xb1, 0x2db, 0x142,
+    0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0x105, 0x15f, 0xbb,
+    0x13d, 0x2dd, 0x13e, 0x17c,
+    0x154, 0xc1, 0xc2, 0x102,
+    0xc4, 0x139, 0x106, 0xc7,
+    0x10c, 0xc9, 0x118, 0xcb,
+    0x11a, 0xcd, 0xce, 0x10e,
+    0x110, 0x143, 0x147, 0xd3,
+    0xd4, 0x150, 0xd6, 0xd7,
+    0x158, 0x16e, 0xda, 0x170,
+    0xdc, 0xdd, 0x162, 0xdf,
+    0x155, 0xe1, 0xe2, 0x103,
+    0xe4, 0x13a, 0x107, 0xe7,
+    0x10d, 0xe9, 0x119, 0xeb,
+    0x11b, 0xed, 0xee, 0x10f,
+    0x111, 0x144, 0x148, 0xf3,
+    0xf4, 0x151, 0xf6, 0xf7,
+    0x159, 0x16f, 0xfa, 0x171,
+    0xfc, 0xfd, 0x163, 0x2d9
+  },
+
+  {
+    // code page: cp1251
+    0x402, 0x403, 0x201a, 0x453,
+    0x201e, 0x2026, 0x2020, 0x2021,
+    0x20ac, 0x2030, 0x409, 0x2039,
+    0x40a, 0x40c, 0x40b, 0x40f,
+    0x452, 0x2018, 0x2019, 0x201c,
+    0x201d, 0x2022, 0x2013, 0x2014,
+    0x98, 0x2122, 0x459, 0x203a,
+    0x45a, 0x45c, 0x45b, 0x45f,
+    0xa0, 0x40e, 0x45e, 0x408,
+    0xa4, 0x490, 0xa6, 0xa7,
+    0x401, 0xa9, 0x404, 0xab,
+    0xac, 0xad, 0xae, 0x407,
+    0xb0, 0xb1, 0x406, 0x456,
+    0x491, 0xb5, 0xb6, 0xb7,
+    0x451, 0x2116, 0x454, 0xbb,
+    0x458, 0x405, 0x455, 0x457,
+    0x410, 0x411, 0x412, 0x413,
+    0x414, 0x415, 0x416, 0x417,
+    0x418, 0x419, 0x41a, 0x41b,
+    0x41c, 0x41d, 0x41e, 0x41f,
+    0x420, 0x421, 0x422, 0x423,
+    0x424, 0x425, 0x426, 0x427,
+    0x428, 0x429, 0x42a, 0x42b,
+    0x42c, 0x42d, 0x42e, 0x42f,
+    0x430, 0x431, 0x432, 0x433,
+    0x434, 0x435, 0x436, 0x437,
+    0x438, 0x439, 0x43a, 0x43b,
+    0x43c, 0x43d, 0x43e, 0x43f,
+    0x440, 0x441, 0x442, 0x443,
+    0x444, 0x445, 0x446, 0x447,
+    0x448, 0x449, 0x44a, 0x44b,
+    0x44c, 0x44d, 0x44e, 0x44f
+  },
+
+  {
+    // code page: cp1252
+    0x20ac, 0x81, 0x201a, 0x192,
+    0x201e, 0x2026, 0x2020, 0x2021,
+    0x2c6, 0x2030, 0x160, 0x2039,
+    0x152, 0x8d, 0x17d, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c,
+    0x201d, 0x2022, 0x2013, 0x2014,
+    0x2dc, 0x2122, 0x161, 0x203a,
+    0x153, 0x9d, 0x17e, 0x178,
+    0xa0, 0xa1, 0xa2, 0xa3,
+    0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xab,
+    0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xba, 0xbb,
+    0xbc, 0xbd, 0xbe, 0xbf,
+    0xc0, 0xc1, 0xc2, 0xc3,
+    0xc4, 0xc5, 0xc6, 0xc7,
+    0xc8, 0xc9, 0xca, 0xcb,
+    0xcc, 0xcd, 0xce, 0xcf,
+    0xd0, 0xd1, 0xd2, 0xd3,
+    0xd4, 0xd5, 0xd6, 0xd7,
+    0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0xdd, 0xde, 0xdf,
+    0xe0, 0xe1, 0xe2, 0xe3,
+    0xe4, 0xe5, 0xe6, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb,
+    0xec, 0xed, 0xee, 0xef,
+    0xf0, 0xf1, 0xf2, 0xf3,
+    0xf4, 0xf5, 0xf6, 0xf7,
+    0xf8, 0xf9, 0xfa, 0xfb,
+    0xfc, 0xfd, 0xfe, 0xff
+  },
+
+  {
+    // code page: cp1253
+    0x20ac, 0x81, 0x201a, 0x192,
+    0x201e, 0x2026, 0x2020, 0x2021,
+    0x88, 0x2030, 0x8a, 0x2039,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c,
+    0x201d, 0x2022, 0x2013, 0x2014,
+    0x98, 0x2122, 0x9a, 0x203a,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0x385, 0x386, 0xa3,
+    0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0x0, 0xab,
+    0xac, 0xad, 0xae, 0x2015,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0x384, 0xb5, 0xb6, 0xb7,
+    0x388, 0x389, 0x38a, 0xbb,
+    0x38c, 0xbd, 0x38e, 0x38f,
+    0x390, 0x391, 0x392, 0x393,
+    0x394, 0x395, 0x396, 0x397,
+    0x398, 0x399, 0x39a, 0x39b,
+    0x39c, 0x39d, 0x39e, 0x39f,
+    0x3a0, 0x3a1, 0x0, 0x3a3,
+    0x3a4, 0x3a5, 0x3a6, 0x3a7,
+    0x3a8, 0x3a9, 0x3aa, 0x3ab,
+    0x3ac, 0x3ad, 0x3ae, 0x3af,
+    0x3b0, 0x3b1, 0x3b2, 0x3b3,
+    0x3b4, 0x3b5, 0x3b6, 0x3b7,
+    0x3b8, 0x3b9, 0x3ba, 0x3bb,
+    0x3bc, 0x3bd, 0x3be, 0x3bf,
+    0x3c0, 0x3c1, 0x3c2, 0x3c3,
+    0x3c4, 0x3c5, 0x3c6, 0x3c7,
+    0x3c8, 0x3c9, 0x3ca, 0x3cb,
+    0x3cc, 0x3cd, 0x3ce, 0x0
+  },
+
+  {
+    // code page: cp1254
+    0x20ac, 0x81, 0x201a, 0x192,
+    0x201e, 0x2026, 0x2020, 0x2021,
+    0x2c6, 0x2030, 0x160, 0x2039,
+    0x152, 0x8d, 0x8e, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c,
+    0x201d, 0x2022, 0x2013, 0x2014,
+    0x2dc, 0x2122, 0x161, 0x203a,
+    0x153, 0x9d, 0x9e, 0x178,
+    0xa0, 0xa1, 0xa2, 0xa3,
+    0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xab,
+    0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xba, 0xbb,
+    0xbc, 0xbd, 0xbe, 0xbf,
+    0xc0, 0xc1, 0xc2, 0xc3,
+    0xc4, 0xc5, 0xc6, 0xc7,
+    0xc8, 0xc9, 0xca, 0xcb,
+    0xcc, 0xcd, 0xce, 0xcf,
+    0x11e, 0xd1, 0xd2, 0xd3,
+    0xd4, 0xd5, 0xd6, 0xd7,
+    0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0x130, 0x15e, 0xdf,
+    0xe0, 0xe1, 0xe2, 0xe3,
+    0xe4, 0xe5, 0xe6, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb,
+    0xec, 0xed, 0xee, 0xef,
+    0x11f, 0xf1, 0xf2, 0xf3,
+    0xf4, 0xf5, 0xf6, 0xf7,
+    0xf8, 0xf9, 0xfa, 0xfb,
+    0xfc, 0x131, 0x15f, 0xff
+  },
+
+  {
+    // code page: cp1255
+    0x20ac, 0x81, 0x201a, 0x192,
+    0x201e, 0x2026, 0x2020, 0x2021,
+    0x2c6, 0x2030, 0x8a, 0x2039,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c,
+    0x201d, 0x2022, 0x2013, 0x2014,
+    0x2dc, 0x2122, 0x9a, 0x203a,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0xa1, 0xa2, 0xa3,
+    0x20aa, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xd7, 0xab,
+    0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xf7, 0xbb,
+    0xbc, 0xbd, 0xbe, 0xbf,
+    0x5b0, 0x5b1, 0x5b2, 0x5b3,
+    0x5b4, 0x5b5, 0x5b6, 0x5b7,
+    0x5b8, 0x5b9, 0x5ba, 0x5bb,
+    0x5bc, 0x5bd, 0x5be, 0x5bf,
+    0x5c0, 0x5c1, 0x5c2, 0x5c3,
+    0x5f0, 0x5f1, 0x5f2, 0x5f3,
+    0x5f4, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x5d0, 0x5d1, 0x5d2, 0x5d3,
+    0x5d4, 0x5d5, 0x5d6, 0x5d7,
+    0x5d8, 0x5d9, 0x5da, 0x5db,
+    0x5dc, 0x5dd, 0x5de, 0x5df,
+    0x5e0, 0x5e1, 0x5e2, 0x5e3,
+    0x5e4, 0x5e5, 0x5e6, 0x5e7,
+    0x5e8, 0x5e9, 0x5ea, 0x0,
+    0x0, 0x200e, 0x200f, 0x0
+  },
+
+  {
+    // code page: cp1256
+    0x20ac, 0x67e, 0x201a, 0x192,
+    0x201e, 0x2026, 0x2020, 0x2021,
+    0x2c6, 0x2030, 0x679, 0x2039,
+    0x152, 0x686, 0x698, 0x688,
+    0x6af, 0x2018, 0x2019, 0x201c,
+    0x201d, 0x2022, 0x2013, 0x2014,
+    0x6a9, 0x2122, 0x691, 0x203a,
+    0x153, 0x200c, 0x200d, 0x6ba,
+    0xa0, 0x60c, 0xa2, 0xa3,
+    0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0x6be, 0xab,
+    0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0x61b, 0xbb,
+    0xbc, 0xbd, 0xbe, 0x61f,
+    0x6c1, 0x621, 0x622, 0x623,
+    0x624, 0x625, 0x626, 0x627,
+    0x628, 0x629, 0x62a, 0x62b,
+    0x62c, 0x62d, 0x62e, 0x62f,
+    0x630, 0x631, 0x632, 0x633,
+    0x634, 0x635, 0x636, 0xd7,
+    0x637, 0x638, 0x639, 0x63a,
+    0x640, 0x641, 0x642, 0x643,
+    0xe0, 0x644, 0xe2, 0x645,
+    0x646, 0x647, 0x648, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb,
+    0x649, 0x64a, 0xee, 0xef,
+    0x64b, 0x64c, 0x64d, 0x64e,
+    0xf4, 0x64f, 0x650, 0xf7,
+    0x651, 0xf9, 0x652, 0xfb,
+    0xfc, 0x200e, 0x200f, 0x6d2
+  },
+
+  {
+    // code page: cp1257
+    0x20ac, 0x81, 0x201a, 0x83,
+    0x201e, 0x2026, 0x2020, 0x2021,
+    0x88, 0x2030, 0x8a, 0x2039,
+    0x8c, 0xa8, 0x2c7, 0xb8,
+    0x90, 0x2018, 0x2019, 0x201c,
+    0x201d, 0x2022, 0x2013, 0x2014,
+    0x98, 0x2122, 0x9a, 0x203a,
+    0x9c, 0xaf, 0x2db, 0x9f,
+    0xa0, 0x0, 0xa2, 0xa3,
+    0xa4, 0x0, 0xa6, 0xa7,
+    0xd8, 0xa9, 0x156, 0xab,
+    0xac, 0xad, 0xae, 0xc6,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0xb4, 0xb5, 0xb6, 0xb7,
+    0xf8, 0xb9, 0x157, 0xbb,
+    0xbc, 0xbd, 0xbe, 0xe6,
+    0x104, 0x12e, 0x100, 0x106,
+    0xc4, 0xc5, 0x118, 0x112,
+    0x10c, 0xc9, 0x179, 0x116,
+    0x122, 0x136, 0x12a, 0x13b,
+    0x160, 0x143, 0x145, 0xd3,
+    0x14c, 0xd5, 0xd6, 0xd7,
+    0x172, 0x141, 0x15a, 0x16a,
+    0xdc, 0x17b, 0x17d, 0xdf,
+    0x105, 0x12f, 0x101, 0x107,
+    0xe4, 0xe5, 0x119, 0x113,
+    0x10d, 0xe9, 0x17a, 0x117,
+    0x123, 0x137, 0x12b, 0x13c,
+    0x161, 0x144, 0x146, 0xf3,
+    0x14d, 0xf5, 0xf6, 0xf7,
+    0x173, 0x142, 0x15b, 0x16b,
+    0xfc, 0x17c, 0x17e, 0x2d9
+  },
+
+  {
+    // code page: cp1258
+    0x20ac, 0x81, 0x201a, 0x192,
+    0x201e, 0x2026, 0x2020, 0x2021,
+    0x2c6, 0x2030, 0x8a, 0x2039,
+    0x152, 0x8d, 0x8e, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c,
+    0x201d, 0x2022, 0x2013, 0x2014,
+    0x2dc, 0x2122, 0x9a, 0x203a,
+    0x153, 0x9d, 0x9e, 0x178,
+    0xa0, 0xa1, 0xa2, 0xa3,
+    0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xab,
+    0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xba, 0xbb,
+    0xbc, 0xbd, 0xbe, 0xbf,
+    0xc0, 0xc1, 0xc2, 0x102,
+    0xc4, 0xc5, 0xc6, 0xc7,
+    0xc8, 0xc9, 0xca, 0xcb,
+    0x300, 0xcd, 0xce, 0xcf,
+    0x110, 0xd1, 0x309, 0xd3,
+    0xd4, 0x1a0, 0xd6, 0xd7,
+    0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0x1af, 0x303, 0xdf,
+    0xe0, 0xe1, 0xe2, 0x103,
+    0xe4, 0xe5, 0xe6, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb,
+    0x301, 0xed, 0xee, 0xef,
+    0x111, 0xf1, 0x323, 0xf3,
+    0xf4, 0x1a1, 0xf6, 0xf7,
+    0xf8, 0xf9, 0xfa, 0xfb,
+    0xfc, 0x1b0, 0x20ab, 0xff
+  },
+  
+  {
+    // code page: ISO 8859-1
+    0x80, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0xa1, 0xa2, 0xa3,
+    0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xab,
+    0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xba, 0xbb,
+    0xbc, 0xbd, 0xbe, 0xbf,
+    0xc0, 0xc1, 0xc2, 0xc3,
+    0xc4, 0xc5, 0xc6, 0xc7,
+    0xc8, 0xc9, 0xca, 0xcb,
+    0xcc, 0xcd, 0xce, 0xcf,
+    0xd0, 0xd1, 0xd2, 0xd3,
+    0xd4, 0xd5, 0xd6, 0xd7,
+    0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0xdd, 0xde, 0xdf,
+    0xe0, 0xe1, 0xe2, 0xe3,
+    0xe4, 0xe5, 0xe6, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb,
+    0xec, 0xed, 0xee, 0xef,
+    0xf0, 0xf1, 0xf2, 0xf3,
+    0xf4, 0xf5, 0xf6, 0xf7,
+    0xf8, 0xf9, 0xfa, 0xfb,
+    0xfc, 0xfd, 0xfe, 0xff
+  },
+  
+  {
+    // code page: ISO 8859-2
+    0x80, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0x104, 0x2d8, 0x141,
+    0xa4, 0x13d, 0x15a, 0xa7,
+    0xa8, 0x160, 0x15e, 0x164,
+    0x179, 0xad, 0x17d, 0x17b,
+    0xb0, 0x105, 0x2db, 0x142,
+    0xb4, 0x13e, 0x15b, 0x2c7,
+    0xb8, 0x161, 0x15f, 0x165,
+    0x17a, 0x2dd, 0x17e, 0x17c,
+    0x154, 0xc1, 0xc2, 0x102,
+    0xc4, 0x139, 0x106, 0xc7,
+    0x10c, 0xc9, 0x118, 0xcb,
+    0x11a, 0xcd, 0xce, 0x10e,
+    0x110, 0x143, 0x147, 0xd3,
+    0xd4, 0x150, 0xd6, 0xd7,
+    0x158, 0x16e, 0xda, 0x170,
+    0xdc, 0xdd, 0x162, 0xdf,
+    0x155, 0xe1, 0xe2, 0x103,
+    0xe4, 0x13a, 0x107, 0xe7,
+    0x10d, 0xe9, 0x119, 0xeb,
+    0x11b, 0xed, 0xee, 0x10f,
+    0x111, 0x144, 0x148, 0xf3,
+    0xf4, 0x151, 0xf6, 0xf7,
+    0x159, 0x16f, 0xfa, 0x171,
+    0xfc, 0xfd, 0x163, 0x2d9
+  },
+  
+  {
+    // code page: ISO 8859-3
+    0x80, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0x126, 0x2d8, 0xa3,
+    0xa4, 0x0, 0x124, 0xa7,
+    0xa8, 0x130, 0x15e, 0x11e,
+    0x134, 0xad, 0x0, 0x17b,
+    0xb0, 0x127, 0xb2, 0xb3,
+    0xb4, 0xb5, 0x125, 0xb7,
+    0xb8, 0x131, 0x15f, 0x11f,
+    0x135, 0xbd, 0x0, 0x17c,
+    0xc0, 0xc1, 0xc2, 0x0,
+    0xc4, 0x10a, 0x108, 0xc7,
+    0xc8, 0xc9, 0xca, 0xcb,
+    0xcc, 0xcd, 0xce, 0xcf,
+    0x0, 0xd1, 0xd2, 0xd3,
+    0xd4, 0x120, 0xd6, 0xd7,
+    0x11c, 0xd9, 0xda, 0xdb,
+    0xdc, 0x16c, 0x15c, 0xdf,
+    0xe0, 0xe1, 0xe2, 0x0,
+    0xe4, 0x10b, 0x109, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb,
+    0xec, 0xed, 0xee, 0xef,
+    0x0, 0xf1, 0xf2, 0xf3,
+    0xf4, 0x121, 0xf6, 0xf7,
+    0x11d, 0xf9, 0xfa, 0xfb,
+    0xfc, 0x16d, 0x15d, 0x2d9
+  },
+  
+  {
+    // code page: ISO 8859-4
+    0x80, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0x104, 0x138, 0x156,
+    0xa4, 0x128, 0x13b, 0xa7,
+    0xa8, 0x160, 0x112, 0x122,
+    0x166, 0xad, 0x17d, 0xaf,
+    0xb0, 0x105, 0x2db, 0x157,
+    0xb4, 0x129, 0x13c, 0x2c7,
+    0xb8, 0x161, 0x113, 0x123,
+    0x167, 0x14a, 0x17e, 0x14b,
+    0x100, 0xc1, 0xc2, 0xc3,
+    0xc4, 0xc5, 0xc6, 0x12e,
+    0x10c, 0xc9, 0x118, 0xcb,
+    0x116, 0xcd, 0xce, 0x12a,
+    0x110, 0x145, 0x14c, 0x136,
+    0xd4, 0xd5, 0xd6, 0xd7,
+    0xd8, 0x172, 0xda, 0xdb,
+    0xdc, 0x168, 0x16a, 0xdf,
+    0x101, 0xe1, 0xe2, 0xe3,
+    0xe4, 0xe5, 0xe6, 0x12f,
+    0x10d, 0xe9, 0x119, 0xeb,
+    0x117, 0xed, 0xee, 0x12b,
+    0x111, 0x146, 0x14d, 0x137,
+    0xf4, 0xf5, 0xf6, 0xf7,
+    0xf8, 0x173, 0xfa, 0xfb,
+    0xfc, 0x169, 0x16b, 0x2d9
+  },
+  
+  {
+    // code page: ISO 8859-5
+    0x80, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0x401, 0x402, 0x403,
+    0x404, 0x405, 0x406, 0x407,
+    0x408, 0x409, 0x40a, 0x40b,
+    0x40c, 0xad, 0x40e, 0x40f,
+    0x410, 0x411, 0x412, 0x413,
+    0x414, 0x415, 0x416, 0x417,
+    0x418, 0x419, 0x41a, 0x41b,
+    0x41c, 0x41d, 0x41e, 0x41f,
+    0x420, 0x421, 0x422, 0x423,
+    0x424, 0x425, 0x426, 0x427,
+    0x428, 0x429, 0x42a, 0x42b,
+    0x42c, 0x42d, 0x42e, 0x42f,
+    0x430, 0x431, 0x432, 0x433,
+    0x434, 0x435, 0x436, 0x437,
+    0x438, 0x439, 0x43a, 0x43b,
+    0x43c, 0x43d, 0x43e, 0x43f,
+    0x440, 0x441, 0x442, 0x443,
+    0x444, 0x445, 0x446, 0x447,
+    0x448, 0x449, 0x44a, 0x44b,
+    0x44c, 0x44d, 0x44e, 0x44f,
+    0x2116, 0x451, 0x452, 0x453,
+    0x454, 0x455, 0x456, 0x457,
+    0x458, 0x459, 0x45a, 0x45b,
+    0x45c, 0xa7, 0x45e, 0x45f
+  },
+  
+  {
+    // code page: ISO 8859-6
+    0x80, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0x0, 0x0, 0x0,
+    0xa4, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x60c, 0xad, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x61b,
+    0x0, 0x0, 0x0, 0x61f,
+    0x0, 0x621, 0x622, 0x623,
+    0x624, 0x625, 0x626, 0x627,
+    0x628, 0x629, 0x62a, 0x62b,
+    0x62c, 0x62d, 0x62e, 0x62f,
+    0x630, 0x631, 0x632, 0x633,
+    0x634, 0x635, 0x636, 0x637,
+    0x638, 0x639, 0x63a, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x640, 0x641, 0x642, 0x643,
+    0x644, 0x645, 0x646, 0x647,
+    0x648, 0x649, 0x64a, 0x64b,
+    0x64c, 0x64d, 0x64e, 0x64f,
+    0x650, 0x651, 0x652, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0
+  },
+  
+  {
+    // code page: ISO 8859-7
+    0x80, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0x2018, 0x2019, 0xa3,
+    0x0, 0x0, 0xa6, 0xa7,
+    0xa8, 0xa9, 0x0, 0xab,
+    0xac, 0xad, 0x0, 0x2015,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0x384, 0x385, 0x386, 0xb7,
+    0x388, 0x389, 0x38a, 0xbb,
+    0x38c, 0xbd, 0x38e, 0x38f,
+    0x390, 0x391, 0x392, 0x393,
+    0x394, 0x395, 0x396, 0x397,
+    0x398, 0x399, 0x39a, 0x39b,
+    0x39c, 0x39d, 0x39e, 0x39f,
+    0x3a0, 0x3a1, 0x0, 0x3a3,
+    0x3a4, 0x3a5, 0x3a6, 0x3a7,
+    0x3a8, 0x3a9, 0x3aa, 0x3ab,
+    0x3ac, 0x3ad, 0x3ae, 0x3af,
+    0x3b0, 0x3b1, 0x3b2, 0x3b3,
+    0x3b4, 0x3b5, 0x3b6, 0x3b7,
+    0x3b8, 0x3b9, 0x3ba, 0x3bb,
+    0x3bc, 0x3bd, 0x3be, 0x3bf,
+    0x3c0, 0x3c1, 0x3c2, 0x3c3,
+    0x3c4, 0x3c5, 0x3c6, 0x3c7,
+    0x3c8, 0x3c9, 0x3ca, 0x3cb,
+    0x3cc, 0x3cd, 0x3ce, 0x0
+  },
+  
+  {
+    // code page: ISO 8859-8
+    0x80, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0x0, 0xa2, 0xa3,
+    0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xd7, 0xab,
+    0xac, 0xad, 0xae, 0x203e,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xf7, 0xbb,
+    0xbc, 0xbd, 0xbe, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x0,
+    0x0, 0x0, 0x0, 0x2017,
+    0x5d0, 0x5d1, 0x5d2, 0x5d3,
+    0x5d4, 0x5d5, 0x5d6, 0x5d7,
+    0x5d8, 0x5d9, 0x5da, 0x5db,
+    0x5dc, 0x5dd, 0x5de, 0x5df,
+    0x5e0, 0x5e1, 0x5e2, 0x5e3,
+    0x5e4, 0x5e5, 0x5e6, 0x5e7,
+    0x5e8, 0x5e9, 0x5ea, 0x0,
+    0x0, 0x0, 0x0, 0x0
+  },
+  
+  {
+    // code page: ISO 8859-9
+    0x80, 0x81, 0x82, 0x83,
+    0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b,
+    0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93,
+    0x94, 0x95, 0x96, 0x97,
+    0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9e, 0x9f,
+    0xa0, 0xa1, 0xa2, 0xa3,
+    0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xab,
+    0xac, 0xad, 0xae, 0xaf,
+    0xb0, 0xb1, 0xb2, 0xb3,
+    0xb4, 0xb5, 0xb6, 0xb7,
+    0xb8, 0xb9, 0xba, 0xbb,
+    0xbc, 0xbd, 0xbe, 0xbf,
+    0xc0, 0xc1, 0xc2, 0xc3,
+    0xc4, 0xc5, 0xc6, 0xc7,
+    0xc8, 0xc9, 0xca, 0xcb,
+    0xcc, 0xcd, 0xce, 0xcf,
+    0x11e, 0xd1, 0xd2, 0xd3,
+    0xd4, 0xd5, 0xd6, 0xd7,
+    0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0x130, 0x15e, 0xdf,
+    0xe0, 0xe1, 0xe2, 0xe3,
+    0xe4, 0xe5, 0xe6, 0xe7,
+    0xe8, 0xe9, 0xea, 0xeb,
+    0xec, 0xed, 0xee, 0xef,
+    0x11f, 0xf1, 0xf2, 0xf3,
+    0xf4, 0xf5, 0xf6, 0xf7,
+    0xf8, 0xf9, 0xfa, 0xfb,
+    0xfc, 0x131, 0x15f, 0xff
+  }
+};
\ No newline at end of file
index 9537f2b93d1c7b3baa7390fd474c249aa7d9b3ea..66f4ccc679817b6658c01860b8e580c40dba071d 100644 (file)
@@ -24,12 +24,12 @@ enum Resource_FormatType
 {
   Resource_FormatType_SJIS,         //!< SJIS (Shift Japanese Industrial Standards) encoding
   Resource_FormatType_EUC,          //!< EUC (Extended Unix Code) multi-byte encoding primarily for Japanese, Korean, and simplified Chinese
-  Resource_FormatType_ANSI,         //!< ANSI encoding (pass through without conversion)
+  Resource_FormatType_NoConversion, //!< format type indicating non-conversion behavior
   Resource_FormatType_GB,           //!< GB (Guobiao) encoding for Simplified Chinese
   Resource_FormatType_UTF8,         //!< multi-byte UTF-8 encoding
   Resource_FormatType_SystemLocale, //!< active system-defined locale; this value is strongly NOT recommended to use
 
-  // non ASCII format types
+  // Windows-native ("ANSI") 8-bit code pages
   Resource_FormatType_CP1250,       //!< cp1250 (Central European) encoding
   Resource_FormatType_CP1251,       //!< cp1251 (Cyrillic) encoding
   Resource_FormatType_CP1252,       //!< cp1252 (Western European) encoding
@@ -40,7 +40,19 @@ enum Resource_FormatType
   Resource_FormatType_CP1257,       //!< cp1257 (Baltic) encoding
   Resource_FormatType_CP1258,       //!< cp1258 (Vietnamese) encoding
 
+  // ISO8859 8-bit code pages
+  Resource_FormatType_iso8859_1,    //!< ISO 8859-1 (Western European) encoding
+  Resource_FormatType_iso8859_2,    //!< ISO 8859-2 (Central European) encoding
+  Resource_FormatType_iso8859_3,    //!< ISO 8859-3 (Turkish) encoding
+  Resource_FormatType_iso8859_4,    //!< ISO 8859-4 (Northern European) encoding
+  Resource_FormatType_iso8859_5,    //!< ISO 8859-5 (Cyrillic) encoding
+  Resource_FormatType_iso8859_6,    //!< ISO 8859-6 (Arabic) encoding
+  Resource_FormatType_iso8859_7,    //!< ISO 8859-7 (Greek) encoding
+  Resource_FormatType_iso8859_8,    //!< ISO 8859-8 (Hebrew) encoding
+  Resource_FormatType_iso8859_9,    //!< ISO 8859-9 (Turkish) encoding
+
   // old aliases
+  Resource_FormatType_ANSI = Resource_FormatType_NoConversion,
   Resource_SJIS = Resource_FormatType_SJIS,
   Resource_EUC  = Resource_FormatType_EUC,
   Resource_ANSI = Resource_FormatType_ANSI,
index c3e1c7fb96ca8be45ae56d2f97eb3fe8e5161836..ab24ce22ee2ec48a4e2532dd5685defea12cb2b2 100644 (file)
@@ -22,7 +22,7 @@
 #include <TCollection_ExtendedString.hxx>
 #include <NCollection_UtfString.hxx>
 #include <Standard_NotImplemented.hxx>
-#include "Resource_ANSI.pxx"
+#include "Resource_CodePages.pxx"
 #include "Resource_GBK.pxx"
 #include "Resource_Big5.pxx"
 
@@ -625,20 +625,30 @@ void Resource_Unicode::ConvertFormatToUnicode (const Resource_FormatType theForm
     case Resource_FormatType_CP1256:
     case Resource_FormatType_CP1257:
     case Resource_FormatType_CP1258:
+    case Resource_FormatType_iso8859_1:
+    case Resource_FormatType_iso8859_2:
+    case Resource_FormatType_iso8859_3:
+    case Resource_FormatType_iso8859_4:
+    case Resource_FormatType_iso8859_5:
+    case Resource_FormatType_iso8859_6:
+    case Resource_FormatType_iso8859_7:
+    case Resource_FormatType_iso8859_8:
+    case Resource_FormatType_iso8859_9:
     {
       const int aCodePageIndex = (int)theFormat - (int)Resource_FormatType_CP1250;
       const Standard_ExtString aCodePage = THE_CODEPAGES_ANSI[aCodePageIndex];
       theToStr.Clear();
       for (const char* anInputPntr = theFromStr; *anInputPntr != '\0'; ++anInputPntr)
       {
-        Standard_ExtCharacter aRes = (*anInputPntr & 0x80) != 0
-          ? aCodePage[(0x7f & *anInputPntr)]
-          : *anInputPntr;
-        if (aRes == (Standard_ExtCharacter)0x0)
+        unsigned char anInputChar = (unsigned char)(*anInputPntr);
+        Standard_ExtCharacter aRes = (anInputChar & 0x80) != 0
+          ? aCodePage[(0x7f & anInputChar)]
+          : anInputChar;
+        if (aRes == 0)
         {
           aRes = '?';
         }
-        theToStr.Insert(theToStr.Length() + 1, aRes);
+        theToStr.AssignCat(aRes);
       }
       break;
     }
@@ -689,8 +699,52 @@ Standard_Boolean Resource_Unicode::ConvertUnicodeToFormat(const Resource_FormatT
     case Resource_FormatType_CP1256:
     case Resource_FormatType_CP1257:
     case Resource_FormatType_CP1258:
+    case Resource_FormatType_iso8859_1:
+    case Resource_FormatType_iso8859_2:
+    case Resource_FormatType_iso8859_3:
+    case Resource_FormatType_iso8859_4:
+    case Resource_FormatType_iso8859_5:
+    case Resource_FormatType_iso8859_6:
+    case Resource_FormatType_iso8859_7:
+    case Resource_FormatType_iso8859_8:
+    case Resource_FormatType_iso8859_9:
     {
-      throw Standard_NotImplemented("Resource_Unicode::ConvertUnicodeToFormat - conversion from CP1250 - CP1258 to Unicode is not implemented");
+      if (theMaxSize < theFromStr.Length())
+      {
+        return Standard_False;
+      }
+      const int aCodePageIndex = (int)theFormat - (int)Resource_FormatType_CP1250;
+      const Standard_ExtString aCodePage = THE_CODEPAGES_ANSI[aCodePageIndex];
+      for (Standard_Integer aToCharInd = 0; aToCharInd < theMaxSize - 1; ++aToCharInd)
+      {
+        Standard_Boolean isFind = Standard_False;
+        Standard_ExtCharacter aFromChar = theFromStr.Value(aToCharInd + 1);
+        if (aFromChar == 0)
+        {
+          // zero value should be handled explicitly to avoid false conversion by
+          // selected code page that may have unused values (encoded as zero)
+          theToStr[aToCharInd] = '\0';
+        }
+        else
+        {
+          // find the character in the code page
+          for (unsigned char anIndCP = 0; aFromChar != 0 && anIndCP < 128; ++anIndCP)
+          {
+            if (aCodePage[anIndCP] == aFromChar)
+            {
+              theToStr[aToCharInd] = anIndCP | 0x80;
+              isFind = Standard_True;
+            }
+          }
+          // if character is not found, put '?'
+          if (!isFind)
+          {
+            theToStr[aToCharInd] = '?';
+          }
+        }
+      }
+      theToStr[theMaxSize - 1] = '\0';
+      return Standard_True;
     }
     case Resource_FormatType_UTF8:
     {
index c9c1398220cc53cc8fd1e254a70dc71c773651ea..1b5d41463a4cf11e73d8ef59606e6c6003e43cad 100644 (file)
@@ -69,26 +69,5 @@ Standard_Boolean STEPCAFControl_Controller::Init ()
   Interface_Static::Init   ("stepcaf", "read.stepcaf.subshapes.name", '&', "eval On");  // 1
   Interface_Static::SetIVal("read.stepcaf.subshapes.name", 0); // Disabled by default
 
-  // STEP file encoding for names translation
-  // Note: the numbers should be consistent with Resource_FormatType enumeration
-  Interface_Static::Init ("step", "read.stepcaf.codepage", 'e', "");
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "enum 0");
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval SJIS");         // Resource_FormatType_SJIS
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval EUC");          // Resource_FormatType_EUC
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval ANSI");         // Resource_FormatType_ANSI
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval GB");           // Resource_FormatType_GB
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval UTF8");         // Resource_FormatType_UTF8
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval SystemLocale"); // Resource_FormatType_SystemLocale
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1250");       // Resource_FormatType_CP1250
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1251");       // Resource_FormatType_CP1251
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1252");       // Resource_FormatType_CP1252
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1253");       // Resource_FormatType_CP1253
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1254");       // Resource_FormatType_CP1254
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1255");       // Resource_FormatType_CP1255
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1256");       // Resource_FormatType_CP1256
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1257");       // Resource_FormatType_CP1257
-  Interface_Static::Init ("step", "read.stepcaf.codepage", '&', "eval CP1258");       // Resource_FormatType_CP1258
-  Interface_Static::SetCVal ("read.stepcaf.codepage", "UTF8");
-
   return Standard_True;
 }
index 06c7befa6ffe7857930fe3e7a03867361ad017e4..146fed35c848f6c498f17134dc9fa10e34342669 100644 (file)
 #include <Transfer_ActorOfTransientProcess.hxx>
 #include <Bnd_Box.hxx>
 #include <BRepBndLib.hxx>
-#include <Resource_Unicode.hxx>
 
 // skl 21.08.2003 for reading G&DT
 //#include <StepRepr_CompoundItemDefinition.hxx>
@@ -321,8 +320,7 @@ TCollection_AsciiString AddrToString(const TopoDS_Shape& theShape)
 //=======================================================================
 
 STEPCAFControl_Reader::STEPCAFControl_Reader()
-: mySourceCodePage (Resource_FormatType_UTF8),
-  myColorMode(Standard_True),
+: myColorMode(Standard_True),
   myNameMode(Standard_True),
   myLayerMode(Standard_True),
   myPropsMode(Standard_True),
@@ -332,7 +330,6 @@ STEPCAFControl_Reader::STEPCAFControl_Reader()
   myViewMode(Standard_True)
 {
   STEPCAFControl_Controller::Init();
-  mySourceCodePage = (Resource_FormatType )Interface_Static::IVal ("read.stepcaf.codepage");
 }
 
 
@@ -343,8 +340,7 @@ STEPCAFControl_Reader::STEPCAFControl_Reader()
 
 STEPCAFControl_Reader::STEPCAFControl_Reader(const Handle(XSControl_WorkSession)& WS,
   const Standard_Boolean scratch)
-: mySourceCodePage (Resource_FormatType_UTF8),
-  myColorMode(Standard_True),
+: myColorMode(Standard_True),
   myNameMode(Standard_True),
   myLayerMode(Standard_True),
   myPropsMode(Standard_True),
@@ -354,7 +350,6 @@ STEPCAFControl_Reader::STEPCAFControl_Reader(const Handle(XSControl_WorkSession)
   myViewMode(Standard_True)
 {
   STEPCAFControl_Controller::Init();
-  mySourceCodePage = (Resource_FormatType )Interface_Static::IVal ("read.stepcaf.codepage");
   Init(WS, scratch);
 }
 
@@ -386,9 +381,10 @@ void STEPCAFControl_Reader::Init(const Handle(XSControl_WorkSession)& WS,
 //=======================================================================
 TCollection_ExtendedString STEPCAFControl_Reader::convertName (const TCollection_AsciiString& theName) const
 {
-  TCollection_ExtendedString aName;
-  Resource_Unicode::ConvertFormatToUnicode (mySourceCodePage, theName.ToCString(), aName);
-  return aName;
+  // If source code page is not a NoConversion
+  // the string is treated as having UTF-8 coding,
+  // else each character is copied to ExtCharacter.
+  return TCollection_ExtendedString (theName, SourceCodePage() != Resource_FormatType_NoConversion);
 }
 
 //=======================================================================
@@ -4787,6 +4783,26 @@ Standard_Boolean STEPCAFControl_Reader::GetNameMode() const
   return myNameMode;
 }
 
+//=======================================================================
+//function : SourceCodePage
+//purpose  : 
+//=======================================================================
+
+Resource_FormatType STEPCAFControl_Reader::SourceCodePage() const
+{
+  return myReader.StepModel()->SourceCodePage();
+}
+
+//=======================================================================
+//function : SetSourceCodePage
+//purpose  : 
+//=======================================================================
+
+void STEPCAFControl_Reader::SetSourceCodePage(Resource_FormatType theCode)
+{
+  myReader.StepModel()->SetSourceCodePage(theCode);
+}
+
 //=======================================================================
 //function : SetLayerMode
 //purpose  : 
index 737179cc5a9a19a275ed598fe124aa926cfd4319..8bddd9602eae58cb5c454adb35b39d0caf696781 100644 (file)
@@ -142,11 +142,11 @@ public:
   Standard_EXPORT Standard_Boolean GetNameMode() const;
 
   //! Return the encoding of STEP file for converting names into UNICODE.
-  //! Initialized from "read.stepcaf.codepage" variable by constructor, which is Resource_UTF8 by default.
-  Resource_FormatType SourceCodePage() const { return mySourceCodePage; }
+  //! Initialized from "read.step.codepage" variable by constructor, which is Resource_UTF8 by default.
+  Standard_EXPORT Resource_FormatType SourceCodePage() const;
 
   //! Return the encoding of STEP file for converting names into UNICODE.
-  void SetSourceCodePage (Resource_FormatType theCode) { mySourceCodePage = theCode; }
+  Standard_EXPORT void SetSourceCodePage (Resource_FormatType theCode);
 
   //! Set LayerMode for indicate read Layers or not.
   Standard_EXPORT void SetLayerMode (const Standard_Boolean layermode);
@@ -301,7 +301,6 @@ private:
 
   STEPControl_Reader myReader;
   NCollection_DataMap<TCollection_AsciiString, Handle(STEPCAFControl_ExternFile)> myFiles;
-  Resource_FormatType mySourceCodePage;
   Standard_Boolean myColorMode;
   Standard_Boolean myNameMode;
   Standard_Boolean myLayerMode;
index ec1559ee3cbf55cfeaa635f25287d1d3521c2b0a..c8df0600a11479e2e9de7858230656a747c98ce5 100644 (file)
@@ -217,6 +217,38 @@ STEPControl_Controller::STEPControl_Controller ()
     Interface_Static::Init("step", "read.step.root.transformation", '&', "eval OFF");
     Interface_Static::SetCVal("read.step.root.transformation", "ON");
 
+    // STEP file encoding for names translation
+    // Note: the numbers should be consistent with Resource_FormatType enumeration
+    Interface_Static::Init("step", "read.step.codepage", 'e', "");
+    Interface_Static::Init("step", "read.step.codepage", '&', "enum 0");
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval SJIS");         // Resource_FormatType_SJIS
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval EUC");          // Resource_FormatType_EUC
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval NoConversion"); // Resource_FormatType_NoConversion
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval GB");           // Resource_FormatType_GB
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval UTF8");         // Resource_FormatType_UTF8
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval SystemLocale"); // Resource_FormatType_SystemLocale
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1250");       // Resource_FormatType_CP1250
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1251");       // Resource_FormatType_CP1251
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1252");       // Resource_FormatType_CP1252
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1253");       // Resource_FormatType_CP1253
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1254");       // Resource_FormatType_CP1254
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1255");       // Resource_FormatType_CP1255
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1256");       // Resource_FormatType_CP1256
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1257");       // Resource_FormatType_CP1257
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval CP1258");       // Resource_FormatType_CP1258
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-1");    // Resource_FormatType_iso8859_1
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-2");    // Resource_FormatType_iso8859_2
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-3");    // Resource_FormatType_iso8859_3
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-4");    // Resource_FormatType_iso8859_4
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-5");    // Resource_FormatType_iso8859_5
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-6");    // Resource_FormatType_iso8859_6
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-7");    // Resource_FormatType_iso8859_7
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-8");    // Resource_FormatType_iso8859_8
+    Interface_Static::Init("step", "read.step.codepage", '&', "eval iso8859-9");    // Resource_FormatType_iso8859_9
+    Interface_Static::SetCVal("read.step.codepage", "UTF8");
+
+    Standard_STATIC_ASSERT((int)Resource_FormatType_iso8859_9 - (int)Resource_FormatType_CP1250 == 17); // "Error: Invalid Codepage Enumeration"
+
     init = Standard_True;
   }
 
index c68965372e3d226b69ab4c192c9b1ef0dd3a7b06..187e99c0e6560ede8cc0526b2fb5f8f2a2a32744 100644 (file)
 #include <StepData_StepModel.hxx>
 #include <StepData_StepWriter.hxx>
 #include <TCollection_HAsciiString.hxx>
+#include <Interface_Static.hxx>
 
 #include <stdio.h>
 IMPLEMENT_STANDARD_RTTIEXT(StepData_StepModel,Interface_InterfaceModel)
 
 // Entete de fichier : liste d entites
-StepData_StepModel::StepData_StepModel ()  {  }
+StepData_StepModel::StepData_StepModel () :mySourceCodePage((Resource_FormatType)Interface_Static::IVal("read.step.codepage"))
+{}
 
 
 Handle(Standard_Transient) StepData_StepModel::Entity
index db70abdc349057a0442b6db033102cc9e3ff4326..7a291054beb89df638da18722eb578a58ac24587 100644 (file)
@@ -20,6 +20,7 @@
 #include <Interface_EntityList.hxx>
 #include <TColStd_HArray1OfInteger.hxx>
 #include <Interface_InterfaceModel.hxx>
+#include <Resource_FormatType.hxx>
 
 class Standard_NoSuchObject;
 class Standard_Transient;
@@ -97,7 +98,12 @@ public:
   //! same form as for PrintLabel
   Standard_EXPORT Handle(TCollection_HAsciiString) StringLabel (const Handle(Standard_Transient)& ent) const Standard_OVERRIDE;
 
+  //! Return the encoding of STEP file for converting names into UNICODE.
+  //! Initialized from "read.step.codepage" variable by constructor, which is Resource_UTF8 by default.
+  Resource_FormatType SourceCodePage() const { return mySourceCodePage; }
 
+  //! Return the encoding of STEP file for converting names into UNICODE.
+  void SetSourceCodePage (Resource_FormatType theCode) { mySourceCodePage = theCode; }
 
 
   DEFINE_STANDARD_RTTIEXT(StepData_StepModel,Interface_InterfaceModel)
@@ -112,6 +118,7 @@ private:
 
   Interface_EntityList theheader;
   Handle(TColStd_HArray1OfInteger) theidnums;
+  Resource_FormatType mySourceCodePage;
 
 
 };
index 56018e249aa3f214caf20ff15b232f0568d25fd3..ecc36a0e3c70b3d038d3068f898b6b9975222456 100644 (file)
@@ -18,6 +18,7 @@
 #include <Interface_FileParameter.hxx>
 #include <Interface_HArray1OfHAsciiString.hxx>
 #include <Interface_Macros.hxx>
+#include <Interface_Static.hxx>
 #include <Interface_ParamList.hxx>
 #include <Message.hxx>
 #include <Message_Messenger.hxx>
@@ -37,6 +38,8 @@
 #include <StepData_StepModel.hxx>
 #include <StepData_StepReaderData.hxx>
 #include <TCollection_AsciiString.hxx>
+#include <TCollection_ExtendedString.hxx>
+#include <NCollection_UtfIterator.hxx>
 #include <TCollection_HAsciiString.hxx>
 #include <TColStd_Array1OfInteger.hxx>
 #include <TColStd_HArray1OfInteger.hxx>
@@ -46,6 +49,7 @@
 #include <TColStd_IndexedMapOfInteger.hxx>
 #include <TColStd_SequenceOfInteger.hxx>
 #include <StepData_UndefinedEntity.hxx>
+#include <Resource_Unicode.hxx>
 
 #include <stdio.h>
 IMPLEMENT_STANDARD_RTTIEXT(StepData_StepReaderData, Interface_FileReaderData)
@@ -61,7 +65,6 @@ static char txtmes[200];  // plus commode que redeclarer partout
 
 
 static Standard_Boolean initstr = Standard_False;
-
 #define Maxlst 64
 //static TCollection_AsciiString subl[Maxlst];          // Maxlst : minimum 10
 
@@ -69,36 +72,231 @@ static Standard_Integer acceptvoid = 0;
 
 // ----------  Fonctions Utilitaires  ----------
 
+//! Convert unsigned character to hexadecimal system, 
+//! if character hasn't representation in this system, returns 0.
+static Standard_Integer convertCharacterTo16bit(const unsigned char theCharacter)
+{
+  switch (theCharacter)
+  {
+    case '0': return 0;
+    case '1': return 1;
+    case '2': return 2;
+    case '3': return 3;
+    case '4': return 4;
+    case '5': return 5;
+    case '6': return 6;
+    case '7': return 7;
+    case '8': return 8;
+    case '9': return 9;
+    case 'A': return 10;
+    case 'B': return 11;
+    case 'C': return 12;
+    case 'D': return 13;
+    case 'E': return 14;
+    case 'F': return 15;
+    default : return 0;
+  }
+}
+
 //=======================================================================
-//function : CleanText
+//function : cleanText
 //purpose  : 
 //=======================================================================
-static void CleanText(const Handle(TCollection_HAsciiString)& val)
+
+void StepData_StepReaderData::cleanText(const Handle(TCollection_HAsciiString)& theVal) const
 {
-  Standard_Integer n = val->Length();    // avant reduction
-  val->Remove(n);
-  val->Remove(1);
-  //  Ne pas oublier de traiter les caracteres speciaux
+  Standard_Integer n = theVal->Length();    // string size before reduction
+  theVal->Remove(n);
+  theVal->Remove(1);
+  // Don't forget to treat the special characters
   for (Standard_Integer i = n - 2; i > 0; i--) {
-    char uncar = val->Value(i);
-    if (uncar == '\n')
-      {      val->Remove(i);      if (i < n-2) uncar = val->Value(i);  }
-    if (uncar == '\'' && i < n - 2) {
-      if (val->Value(i + 1) == '\'') { val->Remove(i + 1);    continue; }
+    char aChar = theVal->Value(i);
+    if (aChar == '\n')
+    { theVal->Remove(i);      if (i < n-2) aChar = theVal->Value(i);  }
+    if (aChar == '\'' && i < n - 2) {
+      if (theVal->Value(i + 1) == '\'') { theVal->Remove(i + 1);    continue; }
+    }
+    if (aChar == '\\' && i < n - 3) {
+      if (theVal->Value(i + 2) == '\\') {
+        if (theVal->Value(i + 1) == 'N')
+             {  theVal->SetValue(i,'\n');    theVal->Remove(i+1,2);  continue;  }
+        if (theVal->Value(i + 1) == 'T')
+             {  theVal->SetValue(i,'\t');    theVal->Remove(i+1,2);  continue;  }
+      }
+    }
+  }
+
+  // pass through without conversion the control directives
+  if (mySourceCodePage == Resource_FormatType_NoConversion)
+    return;
+
+  Standard_Integer aFirstCharInd = 1; // begin index of substring to conversion before the control directives
+  Standard_Integer aLastCharInd = 1; // end index of substring to conversion before the control directives
+  TCollection_ExtendedString aTempExtString; // string for characters within control directives
+  TCollection_ExtendedString anOutputExtString; // string for conversion in UTF-8
+  Resource_FormatType aLocalFormatType = Resource_FormatType_iso8859_1; // a code page for a "\S\" control directive
+  for (Standard_Integer i = 1; i <= theVal->Length(); ++i)
+  {
+    unsigned char aChar = theVal->Value(i);
+    if (aChar != '\\' || (theVal->Length() - i) < 3) // does not contain the control directive
+    {
+      continue;
     }
-    if (uncar == '\\' && i < n - 2) {
-      if (val->Value(i + 1) == '\\') { val->Remove(i + 1);    continue; }
+    Standard_Integer aLocalLastCharInd = i - 1;
+    Standard_Boolean isConverted = Standard_False;
+    // Encoding ISO 8859 characters within a string;
+    // ("\P{N}\") control directive;
+    // indicates code page for ("\S\") control directive;
+    // {N}: "A", "B", "C", "D", "E", "F", "G", "H", "I";
+    // "A" identifies ISO 8859-1; "B" identifies ISO 8859-2, etc.
+    if (theVal->Value(i + 1) == 'P' && theVal->Length() - i > 3 && theVal->Value(i + 3) == '\\')
+    {
+      Standard_Character aPageId = UpperCase (theVal->Value(i + 2));
+      if (aPageId >= 'A' && aPageId <= 'I')
+      {
+        aLocalFormatType = (Resource_FormatType)(Resource_FormatType_iso8859_1 + (aPageId - 'A'));
+      }
+      else
+      {
+        thecheck->AddWarning("String control directive \\P*\\ with an unsupported symbol in place of *");
+      }
+
+      isConverted = Standard_True;
+      i += 3;
+    }
+    // Encoding ISO 8859 characters within a string;
+    // ("\S\") control directive;
+    // converts followed a LATIN CODEPOINT character.
+    else if (theVal->Value(i + 1) == 'S' && theVal->Length() - i > 2 && theVal->Value(i + 2) == '\\')
+    {
+      Standard_Character aResChar = theVal->Value(i + 3) | 0x80;
+      const char aStrForCovert[2] = { aResChar, '\0' };
+      Resource_Unicode::ConvertFormatToUnicode(aLocalFormatType, aStrForCovert, aTempExtString);
+      isConverted = Standard_True;
+      i += 3;
     }
-    else if (uncar == '\\' && i < n - 3) {
-      if (val->Value(i + 2) == '\\') {
-        if (val->Value(i + 1) == 'N')
-         {  val->SetValue(i,'\n');    val->Remove(i+1,2);  continue;  }
-        if (val->Value(i + 1) == 'T')
-         {  val->SetValue(i,'\t');    val->Remove(i+1,2);  continue;  }
+    // Encoding U+0000 to U+00FF in a string
+    // ("\X\") control directive;
+    // converts followed two hexadecimal character.
+    else if (theVal->Value(i + 1) == 'X' && theVal->Length() - i > 3 && theVal->Value(i + 2) == '\\')
+    {
+      Standard_Character aResChar = (char)convertCharacterTo16bit(theVal->Value(i + 3));
+      aResChar = (aResChar << 4) | (char)convertCharacterTo16bit(theVal->Value(i + 4));
+      const char aStrForCovert[2] = { aResChar, '\0' };
+      aTempExtString = TCollection_ExtendedString(aStrForCovert, Standard_False); // pass through without conversion
+      isConverted = Standard_True;
+      i += 4;
+    }
+    // Encoding ISO 10646 characters within a string
+    // ("\X{N}\") control directive;
+    // {N}: "0", "2", "4";
+    // "\X2\" or "\X4\" converts followed a hexadecimal character sequence;
+    // "\X0\" indicate the end of the "\X2\" or "\X4\".
+    else if (theVal->Value(i + 1) == 'X' && theVal->Length() - i > 2 && theVal->Value(i + 3) == '\\')
+    {
+      Standard_Integer aFirstInd = i + 3;
+      Standard_Integer aLastInd = i;
+      Standard_Boolean isClosed = Standard_False;
+      for (; i <= theVal->Length() && !isClosed; ++i) // find the end of the "\X2\" or "\X4\" by an external "i"
+      {
+        if (theVal->Length() - i > 2 && theVal->Value(i) == '\\' && theVal->Value(i + 1) == 'X' && theVal->Value(i + 2) == '0' && theVal->Value(i + 3) == '\\')
+        {
+          aLastInd = i - 1;
+          i = i + 2;
+          isClosed = Standard_True;
+        }
+      }
+      if (!isClosed) // "\X0\" not exists
+      {
+        aLastInd = theVal->Length();
+      }
+      TCollection_AsciiString aBitString;
+      aBitString = TCollection_AsciiString(theVal->ToCString() + aFirstInd, aLastInd - aFirstInd);
+      aBitString.UpperCase(); // make valid for conversion into 16-bit
+      // "\X2\" control directive;
+      // followed by multiples of four or three hexadecimal characters. 
+      // Encoding in UTF-16
+      if (theVal->Value(aFirstInd - 1) == '2' && theVal->Length() - aFirstInd > 3)
+      {
+        Standard_Integer anIterStep = (aBitString.Length() % 4 == 0) ? 4 : 3;
+        if (aBitString.Length() % anIterStep)
+        {
+          aTempExtString.AssignCat('?');
+          thecheck->AddWarning("String control directive \\X2\\ is followed by number of digits not multiple of 4");
+        }
+        else
+        {
+          Standard_Integer aStrLen = aBitString.Length() / anIterStep;
+          Standard_Utf16Char aUtfCharacter = '\0';
+          for (Standard_Integer aCharInd = 1; aCharInd <= aStrLen * anIterStep; ++aCharInd)
+          {
+            aUtfCharacter |= convertCharacterTo16bit(aBitString.Value(aCharInd));
+            if (aCharInd % anIterStep == 0)
+            {
+              aTempExtString.AssignCat(aUtfCharacter);
+              aUtfCharacter = '\0';
+            }
+            aUtfCharacter = aUtfCharacter << 4;
+          }
+        }
+      }
+      // "\X4\" control directive;
+      // followed by multiples of eight hexadecimal characters. 
+      // Encoding in UTF-32
+      else if (theVal->Value(aFirstInd - 1) == '4' && theVal->Length() - aFirstInd  > 7)
+      {
+        if (aBitString.Length() % 8)
+        {
+          aTempExtString.AssignCat('?');
+          thecheck->AddWarning("String control directive \\X4\\ is followed by number of digits not multiple of 8");
+        }
+        else
+        {
+          Standard_Integer aStrLen = aBitString.Length() / 8;
+          Standard_Utf32Char aUtfCharacter[2] = {'\0', '\0'};
+          for (Standard_Integer aCharInd = 1; aCharInd <= aStrLen * 8; ++aCharInd)
+          {
+            aUtfCharacter[0] |= convertCharacterTo16bit(aBitString.Value(aCharInd));
+            if (aCharInd % 8 == 0)
+            {
+              NCollection_Utf32Iter aUtfIter(aUtfCharacter);
+              Standard_Utf16Char aStringBuffer[3];
+              Standard_Utf16Char* aUtfPntr = aUtfIter.GetUtf16(aStringBuffer);
+              *aUtfPntr++ = '\0';
+              TCollection_ExtendedString aUtfString(aStringBuffer);
+              aTempExtString.AssignCat(aUtfString);
+              aUtfCharacter[0] = '\0';
+            }
+            aUtfCharacter[0] = aUtfCharacter[0] << 4;
+          }
         }
       }
+      isConverted = Standard_True;
+    }
+    if (isConverted) // find the control directive
+    {
+      TCollection_ExtendedString anExtString;
+      if (aFirstCharInd <= aLocalLastCharInd)
+      {
+        Resource_Unicode::ConvertFormatToUnicode(mySourceCodePage, theVal->SubString(aFirstCharInd, aLocalLastCharInd)->ToCString(), anExtString);
+      }
+      anOutputExtString.AssignCat(anExtString);
+      anOutputExtString.AssignCat(aTempExtString);
+      aFirstCharInd = i + 1;
+      aLastCharInd = aFirstCharInd;
+      aTempExtString.Clear();
     }
   }
+  if (aLastCharInd <= theVal->Length())
+  {
+    Resource_Unicode::ConvertFormatToUnicode(mySourceCodePage, theVal->ToCString() + aLastCharInd - 1, aTempExtString);
+    anOutputExtString.AssignCat(aTempExtString);
+  }
+  theVal->Clear();
+  TCollection_AsciiString aTmpString(anOutputExtString, 0);
+  theVal->AssignCat(aTmpString.ToCString());
+}
+
 
 //  -------------  METHODES  -------------
 
@@ -109,9 +307,9 @@ static void CleanText(const Handle(TCollection_HAsciiString)& val)
 
 StepData_StepReaderData::StepData_StepReaderData
 (const Standard_Integer nbheader, const Standard_Integer nbtotal,
-  const Standard_Integer nbpar)
+  const Standard_Integer nbpar, const Resource_FormatType theSourceCodePage)
   : Interface_FileReaderData(nbtotal, nbpar), theidents(1, nbtotal),
-  thetypes(1, nbtotal) //, themults (1,nbtotal)
+  thetypes(1, nbtotal), mySourceCodePage(theSourceCodePage) //, themults (1,nbtotal)
 {
   //  char textnum[10];
   thenbscop = 0;  thenbents = 0;  thelastn = 0;  thenbhead = nbheader;
@@ -564,7 +762,9 @@ Standard_Integer StepData_StepReaderData::ReadSub(const Standard_Integer numsub,
     case 6: {
       if (FT != Interface_ParamText) { kod = 0; break; }
       Handle(TCollection_HAsciiString) txt = new TCollection_HAsciiString(str);
-      CleanText(txt);  hst->SetValue(ip, txt);  break;
+      cleanText(txt);
+      hst->SetValue(ip, txt);
+      break;
     }
     case 7: {
       Handle(Standard_Transient) ent = BoundEntity(FP.EntityNumber());
@@ -636,7 +836,9 @@ Standard_Integer StepData_StepReaderData::ReadSub(const Standard_Integer numsub,
     case Interface_ParamLogical: break;
     case Interface_ParamText: {
       Handle(TCollection_HAsciiString) txt = new TCollection_HAsciiString(str);
-      CleanText(txt);  htr->SetValue(ip, txt);  break;
+      cleanText(txt);
+      htr->SetValue(ip, txt);
+      break;
     }
     case Interface_ParamSub: {
       Handle(Standard_Transient) sub;
@@ -714,7 +916,9 @@ Standard_Boolean StepData_StepReaderData::ReadField(const Standard_Integer num,
   case Interface_ParamVoid:  break;
   case Interface_ParamText:
     txt = new TCollection_HAsciiString(str);
-    CleanText(txt);  fild.Set(txt);  break;
+    cleanText(txt);
+    fild.Set(txt);
+    break;
   case Interface_ParamEnum:
     if (!strcmp(str, ".T.")) fild.SetLogical(StepData_LTrue);
     else if (!strcmp(str, ".F.")) fild.SetLogical(StepData_LFalse);
@@ -841,7 +1045,7 @@ Standard_Boolean StepData_StepReaderData::ReadAny(const Standard_Integer num,
   case Interface_ParamLogical: break;
   case Interface_ParamText: {
     Handle(TCollection_HAsciiString) txt = new TCollection_HAsciiString(str);
-    CleanText(txt);
+    cleanText(txt);
 
     // PDN May 2000: for reading SOURCE_ITEM (external references)
     if (!val.IsNull()) {
@@ -1242,7 +1446,7 @@ Standard_Boolean StepData_StepReaderData::ReadString(const Standard_Integer num,
         CleanText (val);
       }*/
       val = new TCollection_HAsciiString(FP.CValue());
-      CleanText(val);
+      cleanText(val);
     } else {
       if (acceptvoid && FP.ParamType() == Interface_ParamVoid) warn = Standard_True;
       errmess = new String("Parameter n0.%d (%s) not a quoted String");
index 5e830520f3450c1972cd6027ffaf4a95ac0ae9eb..5cc8619e6461bd21d5c97388e80e11a1b686c7f4 100644 (file)
@@ -19,6 +19,7 @@
 
 #include <Standard.hxx>
 #include <Standard_Type.hxx>
+#include <Resource_FormatType.hxx>
 
 #include <TColStd_Array1OfInteger.hxx>
 #include <Interface_IndexedMapOfAsciiString.hxx>
@@ -63,7 +64,7 @@ public:
   //! creation time, because it contains arrays)
   //! nbheader is nb of records for Header, nbtotal for Header+Data
   //! and nbpar gives the total count of parameters
-  Standard_EXPORT StepData_StepReaderData(const Standard_Integer nbheader, const Standard_Integer nbtotal, const Standard_Integer nbpar);
+  Standard_EXPORT StepData_StepReaderData(const Standard_Integer nbheader, const Standard_Integer nbtotal, const Standard_Integer nbpar, const Resource_FormatType theSourceCodePage = Resource_FormatType_UTF8);
   
   //! Fills the fields of a record
   Standard_EXPORT void SetRecord (const Standard_Integer num, const Standard_CString ident, const Standard_CString type, const Standard_Integer nbpar);
@@ -349,6 +350,16 @@ private:
   //! If found, returns its EntityNumber, else returns Zero.
   Standard_EXPORT Standard_Integer FindEntityNumber (const Standard_Integer num, const Standard_Integer id) const;
 
+  //! Prepare string to use in OCCT exchange structure.
+  //! If code page is Resource_FormatType_NoConversion,
+  //! clean only special characters without conversion;
+  //! else convert a string to UTF8 using the code page
+  //! and handle the control directives.
+  Standard_EXPORT void cleanText(const Handle(TCollection_HAsciiString)& theVal) const;
+
+private:
+
+
   TColStd_Array1OfInteger theidents;
   TColStd_Array1OfInteger thetypes;
   Interface_IndexedMapOfAsciiString thenametypes;
@@ -358,6 +369,7 @@ private:
   Standard_Integer thenbhead;
   Standard_Integer thenbscop;
   Handle(Interface_Check) thecheck;
+  Resource_FormatType mySourceCodePage;
 
 
 };
index 1d564e7c4457b8de484077b1816d235af736b936..20983510068484f1b8c4600ddbf62157a9d139c6 100644 (file)
@@ -143,7 +143,7 @@ static Standard_Integer StepFile_Read (const char* theName,
   Standard_Integer nbhead, nbrec, nbpar;
   lir_file_nbr (&nbhead,&nbrec,&nbpar);  // renvoi par lex/yacc
   Handle(StepData_StepReaderData) undirec =
-    new StepData_StepReaderData(nbhead,nbrec,nbpar);  // creation tableau de records
+    new StepData_StepReaderData(nbhead,nbrec,nbpar, stepmodel->SourceCodePage());  // creation tableau de records
 
   for ( Standard_Integer nr = 1; nr <= nbrec; nr ++) {
     int nbarg; char* ident; char* typrec = 0;
index 4b8d391f614f88465d0db7b6c600fa2e0c554413..00c60225ad51ef559b5e4df3f614802ab5156b2b 100644 (file)
@@ -258,12 +258,13 @@ TCollection_ExtendedString::TCollection_ExtendedString
 //  Create an extendedstring from an AsciiString 
 //---------------------------------------------------------------------------
 TCollection_ExtendedString::TCollection_ExtendedString
-                                (const TCollection_AsciiString& theString)
+                                (const TCollection_AsciiString& theString,
+                                 const Standard_Boolean isMultiByte)
 {
   mylength = nbSymbols (theString.ToCString());
   mystring = allocateExtChars (mylength);
   mystring[mylength] = 0;
-  if (ConvertToUnicode (theString.ToCString()))
+  if (isMultiByte && ConvertToUnicode (theString.ToCString()))
   {
     return;
   }
@@ -304,6 +305,20 @@ void TCollection_ExtendedString::AssignCat (const TCollection_ExtendedString& th
   mystring[mylength] = 0;
 }
 
+// ----------------------------------------------------------------------------
+//  AssignCat
+// ----------------------------------------------------------------------------
+void TCollection_ExtendedString::AssignCat(const Standard_Utf16Char theChar)
+{
+  if (theChar != '\0')
+  {
+    mystring = reallocateExtChars(mystring, mylength + 1);
+    mystring[mylength] = theChar;
+    mylength += 1;
+    mystring[mylength] = '\0';
+  }  
+}
+
 // ----------------------------------------------------------------------------
 // Cat
 // ----------------------------------------------------------------------------
index 3b429d2e3eedc3497bb58623286e983e3e12c338..3e16ca49dcd18465b8b734c1ce4a5be6efb50aae 100644 (file)
@@ -114,8 +114,9 @@ public:
 
   //! Creation by converting an Ascii string to an extended
   //! string. The string is treated as having UTF-8 coding.
-  //! If it is not a UTF-8 then each character is copied to ExtCharacter.
-  Standard_EXPORT TCollection_ExtendedString(const TCollection_AsciiString& astring);
+  //! If it is not a UTF-8 or multi byte then
+  //! each character is copied to ExtCharacter.
+  Standard_EXPORT TCollection_ExtendedString(const TCollection_AsciiString& astring, const Standard_Boolean isMultiByte = Standard_True);
   
   //! Appends the other extended string to this extended string.
   //! Note that this method is an alias of operator +=.
@@ -125,6 +126,9 @@ void operator += (const TCollection_ExtendedString& other)
 {
   AssignCat(other);
 }
+
+  //! Appends the utf16 char to this extended string.
+  Standard_EXPORT void AssignCat (const Standard_Utf16Char theChar);
   
   //! Appends <other> to me.
   Standard_EXPORT TCollection_ExtendedString Cat (const TCollection_ExtendedString& other) const;
diff --git a/tests/bugs/step/bug28454_1 b/tests/bugs/step/bug28454_1
new file mode 100644 (file)
index 0000000..0c85633
--- /dev/null
@@ -0,0 +1,122 @@
+puts "================"
+puts "0028454: Data Exchange - Names with Special Characters Cannot Be Read from STEP or IGES Files"
+puts "Target encodings: ISO 8859-1, ISO 8859-2, ISO 8859-3, ISO 8859-4, ISO 8859-5, ISO 8859-6,, ISO 8859-7, ISO 8859-8"
+puts "Test case:"
+puts "1) Creates a temporary STEP file-template using WriteStep."
+puts "2) Reads generated template and replaces @tmp_name@ entity in it with target language characters using Tcl."
+puts "3) Generates 2 STEP files in UTF-8 and ISO 8859-(N) encodings (converted by Tcl)."
+puts "4) Reads generated files using StepRead and validates entity name."
+puts "================"
+puts ""
+
+proc fileToString { thePath } {
+  set aFile [open "$thePath" r]
+  set aText [read $aFile [file size "$thePath"]]
+  close $aFile
+  return $aText
+}
+
+proc fileFromString { thePath theContent theCodePage } {
+  set aFile [open "$thePath" w]
+  fconfigure $aFile -translation lf -encoding "$theCodePage"
+  puts $aFile $theContent
+  close $aFile
+}
+
+proc fileCreateAndCompare { thePathFrom theUtfPathTo theCpPathTo theNameFrom theNameTo theCodePage } {
+  regsub -all -- $theNameFrom [fileToString "$thePathFrom"] "$theNameTo" aContent
+  set aPathTo $theCpPathTo${theCodePage}.stp
+  fileFromString "$theUtfPathTo" "$aContent" "utf-8"
+  fileFromString "$aPathTo" "$aContent" "$theCodePage"
+  
+  param read.step.codepage UTF8
+  ReadStep U "$theUtfPathTo"
+  ReadStep A "$aPathTo"
+  param read.step.codepage "$theCodePage"
+  ReadStep ISO "$aPathTo"
+  
+  if { [GetName U   0:1:1:1] != "$theNameTo" } { puts "Error: unable to read UTF-8  STEP" }
+  if { [GetName ISO 0:1:1:1] != "$theNameTo" } { puts "Error: unable to read $theCodePage STEP" }
+  if { [GetName A   0:1:1:1] == "$theNameTo" } { puts "Error: broken test case" }
+  catch { Close A }
+  catch { Close U }
+  catch { Close ISO }
+}
+
+pload XDE OCAF MODELING VISUALIZATION
+set aTmpNameTmpl "@tmp_name@"
+set aTmpFileTmpl "${imagedir}/${casename}-tmp.stp"
+set aTmpFileUtf8 "${imagedir}/${casename}-tmp-utf8.stp"
+set aTmpFileISO8859N "${imagedir}/${casename}-tmp-"
+
+# "Test" (english multi-encoding) + "Test" (encoding in the target language)
+
+# multi-encoding
+set anEngName [encoding convertfrom utf-8 "\x54\x65\x73\x74"]
+# ISO 8859-1
+set iso8859_1 [encoding convertfrom utf-8 "\x50\x72\x6f\x62\xed\x68\xe1"]
+# ISO 8859-2
+set iso8859_2 [encoding convertfrom utf-8 "\x50\x72\x6f\x62\xed\x68\xe1"]
+# ISO 8859-3
+set iso8859_3 [encoding convertfrom utf-8 "\xd6\x6c\xe7\x65\x6b"]
+# ISO 8859-4 
+set iso8859_4 [encoding convertfrom utf-8 "\x6d\xc4\x93\xc4\xa3\x69\x6e\x69\x65\x74"]
+# ISO 8859-5
+set iso8859_5 [encoding convertfrom utf-8 "\xD0\xa2\xD0\xB5\xD1\x81\xD1\x82"]
+# ISO 8859-6
+set iso8859_6 [encoding convertfrom utf-8 "\xd8\xa7\xd8\xae\xd8\xaa\xd8\xa8\xd8\xa7\xd8\xb1"]
+# ISO 8859-7
+set iso8859_7 [encoding convertfrom utf-8 "\xce\xb4\xcf\x80\xce\xa8\xce\xae"]
+# ISO 8859-8
+set iso8859_8 [encoding convertfrom utf-8 "\xc2\xb1\xd7\xa4\xd7\x9e\xd7\x9c\xd7\xa9"]
+# ISO 8859-9
+set iso8859_9 [encoding convertfrom utf-8 "\xc4\x9f\xc5\x9f\xc4\x9e\xc5\x9e\xc3\x86"]
+
+box b 1 2 3
+catch { Close A }
+catch { Close T }
+catch { Close U }
+catch { Close CP }
+
+XNewDoc   T
+XAddShape T b 0
+XSetColor T b 1 0 0
+SetName   T 0:1:1:1 "$aTmpNameTmpl"
+GetName   T 0:1:1:1
+WriteStep T "$aTmpFileTmpl"
+
+puts "ISO 8859-1"
+set aName "$anEngName $iso8859_1"
+fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-1"
+
+puts "ISO 8859-2"
+set aName "$anEngName $iso8859_2"
+fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-2"
+
+puts "ISO 8859-3"
+set aName "$anEngName $iso8859_3"
+fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-3"
+
+puts "ISO 8859-4"
+set aName "$anEngName $iso8859_4"
+fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-4"
+
+puts "ISO 8859-5"
+set aName "$anEngName $iso8859_5"
+fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-5"
+
+puts "ISO 8859-6"
+set aName "$anEngName $iso8859_6"
+fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-6"
+
+puts "ISO 8859-7"
+set aName "$anEngName $iso8859_7"
+fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-7"
+
+puts "ISO 8859-8"
+set aName "$anEngName $iso8859_8"
+fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-8"
+
+puts "ISO 8859-9"
+set aName "$anEngName $iso8859_9"
+fileCreateAndCompare "$aTmpFileTmpl" "$aTmpFileUtf8" "$aTmpFileISO8859N" "$aTmpNameTmpl" "$aName" "iso8859-9"
\ No newline at end of file
diff --git a/tests/bugs/step/bug28454_2 b/tests/bugs/step/bug28454_2
new file mode 100644 (file)
index 0000000..102f4b0
--- /dev/null
@@ -0,0 +1,78 @@
+puts "================"
+puts "0028454: Data Exchange - Names with Special Characters Cannot Be Read from STEP or IGES Files"
+puts "Target control directives: (\X\), (\S\), (\P*\), (\X0\), (\X2\), (\X4\)"
+puts "================"
+puts ""
+
+pload OCAF
+catch {Close D}
+
+# Read File
+ReadStep D [locate_data_file bug28454_directives.stp]
+
+# Checking 
+puts {# STEP string: 'AaBbCc\X2\00C4\X0\\X2\00E4\X0\\X2\00D6\X0\\X2\00F6\X0\\X2\00DC\X0\\X2\00FC\X0\\X2\00DF\X0\*,.-;:_'}
+if { [GetName D 0:1:1:1] != "AaBbCcÄäÖöÜüß*,.-;:_" } {
+  puts "Error:  name has been lost on conversion \X2\ directives"
+}
+puts {# STEP string: '\X2\03C0\X0\'}
+if { [GetName D 0:1:1:2] != "π" } {
+  puts "Error:  name has been lost on conversion \X2\ directive"
+}
+puts {# STEP string: 'Expression: \X2\03B103B2\X0\\\X2\03B3\X0\*\X2\03C0\X0\'}
+if { [GetName D 0:1:1:3] != "Expression: αβ\\γ*π" } {
+  puts "Error:  name has been lost on conversion \X2\ directive"
+}
+puts {# STEP string: '\X4\0001F7CA\X0\'}
+# Note hack with conversion to utf-8, needed to get the strings comparison work for Unicode symbols above BMP
+if { [encoding convertto utf-8 [GetName D 0:1:1:4]] !=
+     [encoding convertto utf-8 "🟊"] } {
+  puts "Error:  name has been lost on conversion \X4\ directive"
+}
+puts {# STEP string: 'Star, arrow, faces: \X4\0001F7CA0001f80A0001f6370001f926\X0\ end'}
+# Note hack with conversion to utf-8, needed to get the strings comparison work for Unicode symbols above BMP
+if { [encoding convertto utf-8 [GetName D 0:1:1:5]] !=
+     [encoding convertto utf-8 "Star, arrow, faces: 🟊🠊😷🤦 end"] } {
+  puts "Error:  name has been lost on conversion \X4\ directive"
+}
+puts {# STEP string: '\S\Drger'}
+if { [GetName D 0:1:1:6] != "Ärger" } {
+  puts "Error:  name has been lost on conversion \S\ directive"
+}
+puts {# STEP string: 'h\S\ttel'}
+if { [GetName D 0:1:1:7] != "hôtel" } {
+  puts "Error:  name has been lost on conversion \S\ directive"
+}
+puts {# STEP string: '\PE\\S\*\S\U\S\b'}
+if { [GetName D 0:1:1:8] != "Њет" } {
+  puts "Error:  name has been lost on conversion \PE\ and \S\ directives"
+}
+puts {# STEP string: 'see \X\A7 4.1'}
+if { [GetName D 0:1:1:9] != "see § 4.1" } {
+  puts "Error:  name has been lost on conversion \X\ directive"
+}
+puts {# STEP string: 'line one\X\0Aline two'}
+if { [GetName D 0:1:1:10] != "line one\nline two" } {
+  puts "Error:  name has been lost on conversion \X\ directives"
+}
+puts {# STEP string: 'Expression: \X\B13\X2\03C0\X0\*\X2\03C0\X0\+12'}
+if { [GetName D 0:1:1:11] != "Expression: ±3π*π+12" } {
+  puts "Error:  name has been lost on conversion \X2\ and \X\ directives"
+}
+puts {# STEP string: 'Expression: \X2\03B1\X0\\PC\*\X2\03B2\X0\\S\w\X2\03B3'}
+if { [GetName D 0:1:1:12] != "Expression: α*β÷γ" } {
+  puts "Error:  name has been lost on conversion \X2\,\PC\ and \S\ directives"
+}
+puts {# STEP string: 'T\PE\\S\5C\S\D'}
+if { [GetName D 0:1:1:13] != "TЕCТ" } {
+  puts "Error:  name has been lost on conversion \S\ and \PE\ directives"
+}
+puts {# STEP string: 'A\S\DaBbCc\X2\0C4\X0\'h\S\t\X2\0E4\X0\\X4\0001F7CA\X0\\X\0A\X4\0001F7CA\X0\*,.-;:_\X2\0D6\X0\\PE\\S\5'}
+if { [encoding convertto utf-8 [GetName D 0:1:1:14]] !=
+     [encoding convertto utf-8 "AÄaBbCcÄ'hôä🟊\n🟊*,.-;:_ÖЕ"] } {
+  puts "Error:  name has been lost on conversion \X\,\X2\,\X4\ and \S\ directive"
+}
+puts {# STEP string: '\X2\3B13B23B3\X0\'}
+if { [GetName D 0:1:1:15] != "αβγ" } {
+  puts "Error:  name has been lost on conversion \X2\ directive"
+}
\ No newline at end of file
index 04942ac3fab2e881a68783e1f2f92496d3804442..9dc8c4d45ddf04be50936875e9fc86a36b8fb42d 100644 (file)
@@ -46,10 +46,10 @@ regsub -all -- $aTmpNameTmpl [fileToString "$aTmpFileTmpl"] "$aName" aContent
 fileFromString "$aTmpFileUtf8" "$aContent" "utf-8"
 fileFromString "$aTmpFileGb"   "$aContent" "gb2312"
 
-param read.stepcaf.codepage UTF8
+param read.step.codepage UTF8
 ReadStep U "$aTmpFileUtf8"
 ReadStep A "$aTmpFileGb"
-param read.stepcaf.codepage GB
+param read.step.codepage GB
 ReadStep G "$aTmpFileGb"
 
 if { [GetName U 0:1:1:1] != "$aName" } { puts "Error: unable to read UTF-8  STEP" }
index dadeb88fe532fd8628bc79563a005903dc2ce85e..3165439687b5e33b14ffe5726ced76cf6b97b9b1 100644 (file)
@@ -6,7 +6,7 @@ puts ""
 pload OCAF
 
 # Read file
-param read.stepcaf.codepage CP1251
+param read.step.codepage CP1251
 ReadStep D [locate_data_file bug31670_russian.stp]
 
 # Checking 
index 91c1ed262a30eff3485c179fa75b700def7eb6b9..a98cb37a684ef70fadd2b285d115a557585bafe7 100644 (file)
@@ -29,10 +29,10 @@ proc fileCreateAndCompare { thePathFrom theUtfPathTo theCpPathTo theNameFrom the
   fileFromString "$theUtfPathTo" "$aContent" "utf-8"
   fileFromString "$theCpPathTo" "$aContent" "$aCodePage"
   
-  param read.stepcaf.codepage UTF8
+  param read.step.codepage UTF8
   ReadStep U "$theUtfPathTo"
   ReadStep A "$theCpPathTo"
-  param read.stepcaf.codepage "$theCodePage"
+  param read.step.codepage "$theCodePage"
   ReadStep CP "$theCpPathTo"
   
   if { [GetName U  0:1:1:1] != "$theNameTo" } { puts "Error: unable to read UTF-8  STEP" }
index 2c3a5ab984bcbc6f16ec6b0a3ec3495f097d892f..254f86fc984e2f74eb17222bdb73303c05235e90 100644 (file)
@@ -1,5 +1,5 @@
 set filename bug28315_part_step-pmi.stp
 
 set ref_data {
-D_First 0:1:7:1 0 27 0 "Nyn\\X2\\11b\\X0\\j\\X2\\161\\X0\\S\\m pohled modelu" parallel 13.0973082 1.62908566 -570.319287 0.0 0.0 -1.0 0 1 0 0.0 342.494354 342.494354
+D_First 0:1:7:1 0 27 0 "Nynější pohled modelu" parallel 13.0973082 1.62908566 -570.319287 0.0 0.0 -1.0 0 1 0 0.0 342.494354 342.494354
 }
index b585a6327fe62a7f5f74e78f7c2c97e56de1f82a..960c1b1695ce20eeb96181405efa29585aa09262 100644 (file)
@@ -3,7 +3,7 @@ set filename bug30315.stp
 set ref_data {
 D_First 0:1:7:1 0 0 0 "ALL" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
 D_First 0:1:7:2 0 0 0 "CPC Process" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
-D_First 0:1:7:3 0 0 0 "CPC D\\X2\\00E9\\X0\\tente" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
+D_First 0:1:7:3 0 0 0 "CPC Détente" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
 D_First 0:1:7:4 0 0 0 "REF A/B/C" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
 D_First 0:1:7:5 0 0 0 "CTF A/B/C" parallel 0 0 1.9593266248700001 0 0 -1 1 -5.5511151231299994e-017 0 0 1000 1000
 }