[gdal-dev] GDAL/OGR patch, codepage conversion on windows
OKI Miyuki
oki at hunes.co.jp
Tue May 29 03:26:04 PDT 2012
Hi all,
I made patch for OGR cpl_recode_stub.cpp,
to enable codepage conversion on windows platform.
CODEPAGE Somthing like this "CP932" to "UTF-8"
"CP1252" to "UTF-8"
...
and
"CP932" from "UTF-8"
"CP1252" from "UTF-8"
...
cheers,
Miyuki OKI
----------------------------------------------------------------------------------
*** port/cpl_recode_stub.cpp Wed Jan 4 16:03:15 2012
--- ../../gdal-1.9.0/port/cpl_recode_stub.cpp Tue May 29 19:03:28 2012
***************
*** 46,51 ****
--- 46,56 ----
const char* src, unsigned srclen);
static int utf8test(const char* src, unsigned srclen);
+ #ifdef _WIN32
+ char* code_to_code( const char* src,
+ unsigned src_code_page, unsigned dst_code_page );
+ #endif
+
#ifdef FUTURE_NEEDS
static const char* utf8fwd(const char* p, const char* start, const char* end);
static const char* utf8back(const char* p, const char* start, const char*end);
***************
*** 128,133 ****
--- 133,165 ----
return pszResult;
}
+ #ifdef _WIN32
+ /* ---------------------------------------------------------------------*/
+ /* CPXXX to UTF8 */
+ /* ---------------------------------------------------------------------*/
+ if( strncmp(pszSrcEncoding,"CP",2) == 0
+ && strcmp(pszDstEncoding,CPL_ENC_UTF8) == 0 )
+ {
+ int nCode = atoi( pszSrcEncoding + 2 );
+ if( nCode > 0 ) {
+ return code_to_code( pszSource, nCode, 65001 );
+ }
+ }
+
+ /* ---------------------------------------------------------------------*/
+ /* UTF8 to CPXXX
+ /* ---------------------------------------------------------------------*/
+ if( strcmp(pszSrcEncoding,CPL_ENC_UTF8) == 0
+ && strncmp(pszDstEncoding,"CP",2) == 0 )
+ {
+ int nCode = atoi( pszDstEncoding + 2 );
+ if( nCode > 0 ) {
+ return code_to_code( pszSource, 65001, nCode );
+ }
+ }
+
+ #endif
+
/* -------------------------------------------------------------------- */
/* Anything else to UTF-8 is treated as ISO8859-1 to UTF-8 with */
/* a one-time warning. */
***************
*** 993,998 ****
--- 1025,1105 ----
return count;
}
+ #ifdef _WIN32
+ /************************************************************************/
+ /* code_to_code() */
+ /************************************************************************/
+
+ /* Convert an CODEPAGE (ie normal c-string) byte stream
+ to another CODEPAGE (ie normal c-string) byte stream.
+
+ \a src is target c-string byte stream (including a null terminator).
+ \a src_code_page is target c-string byte code page.
+ \a dst_code_page is destination c-string byte code page.
+
+ UTF7 65000
+ UTF8 65001
+ OEM-US 437
+ OEM-ALABIC 720
+ OEM-GREEK 737
+ OEM-BALTIC 775
+ OEM-MLATIN1 850
+ OEM-LATIN2 852
+ OEM-CYRILLIC 855
+ OEM-TURKISH 857
+ OEM-MLATIN1P 858
+ OEM-HEBREW 862
+ OEM-RUSSIAN 866
+
+ THAI 874
+ SJIS 932
+ GBK 936
+ KOREA 949
+ BIG5 950
+
+ EUROPE 1250
+ CYRILLIC 1251
+ LATIN1 1252
+ GREEK 1253
+ TURKISH 1254
+ HEBREW 1255
+ ARABIC 1256
+ BALTIC 1257
+ VIETNAM 1258
+
+ ISO-LATIN1 28591
+ ISO-LATIN2 28592
+ ISO-LATIN3 28593
+ ISO-BALTIC 28594
+ ISO-CYRILLIC 28595
+ ISO-ARABIC 28596
+ ISO-HEBREW 28598
+ ISO-TURKISH 28599
+ ISO-LATIN9 28605
+
+ ISO-2022-JP 50220
+
+ */
+
+ # include <windows.h>
+
+ char* code_to_code( const char* src, unsigned src_code_page, unsigned dst_code_page ) {
+ if( NULL == src || src_code_page == dst_code_page ) return (char*)src;
+ int wlen = MultiByteToWideChar( src_code_page, 0, src, -1, 0, 0 );
+ wchar_t* tbuf = (wchar_t*)CPLCalloc(sizeof(wchar_t),wlen+1);
+ tbuf[wlen] = 0;
+ MultiByteToWideChar( src_code_page, 0, src, -1, tbuf, wlen+1 );
+ int len = WideCharToMultiByte( dst_code_page, 0, tbuf, -1, 0, 0, 0, 0 );
+ char* result = (char*)CPLCalloc(sizeof(char),len+1);
+ WideCharToMultiByte( dst_code_page, 0, tbuf, -1, result, len+1, 0, 0 );
+ result[len] = 0;
+ CPLFree(tbuf);
+ return result;
+ }
+
+ #endif
+
+
/*
** For now we disable the rest which is locale() related. We may need
** parts of it later.
More information about the gdal-dev
mailing list