[gdal-dev] GDAL/OGR patch, codepage conversion on windows

OKI Miyuki oki at hunes.co.jp
Tue May 29 03:26:04 PDT 2012


Hi all,

   I made patch for OGR cpl_recode_stub.cpp,
   to enable codepage conversion on windows platform.

CODEPAGE  Somthing like this  "CP932"  to "UTF-8"
                                                  "CP1252" to "UTF-8"
                                                  ...
and
                                                  "CP932"  from "UTF-8"
                                                  "CP1252" from "UTF-8"
                                                  ...

cheers,
Miyuki OKI

----------------------------------------------------------------------------------
*** port/cpl_recode_stub.cpp	Wed Jan  4 16:03:15 2012
--- ../../gdal-1.9.0/port/cpl_recode_stub.cpp	Tue May 29 19:03:28 2012
***************
*** 46,51 ****
--- 46,56 ----
                            const char* src, unsigned srclen);
  static int utf8test(const char* src, unsigned srclen);
  
+ #ifdef _WIN32
+ char* code_to_code( const char* src,
+                            unsigned src_code_page, unsigned dst_code_page );
+ #endif
+ 
  #ifdef FUTURE_NEEDS
  static const char* utf8fwd(const char* p, const char* start, const char* end);
  static const char* utf8back(const char* p, const char* start, const char*end);
***************
*** 128,133 ****
--- 133,165 ----
          return pszResult;
      }
  
+ #ifdef _WIN32
+ /* ---------------------------------------------------------------------*/
+ /*      CPXXX to UTF8                                                   */
+ /* ---------------------------------------------------------------------*/
+    if( strncmp(pszSrcEncoding,"CP",2) == 0 
+        && strcmp(pszDstEncoding,CPL_ENC_UTF8) == 0 )
+    {
+        int nCode = atoi( pszSrcEncoding + 2 );
+        if( nCode > 0 ) {
+           return code_to_code( pszSource, nCode, 65001 );
+        }
+    }
+ 
+ /* ---------------------------------------------------------------------*/
+ /*      UTF8 to CPXXX
+ /* ---------------------------------------------------------------------*/
+    if( strcmp(pszSrcEncoding,CPL_ENC_UTF8) == 0
+        && strncmp(pszDstEncoding,"CP",2) == 0 )
+    {
+         int nCode = atoi( pszDstEncoding + 2 );
+         if( nCode > 0 ) {
+             return code_to_code( pszSource, 65001, nCode );
+         }
+    }
+ 
+ #endif
+ 
  /* -------------------------------------------------------------------- */
  /*      Anything else to UTF-8 is treated as ISO8859-1 to UTF-8 with    */
  /*      a one-time warning.                                             */
***************
*** 993,998 ****
--- 1025,1105 ----
    return count;
  }
  
+ #ifdef _WIN32
+ /************************************************************************/
+ /*                             code_to_code()                           */
+ /************************************************************************/
+ 
+ /* Convert an CODEPAGE (ie normal c-string) byte stream 
+      to another CODEPAGE (ie normal c-string) byte stream.
+ 
+     \a src is target c-string byte stream (including a null terminator).
+     \a src_code_page is target c-string byte code page.
+     \a dst_code_page is destination c-string byte code page.
+ 
+    UTF7          65000
+    UTF8          65001
+    OEM-US          437
+    OEM-ALABIC      720
+    OEM-GREEK       737
+    OEM-BALTIC      775
+    OEM-MLATIN1     850
+    OEM-LATIN2      852
+    OEM-CYRILLIC    855
+    OEM-TURKISH     857
+    OEM-MLATIN1P    858
+    OEM-HEBREW      862
+    OEM-RUSSIAN     866
+ 
+    THAI            874
+    SJIS            932
+    GBK             936
+    KOREA           949
+    BIG5            950
+ 
+    EUROPE         1250
+    CYRILLIC       1251
+    LATIN1         1252
+    GREEK          1253
+    TURKISH        1254
+    HEBREW         1255
+    ARABIC         1256
+    BALTIC         1257
+    VIETNAM        1258
+ 
+    ISO-LATIN1    28591
+    ISO-LATIN2    28592
+    ISO-LATIN3    28593
+    ISO-BALTIC    28594
+    ISO-CYRILLIC  28595
+    ISO-ARABIC    28596
+    ISO-HEBREW    28598
+    ISO-TURKISH   28599
+    ISO-LATIN9    28605
+ 
+    ISO-2022-JP   50220
+ 
+ */
+ 
+ # include <windows.h>
+ 
+ char* code_to_code( const char* src, unsigned src_code_page, unsigned dst_code_page ) {
+   if( NULL == src || src_code_page == dst_code_page )  return (char*)src;
+   int wlen = MultiByteToWideChar( src_code_page, 0, src, -1, 0, 0 );
+   wchar_t* tbuf = (wchar_t*)CPLCalloc(sizeof(wchar_t),wlen+1);
+   tbuf[wlen] = 0;
+   MultiByteToWideChar( src_code_page, 0, src, -1, tbuf, wlen+1 );
+   int len = WideCharToMultiByte( dst_code_page, 0, tbuf, -1, 0, 0, 0, 0 );
+   char* result = (char*)CPLCalloc(sizeof(char),len+1);
+   WideCharToMultiByte( dst_code_page, 0, tbuf, -1, result, len+1, 0, 0 );
+   result[len] = 0;
+   CPLFree(tbuf);
+   return result;
+ }
+ 
+ #endif
+ 
+ 
  /*
  ** For now we disable the rest which is locale() related.  We may need 
  ** parts of it later. 



More information about the gdal-dev mailing list