Windows のターミナルでは SJIS の文字以外は文字化けすることから、Windows で Haskellを使う場合は常にSJISへの変換が必要になります。
Windowsは内部ではユニコード(UTF16)が使われています。ユーザが実際に触れるのは ShiftJIS ですから、Windowsはにはユニコード(UTF16)←→ShiftJISを変換するAPIが存在し、常にShiftJIS←→UTF16の変換が行われているのです。
そのAPI がMultiByteToWideChar。これを呼び出せば ShiftJIS → UTF16 の変換が可能です。
Haskellは UCS4 を使用していますが、日本語で使用する分についてはUTF16とUCS4は同じなので問題ないはずです。
- WideCharToMultiByte UTF16 → ShiftJIS
- Unihan data for U+65E5(日)
- sjis.hs
-- ghc --make sjis.hs bstr.c -loleaut32 -o sjis.exe {-# LANGUAGE ForeignFunctionInterface #-} import Data.Char (chr,ord) import Numeric (showHex) import Codec.Binary.UTF8.String (encode,decode) import Foreign.C.String (CString, CWString,withCString, withCWString, peekCString, peekCWString) import Foreign.Marshal.Alloc (free) import System.IO.Unsafe (unsafePerformIO) sjisHanKana = map chr [0xa1..0xdf] utf8Kanji = "日本語" sjisPutStrLn = putStrLn.utf16ToShiftJis main = do sjisPutStrLn utf8Kanji -- => 日本語 print $ map (flip showHex "".ord) utf8Kanji -- テキストは utf8 で記述しますが、コンパイルされると UCS4になります。 -- => ["65e5","672c","8a9e"] print $ map (flip showHex "") $ encode utf8Kanji -- => utf8 はこれ。 -- ["e6","97","a5","e6","9c","ac","e8","aa","9e"] putStrLn sjisHanKana -- => 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚ (putStrLn.utf16ToShiftJis.shiftJisToUtf16) sjisHanKana -- => 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚ print $ map (flip showHex "".ord) $ shiftJisToUtf16 sjisHanKana -- => -- ["ff61","ff62","ff63","ff64","ff65","ff66","ff67","ff68","ff69","ff6a", -- "ff6b","ff6c","ff6d","ff6e","ff6f","ff70","ff71","ff72","ff73","ff74", -- "ff75","ff76","ff77","ff78","ff79","ff7a","ff7b","ff7c","ff7d","ff7e", -- "ff7f","ff80","ff81","ff82","ff83","ff84","ff85","ff86","ff87","ff88", -- "ff89","ff8a","ff8b","ff8c","ff8d","ff8e","ff8f","ff90","ff91","ff92", -- "ff93","ff94","ff95","ff96","ff97","ff98","ff99","ff9a","ff9b","ff9c", -- "ff9d","ff9e","ff9f"] shiftJisToUtf16 :: String -> String shiftJisToUtf16 = unsafePerformIO.shiftJisToUtf16' utf16ToShiftJis :: String -> String utf16ToShiftJis = unsafePerformIO.utf16ToShiftJis' shiftJisToUtf16' :: String -> IO String shiftJisToUtf16' string = withCString string c_CStringToBSTR >>= peekCWString utf16ToShiftJis' :: String -> IO String utf16ToShiftJis' string = withCWString string c_BSTRtoCString >>= peekCString -- C の関数を呼ぶための定義 foreign import ccall unsafe "CStringToBSTR" c_CStringToBSTR :: CString -> IO CWString foreign import ccall unsafe "BSTRtoCString" c_BSTRtoCString :: CWString -> IO CString
以前やった「COM を学ぶ(7) : HaskellでExcel」で BSTRに変換してからCOMに渡していました。
- bstr.c
#include <stdio.h> #include <malloc.h> #include <windows.h> BSTR CStringToBSTR(char* cstring ){ int cstringlen, out_size; BSTR wstr; cstringlen = strlen(cstring); out_size = MultiByteToWideChar(CP_ACP, 0, cstring, cstringlen, NULL, 0); wstr = SysAllocStringLen(NULL, out_size); MultiByteToWideChar(CP_ACP, 0, cstring, cstringlen, wstr, out_size); return wstr; } char* BSTRtoCString(BSTR bstr){ int out_size; char *cstring; out_size = WideCharToMultiByte(CP_ACP, 0, (OLECHAR*)bstr, -1, NULL, 0, NULL, NULL); cstring = (char*)malloc((out_size+1) * sizeof(char)); WideCharToMultiByte(CP_ACP, 0, (OLECHAR*)bstr, -1, cstring, out_size, NULL, NULL); return cstring; }