Haskell から MultiByteToWideChar を呼び ShiftJIS←→UTF16 変換する

Windows のターミナルでは SJIS の文字以外は文字化けすることから、WindowsHaskellを使う場合は常にSJISへの変換が必要になります。
Windowsは内部ではユニコード(UTF16)が使われています。ユーザが実際に触れるのは ShiftJIS ですから、Windowsはにはユニコード(UTF16)←→ShiftJISを変換するAPIが存在し、常にShiftJIS←→UTF16の変換が行われているのです。
そのAPIMultiByteToWideChar。これを呼び出せば ShiftJIS → UTF16 の変換が可能です。
Haskellは UCS4 を使用していますが、日本語で使用する分についてはUTF16とUCS4は同じなので問題ないはずです。

-- ghc --make sjis.hs bstr.c -loleaut32 -o sjis.exe

{-# LANGUAGE ForeignFunctionInterface #-}

import Data.Char                 (chr,ord)
import Numeric                   (showHex)
import Codec.Binary.UTF8.String  (encode,decode)
import Foreign.C.String          (CString, CWString,withCString, withCWString, peekCString, peekCWString)
import Foreign.Marshal.Alloc     (free)
import System.IO.Unsafe          (unsafePerformIO)

sjisHanKana = map chr [0xa1..0xdf]
utf8Kanji   = "日本語"

sjisPutStrLn = putStrLn.utf16ToShiftJis

main = do
  sjisPutStrLn utf8Kanji                       -- => 日本語
  print $ map (flip showHex "".ord) utf8Kanji
  -- テキストは utf8 で記述しますが、コンパイルされると UCS4になります。
  -- => ["65e5","672c","8a9e"]
  print $ map (flip showHex "") $ encode utf8Kanji
  -- => utf8 はこれ。
  -- ["e6","97","a5","e6","9c","ac","e8","aa","9e"]

  putStrLn sjisHanKana
  -- => 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚
  (putStrLn.utf16ToShiftJis.shiftJisToUtf16) sjisHanKana
  -- => 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚
  print $ map (flip showHex "".ord) $ shiftJisToUtf16 sjisHanKana
  -- => 
  -- ["ff61","ff62","ff63","ff64","ff65","ff66","ff67","ff68","ff69","ff6a",
  --  "ff6b","ff6c","ff6d","ff6e","ff6f","ff70","ff71","ff72","ff73","ff74",
  --  "ff75","ff76","ff77","ff78","ff79","ff7a","ff7b","ff7c","ff7d","ff7e",
  --  "ff7f","ff80","ff81","ff82","ff83","ff84","ff85","ff86","ff87","ff88",
  --  "ff89","ff8a","ff8b","ff8c","ff8d","ff8e","ff8f","ff90","ff91","ff92",
  --  "ff93","ff94","ff95","ff96","ff97","ff98","ff99","ff9a","ff9b","ff9c",
  --  "ff9d","ff9e","ff9f"]

shiftJisToUtf16 :: String -> String
shiftJisToUtf16 = unsafePerformIO.shiftJisToUtf16'

utf16ToShiftJis :: String -> String
utf16ToShiftJis = unsafePerformIO.utf16ToShiftJis'

shiftJisToUtf16' :: String -> IO String
shiftJisToUtf16' string = withCString string c_CStringToBSTR >>= peekCWString

utf16ToShiftJis' :: String -> IO String
utf16ToShiftJis' string = withCWString string c_BSTRtoCString >>= peekCString

-- C の関数を呼ぶための定義
foreign import ccall unsafe "CStringToBSTR"  c_CStringToBSTR   :: CString -> IO CWString
foreign import ccall unsafe "BSTRtoCString"  c_BSTRtoCString   :: CWString -> IO CString

以前やった「COM を学ぶ(7) : HaskellでExcel」で BSTRに変換してからCOMに渡していました。

  • bstr.c
#include <stdio.h>
#include <malloc.h>
#include <windows.h>

BSTR CStringToBSTR(char* cstring ){
    int    cstringlen, out_size;
    BSTR   wstr;

    cstringlen = strlen(cstring);
    out_size   = MultiByteToWideChar(CP_ACP, 0, cstring, cstringlen, NULL, 0);
    wstr       = SysAllocStringLen(NULL, out_size);
    MultiByteToWideChar(CP_ACP, 0, cstring, cstringlen, wstr, out_size);
    return wstr;
}

char* BSTRtoCString(BSTR bstr){
    int    out_size;
    char   *cstring;
    out_size = WideCharToMultiByte(CP_ACP, 0, (OLECHAR*)bstr, -1, NULL, 0, NULL, NULL);
    cstring  = (char*)malloc((out_size+1) * sizeof(char));
    WideCharToMultiByte(CP_ACP, 0, (OLECHAR*)bstr, -1, cstring, out_size, NULL, NULL);
    return cstring;
}