Cinnamon で半角カナを全角カナに変換したくない場合は直接 nkf を呼び出す。

Windows環境でHaskell を使っています。文字コード変換にはCinnamonを重宝に使っています。
Cinnamonは内部で nkf を使用しています。半角カナを変換しないでそのままにするには nkf のオプションに x を含めなければならないのですが、Cinnamonでは指定されておらず、文字コード変換の際に半角カナは全角カナに変換されるようになっています。(nkf - ネットワーク用漢字コード変換フィルタ v2.0.7

  • CinnamonのtoJis、toEuc、toSjis、toUtf8、toUtf16 の定義
toJis :: String -> String
toJis = nkf "-m0 -j"
 
toEuc :: String -> String
toEuc = nkf "-m0 -e"
 
toSjis :: String -> String
toSjis = nkf "-m0 -s"
 
toUtf8 :: String -> String
toUtf8 = nkf "-m0 -w"
 
toUtf16 :: String -> String
toUtf16 = nkf "-m0 -w16"
  • 半角カナを全角カナに変換したくない場合は直接 nkf を呼び出します。
import Data.Char                 (chr,ord)
import Numeric                   (showHex)
import Cinnamon.Ucs              (sjisToUcs4,ucs4ToSjis)
import Cinnamon.Nkf              (nkf)
import Codec.Binary.UTF8.String  (encode,decode)

sjisHanKana = map chr [0xa1..0xdf]

sjisToUtf8, utf8ToSjis :: String -> String
sjisToUtf8 = nkf "-m0xSw80"
utf8ToSjis = nkf "-m0xW8s"

sjisToUcs4' = decode.map (fromIntegral.ord).sjisToUtf8
ucs4ToSjis' = utf8ToSjis.map (chr.fromIntegral).encode

main = do
  putStrLn sjisHanKana
  -- => 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚

  -- SJIS 半角カナ文字列をUcs4に変換すると全角カナになってしまう。
  (putStrLn.ucs4ToSjis.sjisToUcs4) sjisHanKana
  -- => 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネ
  --    ノハヒフヘホマミムメモヤユヨラリルレロワン゛゜

  print$map (flip showHex "".ord) $ sjisToUcs4 sjisHanKana
  {-
  ["3002","300c","300d","3001","30fb","30f2","30a1","30a3","30a5","30a7","30a9",
   "30e3","30e5","30e7","30c3","30fc","30a2","30a4","30a6","30a8","30aa","30ab",
   "30ad","30af","30b1","30b3","30b5","30b7","30b9","30bb","30bd","30bf","30c1",
   "30c4","30c6","30c8","30ca","30cb","30cc","30cd","30ce","30cf","30d2","30d5",
   "30d8","30db","30de","30df","30e0","30e1","30e2","30e4","30e6","30e8","30e9",
   "30ea","30eb","30ec","30ed","30ef","30f3","309b","309c"]-}

  -- 自前で定義したsjisToUcs4'、ucs4ToSjis'を使えば半角カナのままです。
  let ucs4HanKana = sjisToUcs4' sjisHanKana
  print $ map (flip showHex "".ord) ucs4HanKana
  {-
  ["ff61","ff62","ff63","ff64","ff65","ff66","ff67","ff68","ff69","ff6a","ff6b",
   "ff6c","ff6d","ff6e","ff6f","ff70","ff71","ff72","ff73","ff74","ff75","ff76",
   "ff77","ff78","ff79","ff7a","ff7b","ff7c","ff7d","ff7e","ff7f","ff80","ff81",
   "ff82","ff83","ff84","ff85","ff86","ff87","ff88","ff89","ff8a","ff8b","ff8c",
   "ff8d","ff8e","ff8f","ff90","ff91","ff92","ff93","ff94","ff95","ff96","ff97",
   "ff98","ff99","ff9a","ff9b","ff9c","ff9d","ff9e","ff9f"] -}

  (putStrLn.ucs4ToSjis'.sjisToUcs4') sjisHanKana
  -- => 。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚
  • 全角英数字を半角に変換する。
import Data.Char                 (chr,ord)
import Numeric                   (showHex)
import Cinnamon.Nkf              (nkf)
import Codec.Binary.UTF8.String  (decodeString, encodeString)

zenString = map chr [0xff10..0xff5a]

zenToHan :: String -> String
zenToHan = decodeString.nkf "-m0W8Z0".encodeString

hanString = zenToHan zenString

main = do
    putStrLn zenString
    -- => 0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ
    --   [\]^_`abcdefghijklmnopqrstuvwxyz

    putStrLn hanString
    -- => 0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz
    print $ map (flip showHex "".ord) hanString
    -- => ["30","31","32","33","34","35","36","37","38","39","3a","3b","3c","3d","3e","3f",
    -- "40","41","42","43","44","45","46","47","48","49","4a","4b","4c","4d","4e","4f",
    -- "50","51","52","53","54","55","56","57","58","59","5a","5b","5c","5d","5e","5f",
    -- "60","61","62","63","64","65","66","67","68","69","6a","6b","6c","6d","6e","6f",
    -- "70","71","72","73","74","75","76","77","78","79","7a"]
  • SJIS 全角英数字・スペースを半角に変換する。
zenToHan :: String -> String
zenToHan = nkf "-m0SZ1"