TOML

Character Encoding - TOML

Character encoding is a rule system for converting characters and symbols into byte sequences that computers can process. Various schemes exist, from international standards like ASCII and UTF-8, to Japanese-specific encodings like Shift_JIS and EUC-JP, to country-specific code pages. While UTF-8 based on Unicode is now widely adopted as the global standard, understanding various encoding schemes remains important for maintaining compatibility with legacy systems.

character code Unicode UTF-8 character set internationalization text processing
[[items]]
code = "utf-8"
slug = "utf-8"
name = "UTF-8"
description = "A variable-length character encoding that represents Unicode using 1 to 4 bytes."
category = "Unicode"
ianaName = "UTF-8"
mibEnum = 106

[[items]]
code = "utf-16"
slug = "utf-16"
name = "UTF-16"
description = "A character encoding that represents Unicode in 16-bit units."
category = "Unicode"
ianaName = "UTF-16"
mibEnum = 1015

[[items]]
code = "utf-32"
slug = "utf-32"
name = "UTF-32"
description = "A character encoding that represents Unicode in fixed-length 32 bits (4 bytes)."
category = "Unicode"
ianaName = "UTF-32"
mibEnum = 1017

[[items]]
code = "us-ascii"
slug = "us-ascii"
name = "US-ASCII"
description = "A basic character encoding that defines 128 characters in 7 bits."
category = "ASCII"
ianaName = "US-ASCII"
mibEnum = 3

[[items]]
code = "iso-8859-1"
slug = "iso-8859-1"
name = "ISO-8859-1 (Latin-1)"
description = "An 8-bit character encoding for Western European languages."
category = "ISO-8859"
ianaName = "ISO-8859-1"
mibEnum = 4

[[items]]
code = "iso-8859-2"
slug = "iso-8859-2"
name = "ISO-8859-2 (Latin-2)"
description = "An 8-bit character encoding for Central European languages."
category = "ISO-8859"
ianaName = "ISO-8859-2"
mibEnum = 5

[[items]]
code = "iso-8859-5"
slug = "iso-8859-5"
name = "ISO-8859-5 (Cyrillic)"
description = "An 8-bit character encoding for Cyrillic script."
category = "ISO-8859"
ianaName = "ISO-8859-5"
mibEnum = 8

[[items]]
code = "iso-8859-7"
slug = "iso-8859-7"
name = "ISO-8859-7 (Greek)"
description = "An 8-bit character encoding for Modern Greek."
category = "ISO-8859"
ianaName = "ISO-8859-7"
mibEnum = 10

[[items]]
code = "iso-8859-15"
slug = "iso-8859-15"
name = "ISO-8859-15 (Latin-9)"
description = "A revised version of ISO-8859-1 that includes the Euro sign."
category = "ISO-8859"
ianaName = "ISO-8859-15"
mibEnum = 111

[[items]]
code = "shift_jis"
slug = "shift-jis"
name = "Shift_JIS"
description = "A Japanese character encoding standardly used on Windows and Macintosh."
category = "Japanese"
ianaName = "Shift_JIS"
mibEnum = 17

[[items]]
code = "euc-jp"
slug = "euc-jp"
name = "EUC-JP"
description = "A Japanese character encoding used on Unix-like systems."
category = "Japanese"
ianaName = "EUC-JP"
mibEnum = 18

[[items]]
code = "iso-2022-jp"
slug = "iso-2022-jp"
name = "ISO-2022-JP"
description = "An encoding for Japanese email in 7-bit environments."
category = "Japanese"
ianaName = "ISO-2022-JP"
mibEnum = 39

[[items]]
code = "gb2312"
slug = "gb2312"
name = "GB2312"
description = "A basic character encoding for Simplified Chinese."
category = "Chinese"
ianaName = "GB2312"
mibEnum = 2025

[[items]]
code = "gbk"
slug = "gbk"
name = "GBK"
description = "A Chinese character encoding that extends GB2312."
category = "Chinese"
ianaName = "GBK"
mibEnum = 113

[[items]]
code = "gb18030"
slug = "gb18030"
name = "GB18030"
description = "China's current national standard, capable of representing all Unicode characters."
category = "Chinese"
ianaName = "GB18030"
mibEnum = 114

[[items]]
code = "big5"
slug = "big5"
name = "Big5"
description = "A Traditional Chinese character encoding used in Taiwan and Hong Kong."
category = "Chinese"
ianaName = "Big5"
mibEnum = 2026

[[items]]
code = "euc-kr"
slug = "euc-kr"
name = "EUC-KR"
description = "A Korean character encoding used on Unix-like systems."
category = "Korean"
ianaName = "EUC-KR"
mibEnum = 38

[[items]]
code = "iso-2022-kr"
slug = "iso-2022-kr"
name = "ISO-2022-KR"
description = "An encoding for Korean email in 7-bit environments."
category = "Korean"
ianaName = "ISO-2022-KR"
mibEnum = 37

[[items]]
code = "koi8-r"
slug = "koi8-r"
name = "KOI8-R"
description = "An 8-bit character encoding for Russian Cyrillic."
category = "Cyrillic"
ianaName = "KOI8-R"
mibEnum = 2084

[[items]]
code = "koi8-u"
slug = "koi8-u"
name = "KOI8-U"
description = "An 8-bit character encoding for Ukrainian Cyrillic."
category = "Cyrillic"
ianaName = "KOI8-U"
mibEnum = 2088

[[items]]
code = "windows-1252"
slug = "windows-1252"
name = "Windows-1252"
description = "An 8-bit encoding for Western European languages used on Microsoft Windows."
category = "Windows Code Page"
ianaName = "windows-1252"
mibEnum = 2252