JSON
Character Encoding - JSON
Character encoding is a rule system for converting characters and symbols into byte sequences that computers can process. Various schemes exist, from international standards like ASCII and UTF-8, to Japanese-specific encodings like Shift_JIS and EUC-JP, to country-specific code pages. While UTF-8 based on Unicode is now widely adopted as the global standard, understanding various encoding schemes remains important for maintaining compatibility with legacy systems.
character code
Unicode
UTF-8
character set
internationalization
text processing
[
{
"code": "utf-8",
"slug": "utf-8",
"name": "UTF-8",
"description": "A variable-length character encoding that represents Unicode using 1 to 4 bytes.",
"category": "Unicode",
"ianaName": "UTF-8",
"mibEnum": 106
},
{
"code": "utf-16",
"slug": "utf-16",
"name": "UTF-16",
"description": "A character encoding that represents Unicode in 16-bit units.",
"category": "Unicode",
"ianaName": "UTF-16",
"mibEnum": 1015
},
{
"code": "utf-32",
"slug": "utf-32",
"name": "UTF-32",
"description": "A character encoding that represents Unicode in fixed-length 32 bits (4 bytes).",
"category": "Unicode",
"ianaName": "UTF-32",
"mibEnum": 1017
},
{
"code": "us-ascii",
"slug": "us-ascii",
"name": "US-ASCII",
"description": "A basic character encoding that defines 128 characters in 7 bits.",
"category": "ASCII",
"ianaName": "US-ASCII",
"mibEnum": 3
},
{
"code": "iso-8859-1",
"slug": "iso-8859-1",
"name": "ISO-8859-1 (Latin-1)",
"description": "An 8-bit character encoding for Western European languages.",
"category": "ISO-8859",
"ianaName": "ISO-8859-1",
"mibEnum": 4
},
{
"code": "iso-8859-2",
"slug": "iso-8859-2",
"name": "ISO-8859-2 (Latin-2)",
"description": "An 8-bit character encoding for Central European languages.",
"category": "ISO-8859",
"ianaName": "ISO-8859-2",
"mibEnum": 5
},
{
"code": "iso-8859-5",
"slug": "iso-8859-5",
"name": "ISO-8859-5 (Cyrillic)",
"description": "An 8-bit character encoding for Cyrillic script.",
"category": "ISO-8859",
"ianaName": "ISO-8859-5",
"mibEnum": 8
},
{
"code": "iso-8859-7",
"slug": "iso-8859-7",
"name": "ISO-8859-7 (Greek)",
"description": "An 8-bit character encoding for Modern Greek.",
"category": "ISO-8859",
"ianaName": "ISO-8859-7",
"mibEnum": 10
},
{
"code": "iso-8859-15",
"slug": "iso-8859-15",
"name": "ISO-8859-15 (Latin-9)",
"description": "A revised version of ISO-8859-1 that includes the Euro sign.",
"category": "ISO-8859",
"ianaName": "ISO-8859-15",
"mibEnum": 111
},
{
"code": "shift_jis",
"slug": "shift-jis",
"name": "Shift_JIS",
"description": "A Japanese character encoding standardly used on Windows and Macintosh.",
"category": "Japanese",
"ianaName": "Shift_JIS",
"mibEnum": 17
},
{
"code": "euc-jp",
"slug": "euc-jp",
"name": "EUC-JP",
"description": "A Japanese character encoding used on Unix-like systems.",
"category": "Japanese",
"ianaName": "EUC-JP",
"mibEnum": 18
},
{
"code": "iso-2022-jp",
"slug": "iso-2022-jp",
"name": "ISO-2022-JP",
"description": "An encoding for Japanese email in 7-bit environments.",
"category": "Japanese",
"ianaName": "ISO-2022-JP",
"mibEnum": 39
},
{
"code": "gb2312",
"slug": "gb2312",
"name": "GB2312",
"description": "A basic character encoding for Simplified Chinese.",
"category": "Chinese",
"ianaName": "GB2312",
"mibEnum": 2025
},
{
"code": "gbk",
"slug": "gbk",
"name": "GBK",
"description": "A Chinese character encoding that extends GB2312.",
"category": "Chinese",
"ianaName": "GBK",
"mibEnum": 113
},
{
"code": "gb18030",
"slug": "gb18030",
"name": "GB18030",
"description": "China's current national standard, capable of representing all Unicode characters.",
"category": "Chinese",
"ianaName": "GB18030",
"mibEnum": 114
},
{
"code": "big5",
"slug": "big5",
"name": "Big5",
"description": "A Traditional Chinese character encoding used in Taiwan and Hong Kong.",
"category": "Chinese",
"ianaName": "Big5",
"mibEnum": 2026
},
{
"code": "euc-kr",
"slug": "euc-kr",
"name": "EUC-KR",
"description": "A Korean character encoding used on Unix-like systems.",
"category": "Korean",
"ianaName": "EUC-KR",
"mibEnum": 38
},
{
"code": "iso-2022-kr",
"slug": "iso-2022-kr",
"name": "ISO-2022-KR",
"description": "An encoding for Korean email in 7-bit environments.",
"category": "Korean",
"ianaName": "ISO-2022-KR",
"mibEnum": 37
},
{
"code": "koi8-r",
"slug": "koi8-r",
"name": "KOI8-R",
"description": "An 8-bit character encoding for Russian Cyrillic.",
"category": "Cyrillic",
"ianaName": "KOI8-R",
"mibEnum": 2084
},
{
"code": "koi8-u",
"slug": "koi8-u",
"name": "KOI8-U",
"description": "An 8-bit character encoding for Ukrainian Cyrillic.",
"category": "Cyrillic",
"ianaName": "KOI8-U",
"mibEnum": 2088
},
{
"code": "windows-1252",
"slug": "windows-1252",
"name": "Windows-1252",
"description": "An 8-bit encoding for Western European languages used on Microsoft Windows.",
"category": "Windows Code Page",
"ianaName": "windows-1252",
"mibEnum": 2252
}
]