JSON

Character Encoding - JSON

Character encoding is a rule system for converting characters and symbols into byte sequences that computers can process. Various schemes exist, from international standards like ASCII and UTF-8, to Japanese-specific encodings like Shift_JIS and EUC-JP, to country-specific code pages. While UTF-8 based on Unicode is now widely adopted as the global standard, understanding various encoding schemes remains important for maintaining compatibility with legacy systems.

character code Unicode UTF-8 character set internationalization text processing
[
  {
    "code": "utf-8",
    "slug": "utf-8",
    "name": "UTF-8",
    "description": "A variable-length character encoding that represents Unicode using 1 to 4 bytes.",
    "category": "Unicode",
    "ianaName": "UTF-8",
    "mibEnum": 106
  },
  {
    "code": "utf-16",
    "slug": "utf-16",
    "name": "UTF-16",
    "description": "A character encoding that represents Unicode in 16-bit units.",
    "category": "Unicode",
    "ianaName": "UTF-16",
    "mibEnum": 1015
  },
  {
    "code": "utf-32",
    "slug": "utf-32",
    "name": "UTF-32",
    "description": "A character encoding that represents Unicode in fixed-length 32 bits (4 bytes).",
    "category": "Unicode",
    "ianaName": "UTF-32",
    "mibEnum": 1017
  },
  {
    "code": "us-ascii",
    "slug": "us-ascii",
    "name": "US-ASCII",
    "description": "A basic character encoding that defines 128 characters in 7 bits.",
    "category": "ASCII",
    "ianaName": "US-ASCII",
    "mibEnum": 3
  },
  {
    "code": "iso-8859-1",
    "slug": "iso-8859-1",
    "name": "ISO-8859-1 (Latin-1)",
    "description": "An 8-bit character encoding for Western European languages.",
    "category": "ISO-8859",
    "ianaName": "ISO-8859-1",
    "mibEnum": 4
  },
  {
    "code": "iso-8859-2",
    "slug": "iso-8859-2",
    "name": "ISO-8859-2 (Latin-2)",
    "description": "An 8-bit character encoding for Central European languages.",
    "category": "ISO-8859",
    "ianaName": "ISO-8859-2",
    "mibEnum": 5
  },
  {
    "code": "iso-8859-5",
    "slug": "iso-8859-5",
    "name": "ISO-8859-5 (Cyrillic)",
    "description": "An 8-bit character encoding for Cyrillic script.",
    "category": "ISO-8859",
    "ianaName": "ISO-8859-5",
    "mibEnum": 8
  },
  {
    "code": "iso-8859-7",
    "slug": "iso-8859-7",
    "name": "ISO-8859-7 (Greek)",
    "description": "An 8-bit character encoding for Modern Greek.",
    "category": "ISO-8859",
    "ianaName": "ISO-8859-7",
    "mibEnum": 10
  },
  {
    "code": "iso-8859-15",
    "slug": "iso-8859-15",
    "name": "ISO-8859-15 (Latin-9)",
    "description": "A revised version of ISO-8859-1 that includes the Euro sign.",
    "category": "ISO-8859",
    "ianaName": "ISO-8859-15",
    "mibEnum": 111
  },
  {
    "code": "shift_jis",
    "slug": "shift-jis",
    "name": "Shift_JIS",
    "description": "A Japanese character encoding standardly used on Windows and Macintosh.",
    "category": "Japanese",
    "ianaName": "Shift_JIS",
    "mibEnum": 17
  },
  {
    "code": "euc-jp",
    "slug": "euc-jp",
    "name": "EUC-JP",
    "description": "A Japanese character encoding used on Unix-like systems.",
    "category": "Japanese",
    "ianaName": "EUC-JP",
    "mibEnum": 18
  },
  {
    "code": "iso-2022-jp",
    "slug": "iso-2022-jp",
    "name": "ISO-2022-JP",
    "description": "An encoding for Japanese email in 7-bit environments.",
    "category": "Japanese",
    "ianaName": "ISO-2022-JP",
    "mibEnum": 39
  },
  {
    "code": "gb2312",
    "slug": "gb2312",
    "name": "GB2312",
    "description": "A basic character encoding for Simplified Chinese.",
    "category": "Chinese",
    "ianaName": "GB2312",
    "mibEnum": 2025
  },
  {
    "code": "gbk",
    "slug": "gbk",
    "name": "GBK",
    "description": "A Chinese character encoding that extends GB2312.",
    "category": "Chinese",
    "ianaName": "GBK",
    "mibEnum": 113
  },
  {
    "code": "gb18030",
    "slug": "gb18030",
    "name": "GB18030",
    "description": "China's current national standard, capable of representing all Unicode characters.",
    "category": "Chinese",
    "ianaName": "GB18030",
    "mibEnum": 114
  },
  {
    "code": "big5",
    "slug": "big5",
    "name": "Big5",
    "description": "A Traditional Chinese character encoding used in Taiwan and Hong Kong.",
    "category": "Chinese",
    "ianaName": "Big5",
    "mibEnum": 2026
  },
  {
    "code": "euc-kr",
    "slug": "euc-kr",
    "name": "EUC-KR",
    "description": "A Korean character encoding used on Unix-like systems.",
    "category": "Korean",
    "ianaName": "EUC-KR",
    "mibEnum": 38
  },
  {
    "code": "iso-2022-kr",
    "slug": "iso-2022-kr",
    "name": "ISO-2022-KR",
    "description": "An encoding for Korean email in 7-bit environments.",
    "category": "Korean",
    "ianaName": "ISO-2022-KR",
    "mibEnum": 37
  },
  {
    "code": "koi8-r",
    "slug": "koi8-r",
    "name": "KOI8-R",
    "description": "An 8-bit character encoding for Russian Cyrillic.",
    "category": "Cyrillic",
    "ianaName": "KOI8-R",
    "mibEnum": 2084
  },
  {
    "code": "koi8-u",
    "slug": "koi8-u",
    "name": "KOI8-U",
    "description": "An 8-bit character encoding for Ukrainian Cyrillic.",
    "category": "Cyrillic",
    "ianaName": "KOI8-U",
    "mibEnum": 2088
  },
  {
    "code": "windows-1252",
    "slug": "windows-1252",
    "name": "Windows-1252",
    "description": "An 8-bit encoding for Western European languages used on Microsoft Windows.",
    "category": "Windows Code Page",
    "ianaName": "windows-1252",
    "mibEnum": 2252
  }
]