YAML

Character Encoding - YAML

Character encoding is a rule system for converting characters and symbols into byte sequences that computers can process. Various schemes exist, from international standards like ASCII and UTF-8, to Japanese-specific encodings like Shift_JIS and EUC-JP, to country-specific code pages. While UTF-8 based on Unicode is now widely adopted as the global standard, understanding various encoding schemes remains important for maintaining compatibility with legacy systems.

character code Unicode UTF-8 character set internationalization text processing
- code: "utf-8"
  slug: "utf-8"
  name: "UTF-8"
  description: "A variable-length character encoding that represents Unicode using 1 to 4 bytes."
  category: "Unicode"
  ianaName: "UTF-8"
  mibEnum: 106
- code: "utf-16"
  slug: "utf-16"
  name: "UTF-16"
  description: "A character encoding that represents Unicode in 16-bit units."
  category: "Unicode"
  ianaName: "UTF-16"
  mibEnum: 1015
- code: "utf-32"
  slug: "utf-32"
  name: "UTF-32"
  description: "A character encoding that represents Unicode in fixed-length 32 bits (4 bytes)."
  category: "Unicode"
  ianaName: "UTF-32"
  mibEnum: 1017
- code: "us-ascii"
  slug: "us-ascii"
  name: "US-ASCII"
  description: "A basic character encoding that defines 128 characters in 7 bits."
  category: "ASCII"
  ianaName: "US-ASCII"
  mibEnum: 3
- code: "iso-8859-1"
  slug: "iso-8859-1"
  name: "ISO-8859-1 (Latin-1)"
  description: "An 8-bit character encoding for Western European languages."
  category: "ISO-8859"
  ianaName: "ISO-8859-1"
  mibEnum: 4
- code: "iso-8859-2"
  slug: "iso-8859-2"
  name: "ISO-8859-2 (Latin-2)"
  description: "An 8-bit character encoding for Central European languages."
  category: "ISO-8859"
  ianaName: "ISO-8859-2"
  mibEnum: 5
- code: "iso-8859-5"
  slug: "iso-8859-5"
  name: "ISO-8859-5 (Cyrillic)"
  description: "An 8-bit character encoding for Cyrillic script."
  category: "ISO-8859"
  ianaName: "ISO-8859-5"
  mibEnum: 8
- code: "iso-8859-7"
  slug: "iso-8859-7"
  name: "ISO-8859-7 (Greek)"
  description: "An 8-bit character encoding for Modern Greek."
  category: "ISO-8859"
  ianaName: "ISO-8859-7"
  mibEnum: 10
- code: "iso-8859-15"
  slug: "iso-8859-15"
  name: "ISO-8859-15 (Latin-9)"
  description: "A revised version of ISO-8859-1 that includes the Euro sign."
  category: "ISO-8859"
  ianaName: "ISO-8859-15"
  mibEnum: 111
- code: "shift_jis"
  slug: "shift-jis"
  name: "Shift_JIS"
  description: "A Japanese character encoding standardly used on Windows and Macintosh."
  category: "Japanese"
  ianaName: "Shift_JIS"
  mibEnum: 17
- code: "euc-jp"
  slug: "euc-jp"
  name: "EUC-JP"
  description: "A Japanese character encoding used on Unix-like systems."
  category: "Japanese"
  ianaName: "EUC-JP"
  mibEnum: 18
- code: "iso-2022-jp"
  slug: "iso-2022-jp"
  name: "ISO-2022-JP"
  description: "An encoding for Japanese email in 7-bit environments."
  category: "Japanese"
  ianaName: "ISO-2022-JP"
  mibEnum: 39
- code: "gb2312"
  slug: "gb2312"
  name: "GB2312"
  description: "A basic character encoding for Simplified Chinese."
  category: "Chinese"
  ianaName: "GB2312"
  mibEnum: 2025
- code: "gbk"
  slug: "gbk"
  name: "GBK"
  description: "A Chinese character encoding that extends GB2312."
  category: "Chinese"
  ianaName: "GBK"
  mibEnum: 113
- code: "gb18030"
  slug: "gb18030"
  name: "GB18030"
  description: "China's current national standard, capable of representing all Unicode characters."
  category: "Chinese"
  ianaName: "GB18030"
  mibEnum: 114
- code: "big5"
  slug: "big5"
  name: "Big5"
  description: "A Traditional Chinese character encoding used in Taiwan and Hong Kong."
  category: "Chinese"
  ianaName: "Big5"
  mibEnum: 2026
- code: "euc-kr"
  slug: "euc-kr"
  name: "EUC-KR"
  description: "A Korean character encoding used on Unix-like systems."
  category: "Korean"
  ianaName: "EUC-KR"
  mibEnum: 38
- code: "iso-2022-kr"
  slug: "iso-2022-kr"
  name: "ISO-2022-KR"
  description: "An encoding for Korean email in 7-bit environments."
  category: "Korean"
  ianaName: "ISO-2022-KR"
  mibEnum: 37
- code: "koi8-r"
  slug: "koi8-r"
  name: "KOI8-R"
  description: "An 8-bit character encoding for Russian Cyrillic."
  category: "Cyrillic"
  ianaName: "KOI8-R"
  mibEnum: 2084
- code: "koi8-u"
  slug: "koi8-u"
  name: "KOI8-U"
  description: "An 8-bit character encoding for Ukrainian Cyrillic."
  category: "Cyrillic"
  ianaName: "KOI8-U"
  mibEnum: 2088
- code: "windows-1252"
  slug: "windows-1252"
  name: "Windows-1252"
  description: "An 8-bit encoding for Western European languages used on Microsoft Windows."
  category: "Windows Code Page"
  ianaName: "windows-1252"
  mibEnum: 2252