XML

Character Encoding Standards - XML

Character encoding standards are specifications for representing characters as digital data in computers. Various schemes exist including ASCII, UTF-8, UTF-16, Shift_JIS, and EUC-JP, each with different character sets, byte structures, and compatibility characteristics. While UTF-8 has become the international standard widely adopted today, legacy encodings like Shift_JIS and EUC-JP are still used in Japanese computing environments.

character encoding Unicode UTF-8 ASCII Shift_JIS EUC-JP charset internationalization
<?xml version="1.0" encoding="UTF-8"?>
<items>
  <item>
    <code>ASCII</code>
    <slug>ascii</slug>
    <name>ASCII</name>
    <description>American Standard Code for Information Interchange. Represents alphanumeric characters and symbols in 7 bits.</description>
    <asciiCompatible>true</asciiCompatible>
    <byteStructure>固定長(1バイト、7ビット使用)</byteStructure>
    <japaneseSupport>false</japaneseSupport>
    <maxCharacters>128</maxCharacters>
    <usage>legacy</usage>
    <yearIntroduced>1963</yearIntroduced>
  </item>
  <item>
    <code>UTF-8</code>
    <slug>utf-8</slug>
    <name>UTF-8</name>
    <description>Variable-length Unicode encoding. ASCII-compatible and represents characters worldwide.</description>
    <asciiCompatible>true</asciiCompatible>
    <byteStructure>可変長(1〜4バイト)</byteStructure>
    <japaneseSupport>true</japaneseSupport>
    <maxCharacters>1114112</maxCharacters>
    <usage>standard</usage>
    <yearIntroduced>1993</yearIntroduced>
  </item>
  <item>
    <code>UTF-16</code>
    <slug>utf-16</slug>
    <name>UTF-16</name>
    <description>16-bit Unicode encoding. Widely used in Windows and Java.</description>
    <asciiCompatible>false</asciiCompatible>
    <byteStructure>可変長(2または4バイト)</byteStructure>
    <japaneseSupport>true</japaneseSupport>
    <maxCharacters>1114112</maxCharacters>
    <usage>system</usage>
    <yearIntroduced>1996</yearIntroduced>
  </item>
  <item>
    <code>UTF-32</code>
    <slug>utf-32</slug>
    <name>UTF-32</name>
    <description>Fixed-length 32-bit Unicode encoding. Used for internal processing.</description>
    <asciiCompatible>false</asciiCompatible>
    <byteStructure>固定長(4バイト)</byteStructure>
    <japaneseSupport>true</japaneseSupport>
    <maxCharacters>1114112</maxCharacters>
    <usage>internal</usage>
    <yearIntroduced>1996</yearIntroduced>
  </item>
  <item>
    <code>Shift_JIS</code>
    <slug>shift-jis</slug>
    <name>Shift_JIS</name>
    <description>Legacy Japanese encoding. Widely used in Windows systems.</description>
    <asciiCompatible>false</asciiCompatible>
    <byteStructure>可変長(1〜2バイト)</byteStructure>
    <japaneseSupport>true</japaneseSupport>
    <maxCharacters>10000</maxCharacters>
    <usage>legacy</usage>
    <yearIntroduced>1978</yearIntroduced>
  </item>
  <item>
    <code>EUC-JP</code>
    <slug>euc-jp</slug>
    <name>EUC-JP</name>
    <description>Japanese encoding used in Unix/Linux. ASCII-compatible.</description>
    <asciiCompatible>true</asciiCompatible>
    <byteStructure>可変長(1〜3バイト)</byteStructure>
    <japaneseSupport>true</japaneseSupport>
    <maxCharacters>11000</maxCharacters>
    <usage>legacy</usage>
    <yearIntroduced>1988</yearIntroduced>
  </item>
  <item>
    <code>ISO-2022-JP</code>
    <slug>iso-2022-jp</slug>
    <name>ISO-2022-JP</name>
    <description>7-bit escape sequence Japanese encoding. Used in email.</description>
    <asciiCompatible>true</asciiCompatible>
    <byteStructure>7ビット可変長(エスケープシーケンス使用)</byteStructure>
    <japaneseSupport>true</japaneseSupport>
    <maxCharacters>10000</maxCharacters>
    <usage>legacy</usage>
    <yearIntroduced>1983</yearIntroduced>
  </item>
  <item>
    <code>GB2312</code>
    <slug>gb2312</slug>
    <name>GB2312</name>
    <description>National standard encoding for Simplified Chinese.</description>
    <asciiCompatible>false</asciiCompatible>
    <byteStructure>可変長(1〜2バイト)</byteStructure>
    <japaneseSupport>false</japaneseSupport>
    <maxCharacters>7445</maxCharacters>
    <usage>legacy</usage>
    <yearIntroduced>1980</yearIntroduced>
  </item>
  <item>
    <code>Big5</code>
    <slug>big5</slug>
    <name>Big5</name>
    <description>Traditional Chinese encoding used in Taiwan and Hong Kong.</description>
    <asciiCompatible>false</asciiCompatible>
    <byteStructure>可変長(1〜2バイト)</byteStructure>
    <japaneseSupport>false</japaneseSupport>
    <maxCharacters>13000</maxCharacters>
    <usage>legacy</usage>
    <yearIntroduced>1984</yearIntroduced>
  </item>
  <item>
    <code>Windows-1252</code>
    <slug>windows-1252</slug>
    <name>Windows-1252</name>
    <description>Western European encoding used in Windows.</description>
    <asciiCompatible>true</asciiCompatible>
    <byteStructure>固定長(1バイト)</byteStructure>
    <japaneseSupport>false</japaneseSupport>
    <maxCharacters>256</maxCharacters>
    <usage>legacy</usage>
    <yearIntroduced>1992</yearIntroduced>
  </item>
</items>