XML
Character Encoding Standards - XML
Character encoding standards are specifications for representing characters as digital data in computers. Various schemes exist including ASCII, UTF-8, UTF-16, Shift_JIS, and EUC-JP, each with different character sets, byte structures, and compatibility characteristics. While UTF-8 has become the international standard widely adopted today, legacy encodings like Shift_JIS and EUC-JP are still used in Japanese computing environments.
character encoding
Unicode
UTF-8
ASCII
Shift_JIS
EUC-JP
charset
internationalization
<?xml version="1.0" encoding="UTF-8"?>
<items>
<item>
<code>ASCII</code>
<slug>ascii</slug>
<name>ASCII</name>
<description>American Standard Code for Information Interchange. Represents alphanumeric characters and symbols in 7 bits.</description>
<asciiCompatible>true</asciiCompatible>
<byteStructure>固定長(1バイト、7ビット使用)</byteStructure>
<japaneseSupport>false</japaneseSupport>
<maxCharacters>128</maxCharacters>
<usage>legacy</usage>
<yearIntroduced>1963</yearIntroduced>
</item>
<item>
<code>UTF-8</code>
<slug>utf-8</slug>
<name>UTF-8</name>
<description>Variable-length Unicode encoding. ASCII-compatible and represents characters worldwide.</description>
<asciiCompatible>true</asciiCompatible>
<byteStructure>可変長(1〜4バイト)</byteStructure>
<japaneseSupport>true</japaneseSupport>
<maxCharacters>1114112</maxCharacters>
<usage>standard</usage>
<yearIntroduced>1993</yearIntroduced>
</item>
<item>
<code>UTF-16</code>
<slug>utf-16</slug>
<name>UTF-16</name>
<description>16-bit Unicode encoding. Widely used in Windows and Java.</description>
<asciiCompatible>false</asciiCompatible>
<byteStructure>可変長(2または4バイト)</byteStructure>
<japaneseSupport>true</japaneseSupport>
<maxCharacters>1114112</maxCharacters>
<usage>system</usage>
<yearIntroduced>1996</yearIntroduced>
</item>
<item>
<code>UTF-32</code>
<slug>utf-32</slug>
<name>UTF-32</name>
<description>Fixed-length 32-bit Unicode encoding. Used for internal processing.</description>
<asciiCompatible>false</asciiCompatible>
<byteStructure>固定長(4バイト)</byteStructure>
<japaneseSupport>true</japaneseSupport>
<maxCharacters>1114112</maxCharacters>
<usage>internal</usage>
<yearIntroduced>1996</yearIntroduced>
</item>
<item>
<code>Shift_JIS</code>
<slug>shift-jis</slug>
<name>Shift_JIS</name>
<description>Legacy Japanese encoding. Widely used in Windows systems.</description>
<asciiCompatible>false</asciiCompatible>
<byteStructure>可変長(1〜2バイト)</byteStructure>
<japaneseSupport>true</japaneseSupport>
<maxCharacters>10000</maxCharacters>
<usage>legacy</usage>
<yearIntroduced>1978</yearIntroduced>
</item>
<item>
<code>EUC-JP</code>
<slug>euc-jp</slug>
<name>EUC-JP</name>
<description>Japanese encoding used in Unix/Linux. ASCII-compatible.</description>
<asciiCompatible>true</asciiCompatible>
<byteStructure>可変長(1〜3バイト)</byteStructure>
<japaneseSupport>true</japaneseSupport>
<maxCharacters>11000</maxCharacters>
<usage>legacy</usage>
<yearIntroduced>1988</yearIntroduced>
</item>
<item>
<code>ISO-2022-JP</code>
<slug>iso-2022-jp</slug>
<name>ISO-2022-JP</name>
<description>7-bit escape sequence Japanese encoding. Used in email.</description>
<asciiCompatible>true</asciiCompatible>
<byteStructure>7ビット可変長(エスケープシーケンス使用)</byteStructure>
<japaneseSupport>true</japaneseSupport>
<maxCharacters>10000</maxCharacters>
<usage>legacy</usage>
<yearIntroduced>1983</yearIntroduced>
</item>
<item>
<code>GB2312</code>
<slug>gb2312</slug>
<name>GB2312</name>
<description>National standard encoding for Simplified Chinese.</description>
<asciiCompatible>false</asciiCompatible>
<byteStructure>可変長(1〜2バイト)</byteStructure>
<japaneseSupport>false</japaneseSupport>
<maxCharacters>7445</maxCharacters>
<usage>legacy</usage>
<yearIntroduced>1980</yearIntroduced>
</item>
<item>
<code>Big5</code>
<slug>big5</slug>
<name>Big5</name>
<description>Traditional Chinese encoding used in Taiwan and Hong Kong.</description>
<asciiCompatible>false</asciiCompatible>
<byteStructure>可変長(1〜2バイト)</byteStructure>
<japaneseSupport>false</japaneseSupport>
<maxCharacters>13000</maxCharacters>
<usage>legacy</usage>
<yearIntroduced>1984</yearIntroduced>
</item>
<item>
<code>Windows-1252</code>
<slug>windows-1252</slug>
<name>Windows-1252</name>
<description>Western European encoding used in Windows.</description>
<asciiCompatible>true</asciiCompatible>
<byteStructure>固定長(1バイト)</byteStructure>
<japaneseSupport>false</japaneseSupport>
<maxCharacters>256</maxCharacters>
<usage>legacy</usage>
<yearIntroduced>1992</yearIntroduced>
</item>
</items>