TOML

Character Encoding Standards - TOML

Character encoding standards are specifications for representing characters as digital data in computers. Various schemes exist including ASCII, UTF-8, UTF-16, Shift_JIS, and EUC-JP, each with different character sets, byte structures, and compatibility characteristics. While UTF-8 has become the international standard widely adopted today, legacy encodings like Shift_JIS and EUC-JP are still used in Japanese computing environments.

character encoding Unicode UTF-8 ASCII Shift_JIS EUC-JP charset internationalization
[[items]]
code = "ASCII"
slug = "ascii"
name = "ASCII"
description = "American Standard Code for Information Interchange. Represents alphanumeric characters and symbols in 7 bits."
asciiCompatible = true
byteStructure = "固定長(1バイト、7ビット使用)"
japaneseSupport = false
maxCharacters = 128
usage = "legacy"
yearIntroduced = 1963

[[items]]
code = "UTF-8"
slug = "utf-8"
name = "UTF-8"
description = "Variable-length Unicode encoding. ASCII-compatible and represents characters worldwide."
asciiCompatible = true
byteStructure = "可変長(1〜4バイト)"
japaneseSupport = true
maxCharacters = 1114112
usage = "standard"
yearIntroduced = 1993

[[items]]
code = "UTF-16"
slug = "utf-16"
name = "UTF-16"
description = "16-bit Unicode encoding. Widely used in Windows and Java."
asciiCompatible = false
byteStructure = "可変長(2または4バイト)"
japaneseSupport = true
maxCharacters = 1114112
usage = "system"
yearIntroduced = 1996

[[items]]
code = "UTF-32"
slug = "utf-32"
name = "UTF-32"
description = "Fixed-length 32-bit Unicode encoding. Used for internal processing."
asciiCompatible = false
byteStructure = "固定長(4バイト)"
japaneseSupport = true
maxCharacters = 1114112
usage = "internal"
yearIntroduced = 1996

[[items]]
code = "Shift_JIS"
slug = "shift-jis"
name = "Shift_JIS"
description = "Legacy Japanese encoding. Widely used in Windows systems."
asciiCompatible = false
byteStructure = "可変長(1〜2バイト)"
japaneseSupport = true
maxCharacters = 10000
usage = "legacy"
yearIntroduced = 1978

[[items]]
code = "EUC-JP"
slug = "euc-jp"
name = "EUC-JP"
description = "Japanese encoding used in Unix/Linux. ASCII-compatible."
asciiCompatible = true
byteStructure = "可変長(1〜3バイト)"
japaneseSupport = true
maxCharacters = 11000
usage = "legacy"
yearIntroduced = 1988

[[items]]
code = "ISO-2022-JP"
slug = "iso-2022-jp"
name = "ISO-2022-JP"
description = "7-bit escape sequence Japanese encoding. Used in email."
asciiCompatible = true
byteStructure = "7ビット可変長(エスケープシーケンス使用)"
japaneseSupport = true
maxCharacters = 10000
usage = "legacy"
yearIntroduced = 1983

[[items]]
code = "GB2312"
slug = "gb2312"
name = "GB2312"
description = "National standard encoding for Simplified Chinese."
asciiCompatible = false
byteStructure = "可変長(1〜2バイト)"
japaneseSupport = false
maxCharacters = 7445
usage = "legacy"
yearIntroduced = 1980

[[items]]
code = "Big5"
slug = "big5"
name = "Big5"
description = "Traditional Chinese encoding used in Taiwan and Hong Kong."
asciiCompatible = false
byteStructure = "可変長(1〜2バイト)"
japaneseSupport = false
maxCharacters = 13000
usage = "legacy"
yearIntroduced = 1984

[[items]]
code = "Windows-1252"
slug = "windows-1252"
name = "Windows-1252"
description = "Western European encoding used in Windows."
asciiCompatible = true
byteStructure = "固定長(1バイト)"
japaneseSupport = false
maxCharacters = 256
usage = "legacy"
yearIntroduced = 1992