INI

Character Encoding - INI

Character encoding is a rule system for converting characters and symbols into byte sequences that computers can process. Various schemes exist, from international standards like ASCII and UTF-8, to Japanese-specific encodings like Shift_JIS and EUC-JP, to country-specific code pages. While UTF-8 based on Unicode is now widely adopted as the global standard, understanding various encoding schemes remains important for maintaining compatibility with legacy systems.

character code Unicode UTF-8 character set internationalization text processing
[item.utf-8]
code=utf-8
slug=utf-8
name=UTF-8
description=A variable-length character encoding that represents Unicode using 1 to 4 bytes.
category=Unicode
ianaName=UTF-8
mibEnum=106

[item.utf-16]
code=utf-16
slug=utf-16
name=UTF-16
description=A character encoding that represents Unicode in 16-bit units.
category=Unicode
ianaName=UTF-16
mibEnum=1015

[item.utf-32]
code=utf-32
slug=utf-32
name=UTF-32
description=A character encoding that represents Unicode in fixed-length 32 bits (4 bytes).
category=Unicode
ianaName=UTF-32
mibEnum=1017

[item.us-ascii]
code=us-ascii
slug=us-ascii
name=US-ASCII
description=A basic character encoding that defines 128 characters in 7 bits.
category=ASCII
ianaName=US-ASCII
mibEnum=3

[item.iso-8859-1]
code=iso-8859-1
slug=iso-8859-1
name=ISO-8859-1 (Latin-1)
description=An 8-bit character encoding for Western European languages.
category=ISO-8859
ianaName=ISO-8859-1
mibEnum=4

[item.iso-8859-2]
code=iso-8859-2
slug=iso-8859-2
name=ISO-8859-2 (Latin-2)
description=An 8-bit character encoding for Central European languages.
category=ISO-8859
ianaName=ISO-8859-2
mibEnum=5

[item.iso-8859-5]
code=iso-8859-5
slug=iso-8859-5
name=ISO-8859-5 (Cyrillic)
description=An 8-bit character encoding for Cyrillic script.
category=ISO-8859
ianaName=ISO-8859-5
mibEnum=8

[item.iso-8859-7]
code=iso-8859-7
slug=iso-8859-7
name=ISO-8859-7 (Greek)
description=An 8-bit character encoding for Modern Greek.
category=ISO-8859
ianaName=ISO-8859-7
mibEnum=10

[item.iso-8859-15]
code=iso-8859-15
slug=iso-8859-15
name=ISO-8859-15 (Latin-9)
description=A revised version of ISO-8859-1 that includes the Euro sign.
category=ISO-8859
ianaName=ISO-8859-15
mibEnum=111

[item.shift-jis]
code=shift_jis
slug=shift-jis
name=Shift_JIS
description=A Japanese character encoding standardly used on Windows and Macintosh.
category=Japanese
ianaName=Shift_JIS
mibEnum=17

[item.euc-jp]
code=euc-jp
slug=euc-jp
name=EUC-JP
description=A Japanese character encoding used on Unix-like systems.
category=Japanese
ianaName=EUC-JP
mibEnum=18

[item.iso-2022-jp]
code=iso-2022-jp
slug=iso-2022-jp
name=ISO-2022-JP
description=An encoding for Japanese email in 7-bit environments.
category=Japanese
ianaName=ISO-2022-JP
mibEnum=39

[item.gb2312]
code=gb2312
slug=gb2312
name=GB2312
description=A basic character encoding for Simplified Chinese.
category=Chinese
ianaName=GB2312
mibEnum=2025

[item.gbk]
code=gbk
slug=gbk
name=GBK
description=A Chinese character encoding that extends GB2312.
category=Chinese
ianaName=GBK
mibEnum=113

[item.gb18030]
code=gb18030
slug=gb18030
name=GB18030
description=China's current national standard, capable of representing all Unicode characters.
category=Chinese
ianaName=GB18030
mibEnum=114

[item.big5]
code=big5
slug=big5
name=Big5
description=A Traditional Chinese character encoding used in Taiwan and Hong Kong.
category=Chinese
ianaName=Big5
mibEnum=2026

[item.euc-kr]
code=euc-kr
slug=euc-kr
name=EUC-KR
description=A Korean character encoding used on Unix-like systems.
category=Korean
ianaName=EUC-KR
mibEnum=38

[item.iso-2022-kr]
code=iso-2022-kr
slug=iso-2022-kr
name=ISO-2022-KR
description=An encoding for Korean email in 7-bit environments.
category=Korean
ianaName=ISO-2022-KR
mibEnum=37

[item.koi8-r]
code=koi8-r
slug=koi8-r
name=KOI8-R
description=An 8-bit character encoding for Russian Cyrillic.
category=Cyrillic
ianaName=KOI8-R
mibEnum=2084

[item.koi8-u]
code=koi8-u
slug=koi8-u
name=KOI8-U
description=An 8-bit character encoding for Ukrainian Cyrillic.
category=Cyrillic
ianaName=KOI8-U
mibEnum=2088

[item.windows-1252]
code=windows-1252
slug=windows-1252
name=Windows-1252
description=An 8-bit encoding for Western European languages used on Microsoft Windows.
category=Windows Code Page
ianaName=windows-1252
mibEnum=2252