---
test case: Empty but with header
in:
encoding: 'text/html; charset=ISO-8859-1'
buffer: &buffer
- '\x00'
out:
expected_result_buffer: *buffer
encoding: 'ISO-8859-1'
return: SUCCEED
---
test case: ISO-8859-1
in:
encoding: 'text/html; charset=ISO-8859-1'
buffer: &buffer
- '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
expected_result_buffer:
- "¡±ÁÑáñÿ\x00"
encoding: 'ISO-8859-1'
return: SUCCEED
---
test case: ISO-8859-1 without ;
in:
encoding: 'text/html charset=ISO-8859-1'
buffer: &buffer
- '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
expected_result_buffer:
- '¡±ÁÑáñÿ\x00'
encoding: 'WINDOWS-1252'
return: SUCCEED
---
test case: ISO-8859-1 charset with spaces
in:
encoding: ' text/html ; charset = ISO-8859-1 '
buffer: &buffer
- '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
expected_result_buffer:
- '¡±ÁÑáñÿ\x00'
encoding: 'ISO-8859-1'
return: SUCCEED
---
test case: ISO-8859-1 charset empty
in:
encoding: 'text/html;'
buffer: &buffer
- '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
expected_result_buffer:
- '¡±ÁÑáñÿ\x00'
encoding: 'WINDOWS-1252'
return: SUCCEED
---
test case: ISO-8859-1 charset special symbol
in:
encoding: 'text/html;=ISO-8859-1'
buffer: &buffer
- '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
expected_result_buffer:
- '¡±ÁÑáñÿ\x00'
encoding: 'WINDOWS-1252'
return: SUCCEED
---
test case: ISO-8859-1 charset special symbol
in:
encoding: 'text/html;=ISO-8859-1'
buffer: &buffer
- '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
expected_result_buffer:
- '¡±ÁÑáñÿ\x00'
encoding: 'WINDOWS-1252'
return: SUCCEED
---
test case: ISO-8859-1 quoted
in:
encoding: 'text/html; charset="ISO-8859-1"'
buffer: &buffer
- '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
expected_result_buffer:
- '¡±ÁÑáñÿ\x00'
encoding: 'ISO-8859-1'
return: SUCCEED
---
test case: ISO-8859-1 quoted/unquote
in:
encoding: 'text/html; charset="\"ISO-8859-1\""'
buffer: &buffer
- '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
expected_result_buffer:
- '¡±ÁÑáñÿ\x00'
encoding: '"ISO-8859-1"'
return: SUCCEED
---
test case: ISO-8859-1 quoted/unquote last
in:
encoding: 'text/html; charset="ISO-8859-1\\"'
buffer: &buffer
- '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
expected_result_buffer:
- '¡±ÁÑáñÿ\x00'
encoding: 'ISO-8859-1\'
return: SUCCEED
---
test case: ISO-8859-1 quoted but no matching quote
in:
encoding: 'text/html; charset="ISO-8859-1'
buffer: &buffer
- '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
expected_result_buffer:
- '¡±ÁÑáñÿ\x00'
encoding: 'WINDOWS-1252'
return: SUCCEED
---
test case: No encoding but is utf-8
in:
encoding: ''
buffer: &buffer
- '¡±ÁÑáñÿ\x00'
out:
expected_result_buffer: *buffer
encoding: 'UTF-8'
return: SUCCEED
### html
---
test case: ISO-8859-1 charset
in:
encoding: 'text/html'
buffer: &buffer
- '
zabbix
The content of the document......\xA1\xB1\xC1\xD1\xE1\xF1\xFF
\x00'
out:
expected_result_buffer:
- '
zabbix
The content of the document......¡±ÁÑáñÿ
\x00'
encoding: 'ISO-8859-1'
return: SUCCEED
---
test case: ISO-8859-1 http-equiv content
in:
encoding: 'text/html'
buffer: &buffer
- '
zabbix
The content of the document......\xA1\xB1\xC1\xD1\xE1\xF1\xFF
\x00'
out:
expected_result_buffer:
- '
zabbix
The content of the document......¡±ÁÑáñÿ
\x00'
encoding: 'ISO-8859-1'
return: SUCCEED
---
test case: ISO-8859-1 http-equiv content invalid charset supplied in header
in:
encoding: 'text/html;charset=UTF-8'
buffer: &buffer
- '
zabbix
The content of the document......\xA1\xB1\xC1\xD1\xE1\xF1\xFF
\x00'
out:
expected_result_buffer:
- '
zabbix
The content of the document......¡±ÁÑáñÿ
\x00'
encoding: 'UTF-8'
return: FAIL
---
test case: meta cut off
in:
encoding: 'text/html'
buffer: &buffer
- '
zabbix
zabbix
zabbix
zabbix
zabbix
zabbix
zabbix
zabbix
\n
\n
\n
zabbix\n
zabbix
zabbix
zabbix
zabbix
zabbix
\n
\n
\n
zabbix\n
zabbix
zabbix
zabbix
/tmp/bytes
# iconv /tmp/bytes -f UTF-16LE -t UTF-8 -o /tmp/utf8
# xxd /tmp/utf8
# efbb bf11 11
#
in:
encoding: ''
buffer:
- '\xFF\xFE\x11\x00\x11\x00\x00'
out:
encoding: 'UTF-16LE'
expected_result_buffer:
- '\x11\x11\x00'
return: SUCCEED
---
test case: utf16 LE noBOM with encoding
#
# no BOM, but we still correctly decode the data as encoding was provided
#
in:
encoding: 'text/html; charset=UTF-16LE'
buffer:
- '\x11\x00\x11\x00\x00'
out:
encoding: 'UTF-16LE'
expected_result_buffer:
- '\x11\x11\x00'
return: SUCCEED
---
test case: utf16 LE bom is removed empty text
in:
encoding: ''
buffer:
- '\xFF\xFE\x00'
out:
encoding: 'UTF-16LE'
expected_result_buffer:
- ''
return: SUCCEED
---
test case: utf16 BE bom is removed
#
# utf-16_BE conversion to utf-8
# echo -n -e '\xfe\xff\x00\x11\x00\x11' > /tmp/bytes
# iconv /tmp/bytes -f UTF-16BE -t UTF-8 -o /tmp/utf8
# xxd /tmp/utf8
# efbb bf11 11
#
in:
encoding: ''
buffer:
- '\xFE\xFF\x00\x11\x00\x11\x00'
out:
encoding: 'UTF-16BE'
expected_result_buffer:
- '\x11\x11\x00'
return: SUCCEED
---
test case: utf16 BE noBOM with encoding
in:
encoding: 'text/html; charset=UTF-16BE'
buffer:
- '\x00\x11\x00\x11\x00'
out:
encoding: 'UTF-16BE'
expected_result_buffer:
- '\x11\x11\x00'
return: SUCCEED
---
test case: utf16 BE bom is removed empty text
in:
encoding: ''
buffer:
- '\xFE\xFF\x00'
out:
encoding: 'UTF-16BE'
expected_result_buffer:
- ''
return: SUCCEED
---
test case: utf32 LE bom is removed incorrect guess conversion to utf16
#
# utf32 conversion to utf-8
# utf32 without encoding is guessed as utf16:
# 11 in utf8 is represented in utf32 as \x11\x00\x00\x00
# \xFF\xFE\x00\x00 is utf-32 BOM
#
# So that,
# echo -n -e '\xFF\xFE\x00\x00\x11\x00\x00\x00' > /tmp/bytes
# iconv /tmp/bytes -f UTF-32 -t UTF-8 -o /tmp/utf8
# xxd /tmp/utf8
# 11
#
# However, if encoding is not specified, we can confuse it with utf-16 LE
# which also has \xFF\xFE at the beginning, so the the result is different:
# 00 11 00
#
# which then reads as an empty string
#
in:
encoding: ''
buffer:
- '\xFF\xFE\x00\x00\x11\x00\x00\x00\x00'
out:
encoding: 'UTF-16LE'
expected_result_buffer:
- ''
return: SUCCEED
...