---
test case: Empty but with header
in:
  encoding: 'text/html; charset=ISO-8859-1'
  buffer: &buffer
    - '\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: ISO-8859-1
in:
  encoding: 'text/html; charset=ISO-8859-1'
  buffer: &buffer
    - '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
  expected_result_buffer:
    - "¡±ÁÑáñÿ\x00"
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: ISO-8859-1 without ;
in:
  encoding: 'text/html charset=ISO-8859-1'
  buffer: &buffer
    - '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
  expected_result_buffer:
    - '¡±ÁÑáñÿ\x00'
  encoding: 'WINDOWS-1252'
  return: SUCCEED
---
test case: ISO-8859-1 charset with spaces
in:
  encoding: ' text/html ; charset = ISO-8859-1 '
  buffer: &buffer
    - '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
  expected_result_buffer:
    - '¡±ÁÑáñÿ\x00'
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: ISO-8859-1 charset empty
in:
  encoding: 'text/html;'
  buffer: &buffer
    - '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
  expected_result_buffer:
    - '¡±ÁÑáñÿ\x00'
  encoding: 'WINDOWS-1252'
  return: SUCCEED
---
test case: ISO-8859-1 charset special symbol
in:
  encoding: 'text/html;=ISO-8859-1'
  buffer: &buffer
    - '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
  expected_result_buffer:
    - '¡±ÁÑáñÿ\x00'
  encoding: 'WINDOWS-1252'
  return: SUCCEED
---
test case: ISO-8859-1 charset special symbol
in:
  encoding: 'text/html;=ISO-8859-1'
  buffer: &buffer
    - '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
  expected_result_buffer:
    - '¡±ÁÑáñÿ\x00'
  encoding: 'WINDOWS-1252'
  return: SUCCEED
---
test case: ISO-8859-1 quoted
in:
  encoding: 'text/html; charset="ISO-8859-1"'
  buffer: &buffer
    - '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
  expected_result_buffer:
    - '¡±ÁÑáñÿ\x00'
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: ISO-8859-1 quoted/unquote
in:
  encoding: 'text/html; charset="\"ISO-8859-1\""'
  buffer: &buffer
    - '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
  expected_result_buffer:
    - '¡±ÁÑáñÿ\x00'
  encoding: '"ISO-8859-1"'
  return: SUCCEED
---
test case: ISO-8859-1 quoted/unquote last
in:
  encoding: 'text/html; charset="ISO-8859-1\\"'
  buffer: &buffer
    - '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
  expected_result_buffer:
    - '¡±ÁÑáñÿ\x00'
  encoding: 'ISO-8859-1\'
  return: SUCCEED
---
test case: ISO-8859-1 quoted but no matching quote
in:
  encoding: 'text/html; charset="ISO-8859-1'
  buffer: &buffer
    - '\xA1\xB1\xC1\xD1\xE1\xF1\xFF\x00'
out:
  expected_result_buffer:
    - '¡±ÁÑáñÿ\x00'
  encoding: 'WINDOWS-1252'
  return: SUCCEED
---
test case: No encoding but is utf-8
in:
  encoding: ''
  buffer: &buffer
    - '¡±ÁÑáñÿ\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'UTF-8'
  return: SUCCEED
### html
---
test case: ISO-8859-1 charset
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta name="description"
        content="line1
        line2,
        line3." />
  <meta name="keywords" content="W3C, HTML, CSS, SVG, Web standards">
  <meta charset="iso-8859-1" />
</head>
<body>

The content of the document......\xA1\xB1\xC1\xD1\xE1\xF1\xFF

</body>
</html>\x00'
out:
  expected_result_buffer:
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta name="description"
        content="line1
        line2,
        line3." />
  <meta name="keywords" content="W3C, HTML, CSS, SVG, Web standards">
  <meta charset="iso-8859-1" />
</head>
<body>

The content of the document......¡±ÁÑáñÿ

</body>
</html>\x00'
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: ISO-8859-1 http-equiv content
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">
  <meta name="description"
        content="line1
        line2,
        line3." />
  <meta name="keywords" content="W3C, HTML, CSS, SVG, Web standards">
</head>
<body>

The content of the document......\xA1\xB1\xC1\xD1\xE1\xF1\xFF

</body>
</html>\x00'
out:
  expected_result_buffer:
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">
  <meta name="description"
        content="line1
        line2,
        line3." />
  <meta name="keywords" content="W3C, HTML, CSS, SVG, Web standards">
</head>
<body>

The content of the document......¡±ÁÑáñÿ

</body>
</html>\x00'
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: ISO-8859-1 http-equiv content invalid charset supplied in header
in:
  encoding: 'text/html;charset=UTF-8'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">
  <meta name="description"
        content="line1
        line2,
        line3." />
  <meta name="keywords" content="W3C, HTML, CSS, SVG, Web standards">
</head>
<body>

The content of the document......\xA1\xB1\xC1\xD1\xE1\xF1\xFF

</body>
</html>\x00'
out:
  expected_result_buffer:
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type">
  <meta name="description"
        content="line1
        line2,
        line3." />
  <meta name="keywords" content="W3C, HTML, CSS, SVG, Web standards">
</head>
<body>

The content of the document......¡±ÁÑáñÿ

</body>
</html>\x00'
  encoding: 'UTF-8'
  return: FAIL
---
test case: meta cut off
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'UTF-8'
  return: SUCCEED
---
test case: meta cut off attribute name
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'UTF-8'
  return: SUCCEED
---
test case: meta cut off attribute name after equal
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content=\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'UTF-8'
  return: SUCCEED
---
test case: meta attribute name without value
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content http-equiv="foo" charset=iso-8859-1\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: meta attribute value in double quote
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: meta attribute value in double quote with spaces
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content = " text/html; charset = ISO-8859-1 " http-equiv = "Content-Type"\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: meta attribute charser in double quote with spaces
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta charset = " ISO-8859-1 "\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: meta attribute value in double quote not terminated
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'UTF-8'
  return: SUCCEED
---
test case: meta attribute value in single quote
in:
  encoding: 'text/html'
  buffer: &buffer
    - "<!DOCTYPE html>\n
<html>\n
<head>\n
  <title>zabbix</title>\n
  <meta content='text/html; charset=ISO-8859-1' http-equiv='Content-Type'\x00"
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: meta attribute value charset with spaces
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta charset = "iso-8859-1"\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: meta attribute value charset two times
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta charset = "iso-8859-1" charset = "iso-8859-1"\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: http-eq and http-eq
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content="text/html; charset=ISO-8859-1" http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" http-equiv="Content-Type"\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: http-eq and meta attribute value charset
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content="text/html; charset=UTF-8" http-equiv="Content-Type" charset="iso-8859-1"\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: meta attribute value charset and http-eq
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta charset="iso-8859-1" content="text/html; charset=UTF-8" http-equiv="Content-Type" \x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: meta attribute value in single quote with spaces
in:
  encoding: 'text/html'
  buffer: &buffer
    - "<!DOCTYPE html>\n
<html>\n
<head>\n
  <title>zabbix</title>\n
  <meta content = 'text/html; charset = ISO-8859-1' http-equiv = 'Content-Type'\x00"
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: meta attribute value in single quote not terminated
in:
  encoding: 'text/html'
  buffer: &buffer
    - "<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content='text/html; charset=ISO-8859-1' http-equiv='Content-Type\x00"
out:
  expected_result_buffer: *buffer
  encoding: 'UTF-8'
  return: SUCCEED
---
test case: meta attribute value unquoted
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content="text/html; charset=ISO-8859-1" http-equiv=Content-Type\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: meta attribute value unquoted with spaces
in:
  encoding: 'text/html'
  buffer: &buffer
    - '<!DOCTYPE html>
<html>
<head>
  <title>zabbix</title>
  <meta content= "text/html; charset = ISO-8859-1" http-equiv = Content-Type \x00'
out:
  expected_result_buffer: *buffer
  encoding: 'ISO-8859-1'
  return: SUCCEED
---
test case: HTML in JSON
in:
  encoding: 'application/json'
  buffer: &buffer
    - '{"id": "<meta charset=\x5c"iso-8859-1\x5c"}\x00'
out:
  expected_result_buffer: *buffer
  encoding: '\'
  return: SUCCEED
---
test case: empty text nothing happens
# empty text
in:
  encoding: ''
  buffer: buffer
    - ''
out:
  expected_result_buffer: expected_result_buffer ''
  encoding: 'UTF-8'
  return: SUCCEED
---
test case: bom is not added to plain data utf8 assumed
in:
  encoding: ''
  buffer: &buffer
    - '\x11\x11\x11\x11\x11\x11\x00'
out:
  expected_result_buffer: *buffer
  encoding: 'UTF-8'
  return: SUCCEED
---
test case: utf8 bom is removed
in:
  encoding: ''
  buffer:
    - '\xEF\xBB\xBF\x11\x11\x11\x11\x11\x11\x00'
out:
  expected_result_buffer:
    - '\x11\x11\x11\x11\x11\x11\x00'
  encoding: 'UTF-8'
  return: SUCCEED

---
test case: utf8 bom is removed2
in:
  encoding: ''
  buffer: &buffer
    - '\xEF\xBB\xBF\x11\x11\x11\x11\x11\x11\x00'
out:
  encoding: 'UTF-8'
  expected_result_buffer: *buffer
  return: FAIL
---
test case: utf8 bom is removed with encoding specified
in:
  encoding: 'text/html; charset=UTF-8'
  buffer:
    - '\xEF\xBB\xBF\x11\x11\x11\x11\x11\x11\x00'
out:
  encoding: 'UTF-8'
  expected_result_buffer:
    - '\x11\x11\x11\x11\x11\x11\x00'
  return: SUCCEED

---
test case: utf8 bom is removed empty text
in:
  encoding: ''
  buffer:
    - '\xEF\xBB\xBF\x00'
out:
  encoding: 'UTF-8'
  expected_result_buffer:
    - ''
  return: SUCCEED

---
test case: utf16 LE bom is removed
#
# utf-16 conversion to utf-8
# echo -n -e '\xff\xfe\x11\x00\x11\x00' > /tmp/bytes
# iconv /tmp/bytes -f UTF-16LE -t UTF-8 -o /tmp/utf8
# xxd /tmp/utf8
# efbb bf11 11
#
in:
  encoding: ''
  buffer:
    - '\xFF\xFE\x11\x00\x11\x00\x00'
out:
  encoding: 'UTF-16LE'
  expected_result_buffer:
    - '\x11\x11\x00'
  return: SUCCEED

---
test case: utf16 LE noBOM with encoding
#
# no BOM, but we still correctly decode the data as encoding was provided
#
in:
  encoding: 'text/html; charset=UTF-16LE'
  buffer:
    - '\x11\x00\x11\x00\x00'
out:
  encoding: 'UTF-16LE'
  expected_result_buffer:
    - '\x11\x11\x00'
  return: SUCCEED

---
test case: utf16 LE bom is removed empty text
in:
  encoding: ''
  buffer:
    - '\xFF\xFE\x00'
out:
  encoding: 'UTF-16LE'
  expected_result_buffer:
    - ''
  return: SUCCEED

---
test case: utf16 BE bom is removed
#
# utf-16_BE conversion to utf-8
# echo -n -e '\xfe\xff\x00\x11\x00\x11' > /tmp/bytes
# iconv /tmp/bytes -f UTF-16BE -t UTF-8 -o /tmp/utf8
# xxd /tmp/utf8
# efbb bf11 11
#
in:
  encoding: ''
  buffer:
    - '\xFE\xFF\x00\x11\x00\x11\x00'
out:
  encoding: 'UTF-16BE'
  expected_result_buffer:
    - '\x11\x11\x00'
  return: SUCCEED

---
test case: utf16 BE noBOM with encoding
in:
  encoding: 'text/html; charset=UTF-16BE'
  buffer:
    - '\x00\x11\x00\x11\x00'
out:
  encoding: 'UTF-16BE'
  expected_result_buffer:
    - '\x11\x11\x00'
  return: SUCCEED

---
test case: utf16 BE bom is removed empty text
in:
  encoding: ''
  buffer:
    - '\xFE\xFF\x00'
out:
  encoding: 'UTF-16BE'
  expected_result_buffer:
    - ''
  return: SUCCEED
---
test case: utf32 LE bom is removed incorrect guess conversion to utf16
#
# utf32 conversion to utf-8
# utf32 without encoding is guessed as utf16:
# 11 in utf8 is represented in utf32 as \x11\x00\x00\x00
# \xFF\xFE\x00\x00 is utf-32 BOM
#
# So that,
# echo -n -e '\xFF\xFE\x00\x00\x11\x00\x00\x00' > /tmp/bytes
# iconv /tmp/bytes -f UTF-32 -t UTF-8 -o /tmp/utf8
# xxd /tmp/utf8
# 11
#
# However, if encoding is not specified, we can confuse it with utf-16 LE
# which also has \xFF\xFE at the beginning, so the the result is different:
# 00 11 00
#
# which then reads as an empty string
#
in:
  encoding: ''
  buffer:
    - '\xFF\xFE\x00\x00\x11\x00\x00\x00\x00'
out:
  encoding: 'UTF-16LE'
  expected_result_buffer:
    - ''
  return: SUCCEED
...