/* ** Zabbix ** Copyright (C) 2001-2023 Zabbix SIA ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 2 of the License, or ** (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. **/ #include "http.h" #include "../sysinfo.h" #include "zbxstr.h" #include "zbxnum.h" #include "zbxtime.h" #include "zbxregexp.h" #include "zbxhttp.h" #include "zbxcomms.h" #define HTTP_SCHEME_STR "http://" #ifndef HAVE_LIBCURL #define ZBX_MAX_WEBPAGE_SIZE (1 * 1024 * 1024) #else #define HTTPS_SCHEME_STR "https://" typedef struct { char *data; size_t allocated; size_t offset; } http_response_t; #endif static int detect_url(const char *host) { char *p; int ret = FAIL; if (NULL != strpbrk(host, "/@#?[]")) return SUCCEED; if (NULL != (p = strchr(host, ':')) && NULL == strchr(++p, ':')) ret = SUCCEED; return ret; } static int process_url(const char *host, const char *port, const char *path, char **url, char **error) { char *p, *delim; int scheme_found = 0; /* port and path parameters must be empty */ if ((NULL != port && '\0' != *port) || (NULL != path && '\0' != *path)) { *error = zbx_strdup(*error, "Parameters \"path\" and \"port\" must be empty if URL is specified in \"host\"."); return FAIL; } /* allow HTTP(S) scheme only */ #ifdef HAVE_LIBCURL if (0 == zbx_strncasecmp(host, HTTP_SCHEME_STR, ZBX_CONST_STRLEN(HTTP_SCHEME_STR)) || 0 == zbx_strncasecmp(host, HTTPS_SCHEME_STR, ZBX_CONST_STRLEN(HTTPS_SCHEME_STR))) #else if (0 == zbx_strncasecmp(host, HTTP_SCHEME_STR, ZBX_CONST_STRLEN(HTTP_SCHEME_STR))) #endif { scheme_found = 1; } else if (NULL != (p = strstr(host, "://")) && (NULL == (delim = strpbrk(host, "/?#")) || delim > p)) { *error = zbx_dsprintf(*error, "Unsupported scheme: %.*s.", (int)(p - host), host); return FAIL; } if (NULL != (p = strchr(host, '#'))) *url = zbx_dsprintf(*url, "%s%.*s", (0 == scheme_found ? HTTP_SCHEME_STR : ""), (int)(p - host), host); else *url = zbx_dsprintf(*url, "%s%s", (0 == scheme_found ? HTTP_SCHEME_STR : ""), host); return SUCCEED; } static int check_common_params(const char *host, const char *path, char **error) { const char *wrong_chr, URI_PROHIBIT_CHARS[] = {0x1,0x2,0x3,0x4,0x5,0x6,0x7,0x8,0x9,0xA,0xB,0xC,0xD,0xE,\ 0xF,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,0x7F,0}; if (NULL == host || '\0' == *host) { *error = zbx_strdup(*error, "Invalid first parameter."); return FAIL; } if (NULL != (wrong_chr = strpbrk(host, URI_PROHIBIT_CHARS))) { *error = zbx_dsprintf(NULL, "Incorrect hostname expression. Check hostname part after: %.*s.", (int)(wrong_chr - host), host); return FAIL; } if (NULL != path && NULL != (wrong_chr = strpbrk(path, URI_PROHIBIT_CHARS))) { *error = zbx_dsprintf(NULL, "Incorrect path expression. Check path part after: %.*s.", (int)(wrong_chr - path), path); return FAIL; } return SUCCEED; } #ifdef HAVE_LIBCURL static size_t curl_write_cb(void *ptr, size_t size, size_t nmemb, void *userdata) { size_t r_size = size * nmemb; http_response_t *response; response = (http_response_t*)userdata; zbx_str_memcpy_alloc(&response->data, &response->allocated, &response->offset, (const char *)ptr, r_size); return r_size; } static size_t curl_ignore_cb(void *ptr, size_t size, size_t nmemb, void *userdata) { ZBX_UNUSED(ptr); ZBX_UNUSED(userdata); return size * nmemb; } static int curl_page_get(char *url, int timeout, char **buffer, char **error) { CURLcode err; http_response_t page = {0}; CURL *easyhandle; int ret = SYSINFO_RET_FAIL; if (NULL == (easyhandle = curl_easy_init())) { *error = zbx_strdup(*error, "Cannot initialize cURL library."); return SYSINFO_RET_FAIL; } if (CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_USERAGENT, "Zabbix " ZABBIX_VERSION)) || CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_SSL_VERIFYPEER, 0L)) || CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_SSL_VERIFYHOST, 0L)) || CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_FOLLOWLOCATION, 0L)) || CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_URL, url)) || CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_WRITEFUNCTION, NULL != buffer ? curl_write_cb : curl_ignore_cb)) || CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_WRITEDATA, &page)) || CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_HEADER, 1L)) || (NULL != sysinfo_get_config_source_ip() && CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_INTERFACE, sysinfo_get_config_source_ip()))) || CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_TIMEOUT, (long)timeout)) || CURLE_OK != (err = curl_easy_setopt(easyhandle, ZBX_CURLOPT_ACCEPT_ENCODING, ""))) { *error = zbx_dsprintf(*error, "Cannot set cURL option: %s.", curl_easy_strerror(err)); goto out; } #if LIBCURL_VERSION_NUM >= 0x071304 /* CURLOPT_PROTOCOLS is supported starting with version 7.19.4 (0x071304) */ /* CURLOPT_PROTOCOLS was deprecated in favor of CURLOPT_PROTOCOLS_STR starting with version 7.85.0 (0x075500) */ # if LIBCURL_VERSION_NUM >= 0x075500 if (CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_PROTOCOLS_STR, "HTTP,HTTPS"))) # else if (CURLE_OK != (err = curl_easy_setopt(easyhandle, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS))) # endif { *error = zbx_dsprintf(*error, "Cannot set allowed protocols: %s", curl_easy_strerror(err)); goto out; } #endif if (CURLE_OK == (err = curl_easy_perform(easyhandle))) { if (NULL != buffer) *buffer = page.data; ret = SYSINFO_RET_OK; } else { zbx_free(page.data); *error = zbx_dsprintf(*error, "Cannot perform cURL request: %s.", curl_easy_strerror(err)); } out: curl_easy_cleanup(easyhandle); return ret; } static int get_http_page(const char *host, const char *path, const char *port, int timeout, char **buffer, char **error) { char *url = NULL; int ret; if (SUCCEED != check_common_params(host, path, error)) return SYSINFO_RET_FAIL; if (SUCCEED == detect_url(host)) { /* URL detected */ if (SUCCEED != process_url(host, port, path, &url, error)) return SYSINFO_RET_FAIL; } else { /* URL is not detected - compose URL using host, port and path */ unsigned short port_n = ZBX_DEFAULT_HTTP_PORT; if (NULL != port && '\0' != *port) { if (SUCCEED != zbx_is_ushort(port, &port_n)) { *error = zbx_strdup(*error, "Invalid third parameter."); return SYSINFO_RET_FAIL; } } if (NULL != strchr(host, ':')) url = zbx_dsprintf(url, HTTP_SCHEME_STR "[%s]:%u/", host, port_n); else url = zbx_dsprintf(url, HTTP_SCHEME_STR "%s:%u/", host, port_n); if (NULL != path) url = zbx_strdcat(url, path + ('/' == *path ? 1 : 0)); } if (SUCCEED != zbx_http_punycode_encode_url(&url)) { *error = zbx_strdup(*error, "Cannot encode domain name into punycode."); ret = SYSINFO_RET_FAIL; goto out; } ret = curl_page_get(url, timeout, buffer, error); out: zbx_free(url); return ret; } #else static char *find_port_sep(char *host, size_t len) { int in_ipv6 = 0; for (; 0 < len--; host++) { if (0 == in_ipv6) { if (':' == *host) return host; else if ('[' == *host) in_ipv6 = 1; } else if (']' == *host) in_ipv6 = 0; } return NULL; } static int get_http_page(const char *host, const char *path, const char *port, int timeout, char **buffer, char **error) { char *url = NULL, *hostname = NULL, *path_loc = NULL; int ret = SYSINFO_RET_OK, ipv6_host_found = 0; unsigned short port_num; zbx_socket_t s; if (SUCCEED != check_common_params(host, path, error)) return SYSINFO_RET_FAIL; if (SUCCEED == detect_url(host)) { /* URL detected */ char *p, *p_host, *au_end; size_t authority_len; if (SUCCEED != process_url(host, port, path, &url, error)) return SYSINFO_RET_FAIL; p_host = url + ZBX_CONST_STRLEN(HTTP_SCHEME_STR); if (0 == (authority_len = strcspn(p_host, "/?"))) { *error = zbx_dsprintf(*error, "Invalid or missing host in URL."); ret = SYSINFO_RET_FAIL; goto out; } if (NULL != memchr(p_host, '@', authority_len)) { *error = zbx_strdup(*error, "Unsupported URL format."); ret = SYSINFO_RET_FAIL; goto out; } au_end = &p_host[authority_len - 1]; if (NULL != (p = find_port_sep(p_host, authority_len))) { char *port_str; int port_len = (int)(au_end - p); if (0 < port_len) { port_str = zbx_dsprintf(NULL, "%.*s", port_len, p + 1); if (SUCCEED != zbx_is_ushort(port_str, &port_num)) ret = SYSINFO_RET_FAIL; else hostname = zbx_dsprintf(hostname, "%.*s", (int)(p - p_host), p_host); zbx_free(port_str); } else ret = SYSINFO_RET_FAIL; } else { port_num = ZBX_DEFAULT_HTTP_PORT; hostname = zbx_dsprintf(hostname, "%.*s", (int)(au_end - p_host + 1), p_host); } if (SYSINFO_RET_OK != ret) { *error = zbx_dsprintf(*error, "URL using bad/illegal format."); goto out; } if ('[' == *hostname) { zbx_ltrim(hostname, "["); zbx_rtrim(hostname, "]"); ipv6_host_found = 1; } if ('\0' == *hostname) { *error = zbx_dsprintf(*error, "Invalid or missing host in URL."); ret = SYSINFO_RET_FAIL; goto out; } path_loc = zbx_strdup(path_loc, '\0' != p_host[authority_len] ? &p_host[authority_len] : "/"); } else { /* URL is not detected */ if (NULL == port || '\0' == *port) { port_num = ZBX_DEFAULT_HTTP_PORT; } else if (FAIL == zbx_is_ushort(port, &port_num)) { *error = zbx_strdup(*error, "Invalid third parameter."); ret = SYSINFO_RET_FAIL; goto out; } path_loc = zbx_strdup(path_loc, (NULL != path ? path : "/")); hostname = zbx_strdup(hostname, host); if (NULL != strchr(hostname, ':')) ipv6_host_found = 1; } if (SUCCEED != zbx_http_punycode_encode_url(&hostname)) { *error = zbx_strdup(*error, "Cannot encode domain name into punycode."); ret = SYSINFO_RET_FAIL; goto out; } if (SUCCEED == (ret = zbx_tcp_connect(&s, sysinfo_get_config_source_ip(), hostname, port_num, timeout, ZBX_TCP_SEC_UNENCRYPTED, NULL, NULL))) { char *request = NULL; request = zbx_dsprintf(request, "GET %s%s HTTP/1.1\r\n" "Host: %s%s%s\r\n" "Connection: close\r\n" "\r\n", ('/' != *path_loc ? "/" : ""), path_loc, (1 == ipv6_host_found ? "[" : ""), hostname, (1 == ipv6_host_found ? "]" : "")); if (SUCCEED == (ret = zbx_tcp_send_raw(&s, request))) { if (SUCCEED == (ret = zbx_tcp_recv_raw(&s))) { if (NULL != buffer) { *buffer = (char*)zbx_malloc(*buffer, ZBX_MAX_WEBPAGE_SIZE); zbx_strlcpy(*buffer, s.buffer, ZBX_MAX_WEBPAGE_SIZE); } } } zbx_free(request); zbx_tcp_close(&s); } if (SUCCEED != ret) { *error = zbx_dsprintf(NULL, "HTTP get error: %s", zbx_socket_strerror()); ret = SYSINFO_RET_FAIL; } else ret = SYSINFO_RET_OK; out: zbx_free(url); zbx_free(path_loc); zbx_free(hostname); return ret; } #endif int web_page_get(AGENT_REQUEST *request, AGENT_RESULT *result) { char *hostname, *path_str, *port_str, *buffer = NULL, *error = NULL; int ret; if (3 < request->nparam) { SET_MSG_RESULT(result, zbx_strdup(NULL, "Too many parameters.")); return SYSINFO_RET_FAIL; } hostname = get_rparam(request, 0); path_str = get_rparam(request, 1); port_str = get_rparam(request, 2); if (SYSINFO_RET_OK == (ret = get_http_page(hostname, path_str, port_str, request->timeout, &buffer, &error))) { zbx_rtrim(buffer, "\r\n"); SET_TEXT_RESULT(result, buffer); } else SET_MSG_RESULT(result, error); return ret; } int web_page_perf(AGENT_REQUEST *request, AGENT_RESULT *result) { char *hostname, *path_str, *port_str, *error = NULL; double start_time; int ret; if (3 < request->nparam) { SET_MSG_RESULT(result, zbx_strdup(NULL, "Too many parameters.")); return SYSINFO_RET_FAIL; } hostname = get_rparam(request, 0); path_str = get_rparam(request, 1); port_str = get_rparam(request, 2); start_time = zbx_time(); if (SYSINFO_RET_OK == (ret = get_http_page(hostname, path_str, port_str, request->timeout, NULL, &error))) SET_DBL_RESULT(result, zbx_time() - start_time); else SET_MSG_RESULT(result, error); return ret; } int web_page_regexp(AGENT_REQUEST *request, AGENT_RESULT *result) { char *hostname, *path_str, *port_str, *buffer = NULL, *error = NULL, *regexp, *length_str; const char *output; int length, ret; if (6 < request->nparam) { SET_MSG_RESULT(result, zbx_strdup(NULL, "Too many parameters.")); return SYSINFO_RET_FAIL; } if (4 > request->nparam) { SET_MSG_RESULT(result, zbx_strdup(NULL, "Invalid number of parameters.")); return SYSINFO_RET_FAIL; } hostname = get_rparam(request, 0); path_str = get_rparam(request, 1); port_str = get_rparam(request, 2); regexp = get_rparam(request, 3); length_str = get_rparam(request, 4); output = get_rparam(request, 5); if (NULL == length_str || '\0' == *length_str) length = ZBX_MAX_UINT31_1; else if (FAIL == zbx_is_uint31_1(length_str, &length)) { SET_MSG_RESULT(result, zbx_strdup(NULL, "Invalid fifth parameter.")); return SYSINFO_RET_FAIL; } /* by default return the matched part of web page */ if (NULL == output || '\0' == *output) output = "\\0"; if (SYSINFO_RET_OK == (ret = get_http_page(hostname, path_str, port_str, request->timeout, &buffer, &error))) { char *ptr = NULL, *str; for (str = buffer; ;) { char *newline; if (NULL != (newline = strchr(str, '\n'))) { if (str != newline && '\r' == newline[-1]) newline[-1] = '\0'; else *newline = '\0'; } if (SUCCEED == zbx_regexp_sub(str, regexp, output, &ptr) && NULL != ptr) break; if (NULL != newline) str = newline + 1; else break; } if (NULL != ptr) { if ((size_t)length < zbx_strlen_utf8(ptr)) ptr[zbx_strlen_utf8_nchars(ptr, length)] = '\0'; SET_STR_RESULT(result, ptr); } else SET_STR_RESULT(result, zbx_strdup(NULL, "")); zbx_free(buffer); } else SET_MSG_RESULT(result, error); return ret; }