/*
** Copyright (C) 2001-2025 Zabbix SIA
**
** This program is free software: you can redistribute it and/or modify it under the terms of
** the GNU Affero General Public License as published by the Free Software Foundation, version 3.
**
** This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
** without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
** See the GNU Affero General Public License for more details.
**
** You should have received a copy of the GNU Affero General Public License along with this program.
** If not, see .
**/
#include "zbxregexp.h"
#include "zbxstr.h"
#include "zbxtime.h"
#ifdef HAVE_PCRE_H
#ifdef HAVE_PCRE2_H
#error "cannot use both pcre and pcre2 at the same time!"
#endif
#define ZBX_REGEXP_MULTILINE PCRE_MULTILINE
#ifdef PCRE_NO_AUTO_CAPTURE
#define ZBX_REGEXP_NO_AUTO_CAPTURE PCRE_NO_AUTO_CAPTURE
#endif
#define ZBX_REGEXP_CASELESS PCRE_CASELESS
#endif
#if !defined(HAVE_PCRE_H) && !defined(HAVE_PCRE2_H)
#error "must use pcre or pcre2!"
#endif
#ifdef HAVE_PCRE2_H
# define ZBX_REGEXP_MULTILINE PCRE2_MULTILINE
# ifdef PCRE2_NO_AUTO_CAPTURE
# define ZBX_REGEXP_NO_AUTO_CAPTURE PCRE2_NO_AUTO_CAPTURE
# endif
# define ZBX_REGEXP_CASELESS PCRE2_CASELESS
# ifdef PCRE2_MATCH_INVALID_UTF
# define ZBX_REGEXP_COMPILE_FLAGS (PCRE2_MATCH_INVALID_UTF | PCRE2_UTF)
# else
# define ZBX_REGEXP_COMPILE_FLAGS (PCRE2_UTF)
# endif
#endif
struct zbx_regexp
{
#ifdef HAVE_PCRE_H
pcre *pcre_regexp;
struct pcre_extra *extra;
#endif
#ifdef HAVE_PCRE2_H
pcre2_code *pcre2_regexp;
pcre2_match_context *match_ctx;
#endif
};
/* maps to ovector of pcre_exec() */
typedef struct
{
int rm_so;
int rm_eo;
}
zbx_regmatch_t;
#define ZBX_REGEXP_GROUPS_MAX 10 /* Max number of supported capture groups in regular expressions. */
/* Group \0 contains the matching part of string, groups \1 ...\9 */
/* contain captured groups (substrings). */
ZBX_PTR_VECTOR_IMPL(expression, zbx_expression_t *)
typedef struct
{
zbx_regmatch_t groups[ZBX_REGEXP_GROUPS_MAX];
}
zbx_match_t;
ZBX_PTR_VECTOR_DECL(match, zbx_match_t *)
ZBX_PTR_VECTOR_IMPL(match, zbx_match_t *)
#ifdef HAVE_PCRE2_H
static void zbx_match_free(zbx_match_t *match)
{
zbx_free(match);
}
#endif
#if defined(HAVE_PCRE2_H)
static char *decode_pcre2_compile_error(int error_code, PCRE2_SIZE error_offset, int flags)
{
/* 120 code units buffer is recommended in "man pcre2api" */
#define BUF_SIZE (120 * PCRE2_CODE_UNIT_WIDTH / 8)
int ret;
char buf[BUF_SIZE];
if (0 > (ret = pcre2_get_error_message(error_code, (PCRE2_UCHAR *)buf, sizeof(buf))))
return zbx_dsprintf(NULL, "pcre2_get_error_message(%d, ...) failed with error %d", error_code, ret);
return zbx_dsprintf(NULL, "%s, position %zu, flags:0x%x", buf, (size_t)error_offset, (unsigned int)flags);
#undef BUF_SIZE
}
#endif
/******************************************************************************
* *
* Purpose: compiles a regular expression *
* *
* Parameters: *
* pattern - [IN] regular expression as a text string. Empty *
* string ("") is allowed, it will match everything. *
* NULL is not allowed. *
* flags - [IN] regexp compilation parameters passed to pcre_compile *
* or pcre2_compile. *
* ZBX_REGEXP_CASELESS, ZBX_REGEXP_NO_AUTO_CAPTURE, *
* ZBX_REGEXP_MULTILINE. *
* regexp - [OUT] compiled regexp. Can be NULL if only regexp *
* compilation is checked, Cleanup in caller. *
* err_msg - [OUT] dynamically allocated error message. Can be NULL to *
* discard the error message. *
* *
* Return value: SUCCEED or FAIL *
* *
******************************************************************************/
static int regexp_compile(const char *pattern, int flags, zbx_regexp_t **regexp, char **err_msg)
{
#ifdef HAVE_PCRE_H
const char *err_msg_static = NULL;
int error_offset = -1;
pcre *pcre_regexp;
#endif
#ifdef HAVE_PCRE2_H
pcre2_code *pcre2_regexp;
int error = 0;
PCRE2_SIZE error_offset = 0;
#endif
#ifdef ZBX_REGEXP_NO_AUTO_CAPTURE
/* If ZBX_REGEXP_NO_AUTO_CAPTURE bit is set in 'flags' but regular expression contains references to numbered */
/* capturing groups then reset ZBX_REGEXP_NO_AUTO_CAPTURE bit. */
/* Otherwise the regular expression might not compile. */
if (0 != (flags & ZBX_REGEXP_NO_AUTO_CAPTURE))
{
const char *pstart = pattern, *offset;
while (NULL != (offset = strchr(pstart, '\\')))
{
offset++;
if (('1' <= *offset && *offset <= '9') || 'g' == *offset)
{
flags ^= ZBX_REGEXP_NO_AUTO_CAPTURE;
break;
}
if (*offset == '\\')
offset++;
pstart = offset;
}
}
#endif
#ifdef HAVE_PCRE_H
if (NULL == (pcre_regexp = pcre_compile(pattern, flags, &err_msg_static, &error_offset, NULL)))
{
if (NULL != err_msg)
{
*err_msg = zbx_dsprintf(*err_msg, "%s, position %d, flags:0x%x", err_msg_static, error_offset,
(unsigned int)flags);
}
return FAIL;
}
if (NULL != regexp)
{
struct pcre_extra *extra;
if (NULL == (extra = pcre_study(pcre_regexp, 0, &err_msg_static)) && NULL != err_msg_static)
{
if (NULL != err_msg)
{
*err_msg = zbx_dsprintf(*err_msg, "pcre_study() error: %s, flags:0x%x", err_msg_static,
(unsigned int)flags);
}
pcre_free(pcre_regexp);
return FAIL;
}
*regexp = (zbx_regexp_t *)zbx_malloc(NULL, sizeof(zbx_regexp_t));
(*regexp)->pcre_regexp = pcre_regexp;
(*regexp)->extra = extra;
}
else
pcre_free(pcre_regexp);
#endif
#ifdef HAVE_PCRE2_H
*err_msg = NULL;
if (NULL == (pcre2_regexp = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
ZBX_REGEXP_COMPILE_FLAGS | flags, &error, &error_offset, NULL)))
{
*err_msg = decode_pcre2_compile_error(error, error_offset, flags);
return FAIL;
}
if (NULL != regexp)
{
pcre2_match_context *match_ctx;
if (NULL == (match_ctx = pcre2_match_context_create(NULL)))
{
pcre2_code_free(pcre2_regexp);
*err_msg = zbx_strdup(*err_msg, "cannot create pcre2 match context");
return FAIL;
}
*regexp = (zbx_regexp_t *)zbx_malloc(NULL, sizeof(zbx_regexp_t));
(*regexp)->pcre2_regexp = pcre2_regexp;
(*regexp)->match_ctx = match_ctx;
}
else
pcre2_code_free(pcre2_regexp);
#endif
return SUCCEED;
}
/******************************************************************************
* *
* Purpose: Compile a regular expression with default options. Capture groups *
* are disabled by default (if PCRE_NO_AUTO_CAPTURE is supported). *
* If you need to compile a regular expression that contains capture *
* groups use function zbx_regexp_compile_ext() instead. *
* *
* Parameters: *
* pattern - [IN] regular expression as a text string. Empty *
* string ("") is allowed, it will match everything. *
* NULL is not allowed. *
* regexp - [OUT] compiled regular expression. *
* err_msg - [OUT] error message if any. *
* *
* Return value: SUCCEED or FAIL *
* *
******************************************************************************/
int zbx_regexp_compile(const char *pattern, zbx_regexp_t **regexp, char **err_msg)
{
#ifdef ZBX_REGEXP_NO_AUTO_CAPTURE
return regexp_compile(pattern, ZBX_REGEXP_MULTILINE | ZBX_REGEXP_NO_AUTO_CAPTURE, regexp, err_msg);
#else
return regexp_compile(pattern, ZBX_REGEXP_MULTILINE, regexp, err_msg);
#endif
}
/******************************************************************************
* *
* Purpose: Compile a regular expression with no or specified regular *
* expression compilation parameters. *
* *
* Parameters: *
* pattern - [IN] regular expression as a text string. Empty *
* string ("") is allowed, it will match everything. *
* NULL is not allowed. *
* regexp - [OUT] compiled regular expression. *
* flags - [IN] regexp compilation parameters passed to pcre_compile. *
* ZBX_REGEXP_CASELESS, ZBX_REGEXP_NO_AUTO_CAPTURE, *
* ZBX_REGEXP_MULTILINE. *
* err_msg - [OUT] error message if any. *
* *
******************************************************************************/
int zbx_regexp_compile_ext(const char *pattern, zbx_regexp_t **regexp, int flags, char **err_msg)
{
return regexp_compile(pattern, flags, regexp, err_msg);
}
/****************************************************************************************************
* *
* Purpose: wrapper for zbx_regexp_compile. Caches and reuses the last used regexp. *
* *
****************************************************************************************************/
static int regexp_prepare(const char *pattern, int flags, zbx_regexp_t **regexp, char **err_msg)
{
static ZBX_THREAD_LOCAL zbx_regexp_t *curr_regexp = NULL;
static ZBX_THREAD_LOCAL char *curr_pattern = NULL;
static ZBX_THREAD_LOCAL int curr_flags = 0;
int ret = SUCCEED;
if (NULL == curr_regexp || 0 != strcmp(curr_pattern, pattern) || curr_flags != flags)
{
if (NULL != curr_regexp)
{
zbx_regexp_free(curr_regexp);
zbx_free(curr_pattern);
}
curr_regexp = NULL;
curr_pattern = NULL;
curr_flags = 0;
if (SUCCEED == regexp_compile(pattern, flags, &curr_regexp, err_msg))
{
curr_pattern = zbx_strdup(curr_pattern, pattern);
curr_flags = flags;
}
else
ret = FAIL;
}
*regexp = curr_regexp;
return ret;
}
/* calculate recursion limit, PCRE man page suggests to reckon on about 500 bytes per recursion */
/* but to be on the safe side - reckon on 800 bytes and do not set limit higher than 100000 */
#define REGEXP_RECURSION_STEP 800
#define REGEXP_RECURSION_LIMIT 100000
static ZBX_THREAD_LOCAL unsigned long rxp_stacklimit = 0;
/****************************************************************************************************
* *
* Purpose: initialize regular expression execution environment *
* *
****************************************************************************************************/
void zbx_init_regexp_env(void)
{
#ifdef HAVE_STACKSIZE
/* get stack size if configured, otherwise it will use default process stack size */
if (REGEXP_RECURSION_LIMIT * REGEXP_RECURSION_STEP < (rxp_stacklimit = HAVE_STACKSIZE * ZBX_KIBIBYTE))
rxp_stacklimit = REGEXP_RECURSION_LIMIT * REGEXP_RECURSION_STEP;
#endif
}
static unsigned long int compute_recursion_limit(void)
{
if (0 == rxp_stacklimit)
{
#if !defined(_WINDOWS) && !defined(__MINGW32__)
# define REGEXP_RECURSION_DEFAULT 10000 /* if stack size cannot be retrieved then assume ~8 MB */
struct rlimit rlim;
if (0 == getrlimit(RLIMIT_STACK, &rlim))
{
if (REGEXP_RECURSION_LIMIT * REGEXP_RECURSION_STEP < (rxp_stacklimit = rlim.rlim_cur))
rxp_stacklimit = REGEXP_RECURSION_LIMIT * REGEXP_RECURSION_STEP;
}
if (0 == rxp_stacklimit)
#else
# define REGEXP_RECURSION_DEFAULT 2000 /* assume ~1 MB stack and ~500 bytes per recursion */
#endif
rxp_stacklimit = REGEXP_RECURSION_DEFAULT * REGEXP_RECURSION_STEP;
}
return rxp_stacklimit / REGEXP_RECURSION_STEP;
#undef REGEXP_RECURSION_DEFAULT
}
#undef REGEXP_RECURSION_LIMIT
#undef REGEXP_RECURSION_STEP
#if defined(HAVE_PCRE2_H)
static char *decode_pcre2_match_error(int error_code)
{
/* 120 code units buffer is recommended in "man pcre2api" */
const size_t err_msg_size = 120 * PCRE2_CODE_UNIT_WIDTH / 8;
char *err_msg = (char *)zbx_malloc(NULL, err_msg_size);
int ret;
if (0 > (ret = pcre2_get_error_message(error_code, (PCRE2_UCHAR *)err_msg, err_msg_size)))
{
zbx_snprintf(err_msg, err_msg_size, "pcre2_get_error_message(%d, ...) failed with error %d",
error_code, ret);
}
return err_msg;
}
#endif
/***********************************************************************************
* *
* Purpose: wrapper for pcre_exec() and pcre2_match(), searches for a given *
* pattern, specified by regexp, in the string *
* *
* Parameters: *
* string - [IN] string to be matched against 'regexp' *
* regexp - [IN] precompiled regular expression *
* flags - [IN] execution flags for matching *
* count - [IN] count of elements in matches array *
* matches - [OUT] matches (can be NULL if matching results are *
* not required) *
* err_msg - [OUT] dynamically allocated error message (can be NULL). *
* offset - [IN] offset in the string at which to start matching *
* *
* Return value: ZBX_REGEXP_MATCH - successful match *
* ZBX_REGEXP_NO_MATCH - no match *
* FAIL - error occurred *
* *
***********************************************************************************/
static int regexp_exec(const char *string, const zbx_regexp_t *regexp, int flags, int count,
zbx_regmatch_t *matches, char **err_msg, int offset)
{
#ifdef HAVE_PCRE_H
#define MATCHES_BUFF_SIZE (ZBX_REGEXP_GROUPS_MAX * 3) /* see pcre_exec() in "man pcreapi" why 3 */
int result, r;
static ZBX_THREAD_LOCAL int matches_buff[MATCHES_BUFF_SIZE];
int *ovector = NULL;
int ovecsize = 3 * count; /* see pcre_exec() in "man pcreapi" why 3 */
struct pcre_extra extra, *pextra;
if (ZBX_REGEXP_GROUPS_MAX < count)
ovector = (int *)zbx_malloc(NULL, (size_t)ovecsize * sizeof(int));
else
ovector = matches_buff;
if (NULL == regexp->extra)
{
pextra = &extra;
pextra->flags = 0;
}
else
pextra = regexp->extra;
#if defined(PCRE_EXTRA_MATCH_LIMIT) && defined(PCRE_EXTRA_MATCH_LIMIT_RECURSION)
pextra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
pextra->match_limit = 1000000;
pextra->match_limit_recursion = compute_recursion_limit();
#endif
/* see "man pcreapi" about pcre_exec() return value and 'ovector' size and layout */
if (0 <= (r = pcre_exec(regexp->pcre_regexp, pextra, string, (int)strlen(string), flags,
offset, ovector, ovecsize)))
{
if (NULL != matches)
memcpy(matches, ovector, (size_t)((0 < r) ? MIN(r, count) : count) * sizeof(zbx_regmatch_t));
result = ZBX_REGEXP_MATCH;
}
else if (PCRE_ERROR_NOMATCH == r)
{
result = ZBX_REGEXP_NO_MATCH;
}
else
{
if (NULL != err_msg)
{
*err_msg = zbx_dsprintf(NULL, "pcre_exec() returned %d. See PCRE library documentation or"
" \"man pcreapi\", section \"Error return values from pcre_exec()\" for explanation"
" or /usr/include/pcre.h", r);
}
result = FAIL;
}
if (ZBX_REGEXP_GROUPS_MAX < count)
zbx_free(ovector);
return result;
#undef MATCHES_BUFF_SIZE
#endif
#ifdef HAVE_PCRE2_H
int result, r, i;
pcre2_match_data *match_data = NULL;
PCRE2_SIZE *ovector = NULL;
pcre2_set_match_limit(regexp->match_ctx, 1000000);
pcre2_set_recursion_limit(regexp->match_ctx, (uint32_t)compute_recursion_limit());
match_data = pcre2_match_data_create((uint32_t)count, NULL);
if (NULL == match_data)
{
zabbix_log(LOG_LEVEL_WARNING, "%s() cannot create pcre2 match data of size %d", __func__, count);
result = FAIL;
}
else
{
#ifdef PCRE2_MATCH_INVALID_UTF
flags |= PCRE2_NO_UTF_CHECK;
#endif
if (0 <= (r = pcre2_match(regexp->pcre2_regexp, (PCRE2_SPTR)string, PCRE2_ZERO_TERMINATED, offset,
flags, match_data, regexp->match_ctx)))
{
if (NULL != matches)
{
ovector = pcre2_get_ovector_pointer(match_data);
/* have to copy this way because pcre2 ovector uses 8 byte integers, *
* but we want to keep it compatible with existing matches structure, *
* which uses 4 byte integers */
for (i = 0; i < ((0 < r) ? MIN(r, count) : count); i++)
{
matches[i].rm_so = (int)ovector[i*2];
matches[i].rm_eo = (int)ovector[i*2+1];
}
}
result = ZBX_REGEXP_MATCH;
}
else if (PCRE2_ERROR_NOMATCH == r)
{
result = ZBX_REGEXP_NO_MATCH;
}
else
{
if (NULL != err_msg)
*err_msg = decode_pcre2_match_error(r);
result = FAIL;
}
pcre2_match_data_free(match_data);
}
return result;
#endif
}
/******************************************************************************
* *
* Purpose: wrapper for pcre_free *
* *
* Parameters: regexp - [IN] compiled regular expression *
* *
******************************************************************************/
void zbx_regexp_free(zbx_regexp_t *regexp)
{
#ifdef HAVE_PCRE_H
/* pcre_free_study() was added to the API for release 8.20 while extra was available before */
#ifdef PCRE_CONFIG_JIT
pcre_free_study(regexp->extra);
#else
pcre_free(regexp->extra);
#endif
pcre_free(regexp->pcre_regexp);
#endif
#ifdef HAVE_PCRE2_H
pcre2_code_free(regexp->pcre2_regexp);
pcre2_match_context_free(regexp->match_ctx);
#endif
zbx_free(regexp);
}
/******************************************************************************
* *
* Purpose: checks if string matches a precompiled regular expression without *
* returning matching groups *
* *
* Parameters: string - [IN] string to be matched *
* regexp - [IN] precompiled regular expression *
* *
* Return value: 0 - successful match *
* nonzero - no match *
* *
* Comments: use this function for better performance if many strings need to *
* be matched against the same regular expression *
* *
******************************************************************************/
int zbx_regexp_match_precompiled(const char *string, const zbx_regexp_t *regexp)
{
return (ZBX_REGEXP_MATCH == regexp_exec(string, regexp, 0, 0, NULL, NULL, 0)) ? 0 : -1;
}
/******************************************************************************
* *
* Purpose: checks if string matches a precompiled regular expression without *
* returning matching groups *
* *
* Parameters: string - [IN] string to be matched *
* regexp - [IN] precompiled regular expression *
* err_msg - [OUT] dynamically allocated error message *
* *
* Return value: ZBX_REGEXP_MATCH - successful match *
* ZBX_REGEXP_NO_MATCH - no match *
* FAIL - error occurred *
* *
* Comments: use this function for better performance if many strings need to *
* be matched against the same regular expression *
* *
******************************************************************************/
int zbx_regexp_match_precompiled2(const char *string, const zbx_regexp_t *regexp, char **err_msg)
{
return regexp_exec(string, regexp, 0, 0, NULL, err_msg, 0);
}
/****************************************************************************************************
* *
* Purpose: compiles and executes a regex pattern *
* *
* Parameters: *
* string - [IN] string to be matched against 'pattern' *
* pattern - [IN] regular expression pattern *
* flags - [IN] execution flags for matching *
* len - [OUT] length of matched string, *
* 0 in case of no match or *
* FAIL if an error occurred. *
* *
* Return value: pointer to the matched substring or null *
* *
* Comments: Note, that although the input 'string' was const, the return is not, as the caller *
* owns it and can modify it. This is similar to strstr() and strcasestr() functions. *
* We may need to find a way how to silence the resulting '-Wcast-qual' warning. *
* *
****************************************************************************************************/
static char *zbx_regexp(const char *string, const char *pattern, int flags, int *len)
{
char *error = NULL, *c = NULL;
zbx_regmatch_t match;
zbx_regexp_t *regexp = NULL;
if (NULL != len)
*len = FAIL;
if (SUCCEED != regexp_prepare(pattern, flags, ®exp, &error))
{
zbx_free(error);
return NULL;
}
if (NULL != string)
{
int r;
if (ZBX_REGEXP_MATCH == (r = regexp_exec(string, regexp, 0, 1, &match, NULL, 0)))
{
c = (char *)string + match.rm_so;
if (NULL != len)
*len = match.rm_eo - match.rm_so;
}
else if (ZBX_REGEXP_NO_MATCH == r && NULL != len)
*len = 0;
}
return c;
}
/****************************************************************************************************
* *
* Purpose: compiles and executes a regex pattern *
* *
* Parameters: *
* string - [IN] string to be matched against 'pattern' *
* pattern - [IN] regular expression pattern *
* flags - [IN] execution flags for matching *
* matched_pos - [OUT] pointer to the matched substring, can be NULL *
* len - [OUT] pointer to length of matched string, can be NULL *
* err_msg - [OUT] error message. Deallocate in caller. *
* *
* Return value: if success: *
* ZBX_REGEXP_MATCH or *
* ZBX_REGEXP_NO_MATCH *
* if errors: *
* ZBX_REGEXP_COMPILE_FAIL or *
* ZBX_REGEXP_RUNTIME_FAIL with error message in 'err_msg' *
* *
****************************************************************************************************/
static int zbx_regexp2(const char *string, const char *pattern, int flags, char **matched_pos, int *len,
char **err_msg)
{
zbx_regmatch_t match;
zbx_regexp_t *regexp = NULL;
int r;
if (SUCCEED != regexp_prepare(pattern, flags, ®exp, err_msg))
return ZBX_REGEXP_COMPILE_FAIL;
/* 'regexp' ownership was taken by regexp_prepare(), do not cleanup */
if (ZBX_REGEXP_MATCH == (r = regexp_exec(string, regexp, 0, 1, &match, err_msg, 0)))
{
if (NULL != matched_pos)
*matched_pos = (char *)(uintptr_t)string + match.rm_so;
if (NULL != len)
*len = match.rm_eo - match.rm_so;
return ZBX_REGEXP_MATCH;
}
if (ZBX_REGEXP_NO_MATCH == r)
{
if (NULL != len)
*len = 0;
return ZBX_REGEXP_NO_MATCH;
}
return ZBX_REGEXP_RUNTIME_FAIL;
}
/*************************************************************************************************
* *
* Comments: Note, that although the input 'string' was const, the return is not, as the caller *
* owns it and can modify it. This is similar to strstr() and strcasestr() functions. *
* We may need to find a way how to silence the resulting '-Wcast-qual' warning. *
* *
*************************************************************************************************/
char *zbx_regexp_match(const char *string, const char *pattern, int *len)
{
return zbx_regexp(string, pattern, ZBX_REGEXP_MULTILINE, len);
}
/******************************************************************************
* *
* Purpose: zbx_strncpy_alloc with maximum allocated memory limit. *
* *
* Parameters: str - [IN/OUT] destination buffer pointer *
* alloc_len - [IN/OUT] already allocated memory *
* offset - [IN/OUT] offset for writing *
* src - [IN] copied string *
* n - [IN] maximum number of bytes to copy *
* limit - [IN] maximum number of bytes to be allocated *
* *
******************************************************************************/
static void strncpy_alloc(char **str, size_t *alloc_len, size_t *offset, const char *src, size_t n, size_t limit)
{
if (0 != limit && *offset + n > limit)
n = (limit > *offset) ? (limit - *offset) : 0;
zbx_strncpy_alloc(str, alloc_len, offset, src, n);
}
/*********************************************************************************
* *
* Purpose: Constructs a string from the specified template and regexp match. *
* *
* Parameters: text - [IN] the input string. *
* output_template - [IN] the output string template. The output *
* string is constructed from template by *
* replacing \ sequences with the captured *
* regexp group. *
* If the output template is NULL or contains *
* empty string then a copy of the whole *
* input string is returned. *
* match - [IN] the captured group data *
* nmatch - [IN] the number of items in captured group data *
* limit - [IN] size limit for memory allocation *
* 0 means no limit *
* group_check - [IN] check if pattern matches but does not *
* contain group to capture and return NULL *
* *
* Return value: Allocated string containing output value *
* *
*********************************************************************************/
static char *regexp_sub_replace(const char *text, const char *output_template, zbx_regmatch_t *match, int nmatch,
size_t limit, zbx_regexp_group_check_t group_check)
{
char *ptr = NULL;
const char *pstart = output_template, *pgroup;
size_t size = 0, offset = 0;
int group_index;
if (NULL == output_template || '\0' == *output_template)
return zbx_strdup(NULL, text);
while (NULL != (pgroup = strchr(pstart, '\\')))
{
switch (*(++pgroup))
{
case '\\':
strncpy_alloc(&ptr, &size, &offset, pstart, (size_t)(pgroup - pstart), limit);
pstart = pgroup + 1;
continue;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
strncpy_alloc(&ptr, &size, &offset, pstart, (size_t)(pgroup - pstart - 1), limit);
group_index = *pgroup - '0';
if (group_index < nmatch && -1 != match[group_index].rm_so)
{
strncpy_alloc(&ptr, &size, &offset, text + match[group_index].rm_so,
(size_t)(match[group_index].rm_eo - match[group_index].rm_so),
limit);
}
else if (ZBX_REGEXP_GROUP_CHECK_ENABLE == group_check)
{
zbx_free(ptr);
goto out;
}
pstart = pgroup + 1;
continue;
default:
strncpy_alloc(&ptr, &size, &offset, pstart, (size_t)(pgroup - pstart), limit);
pstart = pgroup;
}
if (0 != limit && offset >= limit)
break;
}
if ('\0' != *pstart)
strncpy_alloc(&ptr, &size, &offset, pstart, strlen(pstart), limit);
out:
if (NULL != ptr)
{
if (0 != limit && offset >= limit)
{
size = offset;
offset--;
/* ensure that the string is not cut in the middle of UTF-8 sequence */
if (0x80 <= (0xc0 & ptr[offset]))
{
while (0x80 == (0xc0 & ptr[offset]) && 0 < offset)
offset--;
if (zbx_utf8_char_len(&ptr[offset]) != size - offset)
ptr[offset] = '\0';
}
}
/* Some regexp and output template combinations can produce invalid UTF-8 sequences. */
/* For example, regexp "(.)(.)" and output template "\1 \2" produce a valid UTF-8 sequence */
/* for single-byte UTF-8 characters and invalid sequence for multi-byte characters. */
/* Using (*UTF) modifier (e.g. "(*UTF)(.)(.)") solves the problem for multi-byte characters */
/* but it is up to user to add the modifier. To prevent producing invalid UTF-8 sequences do */
/* output sanitization. */
zbx_replace_invalid_utf8(ptr);
}
return ptr;
}
/*********************************************************************************
* *
* Purpose: Test if a string matches the specified regular expression. If yes *
* then create a return value by substituting '\' sequences in *
* output template with the captured groups. *
* *
* Parameters: string - [IN] the string to parse *
* pattern - [IN] the regular expression *
* output_template - [IN] the output string template. The output *
* string is constructed from template by *
* replacing \ sequences with the captured *
* regexp group. *
* If output template is NULL or contains *
* empty string then the whole input string *
* is used as output value. *
* flags - [IN] the pcre_compile() function flags. *
* See pcre_compile() manual. *
* group_check - [IN] check if pattern matches but does not *
* contain group to capture *
* out - [OUT] the output value if the input string *
* matches the specified regular expression *
* or NULL otherwise *
* *
* Return value: SUCCEED - the regular expression match was done *
* FAIL - failed to compile regexp *
* *
*********************************************************************************/
static int regexp_sub(const char *string, const char *pattern, const char *output_template, int flags,
zbx_regexp_group_check_t group_check, char **out)
{
char *error = NULL;
zbx_regexp_t *regexp = NULL;
zbx_regmatch_t match[ZBX_REGEXP_GROUPS_MAX];
unsigned int i;
if (NULL == string)
{
zbx_free(*out);
return SUCCEED;
}
#ifdef ZBX_REGEXP_NO_AUTO_CAPTURE
/* no subpatterns without an output template */
if (NULL == output_template || '\0' == *output_template)
flags |= ZBX_REGEXP_NO_AUTO_CAPTURE;
#endif
if (FAIL == regexp_prepare(pattern, flags, ®exp, &error))
{
zbx_free(error);
return FAIL;
}
zbx_free(*out);
/* -1 is special pcre value for unused patterns */
for (i = 0; i < ARRSIZE(match); i++)
match[i].rm_so = match[i].rm_eo = -1;
if (ZBX_REGEXP_MATCH == regexp_exec(string, regexp, 0, ZBX_REGEXP_GROUPS_MAX, match, NULL, 0))
*out = regexp_sub_replace(string, output_template, match, ZBX_REGEXP_GROUPS_MAX, 0, group_check);
return SUCCEED;
}
/*********************************************************************************
* *
* Purpose: Test if a string matches the specified regular expression. If yes *
* then create a return value by substituting '\' sequences in *
* output template with the captured groups. *
* *
* Parameters: string - [IN] the string to parse *
* pattern - [IN] the regular expression *
* output_template - [IN] the output string template. The output *
* string is constructed from template by *
* replacing \ sequences with the captured *
* regexp group. *
* If output template is NULL or contains *
* empty string then the whole input string *
* is used as output value. *
* flags - [IN] the pcre_compile() function flags. *
* See pcre_compile() manual. *
* out - [OUT] the output value if the input string *
* matches the specified regular expression *
* or NULL otherwise *
* err_msg - [OUT] error message. Deallocate in caller. *
* *
* Return value: if success: *
* ZBX_REGEXP_MATCH or *
* ZBX_REGEXP_NO_MATCH *
* if errors: *
* ZBX_REGEXP_COMPILE_FAIL or *
* ZBX_REGEXP_RUNTIME_FAIL with error message in 'err_msg' *
* *
*********************************************************************************/
static int regexp_sub2(const char *string, const char *pattern, const char *output_template, int flags, char **out,
char **err_msg)
{
zbx_regexp_t *regexp = NULL;
zbx_regmatch_t match[ZBX_REGEXP_GROUPS_MAX];
unsigned int i;
int ret;
#ifdef ZBX_REGEXP_NO_AUTO_CAPTURE
/* no subpatterns without an output template */
if (NULL == output_template || '\0' == *output_template)
flags |= ZBX_REGEXP_NO_AUTO_CAPTURE;
#endif
if (SUCCEED != regexp_prepare(pattern, flags, ®exp, err_msg))
return ZBX_REGEXP_COMPILE_FAIL;
zbx_free(*out);
/* -1 is special pcre value for unused patterns */
for (i = 0; i < ARRSIZE(match); i++)
match[i].rm_so = match[i].rm_eo = -1;
/* 'regexp' ownership was taken by regexp_prepare(), do not cleanup */
if (ZBX_REGEXP_MATCH == (ret = regexp_exec(string, regexp, 0, ZBX_REGEXP_GROUPS_MAX, match, err_msg, 0)))
{
*out = regexp_sub_replace(string, output_template, match, ZBX_REGEXP_GROUPS_MAX, 0,
ZBX_REGEXP_GROUP_CHECK_DISABLE);
}
if (FAIL == ret)
ret = ZBX_REGEXP_RUNTIME_FAIL;
return ret; /* ZBX_REGEXP_MATCH, ZBX_REGEXP_NO_MATCH or ZBX_REGEXP_RUNTIME_FAIL */
}
/*********************************************************************************
* *
* Purpose: Test if a string matches precompiled regular expression. If yes *
* then create a return value by substituting '\' sequences in *
* output template with the captured groups. *
* *
* Parameters: string - [IN] the string to parse *
* regexp - [IN] the precompiled regular expression *
* output_template - [IN] the output string template. The output *
* string is constructed from template by *
* replacing \ sequences with the captured *
* regexp group. *
* If output template is NULL or contains *
* empty string then the whole input string *
* is used as output value. *
* limit - [IN] size limit for memory allocation *
* 0 means no limit *
* out - [OUT] the output value if the input string *
* matches the specified regular expression *
* or NULL otherwise *
* *
* Return value: SUCCEED - the regular expression match was done *
* FAIL - failed to match *
* *
* Comments: Multiline match is performed *
* *
*********************************************************************************/
int zbx_mregexp_sub_precompiled(const char *string, const zbx_regexp_t *regexp, const char *output_template,
size_t limit, char **out)
{
zbx_regmatch_t match[ZBX_REGEXP_GROUPS_MAX];
unsigned int i;
zbx_free(*out);
/* -1 is special pcre value for unused patterns */
for (i = 0; i < ARRSIZE(match); i++)
match[i].rm_so = match[i].rm_eo = -1;
if (ZBX_REGEXP_MATCH == regexp_exec(string, regexp, 0, ZBX_REGEXP_GROUPS_MAX, match, NULL, 0) &&
NULL != (*out = regexp_sub_replace(string, output_template, match, ZBX_REGEXP_GROUPS_MAX,
limit, ZBX_REGEXP_GROUP_CHECK_DISABLE)))
{
return SUCCEED;
}
return FAIL;
}
/*********************************************************************************
* *
* Purpose: Test if a string matches the specified regular expression. If yes *
* then all matches in incoming string are replaced with values based *
* on repleacement template. For each match replacement value created *
* by substituting '\' sequences in output template with the mtach *
* captured groups. *
* *
* Parameters: string - [IN] the string to replace *
* pattern - [IN] the regular expression *
* repl_template - [IN] the repleacement template used to *
* construct replacement string for each *
* match. If output template is NULL then *
* empty string is used as template. *
* out - [OUT] the output string with replaced matches *
* *
* Return value: SUCCEED - the regular expression match was done *
* FAIL - failed to compile regexp *
* *
* Comments: This function performs case sensitive match *
* *
*********************************************************************************/
int zbx_regexp_repl(const char *string, const char *pattern, const char *output_template, char **out)
{
#ifdef HAVE_PCRE2_H
#define ZBX_REGEX_REPL_TIMEOUT 3 /* Regex matches processing timeout in seconds */
zbx_regexp_t *regexp = NULL;
int mi, shift = 0, ret = FAIL, len = strlen(string);
char *out_str, *error = NULL;
zbx_vector_match_t matches;
size_t i;
double starttime = zbx_time();
zabbix_log(LOG_LEVEL_DEBUG, "In %s() len:%d", __func__, len);
if ('\0' == *pattern)
{
*out = zbx_strdup(*out, string);
return SUCCEED;
}
if (FAIL == regexp_prepare(pattern, ZBX_REGEXP_MULTILINE, ®exp, &error))
{
zbx_free(error);
return FAIL;
}
zbx_vector_match_create(&matches);
/* collect all matches */
for (;;)
{
zbx_match_t *match;
match = zbx_malloc(NULL, sizeof(zbx_match_t));
/* -1 is special pcre value for unused patterns */
for (i = 0; i < ARRSIZE(match->groups); i++)
match->groups[i].rm_so = match->groups[i].rm_eo = -1;
if (ZBX_REGEXP_MATCH != regexp_exec(string, regexp, 0, ZBX_REGEXP_GROUPS_MAX, match->groups, NULL,
shift))
{
zbx_free(match);
break;
}
shift = match->groups[0].rm_eo;
zbx_vector_match_append(&matches, match);
if (shift >= len)
break;
if (shift == match->groups[0].rm_so)
shift++;
if (ZBX_REGEX_REPL_TIMEOUT < zbx_time() - starttime)
{
zabbix_log(LOG_LEVEL_DEBUG, "timeout after %d matches %s()",
matches.values_num, __func__);
goto out;
}
}
zabbix_log(LOG_LEVEL_DEBUG, "replacing:%d matches %s()", matches.values_num, __func__);
out_str = zbx_strdup(NULL, string);
/* create pattern based string for each match and relplace matched string with this string */
for (mi = matches.values_num - 1; 0 <= mi; mi--)
{
zbx_regmatch_t *groups = matches.values[mi]->groups;
char *replace, *ptr;
if ('\0' == *output_template)
{
replace = zbx_strdup(NULL, output_template);
}
else
{
replace = regexp_sub_replace(string, output_template, groups, ZBX_REGEXP_GROUPS_MAX,
MAX_EXECUTE_OUTPUT_LEN, ZBX_REGEXP_GROUP_CHECK_DISABLE);
}
if (NULL != replace)
{
size_t replen = strlen(replace), outlen = strlen(out_str), length = outlen + replen + 1,
eo = (size_t)groups[0].rm_eo;
if (MAX_EXECUTE_OUTPUT_LEN <= length)
{
zabbix_log(LOG_LEVEL_DEBUG, "macro function output exceeded limit of %d Kb",
MAX_EXECUTE_OUTPUT_LEN / ZBX_KIBIBYTE);
zbx_free(out_str);
zbx_free(replace);
goto out;
}
ptr = (char *)zbx_malloc(NULL, length);
if (0 != (size_t)groups[0].rm_so)
memcpy(ptr, out_str, (size_t)groups[0].rm_so);
if (0 != replen)
memcpy(ptr + groups[0].rm_so, replace, replen);
memcpy(ptr + groups[0].rm_so + replen, out_str + eo, outlen - eo + 1);
zbx_free(out_str);
out_str = ptr;
zbx_free(replace);
}
else
{
zbx_free(out_str);
goto out;
}
}
ret = SUCCEED;
zbx_free(*out);
*out = out_str;
out:
zbx_vector_match_clear_ext(&matches, zbx_match_free);
zbx_vector_match_destroy(&matches);
zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s", __func__, zbx_result_string(ret));
return ret;
#undef ZBX_REGEX_REPL_TIMEOUT
#else
ZBX_UNUSED(string);
ZBX_UNUSED(pattern);
ZBX_UNUSED(output_template);
ZBX_UNUSED(out);
return FAIL;
#endif
}
/*********************************************************************************
* *
* Purpose: Test if a string matches the specified regular expression. If yes *
* then create a return value by substituting '\' sequences in *
* output template with the captured groups. *
* *
* Parameters: string - [IN] the string to parse *
* pattern - [IN] the regular expression *
* output_template - [IN] the output string template. The output *
* string is constructed from template by *
* replacing \ sequences with the captured *
* regexp group. *
* out - [OUT] the output value if the input string *
* matches the specified regular expression *
* or NULL otherwise *
* *
* Return value: SUCCEED - the regular expression match was done *
* FAIL - failed to compile regexp *
* *
* Comments: This function performs case sensitive match *
* *
*********************************************************************************/
int zbx_regexp_sub(const char *string, const char *pattern, const char *output_template, char **out)
{
return regexp_sub(string, pattern, output_template, ZBX_REGEXP_MULTILINE, ZBX_REGEXP_GROUP_CHECK_DISABLE, out);
}
/*********************************************************************************
* *
* Purpose: This function is similar to zbx_regexp_sub() with exception that *
* multiline matches are accepted. *
* *
*********************************************************************************/
int zbx_mregexp_sub(const char *string, const char *pattern, const char *output_template,
zbx_regexp_group_check_t group_check, char **out)
{
return regexp_sub(string, pattern, output_template, 0, group_check, out);
}
/*********************************************************************************
* *
* Purpose: This function is similar to zbx_regexp_sub() with exception that *
* case insensitive matches are accepted. *
* *
*********************************************************************************/
int zbx_iregexp_sub(const char *string, const char *pattern, const char *output_template, char **out)
{
return regexp_sub(string, pattern, output_template, ZBX_REGEXP_CASELESS, ZBX_REGEXP_GROUP_CHECK_DISABLE, out);
}
/******************************************************************************
* *
* Purpose: frees expression data retrieved by DCget_expressions function or *
* prepared with zbx_add_regexp_ex() function calls *
* *
* Parameters: expressions - [IN] a vector of expression data pointers *
* *
******************************************************************************/
void zbx_regexp_clean_expressions(zbx_vector_expression_t *expressions)
{
int i;
for (i = 0; i < expressions->values_num; i++)
{
zbx_expression_t *regexp = expressions->values[i];
zbx_free(regexp->name);
zbx_free(regexp->expression);
zbx_free(regexp);
}
zbx_vector_expression_clear(expressions);
}
void zbx_add_regexp_ex(zbx_vector_expression_t *regexps, const char *name, const char *expression,
int expression_type, char exp_delimiter, int case_sensitive)
{
zbx_expression_t *regexp;
regexp = zbx_malloc(NULL, sizeof(zbx_expression_t));
regexp->name = zbx_strdup(NULL, name);
regexp->expression = zbx_strdup(NULL, expression);
regexp->expression_type = expression_type;
regexp->exp_delimiter = exp_delimiter;
regexp->case_sensitive = case_sensitive;
zbx_vector_expression_append(regexps, regexp);
}
/**********************************************************************************
* *
* Purpose: Test if the string matches regular expression with the specified *
* case sensitivity option and allocates output variable to store the *
* result if necessary. *
* *
* Parameters: string - [IN] the string to check *
* pattern - [IN] the regular expression *
* case_sensitive - [IN] ZBX_IGNORE_CASE - case insensitive match. *
* ZBX_CASE_SENSITIVE - case sensitive match. *
* output_template - [IN] the output string template. The output *
* string is constructed from the template by *
* replacing \ sequences with the captured *
* regexp group. *
* If output_template is NULL the whole *
* matched string is returned. *
* output - [OUT] a reference to the variable where allocated *
* memory containing the resulting value *
* (substitution) is stored. *
* Specify NULL to skip output value creation. *
* *
* Return value: ZBX_REGEXP_MATCH - the string matches the specified regular *
* expression *
* ZBX_REGEXP_NO_MATCH - the string does not match the regular *
* expression *
* FAIL - the string is NULL or the specified *
* regular expression is invalid *
* *
**********************************************************************************/
static int regexp_match_ex_regsub(const char *string, const char *pattern, int case_sensitive,
const char *output_template, char **output)
{
int regexp_flags = ZBX_REGEXP_MULTILINE, ret = FAIL;
if (ZBX_IGNORE_CASE == case_sensitive)
regexp_flags |= ZBX_REGEXP_CASELESS;
if (NULL == output)
{
if (NULL == zbx_regexp(string, pattern, regexp_flags, &ret))
{
if (FAIL != ret)
ret = ZBX_REGEXP_NO_MATCH;
}
else
ret = ZBX_REGEXP_MATCH;
}
else
{
if (SUCCEED == regexp_sub(string, pattern, output_template, regexp_flags,
ZBX_REGEXP_GROUP_CHECK_DISABLE, output))
{
ret = (NULL != *output ? ZBX_REGEXP_MATCH : ZBX_REGEXP_NO_MATCH);
}
else
ret = FAIL;
}
return ret;
}
/**********************************************************************************
* *
* Purpose: Test if the string matches regular expression with the specified *
* case sensitivity option and allocates output variable to store the *
* result if necessary. *
* *
* Parameters: string - [IN] the string to check *
* pattern - [IN] the regular expression *
* case_sensitive - [IN] ZBX_IGNORE_CASE - case insensitive match. *
* ZBX_CASE_SENSITIVE - case sensitive match. *
* output_template - [IN] the output string template. The output *
* string is constructed from the template by *
* replacing \ sequences with the captured *
* regexp group. *
* If output_template is NULL the whole *
* matched string is returned. *
* output - [OUT] a reference to the variable where allocated *
* memory containing the resulting value *
* (substitution) is stored. *
* Specify NULL to skip output value creation. *
* err_msg - [OUT] dynamically allocated error message *
* *
* Return value: ZBX_REGEXP_MATCH - the string matches the specified regular *
* expression *
* ZBX_REGEXP_NO_MATCH - the string does not match the regular *
* expression *
* If errors: *
* ZBX_REGEXP_COMPILE_FAIL or *
* ZBX_REGEXP_RUNTIME_FAIL with error message in 'err_msg' *
* *
**********************************************************************************/
static int regexp_match_ex_regsub2(const char *string, const char *pattern, int case_sensitive,
const char *output_template, char **output, char **err_msg)
{
int regexp_flags = ZBX_REGEXP_MULTILINE, ret;
char *err_msg_local = NULL;
if (ZBX_IGNORE_CASE == case_sensitive)
regexp_flags |= ZBX_REGEXP_CASELESS;
if (NULL == output)
ret = zbx_regexp2(string, pattern, regexp_flags, NULL, NULL, &err_msg_local);
else
ret = regexp_sub2(string, pattern, output_template, regexp_flags, output, &err_msg_local);
if (ZBX_REGEXP_MATCH == ret || ZBX_REGEXP_NO_MATCH == ret)
return ret;
if (NULL != err_msg)
{
*err_msg = zbx_dsprintf(*err_msg, "%s regular expression: %s", (ZBX_REGEXP_COMPILE_FAIL == ret) ?
"Invalid" : "Error occurred while matching", err_msg_local);
}
zbx_free(err_msg_local);
return ret;
}
/**********************************************************************************
* *
* Purpose: Test if the string contains substring with the specified case *
* sensitivity option. *
* *
* Parameters: string - [IN] the string to check *
* pattern - [IN] the substring to search *
* case_sensitive - [IN] ZBX_IGNORE_CASE - case insensitive search *
* ZBX_CASE_SENSITIVE - case sensitive search *
* *
* Return value: ZBX_REGEXP_MATCH - string contains the specified substring *
* ZBX_REGEXP_NO_MATCH - string does not contain the substring *
* *
**********************************************************************************/
static int regexp_match_ex_substring(const char *string, const char *pattern, int case_sensitive)
{
const char *ptr = NULL;
switch (case_sensitive)
{
case ZBX_CASE_SENSITIVE:
ptr = strstr(string, pattern);
break;
case ZBX_IGNORE_CASE:
ptr = zbx_strcasestr(string, pattern);
break;
}
return (NULL != ptr ? ZBX_REGEXP_MATCH : ZBX_REGEXP_NO_MATCH);
}
/**********************************************************************************
* *
* Purpose: Test if the string contains a substring from list with the specified *
* delimiter and case sensitivity option. *
* *
* Parameters: string - [IN] the string to check *
* pattern - [IN] the substring list *
* case_sensitive - [IN] ZBX_IGNORE_CASE - case insensitive search *
* ZBX_CASE_SENSITIVE - case sensitive search *
* delimiter - [IN] the delimiter separating items in the *
* substring list *
* *
* Return value: ZBX_REGEXP_MATCH - string contains a substring from the list *
* ZBX_REGEXP_NO_MATCH - string does not contain any substrings *
* from the list *
* *
**********************************************************************************/
static int regexp_match_ex_substring_list(const char *string, char *pattern, int case_sensitive, char delimiter)
{
int ret = ZBX_REGEXP_NO_MATCH;
char *s, *c;
for (s = pattern; '\0' != *s && ZBX_REGEXP_MATCH != ret;)
{
if (NULL != (c = strchr(s, delimiter)))
*c = '\0';
ret = regexp_match_ex_substring(string, s, case_sensitive);
if (NULL != c)
{
*c = delimiter;
s = ++c;
}
else
break;
}
return ret;
}
/**********************************************************************************
* *
* Purpose: Test if the string matches regular expression with the specified *
* case sensitivity option and allocates output variable to store the *
* result if necessary. *
* *
* Parameters: regexps - [IN] the global regular expression array *
* string - [IN] the string to check *
* pattern - [IN] the regular expression or global regular *
* expression name (@). *
* case_sensitive - [IN] ZBX_IGNORE_CASE - case insensitive match *
* ZBX_CASE_SENSITIVE - case sensitive match *
* output_template - [IN] the output string template. For regular *
* expressions (type Result is TRUE) output *
* string is constructed from the template by *
* replacing '\' sequences with the *
* captured regexp group. *
* If output_template is NULL then the whole *
* matched string is returned. *
* output - [OUT] a reference to the variable where allocated *
* memory containing the resulting value *
* (substitution) is stored. *
* Specify NULL to skip output value creation. *
* *
* Return value: ZBX_REGEXP_MATCH - the string matches the specified regular *
* expression *
* ZBX_REGEXP_NO_MATCH - the string does not match the specified *
* regular expression *
* FAIL - invalid regular expression *
* *
* Comments: For regular expressions and global regular expressions with 'Result *
* is TRUE' type the 'output_template' substitution result is stored *
* into 'output' variable. For other global regular expression types *
* the whole string is stored into 'output' variable. *
* *
**********************************************************************************/
/* regular expressions */
#define EXPRESSION_TYPE_INCLUDED 0
#define EXPRESSION_TYPE_ANY_INCLUDED 1
#define EXPRESSION_TYPE_NOT_INCLUDED 2
#define EXPRESSION_TYPE_TRUE 3
#define EXPRESSION_TYPE_FALSE 4
int zbx_regexp_sub_ex(const zbx_vector_expression_t *regexps, const char *string, const char *pattern,
int case_sensitive, const char *output_template, char **output)
{
int i, ret = FAIL;
char *output_accu; /* accumulator for 'output' when looping over global regexp subexpressions */
if (NULL == pattern || '\0' == *pattern)
{
/* always match when no pattern is specified */
ret = ZBX_REGEXP_MATCH;
goto out;
}
if ('@' != *pattern) /* not a global regexp */
{
ret = regexp_match_ex_regsub(string, pattern, case_sensitive, output_template, output);
goto out;
}
pattern++;
output_accu = NULL;
for (i = 0; i < regexps->values_num; i++) /* loop over global regexp subexpressions */
{
const zbx_expression_t *regexp = regexps->values[i];
if (0 != strcmp(regexp->name, pattern))
continue;
switch (regexp->expression_type)
{
case EXPRESSION_TYPE_TRUE:
if (NULL != output)
{
char *output_tmp = NULL;
if (ZBX_REGEXP_MATCH == (ret = regexp_match_ex_regsub(string,
regexp->expression, regexp->case_sensitive, output_template,
&output_tmp)))
{
zbx_free(output_accu);
output_accu = output_tmp;
}
}
else
{
ret = regexp_match_ex_regsub(string, regexp->expression, regexp->case_sensitive,
NULL, NULL);
}
break;
case EXPRESSION_TYPE_FALSE:
ret = regexp_match_ex_regsub(string, regexp->expression, regexp->case_sensitive,
NULL, NULL);
if (FAIL != ret) /* invert output value */
ret = (ZBX_REGEXP_MATCH == ret ? ZBX_REGEXP_NO_MATCH : ZBX_REGEXP_MATCH);
break;
case EXPRESSION_TYPE_INCLUDED:
ret = regexp_match_ex_substring(string, regexp->expression, regexp->case_sensitive);
break;
case EXPRESSION_TYPE_NOT_INCLUDED:
ret = regexp_match_ex_substring(string, regexp->expression, regexp->case_sensitive);
/* invert output value */
ret = (ZBX_REGEXP_MATCH == ret ? ZBX_REGEXP_NO_MATCH : ZBX_REGEXP_MATCH);
break;
case EXPRESSION_TYPE_ANY_INCLUDED:
ret = regexp_match_ex_substring_list(string, regexp->expression, regexp->case_sensitive,
regexp->exp_delimiter);
break;
default:
THIS_SHOULD_NEVER_HAPPEN;
ret = FAIL;
}
if (FAIL == ret || ZBX_REGEXP_NO_MATCH == ret)
{
zbx_free(output_accu);
break;
}
}
if (ZBX_REGEXP_MATCH == ret && NULL != output_accu)
{
*output = output_accu;
return ZBX_REGEXP_MATCH;
}
out:
if (ZBX_REGEXP_MATCH == ret && NULL != output && NULL == *output)
{
/* Handle output value allocation for global regular expression types */
/* that cannot perform output_template substitution (practically */
/* all global regular expression types except EXPRESSION_TYPE_TRUE). */
size_t offset = 0, size = 0;
zbx_strcpy_alloc(output, &size, &offset, string);
}
return ret;
}
/**********************************************************************************
* *
* Purpose: Test if the string matches regular expression with the specified *
* case sensitivity option and allocates output variable to store the *
* result if necessary. *
* *
* Parameters: regexps - [IN] the global regular expression array *
* string - [IN] the string to check *
* pattern - [IN] the regular expression or global regular *
* expression name (@). *
* case_sensitive - [IN] ZBX_IGNORE_CASE - case insensitive match *
* ZBX_CASE_SENSITIVE - case sensitive match *
* output_template - [IN] the output string template. For regular *
* expressions (type Result is TRUE) output *
* string is constructed from the template by *
* replacing '\' sequences with the *
* captured regexp group. *
* If output_template is NULL then the whole *
* matched string is returned. *
* output - [OUT] a reference to the variable where allocated *
* memory containing the resulting value *
* (substitution) is stored. *
* Specify NULL to skip output value creation. *
* err_msg - [OUT] dynamically allocated error message *
* *
* Return value: ZBX_REGEXP_MATCH - the string matches the specified regular *
* expression *
* ZBX_REGEXP_NO_MATCH - the string does not match the specified *
* regular expression *
* If errors: *
* ZBX_REGEXP_COMPILE_FAIL or *
* ZBX_REGEXP_RUNTIME_FAIL with error message in 'err_msg' *
* *
* Comments: For regular expressions and global regular expressions with 'Result *
* is TRUE' type the 'output_template' substitution result is stored *
* into 'output' variable. For other global regular expression types *
* the whole string is stored into 'output' variable. *
* *
**********************************************************************************/
int zbx_regexp_sub_ex2(const zbx_vector_expression_t *regexps, const char *string, const char *pattern,
int case_sensitive, const char *output_template, char **output, char **err_msg)
{
int i, ret = ZBX_REGEXP_NO_MATCH;
char *output_accu = NULL; /* accumulator for 'output' when looping over global regexp subexpressions */
if (NULL == pattern || '\0' == *pattern)
{
/* always match when no pattern is specified */
ret = ZBX_REGEXP_MATCH;
goto out;
}
if ('@' != *pattern) /* not a global regexp */
{
ret = regexp_match_ex_regsub2(string, pattern, case_sensitive, output_template, output, err_msg);
goto out;
}
pattern++;
for (i = 0; i < regexps->values_num; i++) /* loop over global regexp subexpressions */
{
const zbx_expression_t *regexp = (const zbx_expression_t *)regexps->values[i];
if (0 != strcmp(regexp->name, pattern))
continue;
switch (regexp->expression_type)
{
case EXPRESSION_TYPE_TRUE:
if (NULL != output)
{
char *output_tmp = NULL;
if (ZBX_REGEXP_MATCH == (ret = regexp_match_ex_regsub2(string,
regexp->expression, regexp->case_sensitive, output_template,
&output_tmp, err_msg)))
{
zbx_free(output_accu);
output_accu = output_tmp;
}
}
else
{
ret = regexp_match_ex_regsub2(string, regexp->expression,
regexp->case_sensitive, NULL, NULL, err_msg);
}
if (ZBX_REGEXP_COMPILE_FAIL == ret || ZBX_REGEXP_RUNTIME_FAIL == ret)
{
zbx_free(output_accu);
return ret;
}
break;
case EXPRESSION_TYPE_FALSE:
ret = regexp_match_ex_regsub2(string, regexp->expression, regexp->case_sensitive,
NULL, NULL, err_msg);
if (ZBX_REGEXP_MATCH == ret) /* invert output value */
{
ret = ZBX_REGEXP_NO_MATCH;
}
else if (ZBX_REGEXP_NO_MATCH == ret)
{
ret = ZBX_REGEXP_MATCH;
}
else if (ZBX_REGEXP_COMPILE_FAIL == ret || ZBX_REGEXP_RUNTIME_FAIL == ret)
{
zbx_free(output_accu);
return ret;
}
break;
case EXPRESSION_TYPE_INCLUDED:
ret = regexp_match_ex_substring(string, regexp->expression, regexp->case_sensitive);
break;
case EXPRESSION_TYPE_NOT_INCLUDED:
ret = regexp_match_ex_substring(string, regexp->expression, regexp->case_sensitive);
/* invert output value */
ret = (ZBX_REGEXP_MATCH == ret ? ZBX_REGEXP_NO_MATCH : ZBX_REGEXP_MATCH);
break;
case EXPRESSION_TYPE_ANY_INCLUDED:
ret = regexp_match_ex_substring_list(string, regexp->expression, regexp->case_sensitive,
regexp->exp_delimiter);
break;
default:
zabbix_log(LOG_LEVEL_WARNING, "%s() Invalid regular expression_type: %d, name:'%s',"
" expression:'%s'", __func__, regexp->expression_type, regexp->name,
regexp->expression);
if (NULL != err_msg)
{
*err_msg = zbx_dsprintf(*err_msg, "Invalid regular expression type: %d",
regexp->expression_type);
}
zbx_free(output_accu);
THIS_SHOULD_NEVER_HAPPEN;
return ZBX_REGEXP_COMPILE_FAIL; /* to make it NOTSUPPORTED */
}
if (ZBX_REGEXP_NO_MATCH == ret)
{
zbx_free(output_accu);
break;
}
}
if (ZBX_REGEXP_MATCH == ret && NULL != output_accu)
{
*output = output_accu;
return ZBX_REGEXP_MATCH;
}
out:
if (ZBX_REGEXP_MATCH == ret && NULL != output && NULL == *output)
{
/* Handle output value allocation for global regular expression types */
/* that cannot perform output_template substitution (practically */
/* all global regular expression types except EXPRESSION_TYPE_TRUE). */
size_t offset = 0, size = 0;
zbx_strcpy_alloc(output, &size, &offset, string);
}
return ret;
}
#undef EXPRESSION_TYPE_INCLUDED
#undef EXPRESSION_TYPE_ANY_INCLUDED
#undef EXPRESSION_TYPE_NOT_INCLUDED
#undef EXPRESSION_TYPE_TRUE
#undef EXPRESSION_TYPE_FALSE
int zbx_regexp_match_ex(const zbx_vector_expression_t *regexps, const char *string, const char *pattern,
int case_sensitive)
{
return zbx_regexp_sub_ex(regexps, string, pattern, case_sensitive, NULL, NULL);
}
int zbx_global_regexp_exists(const char *name, const zbx_vector_expression_t *regexps)
{
int i;
for (i = 0; i < regexps->values_num; i++)
{
const zbx_expression_t *regexp = regexps->values[i];
if (0 == strcmp(regexp->name, name))
return SUCCEED;
}
return FAIL;
}
/**********************************************************************************
* *
* Purpose: calculate a string size after symbols escaping *
* *
* Parameters: string - [IN] the string to check *
* *
* Return value: new size of the string *
* *
**********************************************************************************/
static size_t zbx_regexp_escape_stringsize(const char *string)
{
size_t len = 0;
const char *sptr;
if (NULL == string)
return 0;
for (sptr = string; '\0' != *sptr; sptr++)
{
switch (*sptr)
{
case '.':
case '\\':
case '+':
case '*':
case '?':
case '[':
case '^':
case ']':
case '$':
case '(':
case ')':
case '{':
case '}':
case '=':
case '!':
case '>':
case '<':
case '|':
case ':':
case '-':
case '#':
len += 2;
break;
default:
len++;
}
}
return len;
}
/**********************************************************************************
* *
* Purpose: replace . \ + * ? [ ^ ] $ ( ) { } = ! < > | : - symbols in string *
* with combination of \ and escaped symbol *
* *
* Parameters: p - [IN/OUT] buffer for new string after update *
* string - [IN] the string to update *
* *
**********************************************************************************/
static void zbx_regexp_escape_string(char *p, const char *string)
{
const char *sptr;
for (sptr = string; '\0' != *sptr; sptr++)
{
switch (*sptr)
{
case '.':
case '\\':
case '+':
case '*':
case '?':
case '[':
case '^':
case ']':
case '$':
case '(':
case ')':
case '{':
case '}':
case '=':
case '!':
case '>':
case '<':
case '|':
case ':':
case '-':
case '#':
*p++ = '\\';
*p++ = *sptr;
break;
default:
*p++ = *sptr;
}
}
return;
}
/**********************************************************************************
* *
* Purpose: escaping of symbols for using in regular expression *
* *
* Parameters: string - [IN/OUT] the string to update *
* *
**********************************************************************************/
void zbx_regexp_escape(char **string)
{
size_t size;
char *buffer;
if (0 == (size = zbx_regexp_escape_stringsize(*string)))
return;
buffer = zbx_malloc(NULL, size + 1);
buffer[size] = '\0';
zbx_regexp_escape_string(buffer, *string);
zbx_free(*string);
*string = buffer;
}
/**********************************************************************************
* *
* Purpose: remove repeated wildcard characters from the expression *
* *
* Parameters: str - [IN/OUT] the string to update *
* *
**********************************************************************************/
void zbx_wildcard_minimize(char *str)
{
char *p1, *p2;
int w = 0;
for (p1 = p2 = str; '\0' != *p2; p2++)
{
if ('*' == *p2)
{
if (0 != w)
continue;
w = 1;
}
else
w = 0;
*p1 = *p2;
p1++;
}
*p1 = '\0';
}
/******************************************************************************
* *
* Purpose: Matches string value to specified wildcard. *
* Asterisk (*) characters match to any characters of any length. *
* *
* Parameters: value - [IN] string to match *
* wildcard - [IN] wildcard string expression *
* *
* Return value: 1 - value match the wildcard *
* 0 - otherwise *
* *
******************************************************************************/
int zbx_wildcard_match(const char *value, const char *wildcard)
{
const char *s_pivot = value, *w_pivot = wildcard;
while ('\0' != *value && '*' != *wildcard)
{
if (*value++ != *wildcard++)
return 0;
}
while ('\0' != *value)
{
if ('*' == *wildcard)
{
wildcard++;
if ('\0' == *wildcard)
return 1;
w_pivot = wildcard;
s_pivot = value + 1;
}
else if (*value == *wildcard)
{
value++;
wildcard++;
}
else
{
wildcard = w_pivot;
value = s_pivot++;
}
}
while ('*' == *wildcard)
wildcard++;
return '\0' == *wildcard;
}