/* ** Zabbix ** Copyright (C) 2001-2023 Zabbix SIA ** ** This program is free software; you can redistribute it and/or modify ** it under the terms of the GNU General Public License as published by ** the Free Software Foundation; either version 2 of the License, or ** (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this program; if not, write to the Free Software ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. **/ #include "zbxfile.h" void zbx_find_cr_lf_szbyte(const char *encoding, const char **cr, const char **lf, size_t *szbyte) { /* default is single-byte character set */ *cr = "\r"; *lf = "\n"; *szbyte = 1; if ('\0' != *encoding) { if (0 == strcasecmp(encoding, "UNICODE") || 0 == strcasecmp(encoding, "UNICODELITTLE") || 0 == strcasecmp(encoding, "UTF-16") || 0 == strcasecmp(encoding, "UTF-16LE") || 0 == strcasecmp(encoding, "UTF16") || 0 == strcasecmp(encoding, "UTF16LE") || 0 == strcasecmp(encoding, "UCS-2") || 0 == strcasecmp(encoding, "UCS-2LE")) { *cr = "\r\0"; *lf = "\n\0"; *szbyte = 2; } else if (0 == strcasecmp(encoding, "UNICODEBIG") || 0 == strcasecmp(encoding, "UNICODEFFFE") || 0 == strcasecmp(encoding, "UTF-16BE") || 0 == strcasecmp(encoding, "UTF16BE") || 0 == strcasecmp(encoding, "UCS-2BE")) { *cr = "\0\r"; *lf = "\0\n"; *szbyte = 2; } else if (0 == strcasecmp(encoding, "UTF-32") || 0 == strcasecmp(encoding, "UTF-32LE") || 0 == strcasecmp(encoding, "UTF32") || 0 == strcasecmp(encoding, "UTF32LE")) { *cr = "\r\0\0\0"; *lf = "\n\0\0\0"; *szbyte = 4; } else if (0 == strcasecmp(encoding, "UTF-32BE") || 0 == strcasecmp(encoding, "UTF32BE")) { *cr = "\0\0\0\r"; *lf = "\0\0\0\n"; *szbyte = 4; } } } /****************************************************************************** * * * Purpose: Read one text line from a file descriptor into buffer * * * * Parameters: fd - [IN] file descriptor to read from * * buf - [OUT] buffer to read into * * count - [IN] buffer size in bytes * * encoding - [IN] pointer to a text string describing encoding. * * See function zbx_find_cr_lf_szbyte() for supported * * encodings. * * "" (empty string) means a single-byte character set.* * * * Return value: On success, the number of bytes read is returned (0 (zero) * * indicates end of file). * * On error, -1 (ZBX_READ_ERR) is returned and errno is set * * appropriately. * * If the wrong decoding is detected, -2 * * (ZBX_READ_WRONG_ENCODING) is returned. * * * * Comments: Reading stops after a newline. If the newline is read, it is * * stored into the buffer. * * * * Note: This function is left for testing purposes. * * * ******************************************************************************/ int zbx_read_text_line_from_file(int fd, char *buf, size_t count, const char *encoding) { size_t i, szbyte; ssize_t nbytes; const char *cr, *lf; zbx_offset_t offset; if ((zbx_offset_t)-1 == (offset = zbx_lseek(fd, 0, SEEK_CUR))) return ZBX_READ_ERR; if (0 >= (nbytes = read(fd, buf, count))) return (int)nbytes; zbx_find_cr_lf_szbyte(encoding, &cr, &lf, &szbyte); /* nbytes can be smaller than szbyte. If the target file was encoded in UTF-8 and contained a single */ /* character, but the target encoding was mistakenly set to UTF-32. Then nbytes will be 1 and szbyte */ /* will be 4. Similarly, if bytes read produces a remainder that does not fit szbyte - we can safely */ /* assume the file contains the encoding different from the one provided to us.*/ if ((size_t)nbytes < szbyte || ((size_t)nbytes % szbyte != 0)) return ZBX_READ_WRONG_ENCODING; for (i = 0; i <= (size_t)nbytes - szbyte; i += szbyte) { if (0 == memcmp(&buf[i], lf, szbyte)) /* LF (Unix) */ { i += szbyte; break; } if (0 == memcmp(&buf[i], cr, szbyte)) /* CR (Mac) */ { /* CR+LF (Windows) ? */ if (i < (size_t)nbytes - szbyte && 0 == memcmp(&buf[i + szbyte], lf, szbyte)) i += szbyte; i += szbyte; break; } } if ((zbx_offset_t)-1 == zbx_lseek(fd, offset + (zbx_offset_t)i, SEEK_SET)) return ZBX_READ_ERR; return (int)i; } int zbx_is_regular_file(const char *path) { zbx_stat_t st; if (0 == zbx_stat(path, &st) && 0 != S_ISREG(st.st_mode)) return SUCCEED; return FAIL; } #if !(defined(_WINDOWS) || defined(__MINGW32__)) int zbx_get_file_time(const char *path, int sym, zbx_file_time_t *time) { zbx_stat_t buf; if (0 != sym) { if (0 != lstat(path, &buf)) return FAIL; } else { if (0 != zbx_stat(path, &buf)) return FAIL; } time->access_time = (zbx_fs_time_t)buf.st_atime; time->modification_time = (zbx_fs_time_t)buf.st_mtime; time->change_time = (zbx_fs_time_t)buf.st_ctime; return SUCCEED; } char *zbx_fgets(char *buffer, int size, FILE *fp) { char *s; do { errno = 0; s = fgets(buffer, size, fp); } while (EINTR == errno && NULL == s); return s; } /****************************************************************************** * * * Purpose: call write in a loop, iterating until all the data is written. * * * * Parameters: fd - [IN] descriptor * * buf - [IN] buffer to write * * n - [IN] bytes count to write * * * * Return value: SUCCEED - n bytes successfully written * * FAIL - less than n bytes are written * * * ******************************************************************************/ int zbx_write_all(int fd, const char *buf, size_t n) { while (0 < n) { ssize_t ret; if (-1 != (ret = write(fd, buf, n))) { buf += ret; n -= (size_t)ret; } else if (EINTR != errno) return FAIL; } return SUCCEED; } #endif /* not _WINDOWS */ char *zbx_find_buf_newline(char *p, char **p_next, const char *p_end, const char *cr, const char *lf, size_t szbyte) { if (1 == szbyte) /* single-byte character set */ { for (; p < p_end; p++) { /* detect NULL byte and replace it with '?' character */ if (0x0 == *p) { *p = '?'; continue; } if (0xd < *p || 0xa > *p) continue; if (0xa == *p) /* LF (Unix) */ { *p_next = p + 1; return p; } if (0xd == *p) /* CR (Mac) */ { if (p < p_end - 1 && 0xa == *(p + 1)) /* CR+LF (Windows) */ { *p_next = p + 2; return p; } *p_next = p + 1; return p; } } return (char *)NULL; } else { while (p <= p_end - szbyte) { /* detect NULL byte in UTF-16 encoding and replace it with '?' character */ if (2 == szbyte && 0x0 == *p && 0x0 == *(p + 1)) { if (0x0 == *cr) /* Big-endian */ p[1] = '?'; else /* Little-endian */ *p = '?'; } if (0 == memcmp(p, lf, szbyte)) /* LF (Unix) */ { *p_next = p + szbyte; return p; } if (0 == memcmp(p, cr, szbyte)) /* CR (Mac) */ { if (p <= p_end - szbyte - szbyte && 0 == memcmp(p + szbyte, lf, szbyte)) { /* CR+LF (Windows) */ *p_next = p + szbyte + szbyte; return p; } *p_next = p + szbyte; return p; } p += szbyte; } return (char *)NULL; } } /* Helper context for zbx_buf_readln */ struct buf_read_save { zbx_offset_t offset; /* offset in file where buffer is read from */ char *p_start; /* start position of current line */ char *p_end; /* end position of data */ char *p_next; /* position of next line after newline */ const char *cr, *lf; /* pointers to newline characters */ size_t szbyte; /* size of newline characters */ }; /******************************************************************************* * * * Purpose: reads file line-by-line in a buffered manner * * * * Parameters: * * fd - [IN] file descriptor to read from * * buf - [IN] buffer to read into * * bufsz - [IN] buffer size in bytes. Must divisible by 4. * * encoding - [IN] pointer to a text string describing encoding. * * See function zbx_find_cr_lf_szbyte() for supported * * encodings. * * "" (empty string) means a single-byte character set. * * value - [OUT] resulting pointer to start of line * * saveptr - [IN/OUT] pointer to context. This pointer should be NULL * * on first call of the function. Caller must free * * this pointer after usage. * * * * Comment: This function does not add NULL character at end of line. * * * * Return value: On success, number of bytes read is returned (0 (zero) * * indicated end of file). * * On error, -1 (ZBX_READ_ERR) is returned and errno is set * * appropriately. * * If the wrong decoding is detected, -2 * * (ZBX_READ_WRONG_ENCODING) is returned. * * * ******************************************************************************/ ssize_t zbx_buf_readln(int fd, char *buf, size_t bufsz, const char *encoding, char **value, void **saveptr) { char *p_nl; struct buf_read_save *save = (struct buf_read_save *)*saveptr; if (NULL == *saveptr) { ssize_t nbytes; *saveptr = malloc(sizeof(struct buf_read_save)); save = (struct buf_read_save *)*saveptr; memset(save, 0, sizeof(*save)); zbx_find_cr_lf_szbyte(encoding, &save->cr, &save->lf, &save->szbyte); read_buf: /* refill buffer */ if ((zbx_offset_t)-1 == (save->offset = zbx_lseek(fd, save->offset, SEEK_SET))) return ZBX_READ_ERR; /* cannot set position to 0 */ if (0 >= (nbytes = read(fd, buf, bufsz))) return nbytes; /* nbytes can be smaller than szbyte. If the target file was encoded in UTF-8 and contained a single */ /* character, but the target encoding was mistakenly set to UTF-32. Then nbytes will be 1 and szbyte */ /* will be 4. Similarly, if bytes read produces a remainder that does not fit szbyte - we can safely */ /* assume the file contains the encoding different from the one provided to us.*/ if ((size_t)nbytes < save->szbyte || ((size_t)nbytes % save->szbyte != 0)) return ZBX_READ_WRONG_ENCODING; save->p_start = buf; /* beginning of current line */ save->p_end = buf + (size_t)nbytes; /* no data from this position */ } else save->p_start = save->p_next; /* jump to next line */ while (NULL == (p_nl = zbx_find_buf_newline(save->p_start, &save->p_next, save->p_end, save->cr, save->lf, save->szbyte))) { /* incomplete line */ /* Note. This logic should work the same as zbx_read_text_line_from_file */ if (save->p_end != save->p_next) { char tmp; /* test for EOF */ if (0 == read(fd, &tmp, 1)) { p_nl = save->p_end - 1; save->p_next = save->p_end; break; /* line end with EOF - just return line */ } } if ((ssize_t)bufsz == save->p_end - save->p_start) { p_nl = save->p_end - 1; save->p_next = save->p_end; break; /* line is split but it is bigger than buffer - just return line */ } /* read next buffer window from start of this line */ save->offset += save->p_start - buf; goto read_buf; } *value = save->p_start; return p_nl - save->p_start + 1; }