2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
#ifndef PCRE_INTERNAL_H
|
|
|
|
#define PCRE_INTERNAL_H
|
|
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
#define DEBUG
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#undef DPRINTF
|
|
|
|
#ifdef DEBUG
|
|
|
|
#define DPRINTF(p) printf p
|
|
|
|
#else
|
2022-09-04 17:29:16 +08:00
|
|
|
#define DPRINTF(p)
|
2022-08-29 11:36:29 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <setjmp.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
|
|
#if USHRT_MAX == 65535
|
|
|
|
typedef unsigned short pcre_uint16;
|
|
|
|
#elif UINT_MAX == 65535
|
|
|
|
typedef unsigned int pcre_uint16;
|
|
|
|
#else
|
|
|
|
#error Cannot determine a type for 16-bit unsigned integers
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if UINT_MAX == 4294967295
|
|
|
|
typedef unsigned int pcre_uint32;
|
|
|
|
#elif ULONG_MAX == 4294967295
|
|
|
|
typedef unsigned long int pcre_uint32;
|
|
|
|
#else
|
|
|
|
#error Cannot determine a type for 32-bit unsigned integers
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
typedef unsigned char uschar;
|
|
|
|
|
|
|
|
|
|
|
|
#define NOTACHAR 0xffffffff
|
|
|
|
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#define NLTYPE_FIXED 0
|
|
|
|
#define NLTYPE_ANY 1
|
|
|
|
#define NLTYPE_ANYCRLF 2
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
|
|
|
#define IS_NEWLINE(p) \
|
|
|
|
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
|
|
|
((p) < NLBLOCK->PSEND && \
|
|
|
|
_pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\
|
|
|
|
utf8)) \
|
|
|
|
: \
|
|
|
|
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
|
|
|
|
(p)[0] == NLBLOCK->nl[0] && \
|
|
|
|
(NLBLOCK->nllen == 1 || (p)[1] == NLBLOCK->nl[1]) \
|
|
|
|
) \
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
#define WAS_NEWLINE(p) \
|
|
|
|
((NLBLOCK->nltype != NLTYPE_FIXED)? \
|
|
|
|
((p) > NLBLOCK->PSSTART && \
|
|
|
|
_pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
|
|
|
|
&(NLBLOCK->nllen), utf8)) \
|
|
|
|
: \
|
|
|
|
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
|
|
|
|
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
|
|
|
|
(NLBLOCK->nllen == 1 || (p)[-NLBLOCK->nllen+1] == NLBLOCK->nl[1]) \
|
|
|
|
) \
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef CUSTOM_SUBJECT_PTR
|
|
|
|
#define PCRE_SPTR CUSTOM_SUBJECT_PTR
|
|
|
|
#define USPTR CUSTOM_SUBJECT_PTR
|
|
|
|
#else
|
|
|
|
#define PCRE_SPTR const char *
|
|
|
|
#define USPTR const unsigned char *
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#include "pcre.h"
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef VPCOMPAT
|
|
|
|
#define strlen(s) _strlen(s)
|
|
|
|
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
|
|
|
#define memcmp(s,c,n) _memcmp(s,c,n)
|
|
|
|
#define memcpy(d,s,n) _memcpy(d,s,n)
|
|
|
|
#define memmove(d,s,n) _memmove(d,s,n)
|
|
|
|
#define memset(s,c,n) _memset(s,c,n)
|
2022-09-04 17:29:16 +08:00
|
|
|
#else
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
|
|
|
#ifndef HAVE_MEMMOVE
|
2022-09-04 17:29:16 +08:00
|
|
|
#undef memmove
|
2022-08-29 11:36:29 +08:00
|
|
|
#ifdef HAVE_BCOPY
|
|
|
|
#define memmove(a, b, c) bcopy(b, a, c)
|
2022-09-04 17:29:16 +08:00
|
|
|
#else
|
2022-08-29 11:36:29 +08:00
|
|
|
static void *
|
|
|
|
pcre_memmove(void *d, const void *s, size_t n)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
unsigned char *dest = (unsigned char *)d;
|
|
|
|
const unsigned char *src = (const unsigned char *)s;
|
|
|
|
if (dest > src)
|
|
|
|
{
|
|
|
|
dest += n;
|
|
|
|
src += n;
|
|
|
|
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
|
|
|
return (void *)dest;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (i = 0; i < n; ++i) *dest++ = *src++;
|
|
|
|
return (void *)(dest - n);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
2022-09-04 17:29:16 +08:00
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#endif
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
|
|
|
#if LINK_SIZE == 2
|
|
|
|
|
|
|
|
#define PUT(a,n,d) \
|
|
|
|
(a[n] = (d) >> 8), \
|
|
|
|
(a[(n)+1] = (d) & 255)
|
|
|
|
|
|
|
|
#define GET(a,n) \
|
|
|
|
(((a)[n] << 8) | (a)[(n)+1])
|
|
|
|
|
|
|
|
#define MAX_PATTERN_SIZE (1 << 16)
|
|
|
|
|
|
|
|
|
|
|
|
#elif LINK_SIZE == 3
|
|
|
|
|
|
|
|
#define PUT(a,n,d) \
|
|
|
|
(a[n] = (d) >> 16), \
|
|
|
|
(a[(n)+1] = (d) >> 8), \
|
|
|
|
(a[(n)+2] = (d) & 255)
|
|
|
|
|
|
|
|
#define GET(a,n) \
|
|
|
|
(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
|
|
|
|
|
|
|
|
#define MAX_PATTERN_SIZE (1 << 24)
|
|
|
|
|
|
|
|
|
|
|
|
#elif LINK_SIZE == 4
|
|
|
|
|
|
|
|
#define PUT(a,n,d) \
|
|
|
|
(a[n] = (d) >> 24), \
|
|
|
|
(a[(n)+1] = (d) >> 16), \
|
|
|
|
(a[(n)+2] = (d) >> 8), \
|
|
|
|
(a[(n)+3] = (d) & 255)
|
|
|
|
|
|
|
|
#define GET(a,n) \
|
|
|
|
(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#define MAX_PATTERN_SIZE (1 << 30)
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
#error LINK_SIZE must be either 2, 3, or 4
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
|
|
|
|
|
|
|
|
|
|
|
#define PUT2(a,n,d) \
|
|
|
|
a[n] = (d) >> 8; \
|
|
|
|
a[(n)+1] = (d) & 255
|
|
|
|
|
|
|
|
#define GET2(a,n) \
|
|
|
|
(((a)[n] << 8) | (a)[(n)+1])
|
|
|
|
|
|
|
|
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef SUPPORT_UTF8
|
|
|
|
#define NEXTCHAR(p) p++;
|
|
|
|
#define GETCHAR(c, eptr) c = *eptr;
|
|
|
|
#define GETCHARTEST(c, eptr) c = *eptr;
|
|
|
|
#define GETCHARINC(c, eptr) c = *eptr++;
|
|
|
|
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
|
|
|
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
|
|
|
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#else
|
|
|
|
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
#define NEXTCHAR(p) \
|
|
|
|
p++; \
|
|
|
|
if (utf8) { while((*p & 0xc0) == 0x80) p++; }
|
|
|
|
|
|
|
|
|
|
|
|
#define GETCHAR(c, eptr) \
|
|
|
|
c = *eptr; \
|
|
|
|
if (c >= 0xc0) \
|
|
|
|
{ \
|
|
|
|
int gcii; \
|
2022-09-04 17:29:16 +08:00
|
|
|
int gcaa = _pcre_utf8_table4[c & 0x3f]; \
|
2022-08-29 11:36:29 +08:00
|
|
|
int gcss = 6*gcaa; \
|
|
|
|
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
|
|
|
for (gcii = 1; gcii <= gcaa; gcii++) \
|
|
|
|
{ \
|
|
|
|
gcss -= 6; \
|
|
|
|
c |= (eptr[gcii] & 0x3f) << gcss; \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define GETCHARTEST(c, eptr) \
|
|
|
|
c = *eptr; \
|
|
|
|
if (utf8 && c >= 0xc0) \
|
|
|
|
{ \
|
|
|
|
int gcii; \
|
2022-09-04 17:29:16 +08:00
|
|
|
int gcaa = _pcre_utf8_table4[c & 0x3f]; \
|
2022-08-29 11:36:29 +08:00
|
|
|
int gcss = 6*gcaa; \
|
|
|
|
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
|
|
|
for (gcii = 1; gcii <= gcaa; gcii++) \
|
|
|
|
{ \
|
|
|
|
gcss -= 6; \
|
|
|
|
c |= (eptr[gcii] & 0x3f) << gcss; \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define GETCHARINC(c, eptr) \
|
|
|
|
c = *eptr++; \
|
|
|
|
if (c >= 0xc0) \
|
|
|
|
{ \
|
2022-09-04 17:29:16 +08:00
|
|
|
int gcaa = _pcre_utf8_table4[c & 0x3f]; \
|
2022-08-29 11:36:29 +08:00
|
|
|
int gcss = 6*gcaa; \
|
|
|
|
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
|
|
|
while (gcaa-- > 0) \
|
|
|
|
{ \
|
|
|
|
gcss -= 6; \
|
|
|
|
c |= (*eptr++ & 0x3f) << gcss; \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define GETCHARINCTEST(c, eptr) \
|
|
|
|
c = *eptr++; \
|
|
|
|
if (utf8 && c >= 0xc0) \
|
|
|
|
{ \
|
2022-09-04 17:29:16 +08:00
|
|
|
int gcaa = _pcre_utf8_table4[c & 0x3f]; \
|
2022-08-29 11:36:29 +08:00
|
|
|
int gcss = 6*gcaa; \
|
|
|
|
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
|
|
|
while (gcaa-- > 0) \
|
|
|
|
{ \
|
|
|
|
gcss -= 6; \
|
|
|
|
c |= (*eptr++ & 0x3f) << gcss; \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define GETCHARLEN(c, eptr, len) \
|
|
|
|
c = *eptr; \
|
|
|
|
if (c >= 0xc0) \
|
|
|
|
{ \
|
|
|
|
int gcii; \
|
2022-09-04 17:29:16 +08:00
|
|
|
int gcaa = _pcre_utf8_table4[c & 0x3f]; \
|
2022-08-29 11:36:29 +08:00
|
|
|
int gcss = 6*gcaa; \
|
|
|
|
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
|
|
|
for (gcii = 1; gcii <= gcaa; gcii++) \
|
|
|
|
{ \
|
|
|
|
gcss -= 6; \
|
|
|
|
c |= (eptr[gcii] & 0x3f) << gcss; \
|
|
|
|
} \
|
|
|
|
len += gcaa; \
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef offsetof
|
|
|
|
#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
|
|
|
|
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#define PCRE_NOPARTIAL 0x0001
|
|
|
|
#define PCRE_FIRSTSET 0x0002
|
|
|
|
#define PCRE_REQCHSET 0x0004
|
|
|
|
#define PCRE_STARTLINE 0x0008
|
|
|
|
#define PCRE_JCHANGED 0x0010
|
|
|
|
#define PCRE_HASCRORLF 0x0020
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#define PCRE_STUDY_MAPPED 0x01
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
|
|
|
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
|
|
|
|
PCRE_NEWLINE_ANYCRLF)
|
|
|
|
|
|
|
|
#define PUBLIC_OPTIONS \
|
|
|
|
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
|
|
|
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
|
|
|
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
|
|
|
|
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
|
|
|
|
|
|
|
|
#define PUBLIC_EXEC_OPTIONS \
|
|
|
|
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
|
|
|
PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
|
|
|
|
|
|
|
|
#define PUBLIC_DFA_EXEC_OPTIONS \
|
|
|
|
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
|
|
|
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \
|
|
|
|
PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#define PUBLIC_STUDY_OPTIONS 0
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#define MAGIC_NUMBER 0x50435245UL
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
|
|
|
#define REQ_UNSET (-2)
|
|
|
|
#define REQ_NONE (-1)
|
|
|
|
|
|
|
|
|
|
|
|
#define REQ_BYTE_MAX 1000
|
|
|
|
|
|
|
|
/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
|
|
|
|
variable-length repeat, or a anything other than literal characters. */
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#define REQ_CASELESS 0x0100
|
|
|
|
#define REQ_VARY 0x0200
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
|
|
|
typedef int BOOL;
|
|
|
|
|
|
|
|
#define FALSE 0
|
|
|
|
#define TRUE 1
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef ESC_e
|
|
|
|
#define ESC_e 27
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef ESC_f
|
|
|
|
#define ESC_f '\f'
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef ESC_n
|
|
|
|
#define ESC_n '\n'
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef ESC_r
|
|
|
|
#define ESC_r '\r'
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef ESC_tee
|
|
|
|
#define ESC_tee '\t'
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#define PT_ANY 0
|
|
|
|
#define PT_LAMP 1
|
|
|
|
#define PT_GC 2
|
|
|
|
#define PT_PC 3
|
|
|
|
#define PT_SC 4
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#define XCL_NOT 0x01
|
|
|
|
#define XCL_MAP 0x02
|
2022-08-29 11:36:29 +08:00
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
#define XCL_END 0
|
|
|
|
#define XCL_SINGLE 1
|
|
|
|
#define XCL_RANGE 2
|
|
|
|
#define XCL_PROP 3
|
|
|
|
#define XCL_NOTPROP 4
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
/* These are escaped items that aren't just an encoding of a particular data
|
|
|
|
value such as \n. They must have non-zero values, as check_escape() returns
|
|
|
|
their negation. Also, they must appear in the same order as in the opcode
|
|
|
|
definitions below, up to ESC_z. There's a dummy for OP_ANY because it
|
|
|
|
corresponds to "." rather than an escape sequence. The final one must be
|
|
|
|
ESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc).
|
|
|
|
There are two tests in the code for an escape greater than ESC_b and less than
|
|
|
|
ESC_Z to detect the types that may be repeated. These are the types that
|
|
|
|
consume characters. If any new escapes are put in between that don't consume a
|
|
|
|
character, that code will have to change. */
|
|
|
|
|
|
|
|
enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
|
|
|
ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
|
|
|
|
ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
|
|
|
|
|
|
|
|
|
|
|
|
/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to
|
|
|
|
OP_EOD must correspond in order to the list of escapes immediately above.
|
|
|
|
|
|
|
|
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
|
|
|
|
that follow must also be updated to match. There is also a table called
|
|
|
|
"coptable" in pcre_dfa_exec.c that must be updated. */
|
|
|
|
|
|
|
|
enum {
|
2022-09-04 17:29:16 +08:00
|
|
|
OP_END,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
OP_SOD,
|
|
|
|
OP_SOM,
|
|
|
|
OP_SET_SOM,
|
|
|
|
OP_NOT_WORD_BOUNDARY,
|
|
|
|
OP_WORD_BOUNDARY,
|
|
|
|
OP_NOT_DIGIT,
|
|
|
|
OP_DIGIT,
|
|
|
|
OP_NOT_WHITESPACE,
|
|
|
|
OP_WHITESPACE,
|
|
|
|
OP_NOT_WORDCHAR,
|
|
|
|
OP_WORDCHAR,
|
|
|
|
OP_ANY,
|
|
|
|
OP_ANYBYTE,
|
|
|
|
OP_NOTPROP,
|
|
|
|
OP_PROP,
|
|
|
|
OP_ANYNL,
|
|
|
|
OP_NOT_HSPACE,
|
|
|
|
OP_HSPACE,
|
|
|
|
OP_NOT_VSPACE,
|
|
|
|
OP_VSPACE,
|
|
|
|
OP_EXTUNI,
|
|
|
|
OP_EODN,
|
|
|
|
OP_EOD,
|
|
|
|
|
|
|
|
OP_OPT,
|
|
|
|
OP_CIRC,
|
|
|
|
OP_DOLL,
|
|
|
|
OP_CHAR,
|
|
|
|
OP_CHARNC,
|
|
|
|
OP_NOT,
|
|
|
|
|
|
|
|
OP_STAR,
|
|
|
|
OP_MINSTAR,
|
|
|
|
OP_PLUS,
|
|
|
|
OP_MINPLUS,
|
|
|
|
OP_QUERY,
|
|
|
|
OP_MINQUERY,
|
|
|
|
|
|
|
|
OP_UPTO,
|
|
|
|
OP_MINUPTO,
|
|
|
|
OP_EXACT,
|
|
|
|
|
|
|
|
OP_POSSTAR,
|
|
|
|
OP_POSPLUS,
|
|
|
|
OP_POSQUERY,
|
|
|
|
OP_POSUPTO,
|
|
|
|
|
|
|
|
OP_NOTSTAR,
|
|
|
|
OP_NOTMINSTAR,
|
|
|
|
OP_NOTPLUS,
|
|
|
|
OP_NOTMINPLUS,
|
|
|
|
OP_NOTQUERY,
|
|
|
|
OP_NOTMINQUERY,
|
|
|
|
|
|
|
|
OP_NOTUPTO,
|
|
|
|
OP_NOTMINUPTO,
|
|
|
|
OP_NOTEXACT,
|
|
|
|
|
|
|
|
OP_NOTPOSSTAR,
|
|
|
|
OP_NOTPOSPLUS,
|
|
|
|
OP_NOTPOSQUERY,
|
|
|
|
OP_NOTPOSUPTO,
|
|
|
|
|
|
|
|
OP_TYPESTAR,
|
|
|
|
OP_TYPEMINSTAR,
|
|
|
|
OP_TYPEPLUS,
|
|
|
|
OP_TYPEMINPLUS,
|
|
|
|
OP_TYPEQUERY,
|
|
|
|
OP_TYPEMINQUERY,
|
|
|
|
|
|
|
|
OP_TYPEUPTO,
|
|
|
|
OP_TYPEMINUPTO,
|
|
|
|
OP_TYPEEXACT,
|
|
|
|
|
|
|
|
OP_TYPEPOSSTAR,
|
|
|
|
OP_TYPEPOSPLUS,
|
|
|
|
OP_TYPEPOSQUERY,
|
|
|
|
OP_TYPEPOSUPTO,
|
|
|
|
|
|
|
|
OP_CRSTAR,
|
|
|
|
OP_CRMINSTAR,
|
|
|
|
OP_CRPLUS,
|
|
|
|
OP_CRMINPLUS,
|
|
|
|
OP_CRQUERY,
|
|
|
|
OP_CRMINQUERY,
|
|
|
|
OP_CRRANGE,
|
|
|
|
OP_CRMINRANGE,
|
|
|
|
|
|
|
|
OP_CLASS,
|
2022-08-29 11:36:29 +08:00
|
|
|
OP_NCLASS, /* 78 Same, but the bitmap was created from a negative
|
|
|
|
class - the difference is relevant only when a UTF-8
|
|
|
|
character > 255 is encountered. */
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
OP_XCLASS,
|
2022-08-29 11:36:29 +08:00
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
OP_REF,
|
|
|
|
OP_RECURSE,
|
|
|
|
OP_CALLOUT,
|
2022-08-29 11:36:29 +08:00
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
OP_ALT,
|
|
|
|
OP_KET,
|
|
|
|
OP_KETRMAX,
|
|
|
|
OP_KETRMIN,
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
|
|
|
|
OP_ASSERT,
|
|
|
|
OP_ASSERT_NOT,
|
|
|
|
OP_ASSERTBACK,
|
|
|
|
OP_ASSERTBACK_NOT,
|
|
|
|
OP_REVERSE,
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
/* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
|
|
|
|
as there's a test for >= ONCE for a subpattern that isn't an assertion. */
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
OP_ONCE,
|
|
|
|
OP_BRA,
|
|
|
|
OP_CBRA,
|
|
|
|
OP_COND,
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
/* These three must follow the previous three, in the same order. There's a
|
|
|
|
check for >= SBRA to distinguish the two sets. */
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
OP_SBRA,
|
|
|
|
OP_SCBRA,
|
|
|
|
OP_SCOND,
|
2022-08-29 11:36:29 +08:00
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
OP_CREF,
|
|
|
|
OP_RREF,
|
|
|
|
OP_DEF,
|
2022-08-29 11:36:29 +08:00
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
OP_BRAZERO,
|
|
|
|
OP_BRAMINZERO,
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
OP_PRUNE,
|
|
|
|
OP_SKIP,
|
|
|
|
OP_THEN,
|
|
|
|
OP_COMMIT,
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
OP_FAIL,
|
|
|
|
OP_ACCEPT
|
|
|
|
};
|
|
|
|
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
#define OP_NAME_LIST \
|
|
|
|
"End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d", \
|
|
|
|
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \
|
|
|
|
"notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \
|
|
|
|
"extuni", "\\Z", "\\z", \
|
|
|
|
"Opt", "^", "$", "char", "charnc", "not", \
|
|
|
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
|
|
|
"*+","++", "?+", "{", \
|
|
|
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
|
|
|
"*+","++", "?+", "{", \
|
|
|
|
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
|
|
|
"*+","++", "?+", "{", \
|
|
|
|
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
|
|
|
"class", "nclass", "xclass", "Ref", "Recurse", "Callout", \
|
|
|
|
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
|
|
|
|
"AssertB", "AssertB not", "Reverse", \
|
|
|
|
"Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \
|
|
|
|
"Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero", \
|
|
|
|
"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT"
|
|
|
|
|
|
|
|
|
|
|
|
#define OP_LENGTHS \
|
2022-09-04 17:29:16 +08:00
|
|
|
1, \
|
|
|
|
1, 1, 1, 1, 1, \
|
|
|
|
1, 1, 1, 1, 1, 1, \
|
|
|
|
1, 1, \
|
|
|
|
3, 3, 1, \
|
|
|
|
1, 1, 1, 1, 1, \
|
|
|
|
1, 1, 2, 1, 1, \
|
|
|
|
2, \
|
|
|
|
2, \
|
|
|
|
2, \
|
|
|
|
\
|
|
|
|
2, 2, 2, 2, 2, 2, \
|
|
|
|
4, 4, 4, \
|
|
|
|
2, 2, 2, 4, \
|
|
|
|
\
|
|
|
|
2, 2, 2, 2, 2, 2, \
|
|
|
|
4, 4, 4, \
|
|
|
|
2, 2, 2, 4, \
|
|
|
|
\
|
|
|
|
2, 2, 2, 2, 2, 2, \
|
|
|
|
4, 4, 4, \
|
|
|
|
2, 2, 2, 4, \
|
|
|
|
\
|
|
|
|
1, 1, 1, 1, 1, 1, \
|
|
|
|
5, 5, \
|
|
|
|
33, \
|
|
|
|
33, \
|
|
|
|
0, \
|
|
|
|
3, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
2+2*LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
3+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
3+LINK_SIZE, \
|
|
|
|
1+LINK_SIZE, \
|
|
|
|
3, \
|
|
|
|
3, \
|
|
|
|
1, \
|
|
|
|
1, 1, \
|
|
|
|
1, 1, 1, 1, \
|
|
|
|
1, 1
|
|
|
|
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
#define RREF_ANY 0xffff
|
|
|
|
|
|
|
|
|
|
|
|
enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
|
|
|
ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
|
|
|
|
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
|
|
|
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
|
|
|
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
|
|
|
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
|
|
|
ERR60, ERR61, ERR62, ERR63 };
|
|
|
|
|
|
|
|
/* The real format of the start of the pcre block; the index of names and the
|
|
|
|
code vector run on as long as necessary after the end. We store an explicit
|
|
|
|
offset to the name table so that if a regex is compiled on one host, saved, and
|
|
|
|
then run on another where the size of pointers is different, all might still
|
|
|
|
be well. For the case of compiled-on-4 and run-on-8, we include an extra
|
|
|
|
pointer that is always NULL. For future-proofing, a few dummy fields were
|
|
|
|
originally included - even though you can never get this planning right - but
|
|
|
|
there is only one left now.
|
|
|
|
|
|
|
|
NOTE NOTE NOTE:
|
|
|
|
Because people can now save and re-use compiled patterns, any additions to this
|
|
|
|
structure should be made at the end, and something earlier (e.g. a new
|
|
|
|
flag in the options or one of the dummy fields) should indicate that the new
|
|
|
|
fields are present. Currently PCRE always sets the dummy fields to zero.
|
|
|
|
NOTE NOTE NOTE:
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef struct real_pcre {
|
|
|
|
pcre_uint32 magic_number;
|
2022-09-04 17:29:16 +08:00
|
|
|
pcre_uint32 size;
|
|
|
|
pcre_uint32 options;
|
|
|
|
pcre_uint16 flags;
|
|
|
|
pcre_uint16 dummy1;
|
2022-08-29 11:36:29 +08:00
|
|
|
pcre_uint16 top_bracket;
|
|
|
|
pcre_uint16 top_backref;
|
|
|
|
pcre_uint16 first_byte;
|
|
|
|
pcre_uint16 req_byte;
|
2022-09-04 17:29:16 +08:00
|
|
|
pcre_uint16 name_table_offset;
|
|
|
|
pcre_uint16 name_entry_size;
|
|
|
|
pcre_uint16 name_count;
|
|
|
|
pcre_uint16 ref_count;
|
2022-08-29 11:36:29 +08:00
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
const unsigned char *tables;
|
|
|
|
const unsigned char *nullpad;
|
2022-08-29 11:36:29 +08:00
|
|
|
} real_pcre;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct pcre_study_data {
|
2022-09-04 17:29:16 +08:00
|
|
|
pcre_uint32 size;
|
2022-08-29 11:36:29 +08:00
|
|
|
pcre_uint32 options;
|
|
|
|
uschar start_bits[32];
|
|
|
|
} pcre_study_data;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct compile_data {
|
2022-09-04 17:29:16 +08:00
|
|
|
const uschar *lcc;
|
|
|
|
const uschar *fcc;
|
|
|
|
const uschar *cbits;
|
|
|
|
const uschar *ctypes;
|
|
|
|
const uschar *start_workspace;
|
|
|
|
const uschar *start_code;
|
|
|
|
const uschar *start_pattern;
|
|
|
|
const uschar *end_pattern;
|
|
|
|
uschar *hwm;
|
|
|
|
uschar *name_table;
|
|
|
|
int names_found;
|
|
|
|
int name_entry_size;
|
|
|
|
int bracount;
|
|
|
|
int final_bracount;
|
|
|
|
int top_backref;
|
|
|
|
unsigned int backref_map;
|
|
|
|
int external_options;
|
|
|
|
int external_flags;
|
|
|
|
int req_varyopt;
|
|
|
|
BOOL had_accept;
|
|
|
|
int nltype;
|
|
|
|
int nllen;
|
|
|
|
uschar nl[4];
|
2022-08-29 11:36:29 +08:00
|
|
|
} compile_data;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct branch_chain {
|
|
|
|
struct branch_chain *outer;
|
|
|
|
uschar *current;
|
|
|
|
} branch_chain;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct recursion_info {
|
2022-09-04 17:29:16 +08:00
|
|
|
struct recursion_info *prevrec;
|
|
|
|
int group_num;
|
|
|
|
const uschar *after_call;
|
|
|
|
USPTR save_start;
|
|
|
|
int *offset_save;
|
|
|
|
int saved_max;
|
2022-08-29 11:36:29 +08:00
|
|
|
} recursion_info;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct eptrblock {
|
|
|
|
struct eptrblock *epb_prev;
|
|
|
|
USPTR epb_saved_eptr;
|
|
|
|
} eptrblock;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct match_data {
|
2022-09-04 17:29:16 +08:00
|
|
|
unsigned long int match_call_count;
|
|
|
|
unsigned long int match_limit;
|
|
|
|
unsigned long int match_limit_recursion;
|
|
|
|
int *offset_vector;
|
|
|
|
int offset_end;
|
|
|
|
int offset_max;
|
|
|
|
int nltype;
|
|
|
|
int nllen;
|
|
|
|
uschar nl[4];
|
|
|
|
const uschar *lcc;
|
|
|
|
const uschar *ctypes;
|
|
|
|
BOOL offset_overflow;
|
|
|
|
BOOL notbol;
|
|
|
|
BOOL noteol;
|
|
|
|
BOOL utf8;
|
|
|
|
BOOL endonly;
|
|
|
|
BOOL notempty;
|
|
|
|
BOOL partial;
|
|
|
|
BOOL hitend;
|
|
|
|
BOOL bsr_anycrlf;
|
|
|
|
const uschar *start_code;
|
|
|
|
USPTR start_subject;
|
|
|
|
USPTR end_subject;
|
|
|
|
USPTR start_match_ptr;
|
|
|
|
USPTR end_match_ptr;
|
|
|
|
int end_offset_top;
|
|
|
|
int capture_last;
|
|
|
|
int start_offset;
|
|
|
|
eptrblock *eptrchain;
|
|
|
|
int eptrn;
|
|
|
|
recursion_info *recursive;
|
|
|
|
void *callout_data;
|
2022-08-29 11:36:29 +08:00
|
|
|
} match_data;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct dfa_match_data {
|
2022-09-04 17:29:16 +08:00
|
|
|
const uschar *start_code;
|
|
|
|
const uschar *start_subject;
|
|
|
|
const uschar *end_subject;
|
|
|
|
const uschar *tables;
|
|
|
|
int moptions;
|
|
|
|
int poptions;
|
|
|
|
int nltype;
|
|
|
|
int nllen;
|
|
|
|
uschar nl[4];
|
|
|
|
void *callout_data;
|
2022-08-29 11:36:29 +08:00
|
|
|
} dfa_match_data;
|
|
|
|
|
|
|
|
|
|
|
|
#define ctype_space 0x01
|
|
|
|
#define ctype_letter 0x02
|
|
|
|
#define ctype_digit 0x04
|
|
|
|
#define ctype_xdigit 0x08
|
2022-09-04 17:29:16 +08:00
|
|
|
#define ctype_word 0x10
|
|
|
|
#define ctype_meta 0x80
|
|
|
|
|
|
|
|
|
|
|
|
#define cbit_space 0
|
|
|
|
#define cbit_xdigit 32
|
|
|
|
#define cbit_digit 64
|
|
|
|
#define cbit_upper 96
|
|
|
|
#define cbit_lower 128
|
|
|
|
#define cbit_word 160
|
|
|
|
#define cbit_graph 192
|
|
|
|
#define cbit_print 224
|
|
|
|
#define cbit_punct 256
|
|
|
|
#define cbit_cntrl 288
|
|
|
|
#define cbit_length 320
|
|
|
|
|
2022-08-29 11:36:29 +08:00
|
|
|
|
|
|
|
#define lcc_offset 0
|
|
|
|
#define fcc_offset 256
|
|
|
|
#define cbits_offset 512
|
|
|
|
#define ctypes_offset (cbits_offset + cbit_length)
|
|
|
|
#define tables_length (ctypes_offset + 256)
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
pcre_uint16 name_offset;
|
|
|
|
pcre_uint16 type;
|
|
|
|
pcre_uint16 value;
|
|
|
|
} ucp_type_table;
|
|
|
|
|
|
|
|
|
|
|
|
extern const int _pcre_utf8_table1[];
|
|
|
|
extern const int _pcre_utf8_table2[];
|
|
|
|
extern const int _pcre_utf8_table3[];
|
|
|
|
extern const uschar _pcre_utf8_table4[];
|
|
|
|
|
|
|
|
extern const int _pcre_utf8_table1_size;
|
|
|
|
|
|
|
|
extern const ucp_type_table _pcre_utt[];
|
|
|
|
extern const int _pcre_utt_size;
|
|
|
|
|
|
|
|
extern const uschar _pcre_default_tables[];
|
|
|
|
|
|
|
|
extern const uschar _pcre_OP_lengths[];
|
|
|
|
|
|
|
|
|
|
|
|
extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
|
|
|
|
int *, BOOL);
|
|
|
|
extern int _pcre_ord2utf8(int, uschar *);
|
|
|
|
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
|
|
|
|
const pcre_study_data *, pcre_study_data *);
|
|
|
|
extern int _pcre_valid_utf8(const uschar *, int);
|
|
|
|
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
|
|
|
|
int *, BOOL);
|
|
|
|
extern BOOL _pcre_xclass(int, const uschar *);
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2022-09-04 17:29:16 +08:00
|
|
|
|