This commit is contained in:
eglwang 2022-09-04 17:29:16 +08:00
parent d85fed9260
commit 963338e33d
18 changed files with 1247 additions and 1205 deletions

View File

@ -1,15 +1,16 @@

/* #define PCRE_STATIC */
/*
*
* Generally additional utility functions.
* L flag, also known as re.LOCALE in Python is not available here.
* Wrong results may be returned in re_sub likes funcitones when 'repl' contains '\', '\\\\1' for example.
*
* 4/9/2022
*/
#include <stdio.h>
#include <string.h>
#include "pcre.h"
#include "cre.h"
/// @brief the the number of groups in a re pattern
/// @param re: re pattern
/// @param out_groups_number : from 0,1,2,3,4...
/// @return a array pointer, free if after using
int *_re_get_vec_table(pcre *re, int *out_groups_number)
{
int brackets_number = 0;
@ -25,18 +26,9 @@ int *_re_get_vec_table(pcre *re, int *out_groups_number)
return vec;
}
/*************************************************************************
* (https?)://((\w+\.)+)(\w+)
* hihsid dii https://www.baidu.com, http://glwang.com
*************************************************************************/
int *pcre_match(const char *_pat, const char *s, int len, int *out_vec_number, int opt)
{
int *vec = NULL;
// int group_n = 0;
//int rc;
// int start_offset = 0;
pcre *re = re_get_match_re(_pat, opt);
if (!re)
return NULL;
@ -67,9 +59,9 @@ match:
}
if (rc <= 0)
goto e_er;
if (vec[0] == vec[1]) // a empty match
if (vec[0] == vec[1])
{
start_offset++; // advace a position
start_offset++;
if (start_offset >= len)
goto e_er;
goto match;
@ -84,9 +76,6 @@ e_er:
int *pcre_fullmatch(const char *_pat, const char *s, int len, int *out_vec_number, int opt)
{
int *vec = NULL;
// int group_n = 0;
//int rc;
// int start_offset = 0;
opt &= ~PCRE_MULTILINE;
pcre *re = re_get_fullmatch_re(_pat, opt);
if (!re)
@ -118,9 +107,9 @@ match:
}
if (rc <= 0)
goto e_er;
if (vec[0] == vec[1]) // a empty match
if (vec[0] == vec[1])
{
start_offset++; // advace a position
start_offset++;
if (start_offset >= len)
goto e_er;
goto match;
@ -252,9 +241,9 @@ match:
}
if (rc <= 0)
goto e_er;
if (vec[0] == vec[1]) // a empty match
if (vec[0] == vec[1])
{
start_offset++; // advace a position
start_offset++;
if (start_offset >= len)
goto e_er;
goto match;
@ -277,17 +266,10 @@ int **re_searchall(const char *pat, const char *s, int len, int *out_number, int
pcre_free(re);
return res;
}
/// @brief find all match in a string
/// @param re: re pattern
/// @param s : string searching in
/// @param out_number : the number of matches
/// @return a vector table, vrc[n] is the nth matchs,
/// vrc[group_n][i*2] - vrc[group_n][i*2+1] is the begining-offset and ending-offset of group i.
/// Use re_free_searchall() to free the memory
int **re_searchall2(pcre *re, const char *s, int len, int *out_number, int *out_vec_number, int opt)
{
int start_offset = 0;
int **vecs = NULL; // to store vec
int **vecs = NULL;
int vec_cap = 4;
int vec_n = 0;
int *vec = NULL;
@ -304,7 +286,8 @@ int **re_searchall2(pcre *re, const char *s, int len, int *out_number, int *out_
*out_vec_number = group_n;
group_n *= 3;
}
if (!vec){
if (!vec)
{
goto e_er;
}
int rc;
@ -319,14 +302,13 @@ int **re_searchall2(pcre *re, const char *s, int len, int *out_number, int *out_
}
if (rc <= 0)
goto e_er;
if (vec[0] == vec[1]) // a empty match
if (vec[0] == vec[1])
{
start_offset++; // advace a position
start_offset++;
if (start_offset >= len)
goto e_er;
goto match;
}
//to sotre vec
if (!vecs)
{
vecs = (int **)malloc(sizeof(int *) * vec_cap);
@ -334,32 +316,28 @@ int **re_searchall2(pcre *re, const char *s, int len, int *out_number, int *out_
goto e_er;
}
if (vec_n >= vec_cap) // need to recap this list
if (vec_n >= vec_cap)
{
vec_cap *= 2;
void *p = realloc(vecs, vec_cap * sizeof(int *));
if (!p)
goto e_er;
// if (p != vecs) // move data
// {
// memmove(p, vecs, vec_n * sizeof(int*));
vecs = (int **)p;
// }
}
vecs[vec_n++] = vec;
start_offset = vec[1];
}
e_er:
if (vec)
free(vec); // the latest vec table
free(vec);
if (!vecs)
return NULL;
for (int j = 0; j < vec_n; j++)
{
if (vecs[j])
free((void *)(vecs[j])); // free vec table
free((void *)(vecs[j]));
}
free(vecs); // free the table list
free(vecs);
return NULL;
}
void re_free_searchall(int **vecs, int n)
@ -369,9 +347,9 @@ void re_free_searchall(int **vecs, int n)
for (int j = 0; j < n; j++)
{
if (vecs[j])
free((void *)(vecs[j])); // free vec table
free((void *)(vecs[j]));
}
free(vecs); // free the table list
free(vecs);
}
/* the following functions return (a) string in heap, which means it need to be freed after using*/
@ -438,9 +416,9 @@ match:
}
if (rc <= 0)
goto e_er;
if (vec[0] == vec[1]) // a empty match
if (vec[0] == vec[1])
{
start_offset++; // advace a position
start_offset++;
if (start_offset >= len)
goto e_er;
goto match;
@ -498,9 +476,9 @@ void re_free_findall(char **ss, int n)
for (int j = 0; j < n; j++)
{
if (ss[j])
free((void *)(ss[j])); // free vec table
free((void *)(ss[j]));
}
free(ss); // free the table list
free(ss);
}
char *pcre_sub(const char *pat, const char *to, const char *s, int len, int opt)
@ -514,30 +492,20 @@ char *pcre_sub(const char *pat, const char *to, const char *s, int len, int opt)
pcre_free(re);
return res;
}
char *pcre_subn(const char *pat, const char *to, const char *s, int len, int n, int opt)
char *pcre_subn(const char *pat, const char *to, const char *s, int len, int n, int opt, int *out_repl_times)
{
const char *error;
int erroffset;
pcre *re = pcre_compile(pat, opt, &error, &erroffset, NULL);
if (!re)
return NULL;
char *res = re_subn2(re, to, s, len, n, opt);
char *res = re_subn2(re, to, s, len, n, opt, out_repl_times);
pcre_free(re);
return res;
}
/// @brief substitute a string with a pattern expression, given replacement limit
/// @param re : re pattern for matching
/// @param to : re pattern to replacement
/// @param s : string searching in
/// @param len : length of <s>
/// @param n : the replacement number
/// @return if no replacement, return s exactly, otherwise return a new string, free it after using
char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt, int *out_repl_times)
{
int group_n = 0;
// int group_n2 = 0;
// int *vec = NULL;
// int *vec2 = NULL;
pcre *re2 = NULL;
int vcs1_n = 0, vcs2_n = 0;
int **vcs1 = re_searchall2(re, s, len, &vcs1_n, &group_n, opt);
@ -545,11 +513,8 @@ char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
int match_limit = 0;
if (!vcs1_n)
{
//no match, no replacement
return (char *)s;
}
//to determine '\\' and group like: '\group_n'
//3 groups, 0, 1, 2->\\, 3->\group_n, if any
const char *p2 = "(\\\\\\\\|\\\\\\d{1,2})";
int erroffset;
const char *error;
@ -559,76 +524,55 @@ char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
re2 = pcre_compile(p2, 0, &error, &erroffset, NULL);
if (!re2)
goto exit_error;
//match <to>
len_to = strlen(to);
vcs2 = re_searchall2(re2, to, len_to, &vcs2_n, NULL, 0);
//if (!vcs2)
//{
// //goto exit_error;
// vcs2_n = 0;
//}
pcre_free(re2);
re2 = NULL;
//note that re2 is no use after this, onece we get vcs2
remain_length2 = len_to; // the remain length in 'to' exclude from all '\\' and all '\n'
remain_length2 = len_to;
for (int i = 0; i < vcs2_n; i++)
{
int *vc = vcs2[i]; // (0,1)->'\\'or'\n', (2,3)->'\\', (4,5)->'\n', (6,7,8)
int *vc = vcs2[i];
int vc0 = vc[0] + 1;
if (to[vc0] == '\\')
{
vc[2] = 0;
remain_length2 -= 2;
}
else // \n,
else
{
int wanted_number = 0;
//vc[1]--;
int l_n = vc[1] - vc0;
if (l_n == 1)
{
wanted_number = to[vc0] - '0';
remain_length2 -= 2;
}
else // if(l_n==2)
else
{
wanted_number = (to[vc0] - '0') * 10 + to[vc0 + 1] - '0';
remain_length2 -= 3;
}
if (wanted_number <= 0 || wanted_number >= group_n)
goto exit_error;
//store it in vc[2]
vc[2] = wanted_number;
}
}
//now that vcs2 stores data of which group is used in replacement
//Nx9, N is the number of groups used in every one replcaement,
//while vcs2[2] is the exact group number used in replacement
//parse 'to'
//get the remian size
match_limit = n ? (n <= vcs1_n ? n : vcs1_n) : vcs1_n;
remain_size = len + remain_length2 * match_limit;
//match times
for (int i = 0; i < match_limit; i++)
{
int *vc = vcs1[i];
//vc[1]-vc[0] is the match sequence which need to be replaced, while the following are groups
remain_size -= vc[1] - vc[0];
// the replcaements
// 'to' e.g.: \\ \1, \2, ....\x
for (int j = 0; j < vcs2_n; j++)
{
int *v2 = vcs2[j];
if (v2[2])
{
//replaced to a group
remain_size += GetGroupLen(vc, v2[2]);
}
else
{
//replaced to a '/'
remain_size++;
}
}
@ -655,13 +599,11 @@ char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
int to_group_at = vc[to_group * 2];
int to_group_end = vc[to_group * 2 + 1];
int g_l = to_group_end - to_group_at;
//replaced to a group
memcpy(new_s + pi, s + to_group_at, g_l);
pi += g_l;
}
else
{
//replaced to a '/'
new_s[pi++] = '\\';
}
m_start = v2[1];
@ -669,9 +611,10 @@ char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
m_len = len_to - m_start;
memcpy(new_s + pi, to + m_start, m_len);
pi += m_len;
// end of one match
qi = vc[1];
}
if (out_repl_times)
*out_repl_times = match_limit;
if (vcs1)
re_free_searchall(vcs1, vcs1_n);
if (vcs2)
@ -692,13 +635,7 @@ exit_error:
pcre_free(re2);
return NULL;
}
/// @brief substitute a string with a pattern expression
/// @param re : re pattern for matching
/// @param to : re pattern to replacement
/// @param s : string searching in
/// @param len : length of <s>
/// @return if no replacement, return s exactly, otherwise return a new string, free it after using
char *re_sub2(pcre *re, const char *to, const char *s, int len, int opt)
{
return re_subn2(re, to, s, len, 0, opt);
return re_subn2(re, to, s, len, 0, opt, NULL);
}

View File

@ -42,9 +42,9 @@ void re_free_findall(char **ss, int n);
char *pcre_sub(const char *pat, const char *to, const char *s, int len, int opt);
char *pcre_subn(const char *pat, const char *to, const char *s, int len, int n, int opt);
char *pcre_subn(const char *pat, const char *to, const char *s, int len, int n, int opt, int *out_repl_times);
char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt);
char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt, int *out_repl_times);
char *re_sub2(pcre *re, const char *to, const char *s, int len, int opt);
#endif

View File

@ -2,63 +2,13 @@
#ifndef _PCRE_H
#define _PCRE_H
/* The current PCRE version information. */
#define PCRE_MAJOR @PCRE_MAJOR@
#define PCRE_MINOR @PCRE_MINOR@
#define PCRE_PRERELEASE @PCRE_PRERELEASE@
#define PCRE_DATE @PCRE_DATE@
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export setting is defined in pcre_internal.h, which includes this file. So we
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
#if defined(_WIN32) && !defined(PCRE_STATIC)
# ifndef PCRE_EXP_DECL
# define PCRE_EXP_DECL extern __declspec(dllimport)
# endif
# ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN __declspec(dllimport)
# endif
# endif
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCRE_EXP_DECL
# ifdef __cplusplus
# define PCRE_EXP_DECL extern "C"
# else
# define PCRE_EXP_DECL extern
# endif
#endif
#ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN
# endif
#endif
/* Have to include stdlib.h in order to ensure that size_t is defined;
it is needed here for malloc. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options */
#define PCRE_CASELESS 0x00000001
#define PCRE_MULTILINE 0x00000002
@ -87,19 +37,19 @@ extern "C" {
#define PCRE_NEWLINE_ANYCRLF 0x00500000
#define PCRE_BSR_ANYCRLF 0x00800000
#define PCRE_BSR_UNICODE 0x01000000
#define PCRE_ONLY_ASCII 0x02000000
/* Exec-time and get/set-time error codes */
#define PCRE_ERROR_NOMATCH (-1)
#define PCRE_ERROR_NULL (-2)
#define PCRE_ERROR_BADOPTION (-3)
#define PCRE_ERROR_BADMAGIC (-4)
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
#define PCRE_ERROR_UNKNOWN_NODE (-5)
#define PCRE_ERROR_NOMEMORY (-6)
#define PCRE_ERROR_NOSUBSTRING (-7)
#define PCRE_ERROR_MATCHLIMIT (-8)
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
#define PCRE_ERROR_CALLOUT (-9)
#define PCRE_ERROR_BADUTF8 (-10)
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
#define PCRE_ERROR_PARTIAL (-12)
@ -112,17 +62,16 @@ extern "C" {
#define PCRE_ERROR_DFA_WSSIZE (-19)
#define PCRE_ERROR_DFA_RECURSE (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
#define PCRE_ERROR_NULLWSLIMIT (-22)
#define PCRE_ERROR_BADNEWLINE (-23)
/* Request types for pcre_fullinfo() */
#define PCRE_INFO_OPTIONS 0
#define PCRE_INFO_SIZE 1
#define PCRE_INFO_CAPTURECOUNT 2
#define PCRE_INFO_BACKREFMAX 3
#define PCRE_INFO_FIRSTBYTE 4
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
#define PCRE_INFO_FIRSTCHAR 4
#define PCRE_INFO_FIRSTTABLE 5
#define PCRE_INFO_LASTLITERAL 6
#define PCRE_INFO_NAMEENTRYSIZE 7
@ -134,8 +83,6 @@ extern "C" {
#define PCRE_INFO_JCHANGED 13
#define PCRE_INFO_HASCRORLF 14
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_CONFIG_UTF8 0
#define PCRE_CONFIG_NEWLINE 1
@ -147,8 +94,6 @@ compatible. */
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
#define PCRE_CONFIG_BSR 8
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_STUDY_DATA 0x0001
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
@ -156,109 +101,67 @@ these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
/* Types */
struct real_pcre; /* declaration; the definition is private */
typedef struct real_pcre pcre;
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */
#ifndef PCRE_SPTR
#define PCRE_SPTR const char *
#endif
/* The structure for passing additional data to pcre_exec(). This is defined in
such as way as to be extensible. Always add new fields at the end, in order to
remain compatible. */
typedef struct pcre_extra {
unsigned long int flags; /* Bits for which fields are set */
void *study_data; /* Opaque data from pcre_study() */
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
unsigned long int flags;
void *study_data;
unsigned long int match_limit;
void *callout_data;
const unsigned char *tables;
unsigned long int match_limit_recursion;
} pcre_extra;
/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
without modification. */
typedef struct pcre_callout_block {
int version; /* Identifies version of block */
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
PCRE_SPTR subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
int capture_top; /* Max current capture */
int capture_last; /* Most recently closed capture */
void *callout_data; /* Data passed in with the call */
/* ------------------- Added for Version 1 -------------------------- */
int pattern_position; /* Offset to next item in the pattern */
int next_item_length; /* Length of next item in the pattern */
/* ------------------------------------------------------------------ */
int version;
int callout_number;
int *offset_vector;
PCRE_SPTR subject;
int subject_length;
int start_match;
int current_position;
int capture_top;
int capture_last;
void *callout_data;
int pattern_position;
int next_item_length;
} pcre_callout_block;
/* Indirection for store get and free functions. These can be set to
alternative malloc/free functions if required. Special ones are used in the
non-recursive case for "frames". There is also an optional callout function
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
have to take another form. */
#ifndef VPCOMPAT
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_free)(void *);
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
#else /* VPCOMPAT */
PCRE_EXP_DECL void *pcre_malloc(size_t);
PCRE_EXP_DECL void pcre_free(void *);
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
PCRE_EXP_DECL void pcre_stack_free(void *);
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
#endif /* VPCOMPAT */
/* Exported PCRE functions */
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
const unsigned char *);
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
int *, const unsigned char *);
// PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
// int *, int, const char *, char *, int);
// PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
// int);
// PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
// const char *, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
// PCRE_EXP_DECL void pcre_free_substring(const char *);
// PCRE_EXP_DECL void pcre_free_substring_list(const char **);
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
// PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
// int *, int, const char *, const char **);
// PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
// PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
// char **, char **);
// PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
// const char **);
// PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
// const char ***);
// PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
// PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
// PCRE_EXP_DECL int pcre_refcount(pcre *, int);
// PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
#ifdef __cplusplus
} /* extern "C" */
void *(*pcre_malloc)(size_t);
void (*pcre_free)(void *);
void *(*pcre_stack_malloc)(size_t);
void (*pcre_stack_free)(void *);
int (*pcre_callout)(pcre_callout_block *);
#else
void *pcre_malloc(size_t);
void pcre_free(void *);
void *pcre_stack_malloc(size_t);
void pcre_stack_free(void *);
int pcre_callout(pcre_callout_block *);
#endif
#endif /* End of pcre.h */
pcre *pcre_compile(const char *, int, const char **, int *,
const unsigned char *);
pcre *pcre_compile2(const char *, int, int *, const char **,
int *, const unsigned char *);
int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,25 +1,3 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This file contains character tables that are used when no external tables
are passed to PCRE by the application that calls it. The tables are used only
for characters whose code values are less than 256.
This is a default version of the tables that assumes ASCII encoding. A program
called dftables (which is distributed with PCRE) can be used to build
alternative versions of this file. This is necessary if you are running in an
EBCDIC environment, or if you want to default to a different encoding, for
example ISO-8859-1. When dftables is run, it creates these tables in the
current locale. If PCRE is configured with --enable-rebuild-chartables, this
happens automatically.
The following #includes are present because without the gcc 4.x may remove the
array definition from the final binary if PCRE is built into a static library
and dead code stripping is activated. This leads to link errors. Pulling in the
header ensures that the array gets flagged as "someone outside this compilation
unit might reference this" and so it will always be supplied to the linker. */
#include "re_config.h"
#include "pcre_internal.h"

View File

@ -5744,7 +5744,7 @@ Returns: pointer to compiled data block, or NULL on error,
with errorptr and erroroffset set
*/
PCRE_EXP_DEFN pcre *
pcre *
pcre_compile(const char *pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
{
@ -5752,7 +5752,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
}
PCRE_EXP_DEFN pcre *
pcre *
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
const char **errorptr, int *erroroffset, const unsigned char *tables)
{

View File

@ -4306,7 +4306,7 @@ Returns: > 0 => success; value is the number of elements filled in
< -1 => some kind of unexpected problem
*/
PCRE_EXP_DEFN int
int
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
int offsetcount)

View File

@ -23,7 +23,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
PCRE_EXP_DEFN int
int
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
{

View File

@ -11,11 +11,11 @@ differently, and global variables are not used (see pcre.in). */
#include "pcre_internal.h"
#ifndef VPCOMPAT
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free;
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free;
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
void *(*pcre_malloc)(size_t) = malloc;
void (*pcre_free)(void *) = free;
void *(*pcre_stack_malloc)(size_t) = malloc;
void (*pcre_stack_free)(void *) = free;
int (*pcre_callout)(pcre_callout_block *) = NULL;
#endif
/* End of pcre_globals.c */

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,3 @@
/* This module contains internal functions for testing newlines when more than
one kind of newline is to be recognized. When a newline is found, its length is
returned. In principle, we could implement several newline "types", each
referring to a different set of newline characters. At present, PCRE supports
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
http://unicode.org/unicode/reports/tr18/. */
#include "re_config.h"
#include "pcre_internal.h"

View File

@ -1,7 +1,3 @@
/* This file contains a private PCRE function that converts an ordinal
character value into a UTF8 string. */
#include "re_config.h"
#include "pcre_internal.h"

View File

@ -1,10 +1,4 @@
/* This module contains some fixed tables that are used by more than one of the
PCRE code modules. The tables are also #included by the pcretest program, which
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
clashes with the library. */
#include "re_config.h"
#include "pcre_internal.h"

View File

@ -1,9 +1,4 @@
/* This module contains an internal function that tests a compiled pattern to
see if it was compiled with the opposite endianness. If so, it uses an
auxiliary local function to flip the appropriate bytes. */
#include "re_config.h"
#include "pcre_internal.h"

View File

@ -1,8 +1,4 @@
/* This module contains an internal function for validating UTF-8 character
strings. */
#include "re_config.h"
#include "pcre_internal.h"

View File

@ -1,9 +1,4 @@
/* This module contains an internal function that is used to match an extended
class (one that contains characters whose values are > 255). It is used by both
pcre_exec() and pcre_def_exec(). */
#include "re_config.h"
#include "pcre_internal.h"

File diff suppressed because it is too large Load Diff

View File

@ -1,44 +1,77 @@
from PikaObj import *
A: int
ASCII: int
I: int
IGNORECASE: int
M: int
MULTILINE: int
S: int
DOTALL: int
# here, not as in python, there is no 'UNICODE' flags,
# cause this version only support UTF-8 characters
def __init__(): ...
class Pattern:
def __init__(self):
pass
def __del__(self):
pass
def findall(self, subject: str, *flags) -> list:
pass
def sub(self, repl: str, subjet: str, *flags) -> str:
def sub(self, repl: str, subjet: str, *count__flags) -> str:
pass
def subn(self, repl: str, subjet: str, *count__flags) -> list:
pass
def match(self, subject: str, *flags) -> Match:
pass
def fullmatch(self, subject: str, *flags) -> Match:
pass
def search(self, subject: str, *flags) -> Match:
pass
def split(self, subject: str, *maxsplit__flags) -> list:
pass
class Match:
def __init__(self):
pass
def __del__(self):
pass
def group(self, n: int) -> str:
pass
def groups(self) -> list:
pass
def span(self, group_n: int) -> list:
def group(self, *n) -> str:
pass
def groups(self) -> list:
pass
# ! may returns wrong offset when subject contains widechar, like Chinese
# this function returns exactly memory offset between the begin of string and the target substring
def span(self, *group_n) -> list:
pass
def findall(pattern: str, subject: str, *flags) -> list: ...
def sub(pattern: str, repl: str, subjet: str, *flags) -> str: ...
# def sub(pattern, repl, string, count=0, flags=0)
def sub(pattern: str, repl: str, subjet: str, *count__flags) -> str: ...
def match(pattern: str, subject: str, *flags) -> Match: ...
def fullmatch(pattern: str, subject: str, *flags) -> Match: ...
def search(pattern: str, subject: str, *flags) -> Match: ...
def compile(pattern: str) -> Pattern: ...
def compile(pattern: str, *flags) -> Pattern: ...
def escape(pattern: str) -> str: ...
# def subn(pattern, repl, string, count=0, flags=0)
def subn(pattern: str, repl: str, subjet: str, *count__flags) -> list: ...
# def finditer(pattern: str, subject: str, *flags):
def split(pattern: str, subject: str, *maxsplit__flags) -> list: ...

View File

@ -93,14 +93,16 @@ This module prototype are likes this:
# flags
A: int
ASCII: int
I: int
IGNORECASE:int
IGNORECASE: int
M: int
MULTILINE:int
MULTILINE: int
S: int
DOTALL: int
class Pattern():
class Pattern:
def __init__(self):
pass
@ -110,40 +112,53 @@ class Pattern():
def findall(self, subject: str, *flags) -> list:
pass
def sub(self, repl: str, subjet: str, *flags) -> str:
def sub(self, repl: str, subjet: str, *count__flags) -> str:
pass
def subn(self, repl: str, subjet: str, *count__flags) -> list:
pass
def match(self, subject: str, *flags) -> Match:
pass
def fullmatch(self, subject: str, *flags) -> Match:
pass
def search(self, subject: str, *flags) -> Match:
pass
def split(self, subject: str, *maxsplit__flags) -> list:
pass
class Match():
class Match:
def __init__(self):
pass
def __del__(self):
pass
def group(self, n: int) -> str:
def group(self, *n) -> str:
pass
def groups(self) -> list:
pass
def span(self, group_n: int) -> list:
def span(self, *group_n) -> list:
pass
def findall(pattern: str, subject: str, *flags) -> list:...
def sub(pattern: str, repl: str, subjet: str, *flags) -> str: ...
def findall(pattern: str, subject: str, *flags) -> list: ...
# def sub(pattern, repl, string, count=0, flags=0)
def sub(pattern: str, repl: str, subjet: str, *count__flags) -> str: ...
def match(pattern: str, subject: str, *flags) -> Match: ...
def fullmatch(pattern: str, subject: str, *flags) -> Match: ...
def search(pattern: str, subject: str, *flags) -> Match: ...
def compile(pattern: str) -> Pattern: ...
def compile(pattern: str, *flags) -> Pattern: ...
def escape(pattern: str) -> str: ...
# def subn(pattern, repl, string, count=0, flags=0)
def subn(pattern: str, repl: str, subjet: str, *count__flags) -> list: ...
# def finditer(pattern: str, subject: str, *flags):
def split(pattern: str, subject: str, *maxsplit__flags) -> list: ...
```