mirror of
https://gitee.com/Lyon1998/pikapython.git
synced 2025-01-29 17:22:56 +08:00
re V2
This commit is contained in:
parent
d85fed9260
commit
963338e33d
141
package/re/cre.c
141
package/re/cre.c
@ -1,15 +1,16 @@
|
||||
|
||||
/* #define PCRE_STATIC */
|
||||
|
||||
/*
|
||||
*
|
||||
* Generally additional utility functions.
|
||||
* L flag, also known as re.LOCALE in Python is not available here.
|
||||
* Wrong results may be returned in re_sub likes funcitones when 'repl' contains '\', '\\\\1' for example.
|
||||
*
|
||||
* 4/9/2022
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "pcre.h"
|
||||
#include "cre.h"
|
||||
|
||||
/// @brief the the number of groups in a re pattern
|
||||
/// @param re: re pattern
|
||||
/// @param out_groups_number : from 0,1,2,3,4...
|
||||
/// @return a array pointer, free if after using
|
||||
int *_re_get_vec_table(pcre *re, int *out_groups_number)
|
||||
{
|
||||
int brackets_number = 0;
|
||||
@ -25,18 +26,9 @@ int *_re_get_vec_table(pcre *re, int *out_groups_number)
|
||||
return vec;
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
|
||||
* (https?)://((\w+\.)+)(\w+)
|
||||
* hihsid dii https://www.baidu.com, http://glwang.com
|
||||
*************************************************************************/
|
||||
|
||||
int *pcre_match(const char *_pat, const char *s, int len, int *out_vec_number, int opt)
|
||||
{
|
||||
int *vec = NULL;
|
||||
// int group_n = 0;
|
||||
//int rc;
|
||||
// int start_offset = 0;
|
||||
pcre *re = re_get_match_re(_pat, opt);
|
||||
if (!re)
|
||||
return NULL;
|
||||
@ -67,9 +59,9 @@ match:
|
||||
}
|
||||
if (rc <= 0)
|
||||
goto e_er;
|
||||
if (vec[0] == vec[1]) // a empty match
|
||||
if (vec[0] == vec[1])
|
||||
{
|
||||
start_offset++; // advace a position
|
||||
start_offset++;
|
||||
if (start_offset >= len)
|
||||
goto e_er;
|
||||
goto match;
|
||||
@ -84,9 +76,6 @@ e_er:
|
||||
int *pcre_fullmatch(const char *_pat, const char *s, int len, int *out_vec_number, int opt)
|
||||
{
|
||||
int *vec = NULL;
|
||||
// int group_n = 0;
|
||||
//int rc;
|
||||
// int start_offset = 0;
|
||||
opt &= ~PCRE_MULTILINE;
|
||||
pcre *re = re_get_fullmatch_re(_pat, opt);
|
||||
if (!re)
|
||||
@ -118,9 +107,9 @@ match:
|
||||
}
|
||||
if (rc <= 0)
|
||||
goto e_er;
|
||||
if (vec[0] == vec[1]) // a empty match
|
||||
if (vec[0] == vec[1])
|
||||
{
|
||||
start_offset++; // advace a position
|
||||
start_offset++;
|
||||
if (start_offset >= len)
|
||||
goto e_er;
|
||||
goto match;
|
||||
@ -252,9 +241,9 @@ match:
|
||||
}
|
||||
if (rc <= 0)
|
||||
goto e_er;
|
||||
if (vec[0] == vec[1]) // a empty match
|
||||
if (vec[0] == vec[1])
|
||||
{
|
||||
start_offset++; // advace a position
|
||||
start_offset++;
|
||||
if (start_offset >= len)
|
||||
goto e_er;
|
||||
goto match;
|
||||
@ -277,17 +266,10 @@ int **re_searchall(const char *pat, const char *s, int len, int *out_number, int
|
||||
pcre_free(re);
|
||||
return res;
|
||||
}
|
||||
/// @brief find all match in a string
|
||||
/// @param re: re pattern
|
||||
/// @param s : string searching in
|
||||
/// @param out_number : the number of matches
|
||||
/// @return a vector table, vrc[n] is the nth matchs,
|
||||
/// vrc[group_n][i*2] - vrc[group_n][i*2+1] is the begining-offset and ending-offset of group i.
|
||||
/// Use re_free_searchall() to free the memory
|
||||
int **re_searchall2(pcre *re, const char *s, int len, int *out_number, int *out_vec_number, int opt)
|
||||
{
|
||||
int start_offset = 0;
|
||||
int **vecs = NULL; // to store vec
|
||||
int **vecs = NULL;
|
||||
int vec_cap = 4;
|
||||
int vec_n = 0;
|
||||
int *vec = NULL;
|
||||
@ -304,7 +286,8 @@ int **re_searchall2(pcre *re, const char *s, int len, int *out_number, int *out_
|
||||
*out_vec_number = group_n;
|
||||
group_n *= 3;
|
||||
}
|
||||
if (!vec){
|
||||
if (!vec)
|
||||
{
|
||||
goto e_er;
|
||||
}
|
||||
int rc;
|
||||
@ -319,14 +302,13 @@ int **re_searchall2(pcre *re, const char *s, int len, int *out_number, int *out_
|
||||
}
|
||||
if (rc <= 0)
|
||||
goto e_er;
|
||||
if (vec[0] == vec[1]) // a empty match
|
||||
if (vec[0] == vec[1])
|
||||
{
|
||||
start_offset++; // advace a position
|
||||
start_offset++;
|
||||
if (start_offset >= len)
|
||||
goto e_er;
|
||||
goto match;
|
||||
}
|
||||
//to sotre vec
|
||||
if (!vecs)
|
||||
{
|
||||
vecs = (int **)malloc(sizeof(int *) * vec_cap);
|
||||
@ -334,32 +316,28 @@ int **re_searchall2(pcre *re, const char *s, int len, int *out_number, int *out_
|
||||
goto e_er;
|
||||
}
|
||||
|
||||
if (vec_n >= vec_cap) // need to recap this list
|
||||
if (vec_n >= vec_cap)
|
||||
{
|
||||
vec_cap *= 2;
|
||||
void *p = realloc(vecs, vec_cap * sizeof(int *));
|
||||
if (!p)
|
||||
goto e_er;
|
||||
// if (p != vecs) // move data
|
||||
// {
|
||||
// memmove(p, vecs, vec_n * sizeof(int*));
|
||||
vecs = (int **)p;
|
||||
// }
|
||||
}
|
||||
vecs[vec_n++] = vec;
|
||||
start_offset = vec[1];
|
||||
}
|
||||
e_er:
|
||||
if (vec)
|
||||
free(vec); // the latest vec table
|
||||
free(vec);
|
||||
if (!vecs)
|
||||
return NULL;
|
||||
for (int j = 0; j < vec_n; j++)
|
||||
{
|
||||
if (vecs[j])
|
||||
free((void *)(vecs[j])); // free vec table
|
||||
free((void *)(vecs[j]));
|
||||
}
|
||||
free(vecs); // free the table list
|
||||
free(vecs);
|
||||
return NULL;
|
||||
}
|
||||
void re_free_searchall(int **vecs, int n)
|
||||
@ -369,9 +347,9 @@ void re_free_searchall(int **vecs, int n)
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
if (vecs[j])
|
||||
free((void *)(vecs[j])); // free vec table
|
||||
free((void *)(vecs[j]));
|
||||
}
|
||||
free(vecs); // free the table list
|
||||
free(vecs);
|
||||
}
|
||||
|
||||
/* the following functions return (a) string in heap, which means it need to be freed after using*/
|
||||
@ -438,9 +416,9 @@ match:
|
||||
}
|
||||
if (rc <= 0)
|
||||
goto e_er;
|
||||
if (vec[0] == vec[1]) // a empty match
|
||||
if (vec[0] == vec[1])
|
||||
{
|
||||
start_offset++; // advace a position
|
||||
start_offset++;
|
||||
if (start_offset >= len)
|
||||
goto e_er;
|
||||
goto match;
|
||||
@ -498,9 +476,9 @@ void re_free_findall(char **ss, int n)
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
if (ss[j])
|
||||
free((void *)(ss[j])); // free vec table
|
||||
free((void *)(ss[j]));
|
||||
}
|
||||
free(ss); // free the table list
|
||||
free(ss);
|
||||
}
|
||||
|
||||
char *pcre_sub(const char *pat, const char *to, const char *s, int len, int opt)
|
||||
@ -514,30 +492,20 @@ char *pcre_sub(const char *pat, const char *to, const char *s, int len, int opt)
|
||||
pcre_free(re);
|
||||
return res;
|
||||
}
|
||||
char *pcre_subn(const char *pat, const char *to, const char *s, int len, int n, int opt)
|
||||
char *pcre_subn(const char *pat, const char *to, const char *s, int len, int n, int opt, int *out_repl_times)
|
||||
{
|
||||
const char *error;
|
||||
int erroffset;
|
||||
pcre *re = pcre_compile(pat, opt, &error, &erroffset, NULL);
|
||||
if (!re)
|
||||
return NULL;
|
||||
char *res = re_subn2(re, to, s, len, n, opt);
|
||||
char *res = re_subn2(re, to, s, len, n, opt, out_repl_times);
|
||||
pcre_free(re);
|
||||
return res;
|
||||
}
|
||||
/// @brief substitute a string with a pattern expression, given replacement limit
|
||||
/// @param re : re pattern for matching
|
||||
/// @param to : re pattern to replacement
|
||||
/// @param s : string searching in
|
||||
/// @param len : length of <s>
|
||||
/// @param n : the replacement number
|
||||
/// @return if no replacement, return s exactly, otherwise return a new string, free it after using
|
||||
char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
|
||||
char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt, int *out_repl_times)
|
||||
{
|
||||
int group_n = 0;
|
||||
// int group_n2 = 0;
|
||||
// int *vec = NULL;
|
||||
// int *vec2 = NULL;
|
||||
pcre *re2 = NULL;
|
||||
int vcs1_n = 0, vcs2_n = 0;
|
||||
int **vcs1 = re_searchall2(re, s, len, &vcs1_n, &group_n, opt);
|
||||
@ -545,11 +513,8 @@ char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
|
||||
int match_limit = 0;
|
||||
if (!vcs1_n)
|
||||
{
|
||||
//no match, no replacement
|
||||
return (char *)s;
|
||||
}
|
||||
//to determine '\\' and group like: '\group_n'
|
||||
//3 groups, 0, 1, 2->\\, 3->\group_n, if any
|
||||
const char *p2 = "(\\\\\\\\|\\\\\\d{1,2})";
|
||||
int erroffset;
|
||||
const char *error;
|
||||
@ -559,76 +524,55 @@ char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
|
||||
re2 = pcre_compile(p2, 0, &error, &erroffset, NULL);
|
||||
if (!re2)
|
||||
goto exit_error;
|
||||
//match <to>
|
||||
len_to = strlen(to);
|
||||
vcs2 = re_searchall2(re2, to, len_to, &vcs2_n, NULL, 0);
|
||||
//if (!vcs2)
|
||||
//{
|
||||
// //goto exit_error;
|
||||
// vcs2_n = 0;
|
||||
//}
|
||||
|
||||
pcre_free(re2);
|
||||
re2 = NULL;
|
||||
//note that re2 is no use after this, onece we get vcs2
|
||||
remain_length2 = len_to; // the remain length in 'to' exclude from all '\\' and all '\n'
|
||||
remain_length2 = len_to;
|
||||
for (int i = 0; i < vcs2_n; i++)
|
||||
{
|
||||
int *vc = vcs2[i]; // (0,1)->'\\'or'\n', (2,3)->'\\', (4,5)->'\n', (6,7,8)
|
||||
int *vc = vcs2[i];
|
||||
int vc0 = vc[0] + 1;
|
||||
if (to[vc0] == '\\')
|
||||
{
|
||||
vc[2] = 0;
|
||||
remain_length2 -= 2;
|
||||
}
|
||||
else // \n,
|
||||
else
|
||||
{
|
||||
int wanted_number = 0;
|
||||
//vc[1]--;
|
||||
int l_n = vc[1] - vc0;
|
||||
if (l_n == 1)
|
||||
{
|
||||
wanted_number = to[vc0] - '0';
|
||||
remain_length2 -= 2;
|
||||
}
|
||||
else // if(l_n==2)
|
||||
else
|
||||
{
|
||||
wanted_number = (to[vc0] - '0') * 10 + to[vc0 + 1] - '0';
|
||||
remain_length2 -= 3;
|
||||
}
|
||||
if (wanted_number <= 0 || wanted_number >= group_n)
|
||||
goto exit_error;
|
||||
//store it in vc[2]
|
||||
vc[2] = wanted_number;
|
||||
}
|
||||
}
|
||||
//now that vcs2 stores data of which group is used in replacement
|
||||
//Nx9, N is the number of groups used in every one replcaement,
|
||||
//while vcs2[2] is the exact group number used in replacement
|
||||
|
||||
//parse 'to'
|
||||
//get the remian size
|
||||
match_limit = n ? (n <= vcs1_n ? n : vcs1_n) : vcs1_n;
|
||||
remain_size = len + remain_length2 * match_limit;
|
||||
//match times
|
||||
for (int i = 0; i < match_limit; i++)
|
||||
{
|
||||
int *vc = vcs1[i];
|
||||
//vc[1]-vc[0] is the match sequence which need to be replaced, while the following are groups
|
||||
remain_size -= vc[1] - vc[0];
|
||||
// the replcaements
|
||||
// 'to' e.g.: \\ \1, \2, ....\x
|
||||
for (int j = 0; j < vcs2_n; j++)
|
||||
{
|
||||
int *v2 = vcs2[j];
|
||||
if (v2[2])
|
||||
{
|
||||
//replaced to a group
|
||||
remain_size += GetGroupLen(vc, v2[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
//replaced to a '/'
|
||||
remain_size++;
|
||||
}
|
||||
}
|
||||
@ -655,13 +599,11 @@ char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
|
||||
int to_group_at = vc[to_group * 2];
|
||||
int to_group_end = vc[to_group * 2 + 1];
|
||||
int g_l = to_group_end - to_group_at;
|
||||
//replaced to a group
|
||||
memcpy(new_s + pi, s + to_group_at, g_l);
|
||||
pi += g_l;
|
||||
}
|
||||
else
|
||||
{
|
||||
//replaced to a '/'
|
||||
new_s[pi++] = '\\';
|
||||
}
|
||||
m_start = v2[1];
|
||||
@ -669,9 +611,10 @@ char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
|
||||
m_len = len_to - m_start;
|
||||
memcpy(new_s + pi, to + m_start, m_len);
|
||||
pi += m_len;
|
||||
// end of one match
|
||||
qi = vc[1];
|
||||
}
|
||||
if (out_repl_times)
|
||||
*out_repl_times = match_limit;
|
||||
if (vcs1)
|
||||
re_free_searchall(vcs1, vcs1_n);
|
||||
if (vcs2)
|
||||
@ -692,13 +635,7 @@ exit_error:
|
||||
pcre_free(re2);
|
||||
return NULL;
|
||||
}
|
||||
/// @brief substitute a string with a pattern expression
|
||||
/// @param re : re pattern for matching
|
||||
/// @param to : re pattern to replacement
|
||||
/// @param s : string searching in
|
||||
/// @param len : length of <s>
|
||||
/// @return if no replacement, return s exactly, otherwise return a new string, free it after using
|
||||
char *re_sub2(pcre *re, const char *to, const char *s, int len, int opt)
|
||||
{
|
||||
return re_subn2(re, to, s, len, 0, opt);
|
||||
return re_subn2(re, to, s, len, 0, opt, NULL);
|
||||
}
|
||||
|
@ -42,9 +42,9 @@ void re_free_findall(char **ss, int n);
|
||||
|
||||
char *pcre_sub(const char *pat, const char *to, const char *s, int len, int opt);
|
||||
|
||||
char *pcre_subn(const char *pat, const char *to, const char *s, int len, int n, int opt);
|
||||
char *pcre_subn(const char *pat, const char *to, const char *s, int len, int n, int opt, int *out_repl_times);
|
||||
|
||||
char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt);
|
||||
char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt, int *out_repl_times);
|
||||
|
||||
char *re_sub2(pcre *re, const char *to, const char *s, int len, int opt);
|
||||
#endif
|
@ -2,63 +2,13 @@
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR @PCRE_MAJOR@
|
||||
#define PCRE_MINOR @PCRE_MINOR@
|
||||
#define PCRE_PRERELEASE @PCRE_PRERELEASE@
|
||||
#define PCRE_DATE @PCRE_DATE@
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
|
||||
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
||||
# ifndef PCRE_EXP_DECL
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
||||
it is needed here for malloc. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options */
|
||||
|
||||
#define PCRE_CASELESS 0x00000001
|
||||
#define PCRE_MULTILINE 0x00000002
|
||||
@ -87,19 +37,19 @@ extern "C" {
|
||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
||||
#define PCRE_BSR_ANYCRLF 0x00800000
|
||||
#define PCRE_BSR_UNICODE 0x01000000
|
||||
#define PCRE_ONLY_ASCII 0x02000000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
#define PCRE_ERROR_NOMATCH (-1)
|
||||
#define PCRE_ERROR_NULL (-2)
|
||||
#define PCRE_ERROR_BADOPTION (-3)
|
||||
#define PCRE_ERROR_BADMAGIC (-4)
|
||||
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5)
|
||||
#define PCRE_ERROR_NOMEMORY (-6)
|
||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||
#define PCRE_ERROR_CALLOUT (-9)
|
||||
#define PCRE_ERROR_BADUTF8 (-10)
|
||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
|
||||
#define PCRE_ERROR_PARTIAL (-12)
|
||||
@ -112,17 +62,16 @@ extern "C" {
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22)
|
||||
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
|
||||
#define PCRE_INFO_OPTIONS 0
|
||||
#define PCRE_INFO_SIZE 1
|
||||
#define PCRE_INFO_CAPTURECOUNT 2
|
||||
#define PCRE_INFO_BACKREFMAX 3
|
||||
#define PCRE_INFO_FIRSTBYTE 4
|
||||
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
||||
#define PCRE_INFO_FIRSTCHAR 4
|
||||
#define PCRE_INFO_FIRSTTABLE 5
|
||||
#define PCRE_INFO_LASTLITERAL 6
|
||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||
@ -134,8 +83,6 @@ extern "C" {
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
#define PCRE_INFO_HASCRORLF 14
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_CONFIG_UTF8 0
|
||||
#define PCRE_CONFIG_NEWLINE 1
|
||||
@ -147,8 +94,6 @@ compatible. */
|
||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||
#define PCRE_CONFIG_BSR 8
|
||||
|
||||
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
|
||||
these bits, just add new ones on the end, in order to remain compatible. */
|
||||
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
@ -156,109 +101,67 @@ these bits, just add new ones on the end, in order to remain compatible. */
|
||||
#define PCRE_EXTRA_TABLES 0x0008
|
||||
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
||||
|
||||
/* Types */
|
||||
|
||||
struct real_pcre; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
||||
replaced with a custom type. For conventional use, the public interface is a
|
||||
const char *. */
|
||||
|
||||
#ifndef PCRE_SPTR
|
||||
#define PCRE_SPTR const char *
|
||||
#endif
|
||||
|
||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||
such as way as to be extensible. Always add new fields at the end, in order to
|
||||
remain compatible. */
|
||||
|
||||
typedef struct pcre_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
unsigned long int flags;
|
||||
void *study_data;
|
||||
unsigned long int match_limit;
|
||||
void *callout_data;
|
||||
const unsigned char *tables;
|
||||
unsigned long int match_limit_recursion;
|
||||
} pcre_extra;
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. */
|
||||
|
||||
typedef struct pcre_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
PCRE_SPTR subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------------------------------------------------------ */
|
||||
int version;
|
||||
int callout_number;
|
||||
int *offset_vector;
|
||||
PCRE_SPTR subject;
|
||||
int subject_length;
|
||||
int start_match;
|
||||
int current_position;
|
||||
int capture_top;
|
||||
int capture_last;
|
||||
void *callout_data;
|
||||
int pattern_position;
|
||||
int next_item_length;
|
||||
} pcre_callout_block;
|
||||
|
||||
/* Indirection for store get and free functions. These can be set to
|
||||
alternative malloc/free functions if required. Special ones are used in the
|
||||
non-recursive case for "frames". There is also an optional callout function
|
||||
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||
have to take another form. */
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||
#else /* VPCOMPAT */
|
||||
PCRE_EXP_DECL void *pcre_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_free(void *);
|
||||
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
||||
#endif /* VPCOMPAT */
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
// PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
||||
// int *, int, const char *, char *, int);
|
||||
// PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
// int);
|
||||
// PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
// const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
// PCRE_EXP_DECL void pcre_free_substring(const char *);
|
||||
// PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
||||
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
// PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
||||
// int *, int, const char *, const char **);
|
||||
// PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
||||
// PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
// char **, char **);
|
||||
// PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
||||
// const char **);
|
||||
// PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
||||
// const char ***);
|
||||
// PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
|
||||
// PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
||||
// PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
||||
// PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
void *(*pcre_malloc)(size_t);
|
||||
void (*pcre_free)(void *);
|
||||
void *(*pcre_stack_malloc)(size_t);
|
||||
void (*pcre_stack_free)(void *);
|
||||
int (*pcre_callout)(pcre_callout_block *);
|
||||
#else
|
||||
void *pcre_malloc(size_t);
|
||||
void pcre_free(void *);
|
||||
void *pcre_stack_malloc(size_t);
|
||||
void pcre_stack_free(void *);
|
||||
int pcre_callout(pcre_callout_block *);
|
||||
#endif
|
||||
|
||||
#endif /* End of pcre.h */
|
||||
pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -1,25 +1,3 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file contains character tables that are used when no external tables
|
||||
are passed to PCRE by the application that calls it. The tables are used only
|
||||
for characters whose code values are less than 256.
|
||||
|
||||
This is a default version of the tables that assumes ASCII encoding. A program
|
||||
called dftables (which is distributed with PCRE) can be used to build
|
||||
alternative versions of this file. This is necessary if you are running in an
|
||||
EBCDIC environment, or if you want to default to a different encoding, for
|
||||
example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
|
||||
The following #includes are present because without the gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#include "re_config.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
@ -5744,7 +5744,7 @@ Returns: pointer to compiled data block, or NULL on error,
|
||||
with errorptr and erroroffset set
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN pcre *
|
||||
pcre *
|
||||
pcre_compile(const char *pattern, int options, const char **errorptr,
|
||||
int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
@ -5752,7 +5752,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
|
||||
}
|
||||
|
||||
|
||||
PCRE_EXP_DEFN pcre *
|
||||
pcre *
|
||||
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
|
||||
const char **errorptr, int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
|
@ -4306,7 +4306,7 @@ Returns: > 0 => success; value is the number of elements filled in
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
int
|
||||
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
|
||||
int offsetcount)
|
||||
|
@ -23,7 +23,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
int
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||
void *where)
|
||||
{
|
||||
|
@ -11,11 +11,11 @@ differently, and global variables are not used (see pcre.in). */
|
||||
#include "pcre_internal.h"
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
void *(*pcre_malloc)(size_t) = malloc;
|
||||
void (*pcre_free)(void *) = free;
|
||||
void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||
void (*pcre_stack_free)(void *) = free;
|
||||
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,13 +1,3 @@
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#include "re_config.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
@ -1,7 +1,3 @@
|
||||
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
|
||||
#include "re_config.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
@ -1,10 +1,4 @@
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. The tables are also #included by the pcretest program, which
|
||||
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||
clashes with the library. */
|
||||
|
||||
|
||||
#include "re_config.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
@ -1,9 +1,4 @@
|
||||
|
||||
/* This module contains an internal function that tests a compiled pattern to
|
||||
see if it was compiled with the opposite endianness. If so, it uses an
|
||||
auxiliary local function to flip the appropriate bytes. */
|
||||
|
||||
|
||||
#include "re_config.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
@ -1,8 +1,4 @@
|
||||
|
||||
/* This module contains an internal function for validating UTF-8 character
|
||||
strings. */
|
||||
|
||||
|
||||
#include "re_config.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
@ -1,9 +1,4 @@
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class (one that contains characters whose values are > 255). It is used by both
|
||||
pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#include "re_config.h"
|
||||
#include "pcre_internal.h"
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,44 +1,77 @@
|
||||
from PikaObj import *
|
||||
|
||||
A: int
|
||||
ASCII: int
|
||||
I: int
|
||||
IGNORECASE: int
|
||||
M: int
|
||||
MULTILINE: int
|
||||
S: int
|
||||
DOTALL: int
|
||||
# here, not as in python, there is no 'UNICODE' flags,
|
||||
# cause this version only support UTF-8 characters
|
||||
|
||||
|
||||
def __init__(): ...
|
||||
|
||||
|
||||
class Pattern:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __del__(self):
|
||||
pass
|
||||
|
||||
def findall(self, subject: str, *flags) -> list:
|
||||
pass
|
||||
def sub(self, repl: str, subjet: str, *flags) -> str:
|
||||
|
||||
def sub(self, repl: str, subjet: str, *count__flags) -> str:
|
||||
pass
|
||||
|
||||
def subn(self, repl: str, subjet: str, *count__flags) -> list:
|
||||
pass
|
||||
|
||||
def match(self, subject: str, *flags) -> Match:
|
||||
pass
|
||||
|
||||
def fullmatch(self, subject: str, *flags) -> Match:
|
||||
pass
|
||||
|
||||
def search(self, subject: str, *flags) -> Match:
|
||||
pass
|
||||
|
||||
def split(self, subject: str, *maxsplit__flags) -> list:
|
||||
pass
|
||||
|
||||
|
||||
class Match:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __del__(self):
|
||||
pass
|
||||
def group(self, n: int) -> str:
|
||||
pass
|
||||
def groups(self) -> list:
|
||||
pass
|
||||
def span(self, group_n: int) -> list:
|
||||
|
||||
def group(self, *n) -> str:
|
||||
pass
|
||||
|
||||
def groups(self) -> list:
|
||||
pass
|
||||
# ! may returns wrong offset when subject contains widechar, like Chinese
|
||||
# this function returns exactly memory offset between the begin of string and the target substring
|
||||
def span(self, *group_n) -> list:
|
||||
pass
|
||||
|
||||
|
||||
def findall(pattern: str, subject: str, *flags) -> list: ...
|
||||
def sub(pattern: str, repl: str, subjet: str, *flags) -> str: ...
|
||||
# def sub(pattern, repl, string, count=0, flags=0)
|
||||
def sub(pattern: str, repl: str, subjet: str, *count__flags) -> str: ...
|
||||
def match(pattern: str, subject: str, *flags) -> Match: ...
|
||||
def fullmatch(pattern: str, subject: str, *flags) -> Match: ...
|
||||
def search(pattern: str, subject: str, *flags) -> Match: ...
|
||||
def compile(pattern: str) -> Pattern: ...
|
||||
def compile(pattern: str, *flags) -> Pattern: ...
|
||||
|
||||
def escape(pattern: str) -> str: ...
|
||||
# def subn(pattern, repl, string, count=0, flags=0)
|
||||
def subn(pattern: str, repl: str, subjet: str, *count__flags) -> list: ...
|
||||
# def finditer(pattern: str, subject: str, *flags):
|
||||
def split(pattern: str, subject: str, *maxsplit__flags) -> list: ...
|
||||
|
@ -93,14 +93,16 @@ This module prototype are likes this:
|
||||
|
||||
# flags
|
||||
|
||||
A: int
|
||||
ASCII: int
|
||||
I: int
|
||||
IGNORECASE:int
|
||||
IGNORECASE: int
|
||||
M: int
|
||||
MULTILINE:int
|
||||
MULTILINE: int
|
||||
S: int
|
||||
DOTALL: int
|
||||
|
||||
|
||||
class Pattern():
|
||||
class Pattern:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@ -110,40 +112,53 @@ class Pattern():
|
||||
def findall(self, subject: str, *flags) -> list:
|
||||
pass
|
||||
|
||||
def sub(self, repl: str, subjet: str, *flags) -> str:
|
||||
def sub(self, repl: str, subjet: str, *count__flags) -> str:
|
||||
pass
|
||||
|
||||
def subn(self, repl: str, subjet: str, *count__flags) -> list:
|
||||
pass
|
||||
|
||||
def match(self, subject: str, *flags) -> Match:
|
||||
pass
|
||||
|
||||
def fullmatch(self, subject: str, *flags) -> Match:
|
||||
pass
|
||||
|
||||
def search(self, subject: str, *flags) -> Match:
|
||||
pass
|
||||
|
||||
def split(self, subject: str, *maxsplit__flags) -> list:
|
||||
pass
|
||||
|
||||
class Match():
|
||||
|
||||
class Match:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def __del__(self):
|
||||
pass
|
||||
|
||||
def group(self, n: int) -> str:
|
||||
def group(self, *n) -> str:
|
||||
pass
|
||||
|
||||
def groups(self) -> list:
|
||||
pass
|
||||
|
||||
def span(self, group_n: int) -> list:
|
||||
def span(self, *group_n) -> list:
|
||||
pass
|
||||
|
||||
|
||||
def findall(pattern: str, subject: str, *flags) -> list:...
|
||||
def sub(pattern: str, repl: str, subjet: str, *flags) -> str: ...
|
||||
def findall(pattern: str, subject: str, *flags) -> list: ...
|
||||
# def sub(pattern, repl, string, count=0, flags=0)
|
||||
def sub(pattern: str, repl: str, subjet: str, *count__flags) -> str: ...
|
||||
def match(pattern: str, subject: str, *flags) -> Match: ...
|
||||
def fullmatch(pattern: str, subject: str, *flags) -> Match: ...
|
||||
def search(pattern: str, subject: str, *flags) -> Match: ...
|
||||
def compile(pattern: str) -> Pattern: ...
|
||||
def compile(pattern: str, *flags) -> Pattern: ...
|
||||
def escape(pattern: str) -> str: ...
|
||||
# def subn(pattern, repl, string, count=0, flags=0)
|
||||
def subn(pattern: str, repl: str, subjet: str, *count__flags) -> list: ...
|
||||
# def finditer(pattern: str, subject: str, *flags):
|
||||
def split(pattern: str, subject: str, *maxsplit__flags) -> list: ...
|
||||
|
||||
```
|
||||
|
Loading…
x
Reference in New Issue
Block a user