!107 为pikaScript 开发的正则表达式模块

Merge pull request !107 from eglwang/master
This commit is contained in:
李昂 2022-08-29 07:57:24 +00:00 committed by Gitee
commit 446621fba2
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
25 changed files with 15365 additions and 0 deletions

214
package/re/config.h Normal file
View File

@ -0,0 +1,214 @@
/* Define to 1 if you have the `bcopy' function. */
#ifndef HAVE_BCOPY
#define HAVE_BCOPY 0
#endif
/* Define to 1 if you have the <bits/type_traits.h> header file. */
/* #undef HAVE_BITS_TYPE_TRAITS_H */
/* Define to 1 if you have the <bzlib.h> header file. */
#ifndef HAVE_BZLIB_H
#define HAVE_BZLIB_H 0
#endif
/* Define to 1 if you have the <dirent.h> header file. */
#ifndef HAVE_DIRENT_H
#define HAVE_DIRENT_H 0
#endif
/* Define to 1 if you have the <dlfcn.h> header file. */
#ifndef HAVE_DLFCN_H
#define HAVE_DLFCN_H 0
#endif
/* Define to 1 if you have the <inttypes.h> header file. */
#ifndef HAVE_INTTYPES_H
#define HAVE_INTTYPES_H 1
#endif
/* Define to 1 if you have the <limits.h> header file. */
#ifndef HAVE_LIMITS_H
#define HAVE_LIMITS_H 1
#endif
/* Define to 1 if the system has the type `long long'. */
#ifndef HAVE_LONG_LONG
#define HAVE_LONG_LONG 1
#endif
/* Define to 1 if you have the `memmove' function. */
#ifndef HAVE_MEMMOVE
#define HAVE_MEMMOVE 1
#endif
/* Define to 1 if you have the <memory.h> header file. */
#ifndef HAVE_MEMORY_H
#define HAVE_MEMORY_H 1
#endif
/* Define to 1 if you have the <readline/history.h> header file. */
#ifndef HAVE_READLINE_HISTORY_H
#define HAVE_READLINE_HISTORY_H 0
#endif
/* Define to 1 if you have the <readline/readline.h> header file. */
#ifndef HAVE_READLINE_READLINE_H
#define HAVE_READLINE_READLINE_H 0
#endif
/* Define to 1 if you have the <stdint.h> header file. */
#ifndef HAVE_STDINT_H
#define HAVE_STDINT_H 1
#endif
/* Define to 1 if you have the <stdlib.h> header file. */
#ifndef HAVE_STDLIB_H
#define HAVE_STDLIB_H 1
#endif
/* Define to 1 if you have the `strerror' function. */
#ifndef HAVE_STRERROR
#define HAVE_STRERROR 1
#endif
/* Define to 1 if you have the <string> header file. */
#ifndef HAVE_STRING
#define HAVE_STRING 0
#endif
/* Define to 1 if you have the <strings.h> header file. */
#ifndef HAVE_STRINGS_H
#define HAVE_STRINGS_H 0
#endif
/* Define to 1 if you have the <string.h> header file. */
#ifndef HAVE_STRING_H
#define HAVE_STRING_H 1
#endif
/* Define to 1 if you have the `strtoll' function. */
#ifndef HAVE_STRTOLL
#define HAVE_STRTOLL 1
#endif
/* Define to 1 if you have the `strtoq' function. */
#ifndef HAVE_STRTOQ
#define HAVE_STRTOQ 0
#endif
/* Define to 1 if you have the <sys/stat.h> header file. */
#ifndef HAVE_SYS_STAT_H
#define HAVE_SYS_STAT_H 0
#endif
/* Define to 1 if you have the <sys/types.h> header file. */
#ifndef HAVE_SYS_TYPES_H
#define HAVE_SYS_TYPES_H 0
#endif
/* Define to 1 if you have the <type_traits.h> header file. */
/* #undef HAVE_TYPE_TRAITS_H */
/* Define to 1 if you have the <unistd.h> header file. */
#ifndef HAVE_UNISTD_H
#define HAVE_UNISTD_H 0
#endif
/* Define to 1 if the system has the type `unsigned long long'. */
#ifndef HAVE_UNSIGNED_LONG_LONG
#define HAVE_UNSIGNED_LONG_LONG 1
#endif
/* Define to 1 if you have the <windows.h> header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the <zlib.h> header file. */
#ifndef HAVE_ZLIB_H
#define HAVE_ZLIB_H 0
#endif
/* Define to 1 if you have the `_strtoi64' function. */
/* #undef HAVE__STRTOI64 */
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
for longer patterns in extreme cases. On systems that support it,
"configure" can be used to override this default. */
#ifndef LINK_SIZE
#define LINK_SIZE 2
#endif
/* The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of
pcre_exec(). There is a runtime interface for setting a different limit.
The limit exists in order to catch runaway regular expressions that take
for ever to determine that they do not match. The default is set very large
so that it does not accidentally catch legitimate cases. On systems that
support it, "configure" can be used to override this default default. */
#ifndef MATCH_LIMIT
#define MATCH_LIMIT 10000000
#endif
/* The above limit applies to all calls of match(), whether or not they
increase the recursion depth. In some environments it is desirable to limit
the depth of recursive calls of match() more strictly, in order to restrict
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
match(). To have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
a runtime method for setting a different limit. On systems that support it,
"configure" can be used to override the default. */
#ifndef MATCH_LIMIT_RECURSION
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_COUNT
#define MAX_NAME_COUNT 10000
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_SIZE
#define MAX_NAME_SIZE 32
#endif
/* The value of NEWLINE determines the newline character sequence. On systems
that support it, "configure" can be used to override the default, which is
10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2
(ANYCRLF). */
#ifndef NEWLINE
#define NEWLINE 10
#endif
/* When calling PCRE via the POSIX interface, additional working storage is
required for holding the pointers to capturing substrings because PCRE
requires three integers per substring, whereas the POSIX interface provides
only two. If the number of expected substrings is small, the wrapper
function uses space on the stack, because this is faster than using
malloc() for each call. The threshold above which the stack is no longer
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it,
"configure" can be used to override this default. */
#ifndef POSIX_MALLOC_THRESHOLD
#define POSIX_MALLOC_THRESHOLD 10
#endif
/* Define to 1 if you have the ANSI C header files. */
#ifndef STDC_HEADERS
#define STDC_HEADERS 1
#endif
/* Define to enable support for the UTF-8 Unicode encoding. */
/* #undef SUPPORT_UTF8 */
#define SUPPORT_UTF8
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */

701
package/re/cre.c Normal file
View File

@ -0,0 +1,701 @@

/* #define PCRE_STATIC */
#include <stdio.h>
#include <string.h>
#include "pcre.h"
#include "cre.h"
/// @brief the the number of groups in a re pattern
/// @param re: re pattern
/// @param out_groups_number : from 0,1,2,3,4...
/// @return a array pointer, free if after using
int *_re_get_vec_table(pcre *re, int *out_groups_number)
{
int brackets_number = 0;
pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &brackets_number);
brackets_number++;
if (out_groups_number)
*out_groups_number = brackets_number;
brackets_number *= 3;
int *vec = (int *)malloc(brackets_number * sizeof(int));
return vec;
}
/*************************************************************************
* (https?)://((\w+\.)+)(\w+)
* hihsid dii https://www.baidu.com, http://glwang.com
*************************************************************************/
int *pcre_match(const char *_pat, const char *s, int len, int *out_vec_number, int opt)
{
int *vec = NULL;
int group_n = 0;
//int rc;
int start_offset = 0;
pcre *re = re_get_match_re(_pat, opt);
if (!re)
return NULL;
vec = re_match2(re, s, len, out_vec_number, opt);
pcre_free(re);
return vec;
}
int *re_match2(pcre *re, const char *s, int len, int *out_vec_number, int opt)
{
int *vec = NULL;
int group_n = 0;
int rc;
int start_offset = 0;
vec = _re_get_vec_table(re, &group_n);
if (out_vec_number)
*out_vec_number = group_n;
group_n *= 3;
if (!vec)
goto e_er;
match:
rc = pcre_exec(re, NULL, s, len, start_offset, 0, vec, group_n);
if (rc == PCRE_ERROR_NOMATCH)
{
free(vec);
return NULL;
}
if (rc <= 0)
goto e_er;
if (vec[0] == vec[1]) // a empty match
{
start_offset++; // advace a position
if (start_offset >= len)
goto e_er;
goto match;
}
return vec;
e_er:
if (vec)
free(vec);
return NULL;
}
int *pcre_fullmatch(const char *_pat, const char *s, int len, int *out_vec_number, int opt)
{
int *vec = NULL;
int group_n = 0;
//int rc;
int start_offset = 0;
opt &= ~PCRE_MULTILINE;
pcre *re = re_get_fullmatch_re(_pat, opt);
if (!re)
return NULL;
vec = re_fullmatch2(re, s, len, out_vec_number, opt);
pcre_free(re);
return vec;
}
int *re_fullmatch2(pcre *re, const char *s, int len, int *out_vec_number, int opt)
{
int *vec = NULL;
int group_n = 0;
int rc;
int start_offset = 0;
vec = _re_get_vec_table(re, &group_n);
if (out_vec_number)
*out_vec_number = group_n;
group_n *= 3;
if (!vec)
goto e_er;
opt &= ~PCRE_MULTILINE;
match:
rc = pcre_exec(re, NULL, s, len, start_offset, 0, vec, group_n);
if (rc == PCRE_ERROR_NOMATCH)
{
free(vec);
return NULL;
}
if (rc <= 0)
goto e_er;
if (vec[0] == vec[1]) // a empty match
{
start_offset++; // advace a position
if (start_offset >= len)
goto e_er;
goto match;
}
return vec;
e_er:
if (vec)
free(vec);
return NULL;
}
pcre *re_get_match_re(const char *_pat, int opt)
{
const char *pat = _pat;
if (!*pat)
{
return NULL;
}
if (*pat != '^')
{
int pat_len = strlen(_pat);
char *p = (char *)pcre_malloc(pat_len + 2);
if (!p)
return NULL;
*p = '^';
memcpy(p + 1, _pat, pat_len + 1);
pat = p;
}
const char *error;
int erroffset;
pcre *re = pcre_compile(pat, opt, &error, &erroffset, NULL);
if (pat != _pat)
free((void *)pat);
return re;
}
pcre *re_get_fullmatch_re(const char *_pat, int opt)
{
const char *pat = _pat;
if (!*pat)
{
return NULL;
}
int prefix = 0, suffix = 0;
if (*pat != '^')
{
prefix = 1;
}
int pat_len = strlen(_pat);
if (_pat[pat_len - 1] != '$')
suffix = 1;
else
{
int n = pat_len - 2;
int i = 0;
while (_pat[n] == '\\')
{
i++;
n--;
}
if (i % 2)
{
suffix = 1;
}
}
int dn = prefix + suffix;
if (dn)
{
char *q = (char *)malloc(pat_len + dn + 1);
if (!q)
return NULL;
pat = q;
if (prefix)
{
*q = '^';
q++;
}
memcpy(q, _pat, pat_len);
q += pat_len;
if (suffix)
{
*q = '$';
q++;
}
*q = '\0';
}
const char *error;
int erroffset;
pcre *re = pcre_compile(pat, opt, &error, &erroffset, NULL);
if (pat != _pat)
free((void *)pat);
return re;
}
/* the following functions return (a) vector/table in heap, which means it need to be freed after using*/
int *pcre_search(const char *pat, const char *s, int len, int *out_vec_number, int opt)
{
const char *error;
int erroffset;
pcre *re = pcre_compile(pat, opt, &error, &erroffset, NULL);
if (!re)
return NULL;
int *res = re_search2(re, s, len, out_vec_number, opt);
pcre_free(re);
return res;
}
int *re_search2(pcre *re, const char *s, int len, int *out_vec_number, int opt)
{
int *vec = NULL;
int group_n = 0;
int rc;
int start_offset = 0;
vec = _re_get_vec_table(re, &group_n);
if (out_vec_number)
*out_vec_number = group_n;
group_n *= 3;
if (!vec)
goto e_er;
match:
rc = pcre_exec(re, NULL, s, len, start_offset, 0, vec, group_n);
if (rc == PCRE_ERROR_NOMATCH)
{
free(vec);
return NULL;
}
if (rc <= 0)
goto e_er;
if (vec[0] == vec[1]) // a empty match
{
start_offset++; // advace a position
if (start_offset >= len)
goto e_er;
goto match;
}
return vec;
e_er:
if (vec)
free(vec);
return NULL;
}
int **re_searchall(const char *pat, const char *s, int len, int *out_number, int *out_vec_number, int opt)
{
const char *error;
int erroffset;
pcre *re = pcre_compile(pat, opt, &error, &erroffset, NULL);
if (!re)
return NULL;
int **res = re_searchall2(re, s, len, out_number, out_vec_number, opt);
pcre_free(re);
return res;
}
/// @brief find all match in a string
/// @param re: re pattern
/// @param s : string searching in
/// @param out_number : the number of matches
/// @return a vector table, vrc[n] is the nth matchs,
/// vrc[group_n][i*2] - vrc[group_n][i*2+1] is the begining-offset and ending-offset of group i.
/// Use re_free_searchall() to free the memory
int **re_searchall2(pcre *re, const char *s, int len, int *out_number, int *out_vec_number, int opt)
{
int start_offset = 0;
int **vecs = NULL; // to store vec
int vec_cap = 4;
int vec_n = 0;
int *vec = NULL;
int group_n = 0;
while (1)
{
if (group_n)
vec = (int *)malloc(group_n * sizeof(int));
else
{
vec = _re_get_vec_table(re, &group_n);
if (out_vec_number)
*out_vec_number = group_n;
group_n *= 3;
}
if (!vec)
goto e_er;
match:
int rc = pcre_exec(re, NULL, s, len, start_offset, 0, vec, group_n);
if (rc == PCRE_ERROR_NOMATCH)
{
if (out_number)
*out_number = vec_n;
free(vec);
return vecs;
}
if (rc <= 0)
goto e_er;
if (vec[0] == vec[1]) // a empty match
{
start_offset++; // advace a position
if (start_offset >= len)
goto e_er;
goto match;
}
//to sotre vec
if (!vecs)
{
vecs = (int **)malloc(sizeof(int *) * vec_cap);
if (!vecs)
goto e_er;
}
if (vec_n >= vec_cap) // need to recap this list
{
vec_cap *= 2;
void *p = realloc(vecs, vec_cap * sizeof(int *));
if (!p)
goto e_er;
// if (p != vecs) // move data
// {
// memmove(p, vecs, vec_n * sizeof(int*));
vecs = (int **)p;
// }
}
vecs[vec_n++] = vec;
start_offset = vec[1];
}
e_er:
if (vec)
free(vec); // the latest vec table
if (!vecs)
return NULL;
for (int j = 0; j < vec_n; j++)
{
if (vecs[j])
free((void *)(vecs[j])); // free vec table
}
free(vecs); // free the table list
return NULL;
}
void re_free_searchall(int **vecs, int n)
{
if (!vecs)
return;
for (int j = 0; j < n; j++)
{
if (vecs[j])
free((void *)(vecs[j])); // free vec table
}
free(vecs); // free the table list
}
/* the following functions return (a) string in heap, which means it need to be freed after using*/
char **_re_extract_substring(const char *s, int **vecs, int n)
{
if (!vecs)
return NULL;
int c = 0;
char **res = (char **)pcre_malloc(sizeof(char *) * n);
if (!res)
return NULL;
for (int j = 0; j < n; j++)
{
int *v = vecs[j];
int len = v[1] - v[0];
char *p = (char *)pcre_malloc(len + 1);
if (!p)
goto e_er;
res[c++] = p;
memcpy(p, s + v[0], len);
p[len] = 0;
}
return res;
e_er:
if (!res)
return NULL;
for (int i = 0; i < c; i++)
{
free(res[i]);
}
free(res);
return NULL;
}
char *re_find(const char *pat, const char *s, int len, int opt)
{
const char *error;
int erroffset;
pcre *re = pcre_compile(pat, opt, &error, &erroffset, NULL);
if (!re)
return NULL;
char *res = re_find2(re, s, len, opt);
pcre_free(re);
return res;
}
char *re_find2(pcre *re, const char *s, int len, int opt)
{
int *vec = NULL;
int group_n = 0;
int rc;
int start_offset = 0;
char *res_s = NULL;
vec = _re_get_vec_table(re, &group_n);
if (!vec)
goto e_er;
group_n *= 3;
match:
rc = pcre_exec(re, NULL, s, len, start_offset, 0, vec, group_n);
if (rc == PCRE_ERROR_NOMATCH)
{
free(vec);
return NULL;
}
if (rc <= 0)
goto e_er;
if (vec[0] == vec[1]) // a empty match
{
start_offset++; // advace a position
if (start_offset >= len)
goto e_er;
goto match;
}
len = vec[1] - vec[0];
if (!len)
goto e_er;
res_s = (char *)malloc(len + 1);
if (!res_s)
goto e_er;
memcpy(res_s, s + vec[0], len);
res_s[len] = 0;
if (vec)
free(vec);
return res_s;
e_er:
if (vec)
free(vec);
return NULL;
}
char **pcre_findall(const char *pat, const char *s, int len, int *out_number, int opt)
{
const char *error;
int erroffset;
pcre *re = pcre_compile(pat, opt, &error, &erroffset, NULL);
if (!re)
return NULL;
char **res = re_findall2(re, s, len, out_number, opt);
pcre_free(re);
return res;
}
char **re_findall2(pcre *re, const char *s, int len, int *out_number, int opt)
{
int out_vec_number;
int **vecs;
char **res;
vecs = re_searchall2(re, s, len, out_number, &out_vec_number, opt);
if (!vecs)
goto e_er;
res = _re_extract_substring(s, vecs, *out_number);
if (!res)
goto e_er;
re_free_searchall(vecs, *out_number);
return res;
e_er:
if (vecs)
re_free_searchall(vecs, *out_number);
return NULL;
}
void re_free_findall(char **ss, int n)
{
if (!ss)
return;
for (int j = 0; j < n; j++)
{
if (ss[j])
free((void *)(ss[j])); // free vec table
}
free(ss); // free the table list
}
char *pcre_sub(const char *pat, const char *to, const char *s, int len, int opt)
{
const char *error;
int erroffset;
pcre *re = pcre_compile(pat, opt, &error, &erroffset, NULL);
if (!re)
return NULL;
char *res = re_sub2(re, to, s, len, opt);
pcre_free(re);
return res;
}
char *pcre_subn(const char *pat, const char *to, const char *s, int len, int n, int opt)
{
const char *error;
int erroffset;
pcre *re = pcre_compile(pat, opt, &error, &erroffset, NULL);
if (!re)
return NULL;
char *res = re_subn2(re, to, s, len, n, opt);
pcre_free(re);
return res;
}
/// @brief substitute a string with a pattern expression, given replacement limit
/// @param re : re pattern for matching
/// @param to : re pattern to replacement
/// @param s : string searching in
/// @param len : length of <s>
/// @param n : the replacement number
/// @return if no replacement, return s exactly, otherwise return a new string, free it after using
char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt)
{
int group_n = 0, group_n2 = 0;
int *vec = NULL;
int *vec2 = NULL;
pcre *re2 = NULL;
int vcs1_n = 0, vcs2_n = 0;
int **vcs1 = re_searchall2(re, s, len, &vcs1_n, &group_n, opt);
int **vcs2 = NULL;
int match_limit = 0;
if (!vcs1_n)
{
//no match, no replacement
return (char *)s;
}
//to determine '\\' and group like: '\group_n'
//3 groups, 0, 1, 2->\\, 3->\group_n, if any
const char *p2 = "(\\\\\\\\|\\\\\\d{1,2})";
int erroffset;
const char *error;
int len_to, remain_size, remain_length2, pi = 0, qi = 0;
char *new_s = NULL;
re2 = pcre_compile(p2, 0, &error, &erroffset, NULL);
if (!re2)
goto exit_error;
//match <to>
len_to = strlen(to);
vcs2 = re_searchall2(re2, to, len_to, &vcs2_n, NULL, 0);
//if (!vcs2)
//{
// //goto exit_error;
// vcs2_n = 0;
//}
pcre_free(re2);
re2 = NULL;
//note that re2 is no use after this, onece we get vcs2
remain_length2 = len_to; // the remain length in 'to' exclude from all '\\' and all '\n'
for (int i = 0; i < vcs2_n; i++)
{
int *vc = vcs2[i]; // (0,1)->'\\'or'\n', (2,3)->'\\', (4,5)->'\n', (6,7,8)
int vc0 = vc[0] + 1;
if (to[vc0] == '\\')
{
vc[2] = 0;
remain_length2 -= 2;
}
else // \n,
{
int wanted_number = 0;
//vc[1]--;
int l_n = vc[1] - vc0;
if (l_n == 1)
{
wanted_number = to[vc0] - '0';
remain_length2 -= 2;
}
else // if(l_n==2)
{
wanted_number = (to[vc0] - '0') * 10 + to[vc0 + 1] - '0';
remain_length2 -= 3;
}
if (wanted_number <= 0 || wanted_number >= group_n)
goto exit_error;
//store it in vc[2]
vc[2] = wanted_number;
}
}
//now that vcs2 stores data of which group is used in replacement
//Nx9, N is the number of groups used in every one replcaement,
//while vcs2[2] is the exact group number used in replacement
//parse 'to'
//get the remian size
match_limit = n ? (n <= vcs1_n ? n : vcs1_n) : vcs1_n;
remain_size = len + remain_length2 * match_limit;
//match times
for (int i = 0; i < match_limit; i++)
{
int *vc = vcs1[i];
//vc[1]-vc[0] is the match sequence which need to be replaced, while the following are groups
remain_size -= vc[1] - vc[0];
// the replcaements
// 'to' e.g.: \\ \1, \2, ....\x
for (int j = 0; j < vcs2_n; j++)
{
int *v2 = vcs2[j];
if (v2[2])
{
//replaced to a group
remain_size += GetGroupLen(vc, v2[2]);
}
else
{
//replaced to a '/'
remain_size++;
}
}
}
new_s = (char *)malloc(remain_size + 1);
if (!new_s)
goto exit_error;
for (int i = 0; i < match_limit; i++)
{
int *vc = vcs1[i];
memcpy(new_s + pi, s + qi, vc[0] - qi);
pi += vc[0] - qi;
int m_start = 0, m_len = 0;
for (int j = 0; j < vcs2_n; j++)
{
int *v2 = vcs2[j];
m_len = v2[0] - m_start;
memcpy(new_s + pi, to + m_start, m_len);
pi += m_len;
int to_group = v2[2];
if (to_group)
{
int to_group_at = vc[to_group * 2];
int to_group_end = vc[to_group * 2 + 1];
int g_l = to_group_end - to_group_at;
//replaced to a group
memcpy(new_s + pi, s + to_group_at, g_l);
pi += g_l;
}
else
{
//replaced to a '/'
new_s[pi++] = '\\';
}
m_start = v2[1];
}
m_len = len_to - m_start;
memcpy(new_s + pi, to + m_start, m_len);
pi += m_len;
// end of one match
qi = vc[1];
}
if (vcs1)
re_free_searchall(vcs1, vcs1_n);
if (vcs2)
re_free_searchall(vcs2, vcs2_n);
len -= qi;
if (len)
memcpy(new_s + pi, s + qi, len);
pi += len;
new_s[pi] = '\0';
return new_s;
exit_error:
if (vcs1)
re_free_searchall(vcs1, vcs1_n);
if (vcs2)
re_free_searchall(vcs2, vcs2_n);
if (re2)
pcre_free(re2);
return NULL;
}
/// @brief substitute a string with a pattern expression
/// @param re : re pattern for matching
/// @param to : re pattern to replacement
/// @param s : string searching in
/// @param len : length of <s>
/// @return if no replacement, return s exactly, otherwise return a new string, free it after using
char *re_sub2(pcre *re, const char *to, const char *s, int len, int opt)
{
return re_subn2(re, to, s, len, 0, opt);
}

50
package/re/cre.h Normal file
View File

@ -0,0 +1,50 @@
#ifndef CRE_H
#define CRE_H
#include "pcre.h"
#define GetGroupLen(vc, n) (vc[(n)*2 + 1] - vc[(n)*2])
int *_re_get_vec_table(pcre *re, int *out_groups_number);
int *pcre_match(const char *_pat, const char *s, int len, int *out_vec_number, int opt);
int *re_match2(pcre *re, const char *s, int len, int *out_vec_number, int opt);
int *pcre_fullmatch(const char *_pat, const char *s, int len, int *out_vec_number, int opt);
int *re_fullmatch2(pcre *re, const char *s, int len, int *out_vec_number, int opt);
pcre *re_get_match_re(const char *_pat, int opt);
pcre *re_get_fullmatch_re(const char *_pat, int opt);
int *pcre_search(const char *pat, const char *s, int len, int *out_vec_number, int opt);
int *re_search2(pcre *re, const char *s, int len, int *out_vec_number, int opt);
int **re_searchall(const char *pat, const char *s, int len, int *out_number, int *out_vec_number, int opt);
int **re_searchall2(pcre *re, const char *s, int len, int *out_number, int *out_vec_number, int opt);
void re_free_searchall(int **vecs, int n);
char **_re_extract_substring(const char *s, int **vecs, int n);
char *re_find(const char *pat, const char *s, int len, int opt);
char *re_find2(pcre *re, const char *s, int len, int opt);
char **pcre_findall(const char *pat, const char *s, int len, int *out_number, int opt);
char **re_findall2(pcre *re, const char *s, int len, int *out_number, int opt);
void re_free_findall(char **ss, int n);
char *pcre_sub(const char *pat, const char *to, const char *s, int len, int opt);
char *pcre_subn(const char *pat, const char *to, const char *s, int len, int n, int opt);
char *re_subn2(pcre *re, const char *to, const char *s, int len, int n, int opt);
char *re_sub2(pcre *re, const char *to, const char *s, int len, int opt);
#endif

264
package/re/pcre.h Normal file
View File

@ -0,0 +1,264 @@
#ifndef _PCRE_H
#define _PCRE_H
/* The current PCRE version information. */
#define PCRE_MAJOR @PCRE_MAJOR@
#define PCRE_MINOR @PCRE_MINOR@
#define PCRE_PRERELEASE @PCRE_PRERELEASE@
#define PCRE_DATE @PCRE_DATE@
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export setting is defined in pcre_internal.h, which includes this file. So we
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
#if defined(_WIN32) && !defined(PCRE_STATIC)
# ifndef PCRE_EXP_DECL
# define PCRE_EXP_DECL extern __declspec(dllimport)
# endif
# ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN __declspec(dllimport)
# endif
# endif
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCRE_EXP_DECL
# ifdef __cplusplus
# define PCRE_EXP_DECL extern "C"
# else
# define PCRE_EXP_DECL extern
# endif
#endif
#ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN
# endif
#endif
/* Have to include stdlib.h in order to ensure that size_t is defined;
it is needed here for malloc. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options */
#define PCRE_CASELESS 0x00000001
#define PCRE_MULTILINE 0x00000002
#define PCRE_DOTALL 0x00000004
#define PCRE_EXTENDED 0x00000008
#define PCRE_ANCHORED 0x00000010
#define PCRE_DOLLAR_ENDONLY 0x00000020
#define PCRE_EXTRA 0x00000040
#define PCRE_NOTBOL 0x00000080
#define PCRE_NOTEOL 0x00000100
#define PCRE_UNGREEDY 0x00000200
#define PCRE_NOTEMPTY 0x00000400
#define PCRE_UTF8 0x00000800
#define PCRE_NO_AUTO_CAPTURE 0x00001000
#define PCRE_NO_UTF8_CHECK 0x00002000
#define PCRE_AUTO_CALLOUT 0x00004000
#define PCRE_PARTIAL 0x00008000
#define PCRE_DFA_SHORTEST 0x00010000
#define PCRE_DFA_RESTART 0x00020000
#define PCRE_FIRSTLINE 0x00040000
#define PCRE_DUPNAMES 0x00080000
#define PCRE_NEWLINE_CR 0x00100000
#define PCRE_NEWLINE_LF 0x00200000
#define PCRE_NEWLINE_CRLF 0x00300000
#define PCRE_NEWLINE_ANY 0x00400000
#define PCRE_NEWLINE_ANYCRLF 0x00500000
#define PCRE_BSR_ANYCRLF 0x00800000
#define PCRE_BSR_UNICODE 0x01000000
/* Exec-time and get/set-time error codes */
#define PCRE_ERROR_NOMATCH (-1)
#define PCRE_ERROR_NULL (-2)
#define PCRE_ERROR_BADOPTION (-3)
#define PCRE_ERROR_BADMAGIC (-4)
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
#define PCRE_ERROR_NOMEMORY (-6)
#define PCRE_ERROR_NOSUBSTRING (-7)
#define PCRE_ERROR_MATCHLIMIT (-8)
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
#define PCRE_ERROR_BADUTF8 (-10)
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
#define PCRE_ERROR_PARTIAL (-12)
#define PCRE_ERROR_BADPARTIAL (-13)
#define PCRE_ERROR_INTERNAL (-14)
#define PCRE_ERROR_BADCOUNT (-15)
#define PCRE_ERROR_DFA_UITEM (-16)
#define PCRE_ERROR_DFA_UCOND (-17)
#define PCRE_ERROR_DFA_UMLIMIT (-18)
#define PCRE_ERROR_DFA_WSSIZE (-19)
#define PCRE_ERROR_DFA_RECURSE (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
#define PCRE_ERROR_BADNEWLINE (-23)
/* Request types for pcre_fullinfo() */
#define PCRE_INFO_OPTIONS 0
#define PCRE_INFO_SIZE 1
#define PCRE_INFO_CAPTURECOUNT 2
#define PCRE_INFO_BACKREFMAX 3
#define PCRE_INFO_FIRSTBYTE 4
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
#define PCRE_INFO_FIRSTTABLE 5
#define PCRE_INFO_LASTLITERAL 6
#define PCRE_INFO_NAMEENTRYSIZE 7
#define PCRE_INFO_NAMECOUNT 8
#define PCRE_INFO_NAMETABLE 9
#define PCRE_INFO_STUDYSIZE 10
#define PCRE_INFO_DEFAULT_TABLES 11
#define PCRE_INFO_OKPARTIAL 12
#define PCRE_INFO_JCHANGED 13
#define PCRE_INFO_HASCRORLF 14
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_CONFIG_UTF8 0
#define PCRE_CONFIG_NEWLINE 1
#define PCRE_CONFIG_LINK_SIZE 2
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
#define PCRE_CONFIG_MATCH_LIMIT 4
#define PCRE_CONFIG_STACKRECURSE 5
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
#define PCRE_CONFIG_BSR 8
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_STUDY_DATA 0x0001
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
/* Types */
struct real_pcre; /* declaration; the definition is private */
typedef struct real_pcre pcre;
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */
#ifndef PCRE_SPTR
#define PCRE_SPTR const char *
#endif
/* The structure for passing additional data to pcre_exec(). This is defined in
such as way as to be extensible. Always add new fields at the end, in order to
remain compatible. */
typedef struct pcre_extra {
unsigned long int flags; /* Bits for which fields are set */
void *study_data; /* Opaque data from pcre_study() */
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
} pcre_extra;
/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
without modification. */
typedef struct pcre_callout_block {
int version; /* Identifies version of block */
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
PCRE_SPTR subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
int capture_top; /* Max current capture */
int capture_last; /* Most recently closed capture */
void *callout_data; /* Data passed in with the call */
/* ------------------- Added for Version 1 -------------------------- */
int pattern_position; /* Offset to next item in the pattern */
int next_item_length; /* Length of next item in the pattern */
/* ------------------------------------------------------------------ */
} pcre_callout_block;
/* Indirection for store get and free functions. These can be set to
alternative malloc/free functions if required. Special ones are used in the
non-recursive case for "frames". There is also an optional callout function
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
have to take another form. */
#ifndef VPCOMPAT
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_free)(void *);
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
#else /* VPCOMPAT */
PCRE_EXP_DECL void *pcre_malloc(size_t);
PCRE_EXP_DECL void pcre_free(void *);
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
PCRE_EXP_DECL void pcre_stack_free(void *);
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
#endif /* VPCOMPAT */
/* Exported PCRE functions */
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
const unsigned char *);
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
int *, const unsigned char *);
// PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
// int *, int, const char *, char *, int);
// PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
// int);
// PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
// const char *, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
// PCRE_EXP_DECL void pcre_free_substring(const char *);
// PCRE_EXP_DECL void pcre_free_substring_list(const char **);
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
// PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
// int *, int, const char *, const char **);
// PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
// PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
// char **, char **);
// PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
// const char **);
// PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
// const char ***);
// PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
// PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
// PCRE_EXP_DECL int pcre_refcount(pcre *, int);
// PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcre.h */

View File

@ -0,0 +1,195 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This file contains character tables that are used when no external tables
are passed to PCRE by the application that calls it. The tables are used only
for characters whose code values are less than 256.
This is a default version of the tables that assumes ASCII encoding. A program
called dftables (which is distributed with PCRE) can be used to build
alternative versions of this file. This is necessary if you are running in an
EBCDIC environment, or if you want to default to a different encoding, for
example ISO-8859-1. When dftables is run, it creates these tables in the
current locale. If PCRE is configured with --enable-rebuild-chartables, this
happens automatically.
The following #includes are present because without the gcc 4.x may remove the
array definition from the final binary if PCRE is built into a static library
and dead code stripping is activated. This leads to link errors. Pulling in the
header ensures that the array gets flagged as "someone outside this compilation
unit might reference this" and so it will always be supplied to the linker. */
#include "config.h"
#include "pcre_internal.h"
const unsigned char _pcre_default_tables[] = {
/* This table is a lower casing table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table is a case flipping table. */
0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39,
40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,
104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,
120,121,122, 91, 92, 93, 94, 95,
96, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90,123,124,125,126,127,
128,129,130,131,132,133,134,135,
136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,
152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,
168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,
184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,
200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,
216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,
232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
graph, print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
/* This table identifies various classes of character by individual bits:
0x01 white space character
0x02 letter
0x04 decimal digit
0x08 hexadecimal digit
0x10 alphanumeric or '_'
0x80 regular expression metacharacter or binary zero
*/
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
/* End of pcre_chartables.c */

6179
package/re/pcre_compile.c Normal file

File diff suppressed because it is too large Load Diff

4898
package/re/pcre_exec.c Normal file

File diff suppressed because it is too large Load Diff

123
package/re/pcre_fullinfo.c Normal file
View File

@ -0,0 +1,123 @@
/* This module contains the external function pcre_fullinfo(), which returns
information about a compiled pattern. */
#include "config.h"
#include "pcre_internal.h"
/*************************************************
* Return info about compiled pattern *
*************************************************/
/* This is a newer "info" function which has an extensible interface so
that additional items can be added compatibly.
Arguments:
argument_re points to compiled code
extra_data points extra data, or NULL
what what information is required
where where to put the information
Returns: 0 if data returned, negative on error
*/
PCRE_EXP_DEFN int
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
{
real_pcre internal_re;
pcre_study_data internal_study;
const real_pcre *re = (const real_pcre *)argument_re;
const pcre_study_data *study = NULL;
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
study = (const pcre_study_data *)extra_data->study_data;
if (re->magic_number != MAGIC_NUMBER)
{
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}
switch (what)
{
case PCRE_INFO_OPTIONS:
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
break;
case PCRE_INFO_SIZE:
*((size_t *)where) = re->size;
break;
case PCRE_INFO_STUDYSIZE:
*((size_t *)where) = (study == NULL)? 0 : study->size;
break;
case PCRE_INFO_CAPTURECOUNT:
*((int *)where) = re->top_bracket;
break;
case PCRE_INFO_BACKREFMAX:
*((int *)where) = re->top_backref;
break;
case PCRE_INFO_FIRSTBYTE:
*((int *)where) =
((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
break;
/* Make sure we pass back the pointer to the bit vector in the external
block, not the internal copy (with flipped integer fields). */
case PCRE_INFO_FIRSTTABLE:
*((const uschar **)where) =
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
break;
case PCRE_INFO_LASTLITERAL:
*((int *)where) =
((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
break;
case PCRE_INFO_NAMEENTRYSIZE:
*((int *)where) = re->name_entry_size;
break;
case PCRE_INFO_NAMECOUNT:
*((int *)where) = re->name_count;
break;
case PCRE_INFO_NAMETABLE:
*((const uschar **)where) = (const uschar *)re + re->name_table_offset;
break;
case PCRE_INFO_DEFAULT_TABLES:
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
break;
case PCRE_INFO_OKPARTIAL:
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
break;
case PCRE_INFO_JCHANGED:
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
break;
case PCRE_INFO_HASCRORLF:
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
break;
default: return PCRE_ERROR_BADOPTION;
}
return 0;
}
/* End of pcre_fullinfo.c */

21
package/re/pcre_globals.c Normal file
View File

@ -0,0 +1,21 @@
/* This module contains global variables that are exported by the PCRE library.
PCRE is thread-clean and doesn't use any global variables in the normal sense.
However, it calls memory allocation and freeing functions via the four
indirections below, and it can optionally do callouts, using the fifth
indirection. These values can be changed by the caller, but are shared between
all threads. However, when compiling for Virtual Pascal, things are done
differently, and global variables are not used (see pcre.in). */
#include "config.h"
#include "pcre_internal.h"
#ifndef VPCOMPAT
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free;
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free;
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
#endif
/* End of pcre_globals.c */

1083
package/re/pcre_internal.h Normal file

File diff suppressed because it is too large Load Diff

122
package/re/pcre_newline.c Normal file
View File

@ -0,0 +1,122 @@
/* This module contains internal functions for testing newlines when more than
one kind of newline is to be recognized. When a newline is found, its length is
returned. In principle, we could implement several newline "types", each
referring to a different set of newline characters. At present, PCRE supports
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
http://unicode.org/unicode/reports/tr18/. */
#include "config.h"
#include "pcre_internal.h"
/*************************************************
* Check for newline at given position *
*************************************************/
/* It is guaranteed that the initial value of ptr is less than the end of the
string that is being processed.
Arguments:
ptr pointer to possible newline
type the newline type
endptr pointer to the end of the string
lenptr where to return the length
utf8 TRUE if in utf8 mode
Returns: TRUE or FALSE
*/
BOOL
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
int *lenptr, BOOL utf8)
{
int c;
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
if (type == NLTYPE_ANYCRLF) switch(c)
{
case 0x000a: *lenptr = 1; return TRUE; /* LF */
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
return TRUE; /* CR */
default: return FALSE;
}
/* NLTYPE_ANY */
else switch(c)
{
case 0x000a: /* LF */
case 0x000b: /* VT */
case 0x000c: *lenptr = 1; return TRUE; /* FF */
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
return TRUE; /* CR */
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
default: return FALSE;
}
}
/*************************************************
* Check for newline at previous position *
*************************************************/
/* It is guaranteed that the initial value of ptr is greater than the start of
the string that is being processed.
Arguments:
ptr pointer to possible newline
type the newline type
startptr pointer to the start of the string
lenptr where to return the length
utf8 TRUE if in utf8 mode
Returns: TRUE or FALSE
*/
BOOL
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
int *lenptr, BOOL utf8)
{
int c;
ptr--;
#ifdef SUPPORT_UTF8
if (utf8)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
else c = *ptr;
#else /* no UTF-8 support */
c = *ptr;
#endif /* SUPPORT_UTF8 */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
return TRUE; /* LF */
case 0x000d: *lenptr = 1; return TRUE; /* CR */
default: return FALSE;
}
else switch(c)
{
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
return TRUE; /* LF */
case 0x000b: /* VT */
case 0x000c: /* FF */
case 0x000d: *lenptr = 1; return TRUE; /* CR */
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
default: return FALSE;
}
}
/* End of pcre_newline.c */

View File

@ -0,0 +1,43 @@
/* This file contains a private PCRE function that converts an ordinal
character value into a UTF8 string. */
#include "config.h"
#include "pcre_internal.h"
/*************************************************
* Convert character value to UTF-8 *
*************************************************/
/* This function takes an integer value in the range 0 - 0x7fffffff
and encodes it as a UTF-8 character in 0 to 6 bytes.
Arguments:
cvalue the character value
buffer pointer to buffer for result - at least 6 bytes long
Returns: number of characters placed in the buffer
*/
int
_pcre_ord2utf8(int cvalue, uschar *buffer)
{
#ifdef SUPPORT_UTF8
register int i, j;
for (i = 0; i < _pcre_utf8_table1_size; i++)
if (cvalue <= _pcre_utf8_table1[i]) break;
buffer += i;
for (j = i; j > 0; j--)
{
*buffer-- = 0x80 | (cvalue & 0x3f);
cvalue >>= 6;
}
*buffer = _pcre_utf8_table2[i] | cvalue;
return i + 1;
#else
return 0; /* Keep compiler happy; this function won't ever be */
#endif /* called when SUPPORT_UTF8 is not defined. */
}
/* End of pcre_ord2utf8.c */

47
package/re/pcre_tables.c Normal file
View File

@ -0,0 +1,47 @@
/* This module contains some fixed tables that are used by more than one of the
PCRE code modules. The tables are also #included by the pcretest program, which
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
clashes with the library. */
#include "config.h"
#include "pcre_internal.h"
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in pcre_internal.h. */
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
/*************************************************
* Tables for UTF-8 support *
*************************************************/
/* These are the breakpoints for different numbers of bytes in a UTF-8
character. */
#ifdef SUPPORT_UTF8
const int _pcre_utf8_table1[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
/* These are the indicator bits and the mask for the data bits to set in the
first byte of a character, indexed by the number of additional bytes. */
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
/* Table of the number of extra bytes, indexed by the first byte masked with
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
const uschar _pcre_utf8_table4[] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
#endif

View File

@ -0,0 +1,95 @@
/* This module contains an internal function that tests a compiled pattern to
see if it was compiled with the opposite endianness. If so, it uses an
auxiliary local function to flip the appropriate bytes. */
#include "config.h"
#include "pcre_internal.h"
/*************************************************
* Flip bytes in an integer *
*************************************************/
/* This function is called when the magic number in a regex doesn't match, in
order to flip its bytes to see if we are dealing with a pattern that was
compiled on a host of different endianness. If so, this function is used to
flip other byte values.
Arguments:
value the number to flip
n the number of bytes to flip (assumed to be 2 or 4)
Returns: the flipped value
*/
static unsigned long int
byteflip(unsigned long int value, int n)
{
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
return ((value & 0x000000ff) << 24) |
((value & 0x0000ff00) << 8) |
((value & 0x00ff0000) >> 8) |
((value & 0xff000000) >> 24);
}
/*************************************************
* Test for a byte-flipped compiled regex *
*************************************************/
/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
is, it was compiled on a system of opposite endianness. The function is called
only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
we flip all the relevant values into a different data block, and return it.
Arguments:
re points to the regex
study points to study data, or NULL
internal_re points to a new regex block
internal_study points to a new study block
Returns: the new block if is is indeed a byte-flipped regex
NULL if it is not
*/
real_pcre *
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
const pcre_study_data *study, pcre_study_data *internal_study)
{
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
return NULL;
*internal_re = *re; /* To copy other fields */
internal_re->size = byteflip(re->size, sizeof(re->size));
internal_re->options = byteflip(re->options, sizeof(re->options));
internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags));
internal_re->top_bracket =
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
internal_re->top_backref =
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
internal_re->first_byte =
(pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
internal_re->req_byte =
(pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
internal_re->name_table_offset =
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
internal_re->name_entry_size =
(pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
internal_re->name_count =
(pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
if (study != NULL)
{
*internal_study = *study; /* To copy other fields */
internal_study->size = byteflip(study->size, sizeof(study->size));
internal_study->options = byteflip(study->options, sizeof(study->options));
}
return internal_re;
}
/* End of pcre_tryflipped.c */

View File

@ -0,0 +1,120 @@
/* This module contains an internal function for validating UTF-8 character
strings. */
#include "config.h"
#include "pcre_internal.h"
/*************************************************
* Validate a UTF-8 string *
*************************************************/
/* This function is called (optionally) at the start of compile or match, to
validate that a supposed UTF-8 string is actually valid. The early check means
that subsequent code can assume it is dealing with a valid string. The check
can be turned off for maximum performance, but the consequences of supplying
an invalid string are then undefined.
Originally, this function checked according to RFC 2279, allowing for values in
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
the canonical format. Once somebody had pointed out RFC 3629 to me (it
obsoletes 2279), additional restrictions were applied. The values are now
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
subrange 0xd000 to 0xdfff is excluded.
Arguments:
string points to the string
length length of string, or -1 if the string is zero-terminated
Returns: < 0 if the string is a valid UTF-8 string
>= 0 otherwise; the value is the offset of the bad byte
*/
int
_pcre_valid_utf8(const uschar *string, int length)
{
#ifdef SUPPORT_UTF8
register const uschar *p;
if (length < 0)
{
for (p = string; *p != 0; p++);
length = p - string;
}
for (p = string; length-- > 0; p++)
{
register int ab;
register int c = *p;
if (c < 128) continue;
if (c < 0xc0) return p - string;
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
if (length < ab || ab > 3) return p - string;
length -= ab;
/* Check top bits in the second byte */
if ((*(++p) & 0xc0) != 0x80) return p - string;
/* Check for overlong sequences for each different length, and for the
excluded range 0xd000 to 0xdfff. */
switch (ab)
{
/* Check for xx00 000x (overlong sequence) */
case 1:
if ((c & 0x3e) == 0) return p - string;
continue; /* We know there aren't any more bytes to check */
/* Check for 1110 0000, xx0x xxxx (overlong sequence) or
1110 1101, 1010 xxxx (0xd000 - 0xdfff) */
case 2:
if ((c == 0xe0 && (*p & 0x20) == 0) ||
(c == 0xed && *p >= 0xa0))
return p - string;
break;
/* Check for 1111 0000, xx00 xxxx (overlong sequence) or
greater than 0x0010ffff (f4 8f bf bf) */
case 3:
if ((c == 0xf0 && (*p & 0x30) == 0) ||
(c > 0xf4 ) ||
(c == 0xf4 && *p > 0x8f))
return p - string;
break;
#if 0
/* These cases can no longer occur, as we restrict to a maximum of four
bytes nowadays. Leave the code here in case we ever want to add an option
for longer sequences. */
/* Check for 1111 1000, xx00 0xxx */
case 4:
if (c == 0xf8 && (*p & 0x38) == 0) return p - string;
break;
/* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */
case 5:
if (c == 0xfe || c == 0xff ||
(c == 0xfc && (*p & 0x3c) == 0)) return p - string;
break;
#endif
}
/* Check for valid bytes after the 2nd, if any; all must start 10 */
while (--ab > 0)
{
if ((*(++p) & 0xc0) != 0x80) return p - string;
}
}
#endif
return -1;
}
/* End of pcre_valid_utf8.c */

106
package/re/pcre_xclass.c Normal file
View File

@ -0,0 +1,106 @@
/* This module contains an internal function that is used to match an extended
class (one that contains characters whose values are > 255). It is used by both
pcre_exec() and pcre_def_exec(). */
#include "config.h"
#include "pcre_internal.h"
/*************************************************
* Match character against an XCLASS *
*************************************************/
/* This function is called to match a character against an extended class that
might contain values > 255.
Arguments:
c the character
data points to the flag byte of the XCLASS data
Returns: TRUE if character matches, else FALSE
*/
BOOL
_pcre_xclass(int c, const uschar *data)
{
int t;
BOOL negated = (*data & XCL_NOT) != 0;
/* Character values < 256 are matched against a bitmap, if one is present. If
not, we still carry on, because there may be ranges that start below 256 in the
additional data. */
if (c < 256)
{
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
return !negated; /* char found */
}
/* First skip the bit map if present. Then match against the list of Unicode
properties or large chars or ranges that end with a large char. We won't ever
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
if ((*data++ & XCL_MAP) != 0) data += 32;
while ((t = *data++) != XCL_END)
{
int x, y;
if (t == XCL_SINGLE)
{
GETCHARINC(x, data);
if (c == x) return !negated;
}
else if (t == XCL_RANGE)
{
GETCHARINC(x, data);
GETCHARINC(y, data);
if (c >= x && c <= y) return !negated;
}
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
switch(*data)
{
case PT_ANY:
if (t == XCL_PROP) return !negated;
break;
case PT_LAMP:
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
(t == XCL_PROP)) return !negated;
break;
case PT_GC:
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
break;
case PT_PC:
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
break;
case PT_SC:
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
break;
/* This should never occur, but compilers may mutter if there is no
default. */
default:
return FALSE;
}
data += 2;
}
#endif /* SUPPORT_UCP */
}
return negated; /* char did not match */
}
/* End of pcre_xclass.c */

626
package/re/re-api-adapter.c Normal file
View File

@ -0,0 +1,626 @@
#include "re.h"
#include "TinyObj.h"
#include <stdio.h>
#include <stdlib.h>
#include "BaseObj.h"
#include "PikaStdData_List.h"
#include "PikaStdData_Tuple.h"
#include "cre.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "re_Match.h"
#include "re_Pattern.h"
void re_Match___init__args(PikaObj *self, char *sub, int *vec, int ven);
void pre_init_re(PikaObj *self)
{
obj_setInt(self, "I", PCRE_CASELESS);
obj_setInt(self, "M", PCRE_MULTILINE);
obj_setInt(self, "IGNORECASE", PCRE_CASELESS);
obj_setInt(self, "MULTILINE", PCRE_MULTILINE);
obj_setInt(self, "DOTALL", PCRE_DOTALL);
}
PikaObj *re_findall(PikaObj *self, char *pattern, char *subject, PikaTuple *val)
{
int flags = 0;
for (int i = 0; i < tuple_getSize(val); i++)
{
Arg *arg_i = tuple_getArg(val, i);
if (arg_getType(arg_i) != ARG_TYPE_INT)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
flags |= arg_getInt(arg_i);
}
PikaObj *list = newNormalObj(New_PikaStdData_List);
PikaStdData_List___init__(list);
PikaObj *sub_list;
int length = strlen(subject);
int n = 0;
flags |= PCRE_UTF8;
int j2 = 0;
int m_n = -1;
int brackets = -1;
int **vcs = re_searchall(pattern, subject, length, &m_n, &brackets, flags);
char *b = NULL;
Arg *str_arg1;
Arg *sub_arg;
if (!vcs)
{
if (m_n < 0)
obj_setErrorCode(self, -__LINE__);
return list;
}
if (brackets == 1)
{
for (int i = 0; i < m_n; i++)
{
int *v = vcs[i];
length = v[1] - v[0];
if (length)
{
b = malloc(length + 1);
if (!b)
goto e_er;
b[length] = 0;
memcpy(b, subject + v[0], length);
}
else
{
b = (char *)"";
}
str_arg1 = arg_newStr(b);
PikaStdData_List_append(list, str_arg1);
arg_deinit(str_arg1);
if (length)
free(b);
}
goto e_er;
}
for (int i = 0; i < m_n; i++)
{
int *v = vcs[i];
length = v[1] - v[0];
b = malloc(length + 1);
if (!b)
goto e_er;
sub_list = newNormalObj(New_PikaStdData_List);
PikaStdData_List___init__(sub_list);
for (int j = 0; j < brackets; j++)
{
j2 = j * 2;
length = v[j2 + 1] - v[j2];
b[length] = 0;
memcpy(b, subject + v[j2], length);
str_arg1 = arg_newStr(b);
PikaStdData_List_append(sub_list, str_arg1);
arg_deinit(str_arg1);
}
sub_arg = arg_newRef(sub_list);
PikaStdData_List_append(list, sub_arg);
arg_deinit(sub_arg);
free(b);
}
e_er:
if (vcs)
re_free_searchall(vcs, m_n);
return list;
// char **res = pcre_findall(pattern, subject, length, &n, flags);
// if (!res)
// return list;
// for (int i = 0; i < n; i++)
// {
// Arg *str_arg1 = arg_newStr(res[i]);
// PikaStdData_List_append(list, str_arg1);
// arg_deinit(str_arg1);
// }
// re_free_findall(res, n);
// return list;
}
PikaObj *re_match(PikaObj *self, char *pattern, char *subject, PikaTuple *val)
{
int flags = 0;
for (int i = 0; i < tuple_getSize(val); i++)
{
Arg *arg_i = tuple_getArg(val, i);
if (arg_getType(arg_i) != ARG_TYPE_INT)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
flags |= arg_getInt(arg_i);
}
PikaObj *m = newNormalObj(New_re_Match);
int ven = -1;
flags |= PCRE_UTF8;
int *vec = pcre_match(pattern, subject, strlen(subject), &ven, flags);
if (!vec)
{
if (ven < 0)
obj_setErrorCode(self, -__LINE__);
return NULL;
}
re_Match___init__args(m, subject, vec, ven);
return m;
}
PikaObj *re_fullmatch(PikaObj *self, char *pattern, char *subject, PikaTuple *val)
{
int flags = 0;
for (int i = 0; i < tuple_getSize(val); i++)
{
Arg *arg_i = tuple_getArg(val, i);
if (arg_getType(arg_i) != ARG_TYPE_INT)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
flags |= arg_getInt(arg_i);
}
PikaObj *m = newNormalObj(New_re_Match);
int ven = -1;
flags |= PCRE_UTF8;
int *vec = pcre_fullmatch(pattern, subject, strlen(subject), &ven, flags);
if (!vec)
{
if (ven < 0)
obj_setErrorCode(self, -__LINE__);
return NULL;
}
re_Match___init__args(m, subject, vec, ven);
return m;
}
PikaObj *re_search(PikaObj *self, char *pattern, char *subject, PikaTuple *val)
{
int flags = 0;
for (int i = 0; i < tuple_getSize(val); i++)
{
Arg *arg_i = tuple_getArg(val, i);
if (arg_getType(arg_i) != ARG_TYPE_INT)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
flags |= arg_getInt(arg_i);
}
PikaObj *m = newNormalObj(New_re_Match);
int ven = -1;
flags |= PCRE_UTF8;
int *vec = pcre_search(pattern, subject, strlen(subject), &ven, flags);
if (!vec)
{
if (ven < 0)
obj_setErrorCode(self, -__LINE__);
return NULL;
}
re_Match___init__args(m, subject, vec, ven);
return m;
}
char *re_sub(PikaObj *self, char *pattern, char *repl, char *subjet, PikaTuple *val)
{
int flags = 0;
for (int i = 0; i < tuple_getSize(val); i++)
{
Arg *arg_i = tuple_getArg(val, i);
if (arg_getType(arg_i) != ARG_TYPE_INT)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
flags |= arg_getInt(arg_i);
}
int length = strlen(subjet);
flags |= PCRE_UTF8;
char *s = pcre_sub(pattern, repl, subjet, length, flags);
if (!s)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
if (s == subjet)
{
obj_setStr(self, "_b", subjet);
return obj_getStr(self, "_b");
}
int len = strlen(s);
char *b = (char *)malloc(len + 1);
if (!b)
{
free(s);
return NULL;
}
memcpy(b, s, len);
b[len] = 0;
obj_setStr(self, "_b", b);
free(b);
return obj_getStr(self, "_b");
}
PikaObj *re_compile(PikaObj *self, char *pattern)
{
const char *error;
int erroffset;
pcre *re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
if (!re)
{
obj_setErrorCode(self, erroffset);
return NULL;
}
PikaObj *m = newNormalObj(New_re_Pattern);
obj_setPtr(m, "_re", re);
return m;
}
void re_Match___del__(PikaObj *self)
{
void *vec = obj_getPtr(self, "_vec");
if (!vec)
return;
free(vec);
}
void re_Match___init__(PikaObj *self)
{
if (!obj_isArgExist(self, "_vec"))
{
obj_setPtr(self, "_vec", NULL);
obj_setStr(self, "_b", "");
obj_setInt(self, "_ven", 0);
obj_setStr(self, "_s", "");
}
}
void re_Match___init__args(PikaObj *self, char *sub, int *vec, int ven)
{
obj_setPtr(self, "_vec", vec);
obj_setStr(self, "_b", "");
obj_setInt(self, "_ven", ven);
obj_setStr(self, "_s", sub);
}
char *re_Match_group(PikaObj *self, int n)
{
int *vec = obj_getPtr(self, "_vec");
if (!vec)
return NULL;
char *s = obj_getStr(self, "_s");
if (!s)
return NULL;
int ven = obj_getInt(self, "_ven");
if (n >= ven || n < 0)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
int len = vec[n * 2 + 1] - vec[n * 2];
if (!len)
return "";
char *b = (char *)malloc(len + 1);
if (!b)
return NULL;
memcpy(b, s + vec[n * 2], len);
b[len] = 0;
obj_setStr(self, "_b", b);
free(b);
return obj_getStr(self, "_b");
}
PikaObj *re_Match_groups(PikaObj *self)
{
PikaObj *list = newNormalObj(New_PikaStdData_List);
PikaStdData_List___init__(list);
int *vec = obj_getPtr(self, "_vec");
if (!vec)
return list;
char *s = obj_getStr(self, "_s");
if (!s)
return list;
int ven = obj_getInt(self, "_ven");
if (!ven)
return list;
for (int i = 0; i < ven; i++)
{
Arg *str_arg1;
int len = vec[i * 2 + 1] - vec[i * 2];
if (len)
{
char *b = (char *)malloc(len + 1);
if (!b)
return NULL;
memcpy(b, s + vec[i * 2], len);
b[len] = 0;
str_arg1 = arg_newStr(b);
free(b);
}
else
{
str_arg1 = arg_newStr("");
}
PikaStdData_List_append(list, str_arg1);
arg_deinit(str_arg1);
}
return list;
}
PikaObj *re_Match_span(PikaObj *self, int group_n)
{
PikaObj *list = newNormalObj(New_PikaStdData_List);
PikaStdData_List___init__(list);
int *vec = obj_getPtr(self, "_vec");
if (!vec)
return list;
int ven = obj_getInt(self, "_ven");
if (!ven || group_n >= ven)
{
obj_setErrorCode(self, -__LINE__);
return list;
}
Arg *spos = arg_newInt(vec[group_n * 2]);
Arg *epos = arg_newInt(vec[group_n * 2 + 1]);
PikaStdData_List_append(list, spos);
PikaStdData_List_append(list, epos);
arg_deinit(spos);
arg_deinit(epos);
return list;
}
void re_Pattern___del__(PikaObj *self)
{
void *_re = obj_getPtr(self, "_re");
if (!_re)
return;
pcre *re = (pcre *)_re;
pcre_free(re);
}
void re_Pattern___init__(PikaObj *self)
{
if (!obj_isArgExist(self, "_re"))
{
obj_setPtr(self, "_re", NULL);
obj_setStr(self, "_b", "");
}
}
PikaObj *re_Pattern_findall(PikaObj *self, char *subject, PikaTuple *val)
{
int flags = 0;
for (int i = 0; i < tuple_getSize(val); i++)
{
Arg *arg_i = tuple_getArg(val, i);
if (arg_getType(arg_i) != ARG_TYPE_INT)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
flags |= arg_getInt(arg_i);
}
if (!obj_isArgExist(self, "_re"))
return NULL;
pcre *re = obj_getPtr(self, "_re");
PikaObj *list = newNormalObj(New_PikaStdData_List);
PikaStdData_List___init__(list);
PikaObj *sub_list;
int length = strlen(subject);
int n = 0;
flags |= PCRE_UTF8;
int j2 = 0;
int m_n = -1;
int brackets = 0;
int **vcs = re_searchall2(re, subject, length, &m_n, &brackets, flags);
char *b = NULL;
Arg *str_arg1;
Arg *sub_arg;
if (!vcs)
{
if (m_n < 0)
obj_setErrorCode(self, -__LINE__);
return list;
}
if (brackets == 1)
{
for (int i = 0; i < m_n; i++)
{
int *v = vcs[i];
length = v[1] - v[0];
if (length)
{
b = malloc(length + 1);
if (!b)
goto e_er;
b[length] = 0;
memcpy(b, subject + v[0], length);
}
else
{
b = (char *)"";
}
str_arg1 = arg_newStr(b);
PikaStdData_List_append(list, str_arg1);
arg_deinit(str_arg1);
if (length)
free(b);
}
goto e_er;
}
for (int i = 0; i < m_n; i++)
{
int *v = vcs[i];
length = v[1] - v[0];
b = malloc(length + 1);
if (!b)
goto e_er;
sub_list = newNormalObj(New_PikaStdData_List);
PikaStdData_List___init__(sub_list);
for (int j = 0; j < brackets; j++)
{
j2 = j * 2;
length = v[j2 + 1] - v[j2];
b[length] = 0;
memcpy(b, subject + v[j2], length);
str_arg1 = arg_newStr(b);
PikaStdData_List_append(sub_list, str_arg1);
arg_deinit(str_arg1);
}
sub_arg = arg_newRef(sub_list);
PikaStdData_List_append(list, sub_arg);
arg_deinit(sub_arg);
free(b);
}
e_er:
if (vcs)
re_free_searchall(vcs, m_n);
return list;
}
PikaObj *re_Pattern_match(PikaObj *self, char *subject, PikaTuple *val)
{
int flags = 0;
for (int i = 0; i < tuple_getSize(val); i++)
{
Arg *arg_i = tuple_getArg(val, i);
if (arg_getType(arg_i) != ARG_TYPE_INT)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
flags |= arg_getInt(arg_i);
}
if (!obj_isArgExist(self, "_re"))
return NULL;
pcre *re = obj_getPtr(self, "_re");
PikaObj *m = newNormalObj(New_re_Match);
int ven = -1;
flags |= PCRE_UTF8;
int *vec = re_match2(re, subject, strlen(subject), &ven, flags);
if (!vec)
{
if (ven < 0)
obj_setErrorCode(self, -__LINE__);
return NULL;
}
re_Match___init__args(m, subject, vec, ven);
return m;
}
PikaObj *re_Pattern_fullmatch(PikaObj *self, char *subject, PikaTuple *val)
{
int flags = 0;
for (int i = 0; i < tuple_getSize(val); i++)
{
Arg *arg_i = tuple_getArg(val, i);
if (arg_getType(arg_i) != ARG_TYPE_INT)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
flags |= arg_getInt(arg_i);
}
if (!obj_isArgExist(self, "_re"))
return NULL;
pcre *re = obj_getPtr(self, "_re");
PikaObj *m = newNormalObj(New_re_Match);
int ven = -1;
flags |= PCRE_UTF8;
int *vec = re_fullmatch2(re, subject, strlen(subject), &ven, flags);
if (!vec)
{
if (ven < 0)
obj_setErrorCode(self, -__LINE__);
return NULL;
}
re_Match___init__args(m, subject, vec, ven);
return m;
}
PikaObj *re_Pattern_search(PikaObj *self, char *subject, PikaTuple *val)
{
int flags = 0;
for (int i = 0; i < tuple_getSize(val); i++)
{
Arg *arg_i = tuple_getArg(val, i);
if (arg_getType(arg_i) != ARG_TYPE_INT)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
flags |= arg_getInt(arg_i);
}
if (!obj_isArgExist(self, "_re"))
return NULL;
pcre *re = obj_getPtr(self, "_re");
PikaObj *m = newNormalObj(New_re_Match);
int ven = -1;
flags |= PCRE_UTF8;
int *vec = re_search2(re, subject, strlen(subject), &ven, flags);
if (!vec)
{
if (ven < 0)
obj_setErrorCode(self, -__LINE__);
return NULL;
}
re_Match___init__args(m, subject, vec, ven);
return m;
}
char *re_Pattern_sub(PikaObj *self, char *repl, char *subjet, PikaTuple *val)
{
int flags = 0;
for (int i = 0; i < tuple_getSize(val); i++)
{
Arg *arg_i = tuple_getArg(val, i);
if (arg_getType(arg_i) != ARG_TYPE_INT)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
flags |= arg_getInt(arg_i);
}
if (!obj_isArgExist(self, "_re"))
return NULL;
pcre *re = obj_getPtr(self, "_re");
int length = strlen(subjet);
flags |= PCRE_UTF8;
char *s = re_sub2(re, repl, subjet, length, flags);
if (!s)
{
obj_setErrorCode(self, -__LINE__);
return NULL;
}
if (s == subjet)
{
obj_setStr(self, "_b", subjet);
return obj_getStr(self, "_b");
}
int len = strlen(s);
char *b = (char *)malloc(len + 1);
if (!b)
{
free(s);
return NULL;
}
memcpy(b, s, len);
b[len] = 0;
obj_setStr(self, "_b", b);
free(b);
return obj_getStr(self, "_b");
}

89
package/re/re-api.c Normal file
View File

@ -0,0 +1,89 @@
/* ******************************** */
/* Warning! Don't modify this file! */
/* ******************************** */
#include "re.h"
#include "TinyObj.h"
#include <stdio.h>
#include <stdlib.h>
#include "BaseObj.h"
void re_MatchMethod(PikaObj *self, Args *args)
{
Arg *res = re_Match(self);
method_returnArg(args, res);
}
void re_PatternMethod(PikaObj *self, Args *args)
{
Arg *res = re_Pattern(self);
method_returnArg(args, res);
}
void re_compileMethod(PikaObj *self, Args *args)
{
char *pattern = args_getStr(args, "pattern");
PikaObj *res = re_compile(self, pattern);
method_returnObj(args, res);
}
void re_findallMethod(PikaObj *self, Args *args)
{
char *pattern = args_getStr(args, "pattern");
char *subject = args_getStr(args, "subject");
PikaTuple *flags = args_getTuple(args, "flags");
PikaObj *res = re_findall(self, pattern, subject, flags);
method_returnObj(args, res);
}
void re_fullmatchMethod(PikaObj *self, Args *args)
{
char *pattern = args_getStr(args, "pattern");
char *subject = args_getStr(args, "subject");
PikaTuple *flags = args_getTuple(args, "flags");
PikaObj *res = re_fullmatch(self, pattern, subject, flags);
method_returnObj(args, res);
}
void re_matchMethod(PikaObj *self, Args *args)
{
char *pattern = args_getStr(args, "pattern");
char *subject = args_getStr(args, "subject");
PikaTuple *flags = args_getTuple(args, "flags");
PikaObj *res = re_match(self, pattern, subject, flags);
method_returnObj(args, res);
}
void re_searchMethod(PikaObj *self, Args *args)
{
char *pattern = args_getStr(args, "pattern");
char *subject = args_getStr(args, "subject");
PikaTuple *flags = args_getTuple(args, "flags");
PikaObj *res = re_search(self, pattern, subject, flags);
method_returnObj(args, res);
}
void re_subMethod(PikaObj *self, Args *args)
{
char *pattern = args_getStr(args, "pattern");
char *repl = args_getStr(args, "repl");
char *subjet = args_getStr(args, "subjet");
PikaTuple *flags = args_getTuple(args, "flags");
char *res = re_sub(self, pattern, repl, subjet, flags);
method_returnStr(args, res);
}
PikaObj *New_re(Args *args)
{
PikaObj *self = New_TinyObj(args);
void pre_init_re(PikaObj * self);
pre_init_re(self);
class_defineConstructor(self, "Match()->any", re_MatchMethod);
class_defineConstructor(self, "Pattern()->any", re_PatternMethod);
class_defineMethod(self, "compile(pattern:str)->Pattern", re_compileMethod);
class_defineMethod(self, "findall(pattern:str,subject:str,*flags)->list", re_findallMethod);
class_defineMethod(self, "fullmatch(pattern:str,subject:str,*flags)->Match", re_fullmatchMethod);
class_defineMethod(self, "match(pattern:str,subject:str,*flags)->Match", re_matchMethod);
class_defineMethod(self, "search(pattern:str,subject:str,*flags)->Match", re_searchMethod);
class_defineMethod(self, "sub(pattern:str,repl:str,subjet:str,*flags)->str", re_subMethod);
return self;
}

21
package/re/re.h Normal file
View File

@ -0,0 +1,21 @@
/* ******************************** */
/* Warning! Don't modify this file! */
/* ******************************** */
#ifndef __re__H
#define __re__H
#include <stdio.h>
#include <stdlib.h>
#include "PikaObj.h"
PikaObj *New_re(Args *args);
Arg* re_Match(PikaObj *self);
Arg* re_Pattern(PikaObj *self);
PikaObj* re_compile(PikaObj *self, char* pattern);
PikaObj* re_findall(PikaObj *self, char* pattern, char* subject, PikaTuple* flags);
PikaObj* re_fullmatch(PikaObj *self, char* pattern, char* subject, PikaTuple* flags);
PikaObj* re_match(PikaObj *self, char* pattern, char* subject, PikaTuple* flags);
PikaObj* re_search(PikaObj *self, char* pattern, char* subject, PikaTuple* flags);
char* re_sub(PikaObj *self, char* pattern, char* repl, char* subjet, PikaTuple* flags);
#endif

54
package/re/re.pyi Normal file
View File

@ -0,0 +1,54 @@
from PikaObj import *
I: int
IGNORECASE:int
M: int
MULTILINE:int
DOTALL: int
class Pattern():
def __init__(self):
pass
def __del__(self):
pass
def findall(self, subject: str, *flags) -> list:
pass
def sub(self, repl: str, subjet: str, *flags) -> str:
pass
def match(self, subject: str, *flags) -> Match:
pass
def fullmatch(self, subject: str, *flags) -> Match:
pass
def search(self, subject: str, *flags) -> Match:
pass
class Match():
def __init__(self):
pass
def __del__(self):
pass
def group(self, n: int) -> str:
pass
def groups(self) -> list:
pass
def span(self, group_n: int) -> list:
pass
def findall(pattern: str, subject: str, *flags) -> list:...
def sub(pattern: str, repl: str, subjet: str, *flags) -> str: ...
def match(pattern: str, subject: str, *flags) -> Match: ...
def fullmatch(pattern: str, subject: str, *flags) -> Match: ...
def search(pattern: str, subject: str, *flags) -> Match: ...
def compile(pattern: str) -> Pattern: ...

47
package/re/re_Match-api.c Normal file
View File

@ -0,0 +1,47 @@
/* ******************************** */
/* Warning! Don't modify this file! */
/* ******************************** */
#include "re_Match.h"
#include "TinyObj.h"
#include <stdio.h>
#include <stdlib.h>
#include "BaseObj.h"
void re_Match___del__Method(PikaObj *self, Args *args){
re_Match___del__(self);
}
void re_Match___init__Method(PikaObj *self, Args *args){
re_Match___init__(self);
}
void re_Match_groupMethod(PikaObj *self, Args *args){
int n = args_getInt(args, "n");
char* res = re_Match_group(self, n);
method_returnStr(args, res);
}
void re_Match_groupsMethod(PikaObj *self, Args *args){
PikaObj* res = re_Match_groups(self);
method_returnObj(args, res);
}
void re_Match_spanMethod(PikaObj *self, Args *args){
int group_n = args_getInt(args, "group_n");
PikaObj* res = re_Match_span(self, group_n);
method_returnObj(args, res);
}
PikaObj *New_re_Match(Args *args){
PikaObj *self = New_TinyObj(args);
class_defineMethod(self, "__del__()", re_Match___del__Method);
class_defineMethod(self, "__init__()", re_Match___init__Method);
class_defineMethod(self, "group(n:int)->str", re_Match_groupMethod);
class_defineMethod(self, "groups()->list", re_Match_groupsMethod);
class_defineMethod(self, "span(group_n:int)->list", re_Match_spanMethod);
return self;
}
Arg *re_Match(PikaObj *self){
return obj_newObjInPackage(New_re_Match);
}

18
package/re/re_Match.h Normal file
View File

@ -0,0 +1,18 @@
/* ******************************** */
/* Warning! Don't modify this file! */
/* ******************************** */
#ifndef __re_Match__H
#define __re_Match__H
#include <stdio.h>
#include <stdlib.h>
#include "PikaObj.h"
PikaObj *New_re_Match(Args *args);
void re_Match___del__(PikaObj *self);
void re_Match___init__(PikaObj *self);
char* re_Match_group(PikaObj *self, int n);
PikaObj* re_Match_groups(PikaObj *self);
PikaObj* re_Match_span(PikaObj *self, int group_n);
#endif

View File

@ -0,0 +1,68 @@
/* ******************************** */
/* Warning! Don't modify this file! */
/* ******************************** */
#include "re_Pattern.h"
#include "TinyObj.h"
#include <stdio.h>
#include <stdlib.h>
#include "BaseObj.h"
void re_Pattern___del__Method(PikaObj *self, Args *args){
re_Pattern___del__(self);
}
void re_Pattern___init__Method(PikaObj *self, Args *args){
re_Pattern___init__(self);
}
void re_Pattern_findallMethod(PikaObj *self, Args *args){
char* subject = args_getStr(args, "subject");
PikaTuple* flags = args_getTuple(args, "flags");
PikaObj* res = re_Pattern_findall(self, subject, flags);
method_returnObj(args, res);
}
void re_Pattern_fullmatchMethod(PikaObj *self, Args *args){
char* subject = args_getStr(args, "subject");
PikaTuple* flags = args_getTuple(args, "flags");
PikaObj* res = re_Pattern_fullmatch(self, subject, flags);
method_returnObj(args, res);
}
void re_Pattern_matchMethod(PikaObj *self, Args *args){
char* subject = args_getStr(args, "subject");
PikaTuple* flags = args_getTuple(args, "flags");
PikaObj* res = re_Pattern_match(self, subject, flags);
method_returnObj(args, res);
}
void re_Pattern_searchMethod(PikaObj *self, Args *args){
char* subject = args_getStr(args, "subject");
PikaTuple* flags = args_getTuple(args, "flags");
PikaObj* res = re_Pattern_search(self, subject, flags);
method_returnObj(args, res);
}
void re_Pattern_subMethod(PikaObj *self, Args *args){
char* repl = args_getStr(args, "repl");
char* subjet = args_getStr(args, "subjet");
PikaTuple* flags = args_getTuple(args, "flags");
char* res = re_Pattern_sub(self, repl, subjet, flags);
method_returnStr(args, res);
}
PikaObj *New_re_Pattern(Args *args){
PikaObj *self = New_TinyObj(args);
class_defineMethod(self, "__del__()", re_Pattern___del__Method);
class_defineMethod(self, "__init__()", re_Pattern___init__Method);
class_defineMethod(self, "findall(subject:str,*flags)->list", re_Pattern_findallMethod);
class_defineMethod(self, "fullmatch(subject:str,*flags)->Match", re_Pattern_fullmatchMethod);
class_defineMethod(self, "match(subject:str,*flags)->Match", re_Pattern_matchMethod);
class_defineMethod(self, "search(subject:str,*flags)->Match", re_Pattern_searchMethod);
class_defineMethod(self, "sub(repl:str,subjet:str,*flags)->str", re_Pattern_subMethod);
return self;
}
Arg *re_Pattern(PikaObj *self){
return obj_newObjInPackage(New_re_Pattern);
}

20
package/re/re_Pattern.h Normal file
View File

@ -0,0 +1,20 @@
/* ******************************** */
/* Warning! Don't modify this file! */
/* ******************************** */
#ifndef __re_Pattern__H
#define __re_Pattern__H
#include <stdio.h>
#include <stdlib.h>
#include "PikaObj.h"
PikaObj *New_re_Pattern(Args *args);
void re_Pattern___del__(PikaObj *self);
void re_Pattern___init__(PikaObj *self);
PikaObj* re_Pattern_findall(PikaObj *self, char* subject, PikaTuple* flags);
PikaObj* re_Pattern_fullmatch(PikaObj *self, char* subject, PikaTuple* flags);
PikaObj* re_Pattern_match(PikaObj *self, char* subject, PikaTuple* flags);
PikaObj* re_Pattern_search(PikaObj *self, char* subject, PikaTuple* flags);
char* re_Pattern_sub(PikaObj *self, char* repl, char* subjet, PikaTuple* flags);
#endif

161
package/re/readme.md Normal file
View File

@ -0,0 +1,161 @@
# re for pikaScript
This module is made for pikaScript, aiming at providing the same usage as the re module in Python.
## import module
Register *re* module before you initialize pika interpreter, which occurs in *New_PikaMain()* in *PikaMain-api.c* by default.
``` c
// [PikaMain-api.c]
#include "re.h"
PikaObj *New_PikaMain(Args *args){
PikaObj *se
lf = New_PikaStdLib_SysObj(args);
// ...
obj_newObj(self, "re", "re", New_re);
// ...
return self;
}
```
## usage
The functions in this module have almost the same name and usage as the functions in the re module in python, but some features in the python.re module are not avaliable here, pikaScript does not support default arguments for example.
Here we provide some demonstration programs.
### match
``` python
import re
line = "Cats are smarter than dogs"
m = re.match( '(.*) are (.*?) .*', line, re.M|re.I)
if m:
print("matchObj.group(0) : ", m.group(0))
print("matchObj.group(1) : ", m.group(1))
print("matchObj.group(2) : ", m.group(2))
else:
print("No match!!")
'''>> runing output
matchObj.group(0) : Cats are smarter than dogs
matchObj.group(1) : Cats
matchObj.group(2) : smarter
'''
```
### search
``` python
print(re.search('www', 'www.runoob.com').span(0))
print(re.search('com', 'www.runoob.com').span(0))
'''>> running output
[0, 3]
[11, 14]
'''
```
### sub
```python
phone = "2004-959-559 # this is a phone number"
num = re.sub('#.*$', "", phone)
print("the phone number is: ", num)
num = re.sub('\D', "", phone)
print("the phone number is: ", num)
'''>> running output
the phone number is: 2004-959-559
the phone number is: 2004959559
'''
```
### findall
``` python
# year-month-day
pattern = re.compile('(\d{4})-([1-9]|1[0-2])-([1-9]|[1-2][0-9]|3[01])\b')
s = 'date: 2020-1-1, 2022-12-22, 2018-3-31. Wrong format: 2031-13-31, 2032-12-33 ...'
result1 = pattern.findall(s)
print(result1)
result2 = pattern.sub('\1',s)
print(result2)
'''>> running output
[['2020-1-1', '2020', '1', '1'], ['2022-12-22', '2022', '12', '22'], ['2018-3-31', '2018', '3', '31']]
date: 2020, 2022, 2018. Wrong format: 2031-13-31, 2032-12-33 ...
'''
```
## the API
This module prototype are likes this:
``` python
# flags
I: int
IGNORECASE:int
M: int
MULTILINE:int
DOTALL: int
class Pattern():
def __init__(self):
pass
def __del__(self):
pass
def findall(self, subject: str, *flags) -> list:
pass
def sub(self, repl: str, subjet: str, *flags) -> str:
pass
def match(self, subject: str, *flags) -> Match:
pass
def fullmatch(self, subject: str, *flags) -> Match:
pass
def search(self, subject: str, *flags) -> Match:
pass
class Match():
def __init__(self):
pass
def __del__(self):
pass
def group(self, n: int) -> str:
pass
def groups(self) -> list:
pass
def span(self, group_n: int) -> list:
pass
def findall(pattern: str, subject: str, *flags) -> list:...
def sub(pattern: str, repl: str, subjet: str, *flags) -> str: ...
def match(pattern: str, subject: str, *flags) -> Match: ...
def fullmatch(pattern: str, subject: str, *flags) -> Match: ...
def search(pattern: str, subject: str, *flags) -> Match: ...
def compile(pattern: str) -> Pattern: ...
```