#ifndef REGPARSE_H
|
#define REGPARSE_H
|
/**********************************************************************
|
regparse.h - Oniguruma (regular expression library)
|
**********************************************************************/
|
/*-
|
* Copyright (c) 2002-2020 K.Kosako
|
* All rights reserved.
|
*
|
* Redistribution and use in source and binary forms, with or without
|
* modification, are permitted provided that the following conditions
|
* are met:
|
* 1. Redistributions of source code must retain the above copyright
|
* notice, this list of conditions and the following disclaimer.
|
* 2. Redistributions in binary form must reproduce the above copyright
|
* notice, this list of conditions and the following disclaimer in the
|
* documentation and/or other materials provided with the distribution.
|
*
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
* SUCH DAMAGE.
|
*/
|
|
#include "regint.h"
|
|
#define NODE_STRING_MARGIN 16
|
#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
|
#define NODE_BACKREFS_SIZE 6
|
|
/* node type */
|
typedef enum {
|
NODE_STRING = 0,
|
NODE_CCLASS = 1,
|
NODE_CTYPE = 2,
|
NODE_BACKREF = 3,
|
NODE_QUANT = 4,
|
NODE_BAG = 5,
|
NODE_ANCHOR = 6,
|
NODE_LIST = 7,
|
NODE_ALT = 8,
|
NODE_CALL = 9,
|
NODE_GIMMICK = 10
|
} NodeType;
|
|
enum BagType {
|
BAG_MEMORY = 0,
|
BAG_OPTION = 1,
|
BAG_STOP_BACKTRACK = 2,
|
BAG_IF_ELSE = 3,
|
};
|
|
enum GimmickType {
|
GIMMICK_FAIL = 0,
|
GIMMICK_SAVE = 1,
|
GIMMICK_UPDATE_VAR = 2,
|
#ifdef USE_CALLOUT
|
GIMMICK_CALLOUT = 3,
|
#endif
|
};
|
|
enum BodyEmptyType {
|
BODY_IS_NOT_EMPTY = 0,
|
BODY_MAY_BE_EMPTY = 1,
|
BODY_MAY_BE_EMPTY_MEM = 2,
|
BODY_MAY_BE_EMPTY_REC = 3
|
};
|
|
struct _Node;
|
|
typedef struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
|
UChar* s;
|
UChar* end;
|
unsigned int flag;
|
UChar buf[NODE_STRING_BUF_SIZE];
|
int capacity; /* (allocated size - 1) or 0: use buf[] */
|
} StrNode;
|
|
typedef struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
|
unsigned int flags;
|
BitSet bs;
|
BBuf* mbuf; /* multi-byte info or NULL */
|
} CClassNode;
|
|
typedef struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
struct _Node* body;
|
|
int lower;
|
int upper;
|
int greedy;
|
enum BodyEmptyType emptiness;
|
struct _Node* head_exact;
|
struct _Node* next_head_exact;
|
int include_referred; /* include called node. don't eliminate even if {0} */
|
} QuantNode;
|
|
typedef struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
struct _Node* body;
|
|
enum BagType type;
|
union {
|
struct {
|
int regnum;
|
AbsAddrType called_addr;
|
int entry_count;
|
int called_state;
|
} m;
|
struct {
|
OnigOptionType options;
|
} o;
|
struct {
|
/* body is condition */
|
struct _Node* Then;
|
struct _Node* Else;
|
} te;
|
};
|
/* for multiple call reference */
|
OnigLen min_len; /* min length (byte) */
|
OnigLen max_len; /* max length (byte) */
|
OnigLen min_char_len;
|
OnigLen max_char_len;
|
int opt_count; /* referenced count in optimize_nodes() */
|
} BagNode;
|
|
#ifdef USE_CALL
|
|
typedef struct {
|
int offset;
|
struct _Node* target;
|
} UnsetAddr;
|
|
typedef struct {
|
int num;
|
int alloc;
|
UnsetAddr* us;
|
} UnsetAddrList;
|
|
typedef struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
struct _Node* body; /* to BagNode : BAG_MEMORY */
|
|
int by_number;
|
int group_num;
|
UChar* name;
|
UChar* name_end;
|
int entry_count;
|
} CallNode;
|
|
#endif
|
|
typedef struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
|
int back_num;
|
int back_static[NODE_BACKREFS_SIZE];
|
int* back_dynamic;
|
int nest_level;
|
} BackRefNode;
|
|
typedef struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
struct _Node* body;
|
|
int type;
|
OnigLen char_min_len;
|
OnigLen char_max_len;
|
int ascii_mode;
|
struct _Node* lead_node;
|
} AnchorNode;
|
|
typedef struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
|
struct _Node* car;
|
struct _Node* cdr;
|
} ConsAltNode;
|
|
typedef struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
|
int ctype;
|
int not;
|
int ascii_mode;
|
} CtypeNode;
|
|
typedef struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
|
enum GimmickType type;
|
int detail_type;
|
int num;
|
int id;
|
} GimmickNode;
|
|
typedef struct _Node {
|
union {
|
struct {
|
NodeType node_type;
|
int status;
|
struct _Node* parent;
|
struct _Node* body;
|
} base;
|
|
StrNode str;
|
CClassNode cclass;
|
QuantNode quant;
|
BagNode bag;
|
BackRefNode backref;
|
AnchorNode anchor;
|
ConsAltNode cons;
|
CtypeNode ctype;
|
#ifdef USE_CALL
|
CallNode call;
|
#endif
|
GimmickNode gimmick;
|
} u;
|
} Node;
|
|
typedef struct {
|
int new_val;
|
} GroupNumMap;
|
|
|
#define NULL_NODE ((Node* )0)
|
|
|
/* node type bit */
|
#define NODE_TYPE2BIT(type) (1<<(type))
|
|
#define NODE_BIT_STRING NODE_TYPE2BIT(NODE_STRING)
|
#define NODE_BIT_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
|
#define NODE_BIT_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
|
#define NODE_BIT_BACKREF NODE_TYPE2BIT(NODE_BACKREF)
|
#define NODE_BIT_QUANT NODE_TYPE2BIT(NODE_QUANT)
|
#define NODE_BIT_BAG NODE_TYPE2BIT(NODE_BAG)
|
#define NODE_BIT_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
|
#define NODE_BIT_LIST NODE_TYPE2BIT(NODE_LIST)
|
#define NODE_BIT_ALT NODE_TYPE2BIT(NODE_ALT)
|
#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
|
#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
|
|
#define NODE_TYPE(node) ((node)->u.base.node_type)
|
#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
|
|
#define STR_(node) (&((node)->u.str))
|
#define CCLASS_(node) (&((node)->u.cclass))
|
#define CTYPE_(node) (&((node)->u.ctype))
|
#define BACKREF_(node) (&((node)->u.backref))
|
#define QUANT_(node) (&((node)->u.quant))
|
#define BAG_(node) (&((node)->u.bag))
|
#define ANCHOR_(node) (&((node)->u.anchor))
|
#define CONS_(node) (&((node)->u.cons))
|
#define CALL_(node) (&((node)->u.call))
|
#define GIMMICK_(node) (&((node)->u.gimmick))
|
|
#define NODE_CAR(node) (CONS_(node)->car)
|
#define NODE_CDR(node) (CONS_(node)->cdr)
|
|
#define CTYPE_ANYCHAR -1
|
#define NODE_IS_ANYCHAR(node) \
|
(NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
|
|
|
#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
|
#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
|
|
#define NODE_STRING_CRUDE (1<<0)
|
#define NODE_STRING_CASE_EXPANDED (1<<1)
|
|
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
|
#define NODE_STRING_SET_CRUDE(node) (node)->u.str.flag |= NODE_STRING_CRUDE
|
#define NODE_STRING_CLEAR_CRUDE(node) (node)->u.str.flag &= ~NODE_STRING_CRUDE
|
#define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED
|
#define NODE_STRING_IS_CRUDE(node) \
|
(((node)->u.str.flag & NODE_STRING_CRUDE) != 0)
|
#define NODE_STRING_IS_CASE_EXPANDED(node) \
|
(((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0)
|
|
#define BACKREFS_P(br) \
|
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
|
|
/* node status bits */
|
#define NODE_ST_FIXED_MIN (1<<0)
|
#define NODE_ST_FIXED_MAX (1<<1)
|
#define NODE_ST_FIXED_CLEN (1<<2)
|
#define NODE_ST_MARK1 (1<<3)
|
#define NODE_ST_MARK2 (1<<4)
|
#define NODE_ST_STRICT_REAL_REPEAT (1<<5)
|
#define NODE_ST_RECURSION (1<<6)
|
#define NODE_ST_CALLED (1<<7)
|
#define NODE_ST_FIXED_ADDR (1<<8)
|
#define NODE_ST_NAMED_GROUP (1<<9)
|
#define NODE_ST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
|
#define NODE_ST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
|
#define NODE_ST_IN_MULTI_ENTRY (1<<12)
|
#define NODE_ST_NEST_LEVEL (1<<13)
|
#define NODE_ST_BY_NUMBER (1<<14) /* {n,m} */
|
#define NODE_ST_BY_NAME (1<<15) /* backref by name */
|
#define NODE_ST_BACKREF (1<<16)
|
#define NODE_ST_CHECKER (1<<17)
|
#define NODE_ST_PROHIBIT_RECURSION (1<<18)
|
#define NODE_ST_SUPER (1<<19)
|
#define NODE_ST_EMPTY_STATUS_CHECK (1<<20)
|
#define NODE_ST_IGNORECASE (1<<21)
|
#define NODE_ST_MULTILINE (1<<22)
|
#define NODE_ST_TEXT_SEGMENT_WORD (1<<23)
|
#define NODE_ST_ABSENT_WITH_SIDE_EFFECTS (1<<24) /* stopper or clear */
|
#define NODE_ST_FIXED_CLEN_MIN_SURE (1<<25)
|
|
|
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
|
#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (NODE_ST_ ## f))
|
#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(NODE_ST_ ## f))
|
|
#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NODE_ST_BY_NUMBER) != 0)
|
#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_REAL_REPEAT) != 0)
|
#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NODE_ST_CALLED) != 0)
|
#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NODE_ST_IN_MULTI_ENTRY) != 0)
|
#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NODE_ST_RECURSION) != 0)
|
#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NODE_ST_IN_ZERO_REPEAT) != 0)
|
#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NODE_ST_NAMED_GROUP) != 0)
|
#define NODE_IS_FIXED_ADDR(node) ((NODE_STATUS(node) & NODE_ST_FIXED_ADDR) != 0)
|
#define NODE_IS_FIXED_CLEN(node) ((NODE_STATUS(node) & NODE_ST_FIXED_CLEN) != 0)
|
#define NODE_IS_FIXED_MIN(node) ((NODE_STATUS(node) & NODE_ST_FIXED_MIN) != 0)
|
#define NODE_IS_FIXED_MAX(node) ((NODE_STATUS(node) & NODE_ST_FIXED_MAX) != 0)
|
#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NODE_ST_MARK1) != 0)
|
#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NODE_ST_MARK2) != 0)
|
#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NODE_ST_NEST_LEVEL) != 0)
|
#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NODE_ST_BY_NAME) != 0)
|
#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NODE_ST_BACKREF) != 0)
|
#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NODE_ST_CHECKER) != 0)
|
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
|
#define NODE_IS_PROHIBIT_RECURSION(node) \
|
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
|
#define NODE_IS_STRICT_REAL_REPEAT(node) \
|
((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0)
|
#define NODE_IS_EMPTY_STATUS_CHECK(node) \
|
((NODE_STATUS(node) & NODE_ST_EMPTY_STATUS_CHECK) != 0)
|
#define NODE_IS_IGNORECASE(node) ((NODE_STATUS(node) & NODE_ST_IGNORECASE) != 0)
|
#define NODE_IS_MULTILINE(node) ((NODE_STATUS(node) & NODE_ST_MULTILINE) != 0)
|
#define NODE_IS_TEXT_SEGMENT_WORD(node) ((NODE_STATUS(node) & NODE_ST_TEXT_SEGMENT_WORD) != 0)
|
#define NODE_IS_ABSENT_WITH_SIDE_EFFECTS(node) ((NODE_STATUS(node) & NODE_ST_ABSENT_WITH_SIDE_EFFECTS) != 0)
|
#define NODE_IS_FIXED_CLEN_MIN_SURE(node) ((NODE_STATUS(node) & NODE_ST_FIXED_CLEN_MIN_SURE) != 0)
|
|
#define NODE_PARENT(node) ((node)->u.base.parent)
|
#define NODE_BODY(node) ((node)->u.base.body)
|
#define NODE_QUANT_BODY(node) ((node)->body)
|
#define NODE_BAG_BODY(node) ((node)->body)
|
#define NODE_CALL_BODY(node) ((node)->body)
|
#define NODE_ANCHOR_BODY(node) ((node)->body)
|
|
#define SCANENV_MEMENV_SIZE 8
|
#define SCANENV_MEMENV(senv) \
|
(IS_NOT_NULL((senv)->mem_env_dynamic) ? \
|
(senv)->mem_env_dynamic : (senv)->mem_env_static)
|
|
#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
|
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
|
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
|
|
#define ID_ENTRY(env, id) do {\
|
id = (env)->id_num++;\
|
} while(0)
|
|
|
typedef struct {
|
Node* mem_node;
|
Node* empty_repeat_node;
|
} MemEnv;
|
|
typedef struct {
|
enum SaveType type;
|
} SaveItem;
|
|
typedef struct {
|
OnigOptionType options;
|
OnigCaseFoldType case_fold_flag;
|
OnigEncoding enc;
|
OnigSyntaxType* syntax;
|
MemStatusType cap_history;
|
MemStatusType backtrack_mem; /* backtrack/recursion */
|
MemStatusType backrefed_mem;
|
UChar* pattern;
|
UChar* pattern_end;
|
UChar* error;
|
UChar* error_end;
|
regex_t* reg; /* for reg->names only */
|
int num_call;
|
int num_mem;
|
int num_named;
|
int mem_alloc;
|
MemEnv mem_env_static[SCANENV_MEMENV_SIZE];
|
MemEnv* mem_env_dynamic;
|
int backref_num;
|
int keep_num;
|
int id_num;
|
int save_alloc_num;
|
SaveItem* saves;
|
#ifdef USE_CALL
|
UnsetAddrList* unset_addr_list;
|
int has_call_zero;
|
#endif
|
unsigned int parse_depth;
|
#ifdef ONIG_DEBUG_PARSE
|
unsigned int max_parse_depth;
|
#endif
|
} ScanEnv;
|
|
|
extern int onig_renumber_name_table P_((regex_t* reg, GroupNumMap* map));
|
|
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
|
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
|
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
|
extern int onig_reduce_nested_quantifier P_((Node* pnode));
|
extern int onig_node_copy(Node** rcopy, Node* from);
|
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
|
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end, int need_free));
|
extern void onig_node_str_clear P_((Node* node, int need_free));
|
extern void onig_node_free P_((Node* node));
|
extern int onig_node_reset_empty P_((Node* node));
|
extern int onig_node_reset_fail P_((Node* node));
|
extern Node* onig_node_new_bag P_((enum BagType type));
|
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
|
extern Node* onig_node_new_list P_((Node* left, Node* right));
|
extern Node* onig_node_new_alt P_((Node* left, Node* right));
|
extern int onig_names_free P_((regex_t* reg));
|
extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
|
extern int onig_free_shared_cclass_table P_((void));
|
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
|
extern int onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc, int n, OnigCodePoint codes[]);
|
extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
|
|
#ifdef USE_CALLOUT
|
extern int onig_global_callout_names_free(void);
|
#endif
|
|
#ifdef ONIG_DEBUG
|
extern int onig_print_names(FILE*, regex_t*);
|
#endif
|
|
#endif /* REGPARSE_H */
|