33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(
void)
38 return OnigDefaultCaseFoldFlag;
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
44 OnigDefaultCaseFoldFlag = case_fold_flag;
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
55str_dup(UChar* s, UChar* end)
57 ptrdiff_t len = end - s;
60 UChar* r = (UChar* )
xmalloc(len + 1);
74 c = *a; *a = *b; *b = c;
76 if (NTYPE(a) == NT_STR) {
79 size_t len = sn->end - sn->s;
81 sn->end = sn->s + len;
85 if (NTYPE(b) == NT_STR) {
88 size_t len = sn->end - sn->s;
90 sn->end = sn->s + len;
96distance_add(OnigDistance d1, OnigDistance d2)
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2)
return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
107distance_multiply(OnigDistance d,
int m)
109 if (m == 0)
return 0;
111 if (d < ONIG_INFINITE_DISTANCE / m)
114 return ONIG_INFINITE_DISTANCE;
118bitset_is_empty(BitSetRef bs)
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0)
return 0;
129bitset_on_num(BitSetRef bs)
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr =
xrealloc(reg->p, reg->used);
155 reg->alloc = reg->used;
159 }
while ((reg = reg->chain) != 0);
163onig_bbuf_init(
BBuf* buf, OnigDistance size)
170 buf->p = (UChar* )
xmalloc(size);
171 if (IS_NULL(buf->p))
return(ONIGERR_MEMORY);
174 buf->alloc = (
unsigned int )size;
180#ifdef USE_SUBEXP_CALL
188 CHECK_NULL_RETURN_MEMERR(p);
190 uslist->alloc = size;
198 if (IS_NOT_NULL(uslist->us))
208 if (uslist->num >= uslist->alloc) {
209 size = uslist->alloc * 2;
211 CHECK_NULL_RETURN_MEMERR(p);
212 uslist->alloc = size;
216 uslist->us[uslist->num].offset = offset;
217 uslist->us[uslist->num].target = node;
225add_opcode(
regex_t* reg,
int opcode)
227 BBUF_ADD1(reg, opcode);
231#ifdef USE_COMBINATION_EXPLOSION_CHECK
233add_state_check_num(
regex_t* reg,
int num)
235 StateCheckNumType n = (StateCheckNumType )num;
237 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
243add_rel_addr(
regex_t* reg,
int addr)
245 RelAddrType ra = (RelAddrType )addr;
247 BBUF_ADD(reg, &ra, SIZE_RELADDR);
252add_abs_addr(
regex_t* reg,
int addr)
254 AbsAddrType ra = (AbsAddrType )addr;
256 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
261add_length(
regex_t* reg, OnigDistance len)
263 LengthType l = (LengthType )len;
265 BBUF_ADD(reg, &l, SIZE_LENGTH);
270add_mem_num(
regex_t* reg,
int num)
272 MemNumType n = (MemNumType )num;
274 BBUF_ADD(reg, &n, SIZE_MEMNUM);
280add_pointer(
regex_t* reg,
void* addr)
282 PointerType ptr = (PointerType )addr;
284 BBUF_ADD(reg, &ptr, SIZE_POINTER);
290add_option(
regex_t* reg, OnigOptionType option)
292 BBUF_ADD(reg, &option, SIZE_OPTION);
297add_opcode_rel_addr(
regex_t* reg,
int opcode,
int addr)
301 r = add_opcode(reg, opcode);
303 r = add_rel_addr(reg, addr);
308add_bytes(
regex_t* reg, UChar* bytes, OnigDistance len)
310 BBUF_ADD(reg, bytes, len);
315add_bitset(
regex_t* reg, BitSetRef bs)
317 BBUF_ADD(reg, bs, SIZE_BITSET);
322add_opcode_option(
regex_t* reg,
int opcode, OnigOptionType option)
326 r = add_opcode(reg, opcode);
328 r = add_option(reg, option);
332static int compile_length_tree(
Node* node,
regex_t* reg);
336#define IS_NEED_STR_LEN_OP_EXACT(op) \
337 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
338 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
341select_str_opcode(
int mb_len, OnigDistance byte_len,
int ignore_case)
344 OnigDistance str_len = roomof(byte_len, mb_len);
348 case 1: op = OP_EXACT1_IC;
break;
349 default: op = OP_EXACTN_IC;
break;
356 case 1: op = OP_EXACT1;
break;
357 case 2: op = OP_EXACT2;
break;
358 case 3: op = OP_EXACT3;
break;
359 case 4: op = OP_EXACT4;
break;
360 case 5: op = OP_EXACT5;
break;
361 default: op = OP_EXACTN;
break;
367 case 1: op = OP_EXACTMB2N1;
break;
368 case 2: op = OP_EXACTMB2N2;
break;
369 case 3: op = OP_EXACTMB2N3;
break;
370 default: op = OP_EXACTMB2N;
break;
387compile_tree_empty_check(
Node* node,
regex_t* reg,
int empty_info)
390 int saved_num_null_check = reg->num_null_check;
392 if (empty_info != 0) {
393 r = add_opcode(reg, OP_NULL_CHECK_START);
395 r = add_mem_num(reg, reg->num_null_check);
397 reg->num_null_check++;
400 r = compile_tree(node, reg);
403 if (empty_info != 0) {
404 if (empty_info == NQ_TARGET_IS_EMPTY)
405 r = add_opcode(reg, OP_NULL_CHECK_END);
406 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
407 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
408 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
409 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
412 r = add_mem_num(reg, saved_num_null_check);
417#ifdef USE_SUBEXP_CALL
423 r = add_opcode(reg, OP_CALL);
425 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
428 r = add_abs_addr(reg, 0 );
434compile_tree_n_times(
Node* node,
int n,
regex_t* reg)
438 for (i = 0; i < n; i++) {
439 r = compile_tree(node, reg);
446add_compile_string_length(UChar* s ARG_UNUSED,
int mb_len, OnigDistance byte_len,
447 regex_t* reg ARG_UNUSED,
int ignore_case)
450 int op = select_str_opcode(mb_len, byte_len, ignore_case);
454 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
455 if (IS_NEED_STR_LEN_OP_EXACT(op))
458 len += (int )byte_len;
463add_compile_string(UChar* s,
int mb_len, OnigDistance byte_len,
466 int op = select_str_opcode(mb_len, byte_len, ignore_case);
469 if (op == OP_EXACTMBN)
470 add_length(reg, mb_len);
472 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
473 if (op == OP_EXACTN_IC)
474 add_length(reg, byte_len);
476 add_length(reg, byte_len / mb_len);
479 add_bytes(reg, s, byte_len);
485compile_length_string_node(
Node* node,
regex_t* reg)
487 int rlen, r, len, prev_len, blen, ambig;
493 if (sn->end <= sn->s)
496 ambig = NSTRING_IS_AMBIG(node);
499 prev_len = enclen(enc, p, sn->end);
504 for (; p < sn->end; ) {
505 len = enclen(enc, p, sn->end);
506 if (len == prev_len || ambig) {
510 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
518 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
526 if (sn->end <= sn->s)
529 return add_compile_string_length(sn->s, 1 , sn->end - sn->s, reg, 0);
535 int r, len, prev_len, blen, ambig;
537 UChar *p, *prev, *end;
541 if (sn->end <= sn->s)
545 ambig = NSTRING_IS_AMBIG(node);
548 prev_len = enclen(enc, p, end);
553 len = enclen(enc, p, end);
554 if (len == prev_len || ambig) {
558 r = add_compile_string(prev, prev_len, blen, reg, ambig);
568 return add_compile_string(prev, prev_len, blen, reg, ambig);
574 if (sn->end <= sn->s)
577 return add_compile_string(sn->s, 1 , sn->end - sn->s, reg, 0);
583#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
584 add_length(reg, mbuf->used);
585 return add_bytes(reg, mbuf->p, mbuf->used);
588 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
590 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
591 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
592 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
594 r = add_bytes(reg, mbuf->p, mbuf->used);
597 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
598 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
608 if (IS_NULL(cc->mbuf)) {
609 len = SIZE_OPCODE + SIZE_BITSET;
612 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
616 len = SIZE_OPCODE + SIZE_BITSET;
618#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
619 len += SIZE_LENGTH + cc->mbuf->used;
621 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
633 if (IS_NULL(cc->mbuf)) {
634 if (IS_NCCLASS_NOT(cc))
635 add_opcode(reg, OP_CCLASS_NOT);
637 add_opcode(reg, OP_CCLASS);
639 r = add_bitset(reg, cc->bs);
642 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
643 if (IS_NCCLASS_NOT(cc))
644 add_opcode(reg, OP_CCLASS_MB_NOT);
646 add_opcode(reg, OP_CCLASS_MB);
648 r = add_multi_byte_cclass(cc->mbuf, reg);
651 if (IS_NCCLASS_NOT(cc))
652 add_opcode(reg, OP_CCLASS_MIX_NOT);
654 add_opcode(reg, OP_CCLASS_MIX);
656 r = add_bitset(reg, cc->bs);
658 r = add_multi_byte_cclass(cc->mbuf, reg);
666entry_repeat_range(
regex_t* reg,
int id,
int lower,
int upper)
668#define REPEAT_RANGE_ALLOC 4
672 if (reg->repeat_range_alloc == 0) {
674 CHECK_NULL_RETURN_MEMERR(p);
675 reg->repeat_range = p;
676 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
678 else if (reg->repeat_range_alloc <=
id) {
680 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
683 CHECK_NULL_RETURN_MEMERR(p);
684 reg->repeat_range = p;
685 reg->repeat_range_alloc = n;
688 p = reg->repeat_range;
692 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
697compile_range_repeat_node(
QtfrNode* qn,
int target_len,
int empty_info,
701 int num_repeat = reg->num_repeat;
703 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
705 r = add_mem_num(reg, num_repeat);
708 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
711 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
714 r = compile_tree_empty_check(qn->target, reg, empty_info);
718#ifdef USE_SUBEXP_CALL
721 IS_QUANTIFIER_IN_REPEAT(qn)) {
722 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
725 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
728 r = add_mem_num(reg, num_repeat);
733is_anychar_star_quantifier(
QtfrNode* qn)
735 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
736 NTYPE(qn->target) == NT_CANY)
742#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
743#define CKN_ON (ckn > 0)
745#ifdef USE_COMBINATION_EXPLOSION_CHECK
750 int len, mod_tlen, cklen;
752 int infinite = IS_REPEAT_INFINITE(qn->upper);
753 int empty_info = qn->target_empty_info;
754 int tlen = compile_length_tree(qn->target, reg);
756 if (tlen < 0)
return tlen;
758 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
760 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
763 if (NTYPE(qn->target) == NT_CANY) {
764 if (qn->greedy && infinite) {
765 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
766 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
768 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
773 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
777 if (infinite && qn->lower <= 1) {
784 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
792 len += mod_tlen + SIZE_OP_PUSH + cklen;
795 else if (qn->upper == 0) {
796 if (qn->is_referred != 0)
797 len = SIZE_OP_JUMP + tlen;
801 else if (qn->upper == 1 && qn->greedy) {
802 if (qn->lower == 0) {
804 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
807 len = SIZE_OP_PUSH + tlen;
814 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
815 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
818 len = SIZE_OP_REPEAT_INC
819 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
821 len += SIZE_OP_STATE_CHECK;
832 int infinite = IS_REPEAT_INFINITE(qn->upper);
833 int empty_info = qn->target_empty_info;
834 int tlen = compile_length_tree(qn->target, reg);
836 if (tlen < 0)
return tlen;
838 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
840 if (is_anychar_star_quantifier(qn)) {
841 r = compile_tree_n_times(qn->target, qn->lower, reg);
843 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
844 if (IS_MULTILINE(reg->options))
845 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
847 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
850 r = add_state_check_num(reg, ckn);
854 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
857 if (IS_MULTILINE(reg->options)) {
858 r = add_opcode(reg, (CKN_ON ?
859 OP_STATE_CHECK_ANYCHAR_ML_STAR
860 : OP_ANYCHAR_ML_STAR));
863 r = add_opcode(reg, (CKN_ON ?
864 OP_STATE_CHECK_ANYCHAR_STAR
869 r = add_state_check_num(reg, ckn);
876 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
880 if (infinite && qn->lower <= 1) {
882 if (qn->lower == 1) {
883 r = add_opcode_rel_addr(reg, OP_JUMP,
884 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
889 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
891 r = add_state_check_num(reg, ckn);
893 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
896 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
899 r = compile_tree_empty_check(qn->target, reg, empty_info);
901 r = add_opcode_rel_addr(reg, OP_JUMP,
902 -(mod_tlen + (
int )SIZE_OP_JUMP
903 + (
int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
906 if (qn->lower == 0) {
907 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
910 r = compile_tree_empty_check(qn->target, reg, empty_info);
913 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
915 r = add_state_check_num(reg, ckn);
917 r = add_rel_addr(reg,
918 -(mod_tlen + (
int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
921 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
924 else if (qn->upper == 0) {
925 if (qn->is_referred != 0) {
926 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
928 r = compile_tree(qn->target, reg);
933 else if (qn->upper == 1 && qn->greedy) {
934 if (qn->lower == 0) {
936 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
938 r = add_state_check_num(reg, ckn);
940 r = add_rel_addr(reg, tlen);
943 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
948 r = compile_tree(qn->target, reg);
950 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
952 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
954 r = add_state_check_num(reg, ckn);
956 r = add_rel_addr(reg, SIZE_OP_JUMP);
959 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
963 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
965 r = compile_tree(qn->target, reg);
968 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
971 r = add_opcode(reg, OP_STATE_CHECK);
973 r = add_state_check_num(reg, ckn);
985 int infinite = IS_REPEAT_INFINITE(qn->upper);
986 int empty_info = qn->target_empty_info;
987 int tlen = compile_length_tree(qn->target, reg);
989 if (tlen < 0)
return tlen;
992 if (NTYPE(qn->target) == NT_CANY) {
993 if (qn->greedy && infinite) {
994 if (IS_NOT_NULL(qn->next_head_exact))
995 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
997 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
1001 if (empty_info != 0)
1002 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1007 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1008 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1012 len = tlen * qn->lower;
1016#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1017 if (IS_NOT_NULL(qn->head_exact))
1018 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1021 if (IS_NOT_NULL(qn->next_head_exact))
1022 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1024 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1027 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1029 else if (qn->upper == 0 && qn->is_referred != 0) {
1030 len = SIZE_OP_JUMP + tlen;
1032 else if (!infinite && qn->greedy &&
1033 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1034 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1035 len = tlen * qn->lower;
1036 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1038 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1039 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1042 len = SIZE_OP_REPEAT_INC
1043 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1053 int infinite = IS_REPEAT_INFINITE(qn->upper);
1054 int empty_info = qn->target_empty_info;
1055 int tlen = compile_length_tree(qn->target, reg);
1057 if (tlen < 0)
return tlen;
1059 if (is_anychar_star_quantifier(qn)) {
1060 r = compile_tree_n_times(qn->target, qn->lower, reg);
1062 if (IS_NOT_NULL(qn->next_head_exact)) {
1063 if (IS_MULTILINE(reg->options))
1064 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1066 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1068 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1071 if (IS_MULTILINE(reg->options))
1072 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1074 return add_opcode(reg, OP_ANYCHAR_STAR);
1078 if (empty_info != 0)
1079 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1084 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1085 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1087#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1088 if (IS_NOT_NULL(qn->head_exact))
1089 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1092 if (IS_NOT_NULL(qn->next_head_exact))
1093 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1095 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1098 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1103 r = compile_tree_n_times(qn->target, qn->lower, reg);
1108#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1109 if (IS_NOT_NULL(qn->head_exact)) {
1110 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1111 mod_tlen + SIZE_OP_JUMP);
1113 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1114 r = compile_tree_empty_check(qn->target, reg, empty_info);
1116 r = add_opcode_rel_addr(reg, OP_JUMP,
1117 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1121 if (IS_NOT_NULL(qn->next_head_exact)) {
1122 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1123 mod_tlen + SIZE_OP_JUMP);
1125 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1126 r = compile_tree_empty_check(qn->target, reg, empty_info);
1128 r = add_opcode_rel_addr(reg, OP_JUMP,
1129 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1132 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1134 r = compile_tree_empty_check(qn->target, reg, empty_info);
1136 r = add_opcode_rel_addr(reg, OP_JUMP,
1137 -(mod_tlen + (
int )SIZE_OP_JUMP + (
int )SIZE_OP_PUSH));
1141 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1143 r = compile_tree_empty_check(qn->target, reg, empty_info);
1145 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (
int )SIZE_OP_PUSH));
1148 else if (qn->upper == 0 && qn->is_referred != 0) {
1149 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1151 r = compile_tree(qn->target, reg);
1153 else if (!infinite && qn->greedy &&
1154 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1155 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1156 int n = qn->upper - qn->lower;
1158 r = compile_tree_n_times(qn->target, qn->lower, reg);
1161 for (i = 0; i < n; i++) {
1162 r = add_opcode_rel_addr(reg, OP_PUSH,
1163 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1165 r = compile_tree(qn->target, reg);
1169 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
1170 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1172 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1174 r = compile_tree(qn->target, reg);
1177 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1187 OnigOptionType prev = reg->options;
1189 reg->options = node->option;
1190 tlen = compile_length_tree(node->target, reg);
1191 reg->options = prev;
1193 if (tlen < 0)
return tlen;
1195 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1196 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1197 + tlen + SIZE_OP_SET_OPTION;
1207 OnigOptionType prev = reg->options;
1209 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1210 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1212 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1214 r = add_opcode(reg, OP_FAIL);
1218 reg->options = node->option;
1219 r = compile_tree(node->target, reg);
1220 reg->options = prev;
1222 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1224 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1235 if (node->type == ENCLOSE_OPTION)
1236 return compile_length_option_node(node, reg);
1239 tlen = compile_length_tree(node->target, reg);
1240 if (tlen < 0)
return tlen;
1245 switch (node->type) {
1246 case ENCLOSE_MEMORY:
1247#ifdef USE_SUBEXP_CALL
1248 if (IS_ENCLOSE_CALLED(node)) {
1249 len = SIZE_OP_MEMORY_START_PUSH + tlen
1250 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1251 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1252 len += (IS_ENCLOSE_RECURSION(node)
1253 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1255 len += (IS_ENCLOSE_RECURSION(node)
1256 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1258 else if (IS_ENCLOSE_RECURSION(node)) {
1259 len = SIZE_OP_MEMORY_START_PUSH;
1260 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1261 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1266 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1267 len = SIZE_OP_MEMORY_START_PUSH;
1269 len = SIZE_OP_MEMORY_START;
1271 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1272 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1276 case ENCLOSE_STOP_BACKTRACK:
1277 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1278 QtfrNode* qn = NQTFR(node->target);
1279 tlen = compile_length_tree(qn->target, reg);
1280 if (tlen < 0)
return tlen;
1282 len = tlen * qn->lower
1283 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1286 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1290 case ENCLOSE_CONDITION:
1291 len = SIZE_OP_CONDITION;
1292 if (NTYPE(node->target) == NT_ALT) {
1293 Node* x = node->target;
1295 tlen = compile_length_tree(NCAR(x), reg);
1296 if (tlen < 0)
return tlen;
1297 len += tlen + SIZE_OP_JUMP;
1298 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1300 tlen = compile_length_tree(NCAR(x), reg);
1301 if (tlen < 0)
return tlen;
1303 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1306 return ONIGERR_PARSER_BUG;
1310 case ENCLOSE_ABSENT:
1311 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1315 return ONIGERR_TYPE_BUG;
1322static int get_char_length_tree(
Node* node,
regex_t* reg,
int* len);
1329 if (node->type == ENCLOSE_OPTION)
1330 return compile_option_node(node, reg);
1332 switch (node->type) {
1333 case ENCLOSE_MEMORY:
1334#ifdef USE_SUBEXP_CALL
1335 if (IS_ENCLOSE_CALLED(node)) {
1336 r = add_opcode(reg, OP_CALL);
1338 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1339 node->state |= NST_ADDR_FIXED;
1340 r = add_abs_addr(reg, (
int )node->call_addr);
1342 len = compile_length_tree(node->target, reg);
1343 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1344 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1345 len += (IS_ENCLOSE_RECURSION(node)
1346 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1348 len += (IS_ENCLOSE_RECURSION(node)
1349 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1351 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1355 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1356 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1358 r = add_opcode(reg, OP_MEMORY_START);
1360 r = add_mem_num(reg, node->regnum);
1362 r = compile_tree(node->target, reg);
1364#ifdef USE_SUBEXP_CALL
1365 if (IS_ENCLOSE_CALLED(node)) {
1366 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1367 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1368 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1370 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1371 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1374 r = add_mem_num(reg, node->regnum);
1376 r = add_opcode(reg, OP_RETURN);
1378 else if (IS_ENCLOSE_RECURSION(node)) {
1379 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1380 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1382 r = add_opcode(reg, OP_MEMORY_END_REC);
1384 r = add_mem_num(reg, node->regnum);
1389 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1390 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1392 r = add_opcode(reg, OP_MEMORY_END);
1394 r = add_mem_num(reg, node->regnum);
1398 case ENCLOSE_STOP_BACKTRACK:
1399 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1400 QtfrNode* qn = NQTFR(node->target);
1401 r = compile_tree_n_times(qn->target, qn->lower, reg);
1404 len = compile_length_tree(qn->target, reg);
1405 if (len < 0)
return len;
1407 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1409 r = compile_tree(qn->target, reg);
1411 r = add_opcode(reg, OP_POP);
1413 r = add_opcode_rel_addr(reg, OP_JUMP,
1414 -((
int )SIZE_OP_PUSH + len + (
int )SIZE_OP_POP + (
int )SIZE_OP_JUMP));
1417 r = add_opcode(reg, OP_PUSH_STOP_BT);
1419 r = compile_tree(node->target, reg);
1421 r = add_opcode(reg, OP_POP_STOP_BT);
1425 case ENCLOSE_CONDITION:
1426 r = add_opcode(reg, OP_CONDITION);
1428 r = add_mem_num(reg, node->regnum);
1431 if (NTYPE(node->target) == NT_ALT) {
1432 Node* x = node->target;
1435 len = compile_length_tree(NCAR(x), reg);
1436 if (len < 0)
return len;
1437 if (NCDR(x) == NULL)
return ONIGERR_PARSER_BUG;
1439 len2 = compile_length_tree(NCAR(x), reg);
1440 if (len2 < 0)
return len2;
1441 if (NCDR(x) != NULL)
return ONIGERR_INVALID_CONDITION_PATTERN;
1444 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1446 r = compile_tree(NCAR(x), reg);
1448 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1451 r = compile_tree(NCAR(x), reg);
1454 return ONIGERR_PARSER_BUG;
1458 case ENCLOSE_ABSENT:
1459 len = compile_length_tree(node->target, reg);
1460 if (len < 0)
return len;
1462 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1464 r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1466 r = compile_tree(node->target, reg);
1468 r = add_opcode(reg, OP_ABSENT_END);
1472 return ONIGERR_TYPE_BUG;
1486 tlen = compile_length_tree(node->target, reg);
1487 if (tlen < 0)
return tlen;
1490 switch (node->type) {
1491 case ANCHOR_PREC_READ:
1492 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1494 case ANCHOR_PREC_READ_NOT:
1495 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1497 case ANCHOR_LOOK_BEHIND:
1498 len = SIZE_OP_LOOK_BEHIND + tlen;
1500 case ANCHOR_LOOK_BEHIND_NOT:
1501 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1517 switch (node->type) {
1518 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF);
break;
1519 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF);
break;
1520 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE);
break;
1521 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE);
break;
1522 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF);
break;
1523 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION);
break;
1525 case ANCHOR_WORD_BOUND:
1526 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1527 else r = add_opcode(reg, OP_WORD_BOUND);
1529 case ANCHOR_NOT_WORD_BOUND:
1530 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1531 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1533#ifdef USE_WORD_BEGIN_END
1534 case ANCHOR_WORD_BEGIN:
1535 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1536 else r = add_opcode(reg, OP_WORD_BEGIN);
1538 case ANCHOR_WORD_END:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1540 else r = add_opcode(reg, OP_WORD_END);
1543 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP);
break;
1545 case ANCHOR_PREC_READ:
1546 r = add_opcode(reg, OP_PUSH_POS);
1548 r = compile_tree(node->target, reg);
1550 r = add_opcode(reg, OP_POP_POS);
1553 case ANCHOR_PREC_READ_NOT:
1554 len = compile_length_tree(node->target, reg);
1555 if (len < 0)
return len;
1556 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1558 r = compile_tree(node->target, reg);
1560 r = add_opcode(reg, OP_FAIL_POS);
1563 case ANCHOR_LOOK_BEHIND:
1566 r = add_opcode(reg, OP_LOOK_BEHIND);
1568 if (node->char_len < 0) {
1569 r = get_char_length_tree(node->target, reg, &n);
1570 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1574 r = add_length(reg, n);
1576 r = compile_tree(node->target, reg);
1580 case ANCHOR_LOOK_BEHIND_NOT:
1583 len = compile_length_tree(node->target, reg);
1584 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1585 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1587 if (node->char_len < 0) {
1588 r = get_char_length_tree(node->target, reg, &n);
1589 if (r)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1593 r = add_length(reg, n);
1595 r = compile_tree(node->target, reg);
1597 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1602 return ONIGERR_TYPE_BUG;
1619 r = compile_length_tree(NCAR(node), reg);
1620 if (r < 0)
return r;
1622 }
while (IS_NOT_NULL(node = NCDR(node)));
1631 r = compile_length_tree(NCAR(node), reg);
1632 if (r < 0)
return r;
1635 }
while (IS_NOT_NULL(node = NCDR(node)));
1637 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1642 if (NSTRING_IS_RAW(node))
1643 r = compile_length_string_raw_node(NSTR(node), reg);
1645 r = compile_length_string_node(node, reg);
1649 r = compile_length_cclass_node(NCCLASS(node), reg);
1661#ifdef USE_BACKREF_WITH_LEVEL
1662 if (IS_BACKREF_NEST_LEVEL(br)) {
1663 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1664 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1668 if (br->back_num == 1) {
1669 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1670 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1673 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1678#ifdef USE_SUBEXP_CALL
1685 r = compile_length_quantifier_node(NQTFR(node), reg);
1689 r = compile_length_enclose_node(NENCLOSE(node), reg);
1693 r = compile_length_anchor_node(NANCHOR(node), reg);
1697 return ONIGERR_TYPE_BUG;
1707 int n,
type, len, pos, r = 0;
1713 r = compile_tree(NCAR(node), reg);
1714 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1722 len += compile_length_tree(NCAR(x), reg);
1723 if (NCDR(x) != NULL) {
1724 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1726 }
while (IS_NOT_NULL(x = NCDR(x)));
1727 pos = reg->used + len;
1730 len = compile_length_tree(NCAR(node), reg);
1731 if (IS_NOT_NULL(NCDR(node))) {
1732 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1735 r = compile_tree(NCAR(node), reg);
1737 if (IS_NOT_NULL(NCDR(node))) {
1738 len = pos - (reg->used + SIZE_OP_JUMP);
1739 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1742 }
while (IS_NOT_NULL(node = NCDR(node)));
1747 if (NSTRING_IS_RAW(node))
1748 r = compile_string_raw_node(NSTR(node), reg);
1750 r = compile_string_node(node, reg);
1754 r = compile_cclass_node(NCCLASS(node), reg);
1761 switch (NCTYPE(node)->ctype) {
1762 case ONIGENC_CTYPE_WORD:
1763 if (NCTYPE(node)->ascii_range != 0) {
1764 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1765 else op = OP_ASCII_WORD;
1768 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1773 return ONIGERR_TYPE_BUG;
1776 r = add_opcode(reg, op);
1781 if (IS_MULTILINE(reg->options))
1782 r = add_opcode(reg, OP_ANYCHAR_ML);
1784 r = add_opcode(reg, OP_ANYCHAR);
1791#ifdef USE_BACKREF_WITH_LEVEL
1792 if (IS_BACKREF_NEST_LEVEL(br)) {
1793 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1795 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1797 r = add_length(reg, br->nest_level);
1800 goto add_bacref_mems;
1804 if (br->back_num == 1) {
1805 n = br->back_static[0];
1806 if (IS_IGNORECASE(reg->options)) {
1807 r = add_opcode(reg, OP_BACKREFN_IC);
1809 r = add_mem_num(reg, n);
1813 case 1: r = add_opcode(reg, OP_BACKREF1);
break;
1814 case 2: r = add_opcode(reg, OP_BACKREF2);
break;
1816 r = add_opcode(reg, OP_BACKREFN);
1818 r = add_mem_num(reg, n);
1827 if (IS_IGNORECASE(reg->options)) {
1828 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1831 r = add_opcode(reg, OP_BACKREF_MULTI);
1835#ifdef USE_BACKREF_WITH_LEVEL
1838 r = add_length(reg, br->back_num);
1841 for (i = br->back_num - 1; i >= 0; i--) {
1842 r = add_mem_num(reg, p[i]);
1849#ifdef USE_SUBEXP_CALL
1851 r = compile_call(NCALL(node), reg);
1856 r = compile_quantifier_node(NQTFR(node), reg);
1860 r = compile_enclose_node(NENCLOSE(node), reg);
1864 r = compile_anchor_node(NANCHOR(node), reg);
1869 fprintf(stderr,
"compile_tree: undefined node type %d\n", NTYPE(node));
1877#ifdef USE_NAMED_GROUP
1883 Node* node = *plink;
1885 switch (NTYPE(node)) {
1889 r = noname_disable_map(&(NCAR(node)), map, counter);
1890 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1895 Node** ptarget = &(NQTFR(node)->target);
1896 Node* old = *ptarget;
1897 r = noname_disable_map(ptarget, map, counter);
1898 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1899 onig_reduce_nested_quantifier(node, *ptarget);
1907 if (en->type == ENCLOSE_MEMORY) {
1908 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1910 map[en->regnum].new_val = *counter;
1911 en->regnum = *counter;
1913 else if (en->regnum != 0) {
1914 *plink = en->target;
1915 en->target = NULL_NODE;
1916 onig_node_free(node);
1917 r = noname_disable_map(plink, map, counter);
1921 r = noname_disable_map(&(en->target), map, counter);
1926 if (NANCHOR(node)->target)
1927 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1940 int i, pos, n, old_num;
1944 if (! IS_BACKREF_NAME_REF(bn))
1945 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1947 old_num = bn->back_num;
1948 if (IS_NULL(bn->back_dynamic))
1949 backs = bn->back_static;
1951 backs = bn->back_dynamic;
1953 for (i = 0, pos = 0; i < old_num; i++) {
1954 if (backs[i] > num_mem)
return ONIGERR_INVALID_BACKREF;
1955 n = map[backs[i]].new_val;
1971 switch (NTYPE(node)) {
1975 r = renumber_by_map(NCAR(node), map, num_mem);
1976 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1979 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1984 if (en->type == ENCLOSE_CONDITION) {
1985 if (en->regnum > num_mem)
return ONIGERR_INVALID_BACKREF;
1986 en->regnum = map[en->regnum].new_val;
1988 r = renumber_by_map(en->target, map, num_mem);
1993 r = renumber_node_backref(node, map, num_mem);
1997 if (NANCHOR(node)->target)
1998 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
2009numbered_ref_check(
Node* node)
2013 switch (NTYPE(node)) {
2017 r = numbered_ref_check(NCAR(node));
2018 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2021 r = numbered_ref_check(NQTFR(node)->target);
2024 r = numbered_ref_check(NENCLOSE(node)->target);
2028 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2029 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2033 if (NANCHOR(node)->target)
2034 r = numbered_ref_check(NANCHOR(node)->target);
2047 int r, i, pos, counter;
2052 CHECK_NULL_RETURN_MEMERR(map);
2053 for (i = 1; i <= env->num_mem; i++) {
2057 r = noname_disable_map(root, map, &counter);
2058 if (r != 0)
return r;
2060 r = renumber_by_map(*root, map, env->num_mem);
2061 if (r != 0)
return r;
2063 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2064 if (map[i].new_val > 0) {
2065 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2070 loc = env->capture_history;
2071 BIT_STATUS_CLEAR(env->capture_history);
2072 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2073 if (BIT_STATUS_AT(loc, i)) {
2074 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2078 env->num_mem = env->num_named;
2079 reg->num_mem = env->num_named;
2081 return onig_renumber_name_table(reg, map);
2085#ifdef USE_SUBEXP_CALL
2093 for (i = 0; i < uslist->num; i++) {
2094 en = NENCLOSE(uslist->us[i].target);
2095 if (! IS_ENCLOSE_ADDR_FIXED(en))
return ONIGERR_PARSER_BUG;
2096 addr = en->call_addr;
2097 offset = uslist->us[i].offset;
2099 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2105#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2107quantifiers_memory_node_info(
Node* node)
2111 switch (NTYPE(node)) {
2117 v = quantifiers_memory_node_info(NCAR(node));
2119 }
while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2123# ifdef USE_SUBEXP_CALL
2125 if (IS_CALL_RECURSION(NCALL(node))) {
2126 return NQ_TARGET_IS_EMPTY_REC;
2129 r = quantifiers_memory_node_info(NCALL(node)->target);
2136 if (qn->upper != 0) {
2137 r = quantifiers_memory_node_info(qn->target);
2146 case ENCLOSE_MEMORY:
2147 return NQ_TARGET_IS_EMPTY_MEM;
2150 case ENCLOSE_OPTION:
2151 case ENCLOSE_STOP_BACKTRACK:
2152 case ENCLOSE_CONDITION:
2153 case ENCLOSE_ABSENT:
2154 r = quantifiers_memory_node_info(en->target);
2177get_min_match_length(
Node* node, OnigDistance *min,
ScanEnv* env)
2183 switch (NTYPE(node)) {
2188 Node** nodes = SCANENV_MEM_NODES(env);
2190 if (br->state & NST_RECURSION)
break;
2192 backs = BACKREFS_P(br);
2193 if (backs[0] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2194 r = get_min_match_length(nodes[backs[0]], min, env);
2196 for (i = 1; i < br->back_num; i++) {
2197 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2198 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2200 if (*min > tmin) *min = tmin;
2205#ifdef USE_SUBEXP_CALL
2207 if (IS_CALL_RECURSION(NCALL(node))) {
2209 if (IS_ENCLOSE_MIN_FIXED(en))
2213 r = get_min_match_length(NCALL(node)->target, min, env);
2219 r = get_min_match_length(NCAR(node), &tmin, env);
2220 if (r == 0) *min += tmin;
2221 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2230 r = get_min_match_length(x, &tmin, env);
2232 if (y == node) *min = tmin;
2233 else if (*min > tmin) *min = tmin;
2234 }
while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2241 *min = sn->end - sn->s;
2258 if (qn->lower > 0) {
2259 r = get_min_match_length(qn->target, min, env);
2261 *min = distance_multiply(*min, qn->lower);
2270 case ENCLOSE_MEMORY:
2271 if (IS_ENCLOSE_MIN_FIXED(en))
2274 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2277 SET_ENCLOSE_STATUS(node, NST_MARK1);
2278 r = get_min_match_length(en->target, min, env);
2279 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2282 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2288 case ENCLOSE_OPTION:
2289 case ENCLOSE_STOP_BACKTRACK:
2290 case ENCLOSE_CONDITION:
2291 r = get_min_match_length(en->target, min, env);
2294 case ENCLOSE_ABSENT:
2309get_max_match_length(
Node* node, OnigDistance *max,
ScanEnv* env)
2315 switch (NTYPE(node)) {
2318 r = get_max_match_length(NCAR(node), &tmax, env);
2320 *max = distance_add(*max, tmax);
2321 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2326 r = get_max_match_length(NCAR(node), &tmax, env);
2327 if (r == 0 && *max < tmax) *max = tmax;
2328 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2334 *max = sn->end - sn->s;
2339 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2344 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2351 Node** nodes = SCANENV_MEM_NODES(env);
2353 if (br->state & NST_RECURSION) {
2354 *max = ONIG_INFINITE_DISTANCE;
2357 backs = BACKREFS_P(br);
2358 for (i = 0; i < br->back_num; i++) {
2359 if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
2360 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2362 if (*max < tmax) *max = tmax;
2367#ifdef USE_SUBEXP_CALL
2369 if (! IS_CALL_RECURSION(NCALL(node)))
2370 r = get_max_match_length(NCALL(node)->target, max, env);
2372 *max = ONIG_INFINITE_DISTANCE;
2380 if (qn->upper != 0) {
2381 r = get_max_match_length(qn->target, max, env);
2382 if (r == 0 && *max != 0) {
2383 if (! IS_REPEAT_INFINITE(qn->upper))
2384 *max = distance_multiply(*max, qn->upper);
2386 *max = ONIG_INFINITE_DISTANCE;
2396 case ENCLOSE_MEMORY:
2397 if (IS_ENCLOSE_MAX_FIXED(en))
2400 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2401 *max = ONIG_INFINITE_DISTANCE;
2403 SET_ENCLOSE_STATUS(node, NST_MARK1);
2404 r = get_max_match_length(en->target, max, env);
2405 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2408 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2414 case ENCLOSE_OPTION:
2415 case ENCLOSE_STOP_BACKTRACK:
2416 case ENCLOSE_CONDITION:
2417 r = get_max_match_length(en->target, max, env);
2420 case ENCLOSE_ABSENT:
2434#define GET_CHAR_LEN_VARLEN -1
2435#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2439get_char_length_tree1(
Node* node,
regex_t* reg,
int* len,
int level)
2446 switch (NTYPE(node)) {
2449 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2451 *len = (int )distance_add(*len, tlen);
2452 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2460 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2461 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2471 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2473 r = GET_CHAR_LEN_VARLEN;
2485 while (s < sn->end) {
2486 s += enclen(reg->enc, s, sn->end);
2495 if (qn->lower == qn->upper) {
2496 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2498 *len = (int )distance_multiply(tlen, qn->lower);
2501 r = GET_CHAR_LEN_VARLEN;
2505#ifdef USE_SUBEXP_CALL
2507 if (! IS_CALL_RECURSION(NCALL(node)))
2508 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2510 r = GET_CHAR_LEN_VARLEN;
2527 case ENCLOSE_MEMORY:
2528#ifdef USE_SUBEXP_CALL
2529 if (IS_ENCLOSE_CLEN_FIXED(en))
2530 *len = en->char_len;
2532 r = get_char_length_tree1(en->target, reg, len, level);
2534 en->char_len = *len;
2535 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2540 case ENCLOSE_OPTION:
2541 case ENCLOSE_STOP_BACKTRACK:
2542 case ENCLOSE_CONDITION:
2543 r = get_char_length_tree1(en->target, reg, len, level);
2545 case ENCLOSE_ABSENT:
2556 r = GET_CHAR_LEN_VARLEN;
2564get_char_length_tree(
Node* node,
regex_t* reg,
int* len)
2566 return get_char_length_tree1(node, reg, len, 0);
2586 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2587 NCTYPE(y)->not != NCTYPE(x)->not &&
2588 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2598 tmp = x; x = y; y = tmp;
2618 switch (NCTYPE(y)->ctype) {
2619 case ONIGENC_CTYPE_WORD:
2620 if (NCTYPE(y)->not == 0) {
2621 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2622 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2623 if (BITSET_AT(xc->bs, i)) {
2624 if (NCTYPE(y)->ascii_range) {
2625 if (IS_CODE_SB_WORD(reg->enc, i))
return 0;
2628 if (ONIGENC_IS_CODE_WORD(reg->enc, i))
return 0;
2637 if (IS_NOT_NULL(xc->mbuf))
return 0;
2638 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2640 if (NCTYPE(y)->ascii_range)
2641 is_word = IS_CODE_SB_WORD(reg->enc, i);
2643 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2645 if (!IS_NCCLASS_NOT(xc)) {
2646 if (BITSET_AT(xc->bs, i))
2650 if (! BITSET_AT(xc->bs, i))
2669 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2670 v = BITSET_AT(xc->bs, i);
2671 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2672 (v == 0 && IS_NCCLASS_NOT(xc))) {
2673 v = BITSET_AT(yc->bs, i);
2674 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2675 (v == 0 && IS_NCCLASS_NOT(yc)))
2679 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2680 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2699 if (NSTRING_LEN(x) == 0)
2704 switch (NCTYPE(y)->ctype) {
2705 case ONIGENC_CTYPE_WORD:
2706 if (NCTYPE(y)->ascii_range) {
2707 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2708 return NCTYPE(y)->not;
2710 return !(NCTYPE(y)->not);
2713 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2714 return NCTYPE(y)->not;
2716 return !(NCTYPE(y)->not);
2728 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2729 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2730 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2738 len = NSTRING_LEN(x);
2739 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2740 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2745 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2746 if (*p != *q)
return 1;
2766get_head_value_node(
Node* node,
int exact,
regex_t* reg)
2768 Node* n = NULL_NODE;
2770 switch (NTYPE(node)) {
2774#ifdef USE_SUBEXP_CALL
2787 n = get_head_value_node(NCAR(node), exact, reg);
2794 if (sn->end <= sn->s)
2798 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2809 if (qn->lower > 0) {
2810#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2811 if (IS_NOT_NULL(qn->head_exact))
2815 n = get_head_value_node(qn->target, exact, reg);
2824 case ENCLOSE_OPTION:
2826 OnigOptionType options = reg->options;
2828 reg->options = NENCLOSE(node)->option;
2829 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2830 reg->options = options;
2834 case ENCLOSE_MEMORY:
2835 case ENCLOSE_STOP_BACKTRACK:
2836 case ENCLOSE_CONDITION:
2837 n = get_head_value_node(en->target, exact, reg);
2840 case ENCLOSE_ABSENT:
2847 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2848 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2859check_type_tree(
Node* node,
int type_mask,
int enclose_mask,
int anchor_mask)
2864 if ((NTYPE2BIT(type) & type_mask) == 0)
2871 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2873 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2877 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2884 if ((en->type & enclose_mask) == 0)
2887 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2892 type = NANCHOR(node)->type;
2893 if ((type & anchor_mask) == 0)
2896 if (NANCHOR(node)->target)
2897 r = check_type_tree(NANCHOR(node)->target,
2898 type_mask, enclose_mask, anchor_mask);
2907#ifdef USE_SUBEXP_CALL
2909# define RECURSION_EXIST 1
2910# define RECURSION_INFINITE 2
2913subexp_inf_recursive_check(
Node* node,
ScanEnv* env,
int head)
2928 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2929 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2932 ret = get_min_match_length(NCAR(x), &min, env);
2933 if (ret != 0)
return ret;
2934 if (min != 0) head = 0;
2936 }
while (IS_NOT_NULL(x = NCDR(x)));
2943 r = RECURSION_EXIST;
2945 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2946 if (ret < 0 || ret == RECURSION_INFINITE)
return ret;
2948 }
while (IS_NOT_NULL(node = NCDR(node)));
2953 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2954 if (r == RECURSION_EXIST) {
2955 if (NQTFR(node)->lower == 0) r = 0;
2963 case ANCHOR_PREC_READ:
2964 case ANCHOR_PREC_READ_NOT:
2965 case ANCHOR_LOOK_BEHIND:
2966 case ANCHOR_LOOK_BEHIND_NOT:
2967 r = subexp_inf_recursive_check(an->target, env, head);
2974 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2978 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2980 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2981 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2983 SET_ENCLOSE_STATUS(node, NST_MARK2);
2984 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2985 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2997subexp_inf_recursive_check_trav(
Node* node,
ScanEnv* env)
3007 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3008 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3012 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3019 case ANCHOR_PREC_READ:
3020 case ANCHOR_PREC_READ_NOT:
3021 case ANCHOR_LOOK_BEHIND:
3022 case ANCHOR_LOOK_BEHIND_NOT:
3023 r = subexp_inf_recursive_check_trav(an->target, env);
3033 if (IS_ENCLOSE_RECURSION(en)) {
3034 SET_ENCLOSE_STATUS(node, NST_MARK1);
3035 r = subexp_inf_recursive_check(en->target, env, 1);
3036 if (r > 0)
return ONIGERR_NEVER_ENDING_RECURSION;
3037 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3039 r = subexp_inf_recursive_check_trav(en->target, env);
3052subexp_recursive_check(
Node* node)
3056 switch (NTYPE(node)) {
3060 r |= subexp_recursive_check(NCAR(node));
3061 }
while (IS_NOT_NULL(node = NCDR(node)));
3065 r = subexp_recursive_check(NQTFR(node)->target);
3072 case ANCHOR_PREC_READ:
3073 case ANCHOR_PREC_READ_NOT:
3074 case ANCHOR_LOOK_BEHIND:
3075 case ANCHOR_LOOK_BEHIND_NOT:
3076 r = subexp_recursive_check(an->target);
3083 r = subexp_recursive_check(NCALL(node)->target);
3084 if (r != 0) SET_CALL_RECURSION(node);
3088 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3090 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3093 SET_ENCLOSE_STATUS(node, NST_MARK2);
3094 r = subexp_recursive_check(NENCLOSE(node)->target);
3095 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3108subexp_recursive_check_trav(
Node* node,
ScanEnv* env)
3110# define FOUND_CALLED_NODE 1
3122 ret = subexp_recursive_check_trav(NCAR(node), env);
3123 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3124 else if (ret < 0)
return ret;
3125 }
while (IS_NOT_NULL(node = NCDR(node)));
3130 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3131 if (NQTFR(node)->upper == 0) {
3132 if (r == FOUND_CALLED_NODE)
3133 NQTFR(node)->is_referred = 1;
3141 case ANCHOR_PREC_READ:
3142 case ANCHOR_PREC_READ_NOT:
3143 case ANCHOR_LOOK_BEHIND:
3144 case ANCHOR_LOOK_BEHIND_NOT:
3145 r = subexp_recursive_check_trav(an->target, env);
3155 if (! IS_ENCLOSE_RECURSION(en)) {
3156 if (IS_ENCLOSE_CALLED(en)) {
3157 SET_ENCLOSE_STATUS(node, NST_MARK1);
3158 r = subexp_recursive_check(en->target);
3159 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3160 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3163 r = subexp_recursive_check_trav(en->target, env);
3164 if (IS_ENCLOSE_CALLED(en))
3165 r |= FOUND_CALLED_NODE;
3186 r = setup_subexp_call(NCAR(node), env);
3187 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3192 r = setup_subexp_call(NCAR(node), env);
3193 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3197 r = setup_subexp_call(NQTFR(node)->target, env);
3200 r = setup_subexp_call(NENCLOSE(node)->target, env);
3206 Node** nodes = SCANENV_MEM_NODES(env);
3208 if (cn->group_num != 0) {
3209 int gnum = cn->group_num;
3211# ifdef USE_NAMED_GROUP
3212 if (env->num_named > 0 &&
3213 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3214 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3215 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3218 if (gnum > env->num_mem) {
3219 onig_scan_env_set_error_string(env,
3220 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3221 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3224# ifdef USE_NAMED_GROUP
3227 cn->target = nodes[cn->group_num];
3228 if (IS_NULL(cn->target)) {
3229 onig_scan_env_set_error_string(env,
3230 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3231 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3233 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3234 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3235 cn->unset_addr_list = env->unset_addr_list;
3237# ifdef USE_NAMED_GROUP
3238# ifdef USE_PERL_SUBEXP_CALL
3239 else if (cn->name == cn->name_end) {
3246 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3249 onig_scan_env_set_error_string(env,
3250 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3251 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3254 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3255 onig_scan_env_set_error_string(env,
3256 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3257 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3260 cn->group_num = refs[0];
3273 case ANCHOR_PREC_READ:
3274 case ANCHOR_PREC_READ_NOT:
3275 case ANCHOR_LOOK_BEHIND:
3276 case ANCHOR_LOOK_BEHIND_NOT:
3277 r = setup_subexp_call(an->target, env);
3296divide_look_behind_alternatives(
Node* node)
3298 Node *head, *np, *insert_node;
3300 int anc_type = an->type;
3304 swap_node(node, head);
3306 NANCHOR(head)->target = np;
3309 while ((np = NCDR(np)) != NULL_NODE) {
3310 insert_node = onig_node_new_anchor(anc_type);
3311 CHECK_NULL_RETURN_MEMERR(insert_node);
3312 NANCHOR(insert_node)->target = NCAR(np);
3313 NCAR(np) = insert_node;
3316 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3319 SET_NTYPE(np, NT_LIST);
3320 }
while ((np = NCDR(np)) != NULL_NODE);
3331 r = get_char_length_tree(an->target, reg, &len);
3334 else if (r == GET_CHAR_LEN_VARLEN)
3335 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3336 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3337 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3338 r = divide_look_behind_alternatives(node);
3340 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3353 if (type == NT_QTFR) {
3355 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3356#ifdef USE_QTFR_PEEK_NEXT
3357 Node* n = get_head_value_node(next_node, 1, reg);
3359 if (IS_NOT_NULL(n) && NSTR(n)->s[0] !=
'\0') {
3360 qn->next_head_exact = n;
3364 if (qn->lower <= 1) {
3365 int ttype = NTYPE(qn->target);
3366 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3368 x = get_head_value_node(qn->target, 0, reg);
3369 if (IS_NOT_NULL(x)) {
3370 y = get_head_value_node(next_node, 0, reg);
3371 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3372 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3373 CHECK_NULL_RETURN_MEMERR(en);
3374 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3375 swap_node(node, en);
3376 NENCLOSE(node)->target = en;
3383 else if (type == NT_ENCLOSE) {
3385 if (en->type == ENCLOSE_MEMORY) {
3395update_string_node_case_fold(
regex_t* reg,
Node *node)
3397 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3398 UChar *sbuf, *ebuf, *sp;
3400 OnigDistance sbuf_size;
3404 sbuf_size = (end - sn->s) * 2;
3405 sbuf = (UChar* )
xmalloc(sbuf_size);
3406 CHECK_NULL_RETURN_MEMERR(sbuf);
3407 ebuf = sbuf + sbuf_size;
3412 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3413 for (i = 0; i < len; i++) {
3415 UChar* p = (UChar* )
xrealloc(sbuf, sbuf_size * 2);
3418 return ONIGERR_MEMORY;
3421 sp = sbuf + sbuf_size;
3423 ebuf = sbuf + sbuf_size;
3430 r = onig_node_str_set(node, sbuf, sp);
3437expand_case_fold_make_rem_string(
Node** rnode, UChar *s, UChar *end,
3443 node = onig_node_new_str(s, end);
3444 if (IS_NULL(node))
return ONIGERR_MEMORY;
3446 r = update_string_node_case_fold(reg, node);
3448 onig_node_free(node);
3452 NSTRING_SET_AMBIG(node);
3453 NSTRING_SET_DONT_GET_OPT_INFO(node);
3464 for (i = 0; i < item_num; i++) {
3465 if (items[i].byte_len != slen) {
3468 if (items[i].code_len != 1) {
3477 UChar *p,
int slen, UChar *end,
3480 int r, i, j, len, varlen;
3481 Node *anode, *var_anode, *snode, *xnode, *an;
3482 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3484 *rnode = var_anode = NULL_NODE;
3487 for (i = 0; i < item_num; i++) {
3488 if (items[i].byte_len != slen) {
3495 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3496 if (IS_NULL(var_anode))
return ONIGERR_MEMORY;
3498 xnode = onig_node_new_list(NULL, NULL);
3499 if (IS_NULL(xnode))
goto mem_err;
3500 NCAR(var_anode) = xnode;
3502 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3503 if (IS_NULL(anode))
goto mem_err;
3504 NCAR(xnode) = anode;
3507 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3508 if (IS_NULL(anode))
return ONIGERR_MEMORY;
3511 snode = onig_node_new_str(p, p + slen);
3512 if (IS_NULL(snode))
goto mem_err;
3514 NCAR(anode) = snode;
3516 for (i = 0; i < item_num; i++) {
3517 snode = onig_node_new_str(NULL, NULL);
3518 if (IS_NULL(snode))
goto mem_err;
3520 for (j = 0; j < items[i].code_len; j++) {
3521 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3527 r = onig_node_str_cat(snode, buf, buf + len);
3528 if (r != 0)
goto mem_err2;
3531 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3536 if (items[i].byte_len != slen) {
3538 UChar *q = p + items[i].byte_len;
3541 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3547 xnode = onig_node_list_add(NULL_NODE, snode);
3548 if (IS_NULL(xnode)) {
3550 onig_node_free(rem);
3553 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3555 onig_node_free(xnode);
3556 onig_node_free(rem);
3566 NCDR(var_anode) = an;
3579 onig_node_free(snode);
3582 onig_node_free(*rnode);
3584 return ONIGERR_MEMORY;
3590#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3592 int r, n, len, alt_num;
3594 UChar *start, *end, *p;
3595 Node *top_root, *root, *snode, *prev_node;
3599 if (NSTRING_IS_AMBIG(node))
return 0;
3603 if (start >= end)
return 0;
3606 top_root = root = prev_node = snode = NULL_NODE;
3610 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3617 len = enclen(reg->enc, p, end);
3619 varlen = is_case_fold_variable_len(n, items, len);
3620 if (n == 0 || varlen == 0) {
3621 if (IS_NULL(snode)) {
3622 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3623 onig_node_free(top_root);
3624 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3625 if (IS_NULL(root)) {
3626 onig_node_free(prev_node);
3631 prev_node = snode = onig_node_new_str(NULL, NULL);
3632 if (IS_NULL(snode))
goto mem_err;
3633 if (IS_NOT_NULL(root)) {
3634 if (IS_NULL(onig_node_list_add(root, snode))) {
3635 onig_node_free(snode);
3641 r = onig_node_str_cat(snode, p, p + len);
3642 if (r != 0)
goto err;
3646 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION)
break;
3648 if (IS_NOT_NULL(snode)) {
3649 r = update_string_node_case_fold(reg, snode);
3651 NSTRING_SET_AMBIG(snode);
3654 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3655 onig_node_free(top_root);
3656 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3657 if (IS_NULL(root)) {
3658 onig_node_free(prev_node);
3663 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3664 if (r < 0)
goto mem_err;
3666 if (IS_NULL(root)) {
3667 top_root = prev_node;
3670 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3671 onig_node_free(prev_node);
3676 root = NCAR(prev_node);
3679 if (IS_NOT_NULL(root)) {
3680 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3681 onig_node_free(prev_node);
3692 if (IS_NOT_NULL(snode)) {
3693 r = update_string_node_case_fold(reg, snode);
3695 NSTRING_SET_AMBIG(snode);
3702 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3703 if (r != 0)
goto mem_err;
3705 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3706 onig_node_free(top_root);
3707 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3708 if (IS_NULL(root)) {
3709 onig_node_free(srem);
3710 onig_node_free(prev_node);
3715 if (IS_NULL(root)) {
3719 if (IS_NULL(onig_node_list_add(root, srem))) {
3720 onig_node_free(srem);
3727 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3728 swap_node(node, top_root);
3729 onig_node_free(top_root);
3736 onig_node_free(top_root);
3741#ifdef USE_COMBINATION_EXPLOSION_CHECK
3743# define CEC_THRES_NUM_BIG_REPEAT 512
3744# define CEC_INFINITE_NUM 0x7fffffff
3746# define CEC_IN_INFINITE_REPEAT (1<<0)
3747# define CEC_IN_FINITE_REPEAT (1<<1)
3748# define CEC_CONT_BIG_REPEAT (1<<2)
3751setup_comb_exp_check(
Node* node,
int state,
ScanEnv* env)
3761 r = setup_comb_exp_check(NCAR(node), r, env);
3762 }
while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3770 ret = setup_comb_exp_check(NCAR(node), state, env);
3772 }
while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3778 int child_state = state;
3781 Node* target = qn->target;
3784 if (! IS_REPEAT_INFINITE(qn->upper)) {
3785 if (qn->upper > 1) {
3787 child_state |= CEC_IN_FINITE_REPEAT;
3790 if (env->backrefed_mem == 0) {
3791 if (NTYPE(qn->target) == NT_ENCLOSE) {
3793 if (en->type == ENCLOSE_MEMORY) {
3794 if (NTYPE(en->target) == NT_QTFR) {
3796 if (IS_REPEAT_INFINITE(q->upper)
3797 && q->greedy == qn->greedy) {
3798 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3800 child_state = state;
3809 if (state & CEC_IN_FINITE_REPEAT) {
3810 qn->comb_exp_check_num = -1;
3813 if (IS_REPEAT_INFINITE(qn->upper)) {
3814 var_num = CEC_INFINITE_NUM;
3815 child_state |= CEC_IN_INFINITE_REPEAT;
3818 var_num = qn->upper - qn->lower;
3821 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3822 add_state |= CEC_CONT_BIG_REPEAT;
3824 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3825 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3826 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3827 if (qn->comb_exp_check_num == 0) {
3828 env->num_comb_exp_check++;
3829 qn->comb_exp_check_num = env->num_comb_exp_check;
3830 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3831 env->comb_exp_max_regnum = env->curr_max_regnum;
3836 r = setup_comb_exp_check(target, child_state, env);
3846 case ENCLOSE_MEMORY:
3848 if (env->curr_max_regnum < en->regnum)
3849 env->curr_max_regnum = en->regnum;
3851 r = setup_comb_exp_check(en->target, state, env);
3856 r = setup_comb_exp_check(en->target, state, env);
3862# ifdef USE_SUBEXP_CALL
3864 if (IS_CALL_RECURSION(NCALL(node)))
3865 env->has_recursion = 1;
3867 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3879#define IN_ALT (1<<0)
3880#define IN_NOT (1<<1)
3881#define IN_REPEAT (1<<2)
3882#define IN_VAR_REPEAT (1<<3)
3883#define IN_CALL (1<<4)
3884#define IN_RECCALL (1<<5)
3905 Node* prev = NULL_NODE;
3907 r = setup_tree(NCAR(node), reg, state, env);
3908 if (IS_NOT_NULL(prev) && r == 0) {
3909 r = next_setup(prev, NCAR(node), reg);
3912 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3918 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3919 }
while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3926 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3927 r = expand_case_fold_string(node, reg);
3935#ifdef USE_SUBEXP_CALL
3944 Node** nodes = SCANENV_MEM_NODES(env);
3947 for (i = 0; i < br->back_num; i++) {
3948 if (p[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
3949 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3950 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3951#ifdef USE_BACKREF_WITH_LEVEL
3952 if (IS_BACKREF_NEST_LEVEL(br)) {
3953 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3956 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3965 Node* target = qn->target;
3967 if ((state & IN_REPEAT) != 0) {
3968 qn->state |= NST_IN_REPEAT;
3971 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3972 r = get_min_match_length(target, &d, env);
3975 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3976#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3977 r = quantifiers_memory_node_info(target);
3980 qn->target_empty_info = r;
3984 r = get_max_match_length(target, &d, env);
3985 if (r == 0 && d == 0) {
3988 if (qn->lower > 1) qn->lower = 1;
3989 if (NTYPE(target) == NT_STR) {
3990 qn->upper = qn->lower = 0;
3998 if (qn->lower != qn->upper)
3999 state |= IN_VAR_REPEAT;
4000 r = setup_tree(target, reg, state, env);
4004#define EXPAND_STRING_MAX_LENGTH 100
4005 if (NTYPE(target) == NT_STR) {
4006 if (qn->lower > 1) {
4007 int i, n = qn->lower;
4008 OnigDistance len = NSTRING_LEN(target);
4012 np = onig_node_new_str(sn->s, sn->end);
4013 if (IS_NULL(np))
return ONIGERR_MEMORY;
4014 NSTR(np)->flag = sn->flag;
4016 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4017 r = onig_node_str_cat(np, sn->s, sn->end);
4023 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4027 if (! IS_REPEAT_INFINITE(qn->upper))
4030 np1 = onig_node_new_list(np, NULL);
4033 return ONIGERR_MEMORY;
4035 swap_node(np1, node);
4036 np2 = onig_node_list_add(node, np1);
4038 onig_node_free(np1);
4039 return ONIGERR_MEMORY;
4043 swap_node(np, node);
4050#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4051 if (qn->greedy && (qn->target_empty_info != 0)) {
4052 if (NTYPE(target) == NT_QTFR) {
4054 if (IS_NOT_NULL(tqn->head_exact)) {
4055 qn->head_exact = tqn->head_exact;
4056 tqn->head_exact = NULL;
4060 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4072 case ENCLOSE_OPTION:
4074 OnigOptionType options = reg->options;
4075 reg->options = NENCLOSE(node)->option;
4076 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4077 reg->options = options;
4081 case ENCLOSE_MEMORY:
4082 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4083 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4086 if (IS_ENCLOSE_CALLED(en))
4088 if (IS_ENCLOSE_RECURSION(en))
4089 state |= IN_RECCALL;
4090 else if ((state & IN_RECCALL) != 0)
4091 SET_CALL_RECURSION(node);
4092 r = setup_tree(en->target, reg, state, env);
4095 case ENCLOSE_STOP_BACKTRACK:
4097 Node* target = en->target;
4098 r = setup_tree(target, reg, state, env);
4099 if (NTYPE(target) == NT_QTFR) {
4101 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4103 int qtype = NTYPE(tqn->target);
4104 if (IS_NODE_TYPE_SIMPLE(qtype))
4105 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4111 case ENCLOSE_CONDITION:
4112#ifdef USE_NAMED_GROUP
4113 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4114 env->num_named > 0 &&
4115 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4116 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4117 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4120 if (NENCLOSE(node)->regnum > env->num_mem)
4121 return ONIGERR_INVALID_BACKREF;
4122 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4125 case ENCLOSE_ABSENT:
4126 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4137 case ANCHOR_PREC_READ:
4138 r = setup_tree(an->target, reg, state, env);
4140 case ANCHOR_PREC_READ_NOT:
4141 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4145#define ALLOWED_TYPE_IN_LB \
4146 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4147 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4149#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4150#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4152#define ALLOWED_ANCHOR_IN_LB \
4153( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4154 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4155 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4156 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4157#define ALLOWED_ANCHOR_IN_LB_NOT \
4158( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4159 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4160 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4161 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4163 case ANCHOR_LOOK_BEHIND:
4165 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4166 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4167 if (r < 0)
return r;
4168 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4169 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4170 r = setup_tree(an->target, reg, state, env);
4171 if (r != 0)
return r;
4172 r = setup_look_behind(node, reg, env);
4176 case ANCHOR_LOOK_BEHIND_NOT:
4178 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4179 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4180 if (r < 0)
return r;
4181 if (r > 0)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4182 if (NTYPE(node) != NT_ANCHOR)
goto restart;
4183 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4184 if (r != 0)
return r;
4185 r = setup_look_behind(node, reg, env);
4199#ifndef USE_SUNDAY_QUICK_SEARCH
4202set_bm_skip(UChar* s, UChar* end,
regex_t* reg,
4203 UChar skip[],
int** int_skip,
int ignore_case)
4205 OnigDistance i, len;
4206 int clen, flen, n, j, k;
4207 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4212 if (len < ONIG_CHAR_TABLE_SIZE) {
4213 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4216 for (i = 0; i < len - 1; i += clen) {
4219 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4221 clen = enclen(enc, p, end);
4223 clen = (int )(end - p);
4225 for (j = 0; j < n; j++) {
4226 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4228 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4232 for (j = 0; j < clen; j++) {
4233 skip[s[i + j]] = (UChar )(len - 1 - i - j);
4234 for (k = 0; k < n; k++) {
4235 skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4241# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4243 return ONIGERR_TYPE_BUG;
4245 if (IS_NULL(*int_skip)) {
4246 *int_skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
4247 if (IS_NULL(*int_skip))
return ONIGERR_MEMORY;
4249 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4252 for (i = 0; i < len - 1; i += clen) {
4255 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4257 clen = enclen(enc, p, end);
4259 clen = (int )(end - p);
4261 for (j = 0; j < n; j++) {
4262 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4264 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4268 for (j = 0; j < clen; j++) {
4269 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4270 for (k = 0; k < n; k++) {
4271 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4284set_bm_skip(UChar* s, UChar* end,
regex_t* reg,
4285 UChar skip[],
int** int_skip,
int ignore_case)
4287 OnigDistance i, len;
4288 int clen, flen, n, j, k;
4289 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4294 if (len < ONIG_CHAR_TABLE_SIZE) {
4295 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4298 for (i = 0; i < len; i += clen) {
4301 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4303 clen = enclen(enc, p, end);
4305 clen = (int )(end - p);
4307 for (j = 0; j < n; j++) {
4308 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4310 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4314 for (j = 0; j < clen; j++) {
4315 skip[s[i + j]] = (UChar )(len - i - j);
4316 for (k = 0; k < n; k++) {
4317 skip[buf[k][j]] = (UChar )(len - i - j);
4323# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4325 return ONIGERR_TYPE_BUG;
4327 if (IS_NULL(*int_skip)) {
4328 *int_skip = (
int* )
xmalloc(
sizeof(
int) * ONIG_CHAR_TABLE_SIZE);
4329 if (IS_NULL(*int_skip))
return ONIGERR_MEMORY;
4331 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4334 for (i = 0; i < len; i += clen) {
4337 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4339 clen = enclen(enc, p, end);
4341 clen = (int )(end - p);
4343 for (j = 0; j < n; j++) {
4344 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4346 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4350 for (j = 0; j < clen; j++) {
4351 (*int_skip)[s[i + j]] = (int )(len - i - j);
4352 for (k = 0; k < n; k++) {
4353 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4371 OnigOptionType options;
4372 OnigCaseFoldType case_fold_flag;
4388 UChar s[OPT_EXACT_MAXLEN];
4396 UChar map[ONIG_CHAR_TABLE_SIZE];
4414 static const short int ByteValTable[] = {
4415 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4416 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4417 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4418 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4419 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4420 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4421 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4422 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4425 if (i < numberof(ByteValTable)) {
4426 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4429 return (
int )ByteValTable[i];
4439 static const short int dist_vals[] = {
4440 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4441 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4442 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4443 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4444 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4445 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4446 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4447 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4448 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4449 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4454 if (mm->max == ONIG_INFINITE_DISTANCE)
return 0;
4456 d = mm->max - mm->min;
4457 if (d < numberof(dist_vals))
4459 return (
int )dist_vals[d];
4467 if (v2 <= 0)
return -1;
4468 if (v1 <= 0)
return 1;
4470 v1 *= distance_value(d1);
4471 v2 *= distance_value(d2);
4473 if (v2 > v1)
return 1;
4474 if (v2 < v1)
return -1;
4476 if (d2->min < d1->min)
return 1;
4477 if (d2->min > d1->min)
return -1;
4484 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4489set_mml(
MinMaxLen* mml, OnigDistance min, OnigDistance max)
4498 mml->min = mml->max = 0;
4504 to->min = from->min;
4505 to->max = from->max;
4511 to->min = distance_add(to->min, from->min);
4512 to->max = distance_add(to->max, from->max);
4517add_len_mml(
MinMaxLen* to, OnigDistance len)
4519 to->min = distance_add(to->min, len);
4520 to->max = distance_add(to->max, len);
4527 if (to->min > from->min) to->min = from->min;
4528 if (to->max < from->max) to->max = from->max;
4540 anc->left_anchor = 0;
4541 anc->right_anchor = 0;
4552 OnigDistance left_len, OnigDistance right_len)
4554 clear_opt_anc_info(to);
4556 to->left_anchor = left->left_anchor;
4557 if (left_len == 0) {
4558 to->left_anchor |= right->left_anchor;
4561 to->right_anchor = right->right_anchor;
4562 if (right_len == 0) {
4563 to->right_anchor |= left->right_anchor;
4566 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4571is_left_anchor(
int anc)
4573 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4574 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4575 anc == ANCHOR_PREC_READ_NOT)
4584 if ((to->left_anchor & anc) != 0)
return 1;
4586 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4592 if (is_left_anchor(anc))
4593 to->left_anchor |= anc;
4595 to->right_anchor |= anc;
4601 if (is_left_anchor(anc))
4602 to->left_anchor &= ~anc;
4604 to->right_anchor &= ~anc;
4610 to->left_anchor &= add->left_anchor;
4611 to->right_anchor &= add->right_anchor;
4617 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4623 clear_mml(&ex->mmd);
4624 clear_opt_anc_info(&ex->anc);
4626 ex->ignore_case = -1;
4644 if (to->ignore_case < 0)
4645 to->ignore_case = add->ignore_case;
4646 else if (to->ignore_case != add->ignore_case)
4651 for (i = to->len; p < end; ) {
4652 len = enclen(enc, p, end);
4653 if (i + len > OPT_EXACT_MAXLEN)
break;
4654 for (j = 0; j < len && p < end; j++)
4659 to->reach_end = (p == end ? add->reach_end : 0);
4661 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4662 if (! to->reach_end) tanc.right_anchor = 0;
4663 copy_opt_anc_info(&to->anc, &tanc);
4667concat_opt_exact_info_str(
OptExactInfo* to, UChar* s, UChar* end,
4673 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4674 len = enclen(enc, p, end);
4675 if (i + len > OPT_EXACT_MAXLEN)
break;
4676 for (j = 0; j < len && p < end; j++)
4688 if (add->len == 0 || to->len == 0) {
4689 clear_opt_exact_info(to);
4693 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4694 clear_opt_exact_info(to);
4698 for (i = 0; i < to->len && i < add->len; ) {
4699 if (to->s[i] != add->s[i])
break;
4700 len = enclen(env->enc, to->s + i, to->s + to->len);
4702 for (j = 1; j < len; j++) {
4703 if (to->s[i+j] != add->s[i+j])
break;
4709 if (! add->reach_end || i < add->len || i < to->len) {
4713 if (to->ignore_case < 0)
4714 to->ignore_case = add->ignore_case;
4715 else if (add->ignore_case >= 0)
4716 to->ignore_case |= add->ignore_case;
4718 alt_merge_opt_anc_info(&to->anc, &add->anc);
4719 if (! to->reach_end) to->anc.right_anchor = 0;
4734 copy_opt_exact_info(now, alt);
4737 else if (v1 <= 2 && v2 <= 2) {
4739 v2 = map_position_value(enc, now->s[0]);
4740 v1 = map_position_value(enc, alt->s[0]);
4742 if (now->len > 1) v1 += 5;
4743 if (alt->len > 1) v2 += 5;
4746 if (now->ignore_case <= 0) v1 *= 2;
4747 if (alt->ignore_case <= 0) v2 *= 2;
4749 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4750 copy_opt_exact_info(now, alt);
4759 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4760 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4761 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4762 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4763 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4764 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4765 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4766 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4767 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4768 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4769 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4770 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4771 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4772 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4773 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4778 xmemcpy(map, &clean_info,
sizeof(
OptMapInfo));
4790 if (map->map[c] == 0) {
4792 map->value += map_position_value(enc, c);
4797add_char_amb_opt_map_info(
OptMapInfo* map, UChar* p, UChar* end,
4801 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4804 add_char_opt_map_info(map, p[0], enc);
4806 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4807 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4808 if (n < 0)
return n;
4810 for (i = 0; i < n; i++) {
4811 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4812 add_char_opt_map_info(map, buf[0], enc);
4821 const int z = 1<<15;
4825 if (alt->value == 0) return ;
4826 if (now->value == 0) {
4827 copy_opt_map_info(now, alt);
4831 v1 = z / now->value;
4832 v2 = z / alt->value;
4833 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4834 copy_opt_map_info(now, alt);
4840#define COMP_EM_BASE 20
4843 if (m->value <= 0)
return -1;
4845 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4846 vm = COMP_EM_BASE * 5 * 2 / m->value;
4847 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4856 if (to->value == 0) return ;
4857 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4858 clear_opt_map_info(to);
4862 alt_merge_mml(&to->mmd, &add->mmd);
4865 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4870 val += map_position_value(enc, i);
4874 alt_merge_opt_anc_info(&to->anc, &add->anc);
4880 copy_mml(&(opt->exb.mmd), mmd);
4881 copy_mml(&(opt->expr.mmd), mmd);
4882 copy_mml(&(opt->map.mmd), mmd);
4888 clear_mml(&opt->len);
4889 clear_opt_anc_info(&opt->anc);
4890 clear_opt_exact_info(&opt->exb);
4891 clear_opt_exact_info(&opt->exm);
4892 clear_opt_exact_info(&opt->expr);
4893 clear_opt_map_info(&opt->map);
4905 int exb_reach, exm_reach;
4908 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4909 copy_opt_anc_info(&to->anc, &tanc);
4911 if (add->exb.len > 0 && to->len.max == 0) {
4912 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4913 to->len.max, add->len.max);
4914 copy_opt_anc_info(&add->exb.anc, &tanc);
4917 if (add->map.value > 0 && to->len.max == 0) {
4918 if (add->map.mmd.max == 0)
4919 add->map.anc.left_anchor |= to->anc.left_anchor;
4922 exb_reach = to->exb.reach_end;
4923 exm_reach = to->exm.reach_end;
4925 if (add->len.max != 0)
4926 to->exb.reach_end = to->exm.reach_end = 0;
4928 if (add->exb.len > 0) {
4930 concat_opt_exact_info(&to->exb, &add->exb, enc);
4931 clear_opt_exact_info(&add->exb);
4933 else if (exm_reach) {
4934 concat_opt_exact_info(&to->exm, &add->exb, enc);
4935 clear_opt_exact_info(&add->exb);
4938 select_opt_exact_info(enc, &to->exm, &add->exb);
4939 select_opt_exact_info(enc, &to->exm, &add->exm);
4941 if (to->expr.len > 0) {
4942 if (add->len.max > 0) {
4943 if (to->expr.len > (
int )add->len.max)
4944 to->expr.len = (int )add->len.max;
4946 if (to->expr.mmd.max == 0)
4947 select_opt_exact_info(enc, &to->exb, &to->expr);
4949 select_opt_exact_info(enc, &to->exm, &to->expr);
4952 else if (add->expr.len > 0) {
4953 copy_opt_exact_info(&to->expr, &add->expr);
4956 select_opt_map_info(&to->map, &add->map);
4958 add_mml(&to->len, &add->len);
4964 alt_merge_opt_anc_info (&to->anc, &add->anc);
4965 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4966 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4967 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4968 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4970 alt_merge_mml(&to->len, &add->len);
4974#define MAX_NODE_OPT_INFO_REF_COUNT 5
4982 clear_node_opt_info(opt);
4983 set_bound_node_opt_info(opt, &env->mmd);
4993 copy_opt_env(&nenv, env);
4995 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4997 add_mml(&nenv.mmd, &nopt.len);
4998 concat_left_node_opt_info(env->enc, opt, &nopt);
5000 }
while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
5010 r = optimize_node_left(NCAR(nd), &nopt, env);
5012 if (nd == node) copy_node_opt_info(opt, &nopt);
5013 else alt_merge_node_opt_info(opt, &nopt, env);
5015 }
while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
5022 OnigDistance slen = sn->end - sn->s;
5023 int is_raw = NSTRING_IS_RAW(node);
5025 if (! NSTRING_IS_AMBIG(node)) {
5026 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5028 opt->exb.ignore_case = 0;
5030 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5032 set_mml(&opt->len, slen, slen);
5037 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5038 int n = onigenc_strlen(env->enc, sn->s, sn->end);
5039 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
5042 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5044 opt->exb.ignore_case = 1;
5047 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5048 env->enc, env->case_fold_flag);
5055 set_mml(&opt->len, slen, max);
5058 if ((OnigDistance )opt->exb.len == slen)
5059 opt->exb.reach_end = 1;
5070 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5071 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5072 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5074 set_mml(&opt->len, min, max);
5077 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5078 z = BITSET_AT(cc->bs, i);
5079 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5080 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5083 set_mml(&opt->len, 1, 1);
5093 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5098 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5099 switch (NCTYPE(node)->ctype) {
5100 case ONIGENC_CTYPE_WORD:
5101 if (NCTYPE(node)->not != 0) {
5102 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5103 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5104 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5109 for (i = 0; i < maxcode; i++) {
5110 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5111 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5119 min = ONIGENC_MBC_MINLEN(env->enc);
5121 set_mml(&opt->len, min, max);
5127 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5128 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5129 set_mml(&opt->len, min, max);
5134 switch (NANCHOR(node)->type) {
5135 case ANCHOR_BEGIN_BUF:
5136 case ANCHOR_BEGIN_POSITION:
5137 case ANCHOR_BEGIN_LINE:
5138 case ANCHOR_END_BUF:
5139 case ANCHOR_SEMI_END_BUF:
5140 case ANCHOR_END_LINE:
5141 case ANCHOR_LOOK_BEHIND:
5142 case ANCHOR_PREC_READ_NOT:
5143 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5146 case ANCHOR_PREC_READ:
5150 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5152 if (nopt.exb.len > 0)
5153 copy_opt_exact_info(&opt->expr, &nopt.exb);
5154 else if (nopt.exm.len > 0)
5155 copy_opt_exact_info(&opt->expr, &nopt.exm);
5157 opt->expr.reach_end = 0;
5159 if (nopt.map.value > 0)
5160 copy_opt_map_info(&opt->map, &nopt.map);
5165 case ANCHOR_LOOK_BEHIND_NOT:
5174 OnigDistance min, max, tmin, tmax;
5175 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5178 if (br->state & NST_RECURSION) {
5179 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5182 backs = BACKREFS_P(br);
5183 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5185 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5187 for (i = 1; i < br->back_num; i++) {
5188 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5190 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5192 if (min > tmin) min = tmin;
5193 if (max < tmax) max = tmax;
5195 if (r == 0) set_mml(&opt->len, min, max);
5199#ifdef USE_SUBEXP_CALL
5201 if (IS_CALL_RECURSION(NCALL(node)))
5202 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5204 OnigOptionType save = env->options;
5205 env->options = NENCLOSE(NCALL(node)->target)->option;
5206 r = optimize_node_left(NCALL(node)->target, opt, env);
5207 env->options = save;
5215 OnigDistance min, max;
5219 r = optimize_node_left(qn->target, &nopt, env);
5222 if ( IS_REPEAT_INFINITE(qn->upper)) {
5223 if (env->mmd.max == 0 &&
5224 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5225 if (IS_MULTILINE(env->options))
5227 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5229 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5233 if (qn->lower > 0) {
5234 copy_node_opt_info(opt, &nopt);
5235 if (nopt.exb.len > 0) {
5236 if (nopt.exb.reach_end) {
5237 for (i = 2; i <= qn->lower &&
5238 ! is_full_opt_exact_info(&opt->exb); i++) {
5239 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5241 if (i < qn->lower) {
5242 opt->exb.reach_end = 0;
5247 if (qn->lower != qn->upper) {
5248 opt->exb.reach_end = 0;
5249 opt->exm.reach_end = 0;
5252 opt->exm.reach_end = 0;
5256 min = distance_multiply(nopt.len.min, qn->lower);
5257 if (IS_REPEAT_INFINITE(qn->upper))
5258 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5260 max = distance_multiply(nopt.len.max, qn->upper);
5262 set_mml(&opt->len, min, max);
5271 case ENCLOSE_OPTION:
5273 OnigOptionType save = env->options;
5275 env->options = en->option;
5276 r = optimize_node_left(en->target, opt, env);
5277 env->options = save;
5281 case ENCLOSE_MEMORY:
5282#ifdef USE_SUBEXP_CALL
5284 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5285 OnigDistance min, max;
5288 max = ONIG_INFINITE_DISTANCE;
5289 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5290 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5291 set_mml(&opt->len, min, max);
5296 r = optimize_node_left(en->target, opt, env);
5298 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5299 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5300 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5305 case ENCLOSE_STOP_BACKTRACK:
5306 case ENCLOSE_CONDITION:
5307 r = optimize_node_left(en->target, opt, env);
5310 case ENCLOSE_ABSENT:
5311 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5319 fprintf(stderr,
"optimize_node_left: undefined node type %d\n",
5322 r = ONIGERR_TYPE_BUG;
5335 if (e->len == 0)
return 0;
5337 reg->exact = (UChar* )
xmalloc(e->len);
5338 CHECK_NULL_RETURN_MEMERR(reg->exact);
5339 xmemcpy(reg->exact, e->s, e->len);
5340 reg->exact_end = reg->exact + e->len;
5343 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5345 if (e->ignore_case > 0) {
5346 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5347 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5348 reg->map, &(reg->int_map), 1);
5350 reg->optimize = (allow_reverse != 0
5351 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5354 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5358 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5362 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5363 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5364 reg->map, &(reg->int_map), 0);
5366 reg->optimize = (allow_reverse != 0
5367 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5370 reg->optimize = ONIG_OPTIMIZE_EXACT;
5374 reg->optimize = ONIG_OPTIMIZE_EXACT;
5378 reg->dmin = e->mmd.min;
5379 reg->dmax = e->mmd.max;
5381 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5382 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5393 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5394 reg->map[i] = m->map[i];
5396 reg->optimize = ONIG_OPTIMIZE_MAP;
5397 reg->dmin = m->mmd.min;
5398 reg->dmax = m->mmd.max;
5400 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5401 reg->threshold_len = (int )(reg->dmin + 1);
5408 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5409 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5412#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5413static void print_optimize_info(
FILE* f,
regex_t* reg);
5425 env.options = reg->options;
5426 env.case_fold_flag = reg->case_fold_flag;
5427 env.scan_env = scan_env;
5428 clear_mml(&env.mmd);
5430 r = optimize_node_left(node, &opt, &env);
5433 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5434 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5435 ANCHOR_LOOK_BEHIND);
5437 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5438 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5440 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5441 ANCHOR_PREC_READ_NOT);
5443 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5444 reg->anchor_dmin = opt.len.min;
5445 reg->anchor_dmax = opt.len.max;
5448 if (opt.exb.len > 0 || opt.exm.len > 0) {
5449 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5450 if (opt.map.value > 0 &&
5451 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5455 r = set_optimize_exact_info(reg, &opt.exb);
5456 set_sub_anchor(reg, &opt.exb.anc);
5459 else if (opt.map.value > 0) {
5461 set_optimize_map_info(reg, &opt.map);
5462 set_sub_anchor(reg, &opt.map.anc);
5465 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5466 if (opt.len.max == 0)
5467 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5470#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5471 print_optimize_info(stderr, reg);
5477clear_optimize_info(
regex_t* reg)
5479 reg->optimize = ONIG_OPTIMIZE_NONE;
5481 reg->anchor_dmin = 0;
5482 reg->anchor_dmax = 0;
5483 reg->sub_anchor = 0;
5484 reg->exact_end = (UChar* )NULL;
5485 reg->threshold_len = 0;
5486 if (IS_NOT_NULL(reg->exact)) {
5488 reg->exact = (UChar* )NULL;
5495 const UChar *s,
const UChar *end)
5497 fprintf(fp,
"\nPATTERN: /");
5499 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5505 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5507 fprintf(fp,
" 0x%04x ", (
int )code);
5510 fputc((
int )code, fp);
5513 p += enclen(enc, p, end);
5518 fputc((
int )*s, fp);
5523 fprintf(fp,
"/ (%s)\n", enc->name);
5527#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5529print_distance_range(
FILE* f, OnigDistance a, OnigDistance b)
5531 if (a == ONIG_INFINITE_DISTANCE)
5534 fprintf(f,
"(%"PRIuPTR
")", a);
5538 if (b == ONIG_INFINITE_DISTANCE)
5541 fprintf(f,
"(%"PRIuPTR
")", b);
5545print_anchor(
FILE* f,
int anchor)
5551 if (anchor & ANCHOR_BEGIN_BUF) {
5552 fprintf(f,
"begin-buf");
5555 if (anchor & ANCHOR_BEGIN_LINE) {
5556 if (q) fprintf(f,
", ");
5558 fprintf(f,
"begin-line");
5560 if (anchor & ANCHOR_BEGIN_POSITION) {
5561 if (q) fprintf(f,
", ");
5563 fprintf(f,
"begin-pos");
5565 if (anchor & ANCHOR_END_BUF) {
5566 if (q) fprintf(f,
", ");
5568 fprintf(f,
"end-buf");
5570 if (anchor & ANCHOR_SEMI_END_BUF) {
5571 if (q) fprintf(f,
", ");
5573 fprintf(f,
"semi-end-buf");
5575 if (anchor & ANCHOR_END_LINE) {
5576 if (q) fprintf(f,
", ");
5578 fprintf(f,
"end-line");
5580 if (anchor & ANCHOR_ANYCHAR_STAR) {
5581 if (q) fprintf(f,
", ");
5583 fprintf(f,
"anychar-star");
5585 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5586 if (q) fprintf(f,
", ");
5587 fprintf(f,
"anychar-star-ml");
5596 static const char* on[] = {
"NONE",
"EXACT",
"EXACT_BM",
"EXACT_BM_NOT_REV",
5598 "EXACT_BM_IC",
"EXACT_BM_NOT_REV_IC" };
5600 fprintf(f,
"optimize: %s\n", on[reg->optimize]);
5601 fprintf(f,
" anchor: "); print_anchor(f, reg->anchor);
5602 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5603 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5606 if (reg->optimize) {
5607 fprintf(f,
" sub anchor: "); print_anchor(f, reg->sub_anchor);
5614 fprintf(f,
"exact: [");
5615 for (p = reg->exact; p < reg->exact_end; p++) {
5618 fprintf(f,
"]: length: %"PRIdPTR
"\n", (reg->exact_end - reg->exact));
5620 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5623 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5624 if (reg->map[i]) n++;
5626 fprintf(f,
"map: n=%d\n", n);
5630 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5631 if (reg->map[i] != 0) {
5632 if (c > 0) fputs(
", ", f);
5634 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5635 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5638 fprintf(f,
"%d", i);
5651 if (IS_NOT_NULL(reg)) {
5652 if (IS_NOT_NULL(reg->p))
xfree(reg->p);
5653 if (IS_NOT_NULL(reg->exact))
xfree(reg->exact);
5654 if (IS_NOT_NULL(reg->int_map))
xfree(reg->int_map);
5655 if (IS_NOT_NULL(reg->int_map_backward))
xfree(reg->int_map_backward);
5656 if (IS_NOT_NULL(reg->repeat_range))
xfree(reg->repeat_range);
5657 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5659#ifdef USE_NAMED_GROUP
5660 onig_names_free(reg);
5668 if (IS_NOT_NULL(reg)) {
5669 onig_free_body(reg);
5676onig_memsize(
const regex_t *reg)
5678 size_t size =
sizeof(
regex_t);
5679 if (IS_NULL(reg))
return 0;
5680 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5681 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5682 if (IS_NOT_NULL(reg->int_map)) size +=
sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5683 if (IS_NOT_NULL(reg->int_map_backward)) size +=
sizeof(
int) * ONIG_CHAR_TABLE_SIZE;
5684 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc *
sizeof(
OnigRepeatRange);
5685 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5693 size_t size =
sizeof(*regs);
5694 if (IS_NULL(regs))
return 0;
5695 size += regs->allocated * (
sizeof(*regs->beg) +
sizeof(*regs->end));
5700#define REGEX_TRANSFER(to,from) do {\
5701 onig_free_body(to);\
5702 xmemcpy(to, from, sizeof(regex_t));\
5710 REGEX_TRANSFER(to, from);
5714#ifdef ONIG_DEBUG_COMPILE
5715static void print_compiled_byte_code_list(
FILE* f,
regex_t* reg);
5717#ifdef ONIG_DEBUG_PARSE_TREE
5718static void print_tree(
FILE* f,
Node* node);
5723onig_compile(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5726 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5732onig_compile_ruby(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5733 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
5736onig_compile(
regex_t* reg,
const UChar* pattern,
const UChar* pattern_end,
5740#define COMPILE_INIT_SIZE 20
5743 OnigDistance init_size;
5746#ifdef USE_SUBEXP_CALL
5750 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5753 scan_env.sourcefile = sourcefile;
5754 scan_env.sourceline = sourceline;
5758 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5761 if (reg->alloc == 0) {
5762 init_size = (pattern_end - pattern) * 2;
5763 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5764 r = BBUF_INIT(reg, init_size);
5765 if (r != 0)
goto end;
5771 reg->num_repeat = 0;
5772 reg->num_null_check = 0;
5773 reg->repeat_range_alloc = 0;
5775#ifdef USE_COMBINATION_EXPLOSION_CHECK
5776 reg->num_comb_exp_check = 0;
5779 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5780 if (r != 0)
goto err;
5782#ifdef ONIG_DEBUG_PARSE_TREE
5784 fprintf(stderr,
"ORIGINAL PARSE TREE:\n");
5785 print_tree(stderr, root);
5789#ifdef USE_NAMED_GROUP
5791 if (scan_env.num_named > 0 &&
5792 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5793 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5794 if (scan_env.num_named != scan_env.num_mem)
5795 r = disable_noname_group_capture(&root, reg, &scan_env);
5797 r = numbered_ref_check(root);
5799 if (r != 0)
goto err;
5803#ifdef USE_SUBEXP_CALL
5804 if (scan_env.num_call > 0) {
5805 r = unset_addr_list_init(&uslist, scan_env.num_call);
5806 if (r != 0)
goto err;
5807 scan_env.unset_addr_list = &uslist;
5808 r = setup_subexp_call(root, &scan_env);
5809 if (r != 0)
goto err_unset;
5810 r = subexp_recursive_check_trav(root, &scan_env);
5811 if (r < 0)
goto err_unset;
5812 r = subexp_inf_recursive_check_trav(root, &scan_env);
5813 if (r != 0)
goto err_unset;
5815 reg->num_call = scan_env.num_call;
5821 r = setup_tree(root, reg, 0, &scan_env);
5822 if (r != 0)
goto err_unset;
5824#ifdef ONIG_DEBUG_PARSE_TREE
5825 print_tree(stderr, root);
5828 reg->capture_history = scan_env.capture_history;
5829 reg->bt_mem_start = scan_env.bt_mem_start;
5830 reg->bt_mem_start |= reg->capture_history;
5831 if (IS_FIND_CONDITION(reg->options))
5832 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5834 reg->bt_mem_end = scan_env.bt_mem_end;
5835 reg->bt_mem_end |= reg->capture_history;
5838#ifdef USE_COMBINATION_EXPLOSION_CHECK
5839 if (scan_env.backrefed_mem == 0
5840# ifdef USE_SUBEXP_CALL
5841 || scan_env.num_call == 0
5844 setup_comb_exp_check(root, 0, &scan_env);
5845# ifdef USE_SUBEXP_CALL
5846 if (scan_env.has_recursion != 0) {
5847 scan_env.num_comb_exp_check = 0;
5851 if (scan_env.comb_exp_max_regnum > 0) {
5853 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5854 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5855 scan_env.num_comb_exp_check = 0;
5862 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5865 clear_optimize_info(reg);
5866#ifndef ONIG_DONT_OPTIMIZE
5867 r = set_optimize_info_from_tree(root, reg, &scan_env);
5868 if (r != 0)
goto err_unset;
5871 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5872 xfree(scan_env.mem_nodes_dynamic);
5873 scan_env.mem_nodes_dynamic = (
Node** )NULL;
5876 r = compile_tree(root, reg);
5878 r = add_opcode(reg, OP_END);
5879#ifdef USE_SUBEXP_CALL
5880 if (scan_env.num_call > 0) {
5881 r = unset_addr_list_fix(&uslist, reg);
5882 unset_addr_list_end(&uslist);
5887 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5888 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5890 if (reg->bt_mem_start != 0)
5891 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5893 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5896#ifdef USE_SUBEXP_CALL
5897 else if (scan_env.num_call > 0) {
5898 unset_addr_list_end(&uslist);
5901 onig_node_free(root);
5903#ifdef ONIG_DEBUG_COMPILE
5904# ifdef USE_NAMED_GROUP
5905 onig_print_names(stderr, reg);
5907 print_compiled_byte_code_list(stderr, reg);
5911 onig_reg_resize(reg);
5915#ifdef USE_SUBEXP_CALL
5916 if (scan_env.num_call > 0) {
5917 unset_addr_list_end(&uslist);
5921 if (IS_NOT_NULL(scan_env.error)) {
5922 if (IS_NOT_NULL(einfo)) {
5923 einfo->enc = scan_env.enc;
5924 einfo->par = scan_env.error;
5925 einfo->par_end = scan_env.error_end;
5929 onig_node_free(root);
5930 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5931 xfree(scan_env.mem_nodes_dynamic);
5935static int onig_inited = 0;
5938onig_reg_init(
regex_t* reg, OnigOptionType option,
5939 OnigCaseFoldType case_fold_flag,
5946 return ONIGERR_INVALID_ARGUMENT;
5948 if (ONIGENC_IS_UNDEF(enc))
5949 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5951 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5952 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5953 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5956 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5957 option |= syntax->options;
5958 option &= ~ONIG_OPTION_SINGLELINE;
5961 option |= syntax->options;
5964 (reg)->options = option;
5965 (reg)->syntax = syntax;
5966 (reg)->optimize = 0;
5967 (reg)->exact = (UChar* )NULL;
5968 (reg)->int_map = (
int* )NULL;
5969 (reg)->int_map_backward = (
int* )NULL;
5970 (reg)->chain = (
regex_t* )NULL;
5972 (reg)->p = (UChar* )NULL;
5975 (reg)->name_table = (
void* )NULL;
5977 (reg)->case_fold_flag = case_fold_flag;
5979 (reg)->timelimit = 0;
5985onig_new_without_alloc(
regex_t* reg,
const UChar* pattern,
5986 const UChar* pattern_end, OnigOptionType option,
OnigEncoding enc,
5991 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5994 r = onig_compile(reg, pattern, pattern_end, einfo);
5999onig_new(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
6006 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
6008 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6011 r = onig_compile(*reg, pattern, pattern_end, einfo);
6021onig_initialize(
OnigEncoding encodings[] ARG_UNUSED,
int n ARG_UNUSED)
6029 if (onig_inited != 0)
6034#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6035 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6041#ifdef ONIG_DEBUG_STATISTICS
6042 onig_statistics_init();
6051extern void onig_add_end_call(
void (*func)(
void))
6056 if (item == 0) return ;
6058 item->next = EndCallTop;
6065exec_end_call_list(
void)
6070 while (EndCallTop != 0) {
6071 func = EndCallTop->func;
6075 EndCallTop = EndCallTop->next;
6083 exec_end_call_list();
6085#ifdef ONIG_DEBUG_STATISTICS
6086 onig_print_statistics(stderr);
6089#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6090 _CrtDumpMemoryLeaks();
6099onig_is_in_code_range(
const UChar* p, OnigCodePoint code)
6101 OnigCodePoint n, *data;
6102 OnigCodePoint low, high, x;
6104 GET_CODE_POINT(n, p);
6105 data = (OnigCodePoint* )p;
6108 for (low = 0, high = n; low < high; ) {
6109 x = (low + high) >> 1;
6110 if (code > data[x * 2 + 1])
6116 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6120onig_is_code_in_cc_len(
int elen, OnigCodePoint code,
CClassNode* cc)
6124 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6125 if (IS_NULL(cc->mbuf)) {
6129 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6133 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6136 if (IS_NCCLASS_NOT(cc))
6147 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6151 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6153 return onig_is_code_in_cc_len(len, code, cc);
6160# define ARG_SPECIAL -1
6162# define ARG_RELADDR 1
6163# define ARG_ABSADDR 2
6164# define ARG_LENGTH 3
6165# define ARG_MEMNUM 4
6166# define ARG_OPTION 5
6167# define ARG_STATE_CHECK 6
6169OnigOpInfoType OnigOpInfo[] = {
6170 { OP_FINISH,
"finish", ARG_NON },
6171 { OP_END,
"end", ARG_NON },
6172 { OP_EXACT1,
"exact1", ARG_SPECIAL },
6173 { OP_EXACT2,
"exact2", ARG_SPECIAL },
6174 { OP_EXACT3,
"exact3", ARG_SPECIAL },
6175 { OP_EXACT4,
"exact4", ARG_SPECIAL },
6176 { OP_EXACT5,
"exact5", ARG_SPECIAL },
6177 { OP_EXACTN,
"exactn", ARG_SPECIAL },
6178 { OP_EXACTMB2N1,
"exactmb2-n1", ARG_SPECIAL },
6179 { OP_EXACTMB2N2,
"exactmb2-n2", ARG_SPECIAL },
6180 { OP_EXACTMB2N3,
"exactmb2-n3", ARG_SPECIAL },
6181 { OP_EXACTMB2N,
"exactmb2-n", ARG_SPECIAL },
6182 { OP_EXACTMB3N,
"exactmb3n" , ARG_SPECIAL },
6183 { OP_EXACTMBN,
"exactmbn", ARG_SPECIAL },
6184 { OP_EXACT1_IC,
"exact1-ic", ARG_SPECIAL },
6185 { OP_EXACTN_IC,
"exactn-ic", ARG_SPECIAL },
6186 { OP_CCLASS,
"cclass", ARG_SPECIAL },
6187 { OP_CCLASS_MB,
"cclass-mb", ARG_SPECIAL },
6188 { OP_CCLASS_MIX,
"cclass-mix", ARG_SPECIAL },
6189 { OP_CCLASS_NOT,
"cclass-not", ARG_SPECIAL },
6190 { OP_CCLASS_MB_NOT,
"cclass-mb-not", ARG_SPECIAL },
6191 { OP_CCLASS_MIX_NOT,
"cclass-mix-not", ARG_SPECIAL },
6192 { OP_ANYCHAR,
"anychar", ARG_NON },
6193 { OP_ANYCHAR_ML,
"anychar-ml", ARG_NON },
6194 { OP_ANYCHAR_STAR,
"anychar*", ARG_NON },
6195 { OP_ANYCHAR_ML_STAR,
"anychar-ml*", ARG_NON },
6196 { OP_ANYCHAR_STAR_PEEK_NEXT,
"anychar*-peek-next", ARG_SPECIAL },
6197 { OP_ANYCHAR_ML_STAR_PEEK_NEXT,
"anychar-ml*-peek-next", ARG_SPECIAL },
6198 { OP_WORD,
"word", ARG_NON },
6199 { OP_NOT_WORD,
"not-word", ARG_NON },
6200 { OP_WORD_BOUND,
"word-bound", ARG_NON },
6201 { OP_NOT_WORD_BOUND,
"not-word-bound", ARG_NON },
6202 { OP_WORD_BEGIN,
"word-begin", ARG_NON },
6203 { OP_WORD_END,
"word-end", ARG_NON },
6204 { OP_ASCII_WORD,
"ascii-word", ARG_NON },
6205 { OP_NOT_ASCII_WORD,
"not-ascii-word", ARG_NON },
6206 { OP_ASCII_WORD_BOUND,
"ascii-word-bound", ARG_NON },
6207 { OP_NOT_ASCII_WORD_BOUND,
"not-ascii-word-bound", ARG_NON },
6208 { OP_ASCII_WORD_BEGIN,
"ascii-word-begin", ARG_NON },
6209 { OP_ASCII_WORD_END,
"ascii-word-end", ARG_NON },
6210 { OP_BEGIN_BUF,
"begin-buf", ARG_NON },
6211 { OP_END_BUF,
"end-buf", ARG_NON },
6212 { OP_BEGIN_LINE,
"begin-line", ARG_NON },
6213 { OP_END_LINE,
"end-line", ARG_NON },
6214 { OP_SEMI_END_BUF,
"semi-end-buf", ARG_NON },
6215 { OP_BEGIN_POSITION,
"begin-position", ARG_NON },
6216 { OP_BACKREF1,
"backref1", ARG_NON },
6217 { OP_BACKREF2,
"backref2", ARG_NON },
6218 { OP_BACKREFN,
"backrefn", ARG_MEMNUM },
6219 { OP_BACKREFN_IC,
"backrefn-ic", ARG_SPECIAL },
6220 { OP_BACKREF_MULTI,
"backref_multi", ARG_SPECIAL },
6221 { OP_BACKREF_MULTI_IC,
"backref_multi-ic", ARG_SPECIAL },
6222 { OP_BACKREF_WITH_LEVEL,
"backref_at_level", ARG_SPECIAL },
6223 { OP_MEMORY_START_PUSH,
"mem-start-push", ARG_MEMNUM },
6224 { OP_MEMORY_START,
"mem-start", ARG_MEMNUM },
6225 { OP_MEMORY_END_PUSH,
"mem-end-push", ARG_MEMNUM },
6226 { OP_MEMORY_END_PUSH_REC,
"mem-end-push-rec", ARG_MEMNUM },
6227 { OP_MEMORY_END,
"mem-end", ARG_MEMNUM },
6228 { OP_MEMORY_END_REC,
"mem-end-rec", ARG_MEMNUM },
6229 { OP_SET_OPTION_PUSH,
"set-option-push", ARG_OPTION },
6230 { OP_SET_OPTION,
"set-option", ARG_OPTION },
6231 { OP_KEEP,
"keep", ARG_NON },
6232 { OP_FAIL,
"fail", ARG_NON },
6233 { OP_JUMP,
"jump", ARG_RELADDR },
6234 { OP_PUSH,
"push", ARG_RELADDR },
6235 { OP_POP,
"pop", ARG_NON },
6236 { OP_PUSH_OR_JUMP_EXACT1,
"push-or-jump-e1", ARG_SPECIAL },
6237 { OP_PUSH_IF_PEEK_NEXT,
"push-if-peek-next", ARG_SPECIAL },
6238 { OP_REPEAT,
"repeat", ARG_SPECIAL },
6239 { OP_REPEAT_NG,
"repeat-ng", ARG_SPECIAL },
6240 { OP_REPEAT_INC,
"repeat-inc", ARG_MEMNUM },
6241 { OP_REPEAT_INC_NG,
"repeat-inc-ng", ARG_MEMNUM },
6242 { OP_REPEAT_INC_SG,
"repeat-inc-sg", ARG_MEMNUM },
6243 { OP_REPEAT_INC_NG_SG,
"repeat-inc-ng-sg", ARG_MEMNUM },
6244 { OP_NULL_CHECK_START,
"null-check-start", ARG_MEMNUM },
6245 { OP_NULL_CHECK_END,
"null-check-end", ARG_MEMNUM },
6246 { OP_NULL_CHECK_END_MEMST,
"null-check-end-memst", ARG_MEMNUM },
6247 { OP_NULL_CHECK_END_MEMST_PUSH,
"null-check-end-memst-push", ARG_MEMNUM },
6248 { OP_PUSH_POS,
"push-pos", ARG_NON },
6249 { OP_POP_POS,
"pop-pos", ARG_NON },
6250 { OP_PUSH_POS_NOT,
"push-pos-not", ARG_RELADDR },
6251 { OP_FAIL_POS,
"fail-pos", ARG_NON },
6252 { OP_PUSH_STOP_BT,
"push-stop-bt", ARG_NON },
6253 { OP_POP_STOP_BT,
"pop-stop-bt", ARG_NON },
6254 { OP_LOOK_BEHIND,
"look-behind", ARG_SPECIAL },
6255 { OP_PUSH_LOOK_BEHIND_NOT,
"push-look-behind-not", ARG_SPECIAL },
6256 { OP_FAIL_LOOK_BEHIND_NOT,
"fail-look-behind-not", ARG_NON },
6257 { OP_PUSH_ABSENT_POS,
"push-absent-pos", ARG_NON },
6258 { OP_ABSENT,
"absent", ARG_RELADDR },
6259 { OP_ABSENT_END,
"absent-end", ARG_NON },
6260 { OP_CALL,
"call", ARG_ABSADDR },
6261 { OP_RETURN,
"return", ARG_NON },
6262 { OP_CONDITION,
"condition", ARG_SPECIAL },
6263 { OP_STATE_CHECK_PUSH,
"state-check-push", ARG_SPECIAL },
6264 { OP_STATE_CHECK_PUSH_OR_JUMP,
"state-check-push-or-jump", ARG_SPECIAL },
6265 { OP_STATE_CHECK,
"state-check", ARG_STATE_CHECK },
6266 { OP_STATE_CHECK_ANYCHAR_STAR,
"state-check-anychar*", ARG_STATE_CHECK },
6267 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6268 "state-check-anychar-ml*", ARG_STATE_CHECK },
6277 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6278 if (opcode == OnigOpInfo[i].opcode)
6279 return OnigOpInfo[i].name;
6285op2arg_type(
int opcode)
6289 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6290 if (opcode == OnigOpInfo[i].opcode)
6291 return OnigOpInfo[i].arg_type;
6296# ifdef ONIG_DEBUG_PARSE_TREE
6298Indent(
FILE* f,
int indent)
6301 for (i = 0; i < indent; i++) putc(
' ', f);
6306p_string(
FILE* f, ptrdiff_t len, UChar* s)
6309 while (len-- > 0) { fputc(*s++, f); }
6313p_len_string(
FILE* f, LengthType len,
int mb_len, UChar* s)
6315 int x = len * mb_len;
6317 fprintf(f,
":%d:", len);
6318 while (x-- > 0) { fputc(*s++, f); }
6322onig_print_compiled_byte_code(
FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6329 StateCheckNumType scn;
6333 fprintf(f,
"[%s", op2name(*bp));
6334 arg_type = op2arg_type(*bp);
6335 if (arg_type != ARG_SPECIAL) {
6341 GET_RELADDR_INC(addr, bp);
6342 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6345 GET_ABSADDR_INC(addr, bp);
6346 fprintf(f,
":(%d)", addr);
6349 GET_LENGTH_INC(len, bp);
6350 fprintf(f,
":%d", len);
6353 mem = *((MemNumType* )bp);
6355 fprintf(f,
":%d", mem);
6359 OnigOptionType option = *((OnigOptionType* )bp);
6361 fprintf(f,
":%d", option);
6365 case ARG_STATE_CHECK:
6366 scn = *((StateCheckNumType* )bp);
6367 bp += SIZE_STATE_CHECK_NUM;
6368 fprintf(f,
":%d", scn);
6375 case OP_ANYCHAR_STAR_PEEK_NEXT:
6376 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6377 p_string(f, 1, bp++);
break;
6379 p_string(f, 2, bp); bp += 2;
break;
6381 p_string(f, 3, bp); bp += 3;
break;
6383 p_string(f, 4, bp); bp += 4;
break;
6385 p_string(f, 5, bp); bp += 5;
break;
6387 GET_LENGTH_INC(len, bp);
6388 p_len_string(f, len, 1, bp);
6393 p_string(f, 2, bp); bp += 2;
break;
6395 p_string(f, 4, bp); bp += 4;
break;
6397 p_string(f, 6, bp); bp += 6;
break;
6399 GET_LENGTH_INC(len, bp);
6400 p_len_string(f, len, 2, bp);
6404 GET_LENGTH_INC(len, bp);
6405 p_len_string(f, len, 3, bp);
6412 GET_LENGTH_INC(mb_len, bp);
6413 GET_LENGTH_INC(len, bp);
6414 fprintf(f,
":%d:%d:", mb_len, len);
6416 while (n-- > 0) { fputc(*bp++, f); }
6421 len = enclen(enc, bp, bpend);
6422 p_string(f, len, bp);
6426 GET_LENGTH_INC(len, bp);
6427 p_len_string(f, len, 1, bp);
6432 n = bitset_on_num((BitSetRef )bp);
6434 fprintf(f,
":%d", n);
6438 n = bitset_on_num((BitSetRef )bp);
6440 fprintf(f,
":%d", n);
6444 case OP_CCLASS_MB_NOT:
6445 GET_LENGTH_INC(len, bp);
6447# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6450 GET_CODE_POINT(code, q);
6452 fprintf(f,
":%d:%d", (
int )code, len);
6456 case OP_CCLASS_MIX_NOT:
6457 n = bitset_on_num((BitSetRef )bp);
6459 GET_LENGTH_INC(len, bp);
6461# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6464 GET_CODE_POINT(code, q);
6466 fprintf(f,
":%d:%d:%d", n, (
int )code, len);
6469 case OP_BACKREFN_IC:
6470 mem = *((MemNumType* )bp);
6472 fprintf(f,
":%d", mem);
6475 case OP_BACKREF_MULTI_IC:
6476 case OP_BACKREF_MULTI:
6478 GET_LENGTH_INC(len, bp);
6479 for (i = 0; i < len; i++) {
6480 GET_MEMNUM_INC(mem, bp);
6481 if (i > 0) fputs(
", ", f);
6482 fprintf(f,
"%d", mem);
6486 case OP_BACKREF_WITH_LEVEL:
6488 OnigOptionType option;
6491 GET_OPTION_INC(option, bp);
6492 fprintf(f,
":%d", option);
6493 GET_LENGTH_INC(level, bp);
6494 fprintf(f,
":%d", level);
6497 GET_LENGTH_INC(len, bp);
6498 for (i = 0; i < len; i++) {
6499 GET_MEMNUM_INC(mem, bp);
6500 if (i > 0) fputs(
", ", f);
6501 fprintf(f,
"%d", mem);
6509 mem = *((MemNumType* )bp);
6511 addr = *((RelAddrType* )bp);
6513 fprintf(f,
":%d:%d", mem, addr);
6517 case OP_PUSH_OR_JUMP_EXACT1:
6518 case OP_PUSH_IF_PEEK_NEXT:
6519 addr = *((RelAddrType* )bp);
6521 fprintf(f,
":(%s%d)", (addr >= 0) ?
"+" :
"", addr);
6526 case OP_LOOK_BEHIND:
6527 GET_LENGTH_INC(len, bp);
6528 fprintf(f,
":%d", len);
6531 case OP_PUSH_LOOK_BEHIND_NOT:
6532 GET_RELADDR_INC(addr, bp);
6533 GET_LENGTH_INC(len, bp);
6534 fprintf(f,
":%d:(%s%d)", len, (addr >= 0) ?
"+" :
"", addr);
6537 case OP_STATE_CHECK_PUSH:
6538 case OP_STATE_CHECK_PUSH_OR_JUMP:
6539 scn = *((StateCheckNumType* )bp);
6540 bp += SIZE_STATE_CHECK_NUM;
6541 addr = *((RelAddrType* )bp);
6543 fprintf(f,
":%d:(%s%d)", scn, (addr >= 0) ?
"+" :
"", addr);
6547 GET_MEMNUM_INC(mem, bp);
6548 GET_RELADDR_INC(addr, bp);
6549 fprintf(f,
":%d:(%s%d)", mem, (addr >= 0) ?
"+" :
"", addr);
6553 fprintf(stderr,
"onig_print_compiled_byte_code: undefined code %d\n",
6558 if (nextp) *nextp = bp;
6561# ifdef ONIG_DEBUG_COMPILE
6563print_compiled_byte_code_list(
FILE* f,
regex_t* reg)
6567 UChar* end = reg->p + reg->used;
6569 fprintf(f,
"code length: %d", reg->used);
6575 fprintf(f,
"\n%ld:", bp - reg->p);
6577 fprintf(f,
" %ld:", bp - reg->p);
6578 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6585# ifdef ONIG_DEBUG_PARSE_TREE
6587print_indent_tree(
FILE* f,
Node* node,
int indent)
6589 int i,
type, container_p = 0;
6594 if (IS_NULL(node)) {
6595 fprintf(f,
"ERROR: null node!!!\n");
6603 if (NTYPE(node) == NT_LIST)
6604 fprintf(f,
"<list:%"PRIxPTR
">\n", (intptr_t )node);
6606 fprintf(f,
"<alt:%"PRIxPTR
">\n", (intptr_t )node);
6608 print_indent_tree(f, NCAR(node), indent + add);
6609 while (IS_NOT_NULL(node = NCDR(node))) {
6610 if (NTYPE(node) != type) {
6611 fprintf(f,
"ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6614 print_indent_tree(f, NCAR(node), indent + add);
6619 fprintf(f,
"<string%s:%"PRIxPTR
">",
6620 (NSTRING_IS_RAW(node) ?
"-raw" :
""), (intptr_t )node);
6621 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6622 if (*p >= 0x20 && *p < 0x7f)
6625 fprintf(f,
" 0x%02x", *p);
6631 fprintf(f,
"<cclass:%"PRIxPTR
">", (intptr_t )node);
6632 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(
"not ", f);
6633 if (NCCLASS(node)->mbuf) {
6634 BBuf* bbuf = NCCLASS(node)->mbuf;
6635 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6636 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6637 fprintf(f,
"%d", *data++);
6638 for (; data < end; data+=2) {
6640 fprintf(f,
"%04x-%04x", data[0], data[1]);
6646 fprintf(f,
"<ctype:%"PRIxPTR
"> ", (intptr_t )node);
6647 switch (NCTYPE(node)->ctype) {
6648 case ONIGENC_CTYPE_WORD:
6649 if (NCTYPE(node)->not != 0)
6650 fputs(
"not word", f);
6656 fprintf(f,
"ERROR: undefined ctype.\n");
6662 fprintf(f,
"<anychar:%"PRIxPTR
">", (intptr_t )node);
6666 fprintf(f,
"<anchor:%"PRIxPTR
"> ", (intptr_t )node);
6667 switch (NANCHOR(node)->type) {
6668 case ANCHOR_BEGIN_BUF: fputs(
"begin buf", f);
break;
6669 case ANCHOR_END_BUF: fputs(
"end buf", f);
break;
6670 case ANCHOR_BEGIN_LINE: fputs(
"begin line", f);
break;
6671 case ANCHOR_END_LINE: fputs(
"end line", f);
break;
6672 case ANCHOR_SEMI_END_BUF: fputs(
"semi end buf", f);
break;
6673 case ANCHOR_BEGIN_POSITION: fputs(
"begin position", f);
break;
6675 case ANCHOR_WORD_BOUND: fputs(
"word bound", f);
break;
6676 case ANCHOR_NOT_WORD_BOUND: fputs(
"not word bound", f);
break;
6677# ifdef USE_WORD_BEGIN_END
6678 case ANCHOR_WORD_BEGIN: fputs(
"word begin", f);
break;
6679 case ANCHOR_WORD_END: fputs(
"word end", f);
break;
6681 case ANCHOR_PREC_READ: fputs(
"prec read", f); container_p = TRUE;
break;
6682 case ANCHOR_PREC_READ_NOT: fputs(
"prec read not", f); container_p = TRUE;
break;
6683 case ANCHOR_LOOK_BEHIND: fputs(
"look_behind", f); container_p = TRUE;
break;
6684 case ANCHOR_LOOK_BEHIND_NOT: fputs(
"look_behind_not",f); container_p = TRUE;
break;
6685 case ANCHOR_KEEP: fputs(
"keep",f);
break;
6688 fprintf(f,
"ERROR: undefined anchor type.\n");
6698 fprintf(f,
"<backref:%"PRIxPTR
">", (intptr_t )node);
6699 for (i = 0; i < br->back_num; i++) {
6700 if (i > 0) fputs(
", ", f);
6701 fprintf(f,
"%d", p[i]);
6706# ifdef USE_SUBEXP_CALL
6710 fprintf(f,
"<call:%"PRIxPTR
">", (intptr_t )node);
6711 p_string(f, cn->name_end - cn->name, cn->name);
6717 fprintf(f,
"<quantifier:%"PRIxPTR
">{%d,%d}%s\n", (intptr_t )node,
6718 NQTFR(node)->lower, NQTFR(node)->upper,
6719 (NQTFR(node)->greedy ?
"" :
"?"));
6720 print_indent_tree(f, NQTFR(node)->target, indent + add);
6724 fprintf(f,
"<enclose:%"PRIxPTR
"> ", (intptr_t )node);
6725 switch (NENCLOSE(node)->type) {
6726 case ENCLOSE_OPTION:
6727 fprintf(f,
"option:%d", NENCLOSE(node)->option);
6729 case ENCLOSE_MEMORY:
6730 fprintf(f,
"memory:%d", NENCLOSE(node)->regnum);
6732 case ENCLOSE_STOP_BACKTRACK:
6733 fprintf(f,
"stop-bt");
6735 case ENCLOSE_CONDITION:
6736 fprintf(f,
"condition:%d", NENCLOSE(node)->regnum);
6738 case ENCLOSE_ABSENT:
6739 fprintf(f,
"absent");
6746 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6750 fprintf(f,
"print_indent_tree: undefined node type %d\n", NTYPE(node));
6754 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6758 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6766 print_indent_tree(f, node, 0);
#define xfree
Old name of ruby_xfree.
#define xrealloc
Old name of ruby_xrealloc.
#define xmalloc
Old name of ruby_xmalloc.
VALUE type(ANYARGS)
ANYARGS-ed function type.