/*
|
* deluxe.c
|
* contributed by Mark Griffin
|
*/
|
#include <stdio.h>
|
#include "oniguruma.h"
|
|
#include <stdlib.h>
|
#include <string.h>
|
|
#define RETRY_LIMIT 10000
|
#define DEPTH_LIMIT 10
|
|
typedef unsigned char uint8_t;
|
|
static int
|
search(regex_t* reg, unsigned char* str, unsigned char* end)
|
{
|
int r;
|
unsigned char *start, *range;
|
OnigRegion *region;
|
|
region = onig_region_new();
|
|
start = str;
|
range = end;
|
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
if (r >= 0) {
|
int i;
|
|
fprintf(stdout, "match at %d (%s)\n", r,
|
ONIGENC_NAME(onig_get_encoding(reg)));
|
for (i = 0; i < region->num_regs; i++) {
|
fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
|
}
|
}
|
else if (r == ONIG_MISMATCH) {
|
fprintf(stdout, "search fail (%s)\n",
|
ONIGENC_NAME(onig_get_encoding(reg)));
|
}
|
else { /* error */
|
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
onig_error_code_to_str((UChar* )s, r);
|
fprintf(stdout, "ERROR: %s\n", s);
|
fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
|
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
|
return -1;
|
}
|
|
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
|
return 0;
|
}
|
|
static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN;
|
|
static int
|
exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
|
OnigOptionType options, char* apattern, char* apattern_end,
|
char* astr, char* astr_end)
|
{
|
int r;
|
regex_t* reg;
|
OnigCompileInfo ci;
|
OnigErrorInfo einfo;
|
UChar* pattern = (UChar* )apattern;
|
UChar* str = (UChar* )astr;
|
UChar* pattern_end = (UChar* )apattern_end;
|
unsigned char* end = (unsigned char* )astr_end;
|
|
onig_initialize(&str_enc, 1);
|
onig_set_retry_limit_in_search(RETRY_LIMIT);
|
onig_set_parse_depth_limit(DEPTH_LIMIT);
|
|
ci.num_of_elements = 5;
|
ci.pattern_enc = pattern_enc;
|
ci.target_enc = str_enc;
|
ci.syntax = ONIG_SYNTAX_DEFAULT;
|
ci.option = options;
|
ci.case_fold_flag = CF;
|
|
r = onig_new_deluxe(®, pattern, pattern_end, &ci, &einfo);
|
if (r != ONIG_NORMAL) {
|
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
onig_error_code_to_str((UChar* )s, r, &einfo);
|
fprintf(stdout, "ERROR: %s\n", s);
|
onig_end();
|
return -1;
|
}
|
|
if (onigenc_is_valid_mbc_string(str_enc, str, end) != 0) {
|
r = search(reg, str, end);
|
}
|
|
onig_free(reg);
|
onig_end();
|
return 0;
|
}
|
|
#define PATTERN_SIZE 48
|
#define NUM_CONTROL_BYTES 1
|
#define MIN_STR_SIZE 2
|
int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
|
{
|
int r;
|
size_t remaining_size;
|
unsigned char *data;
|
unsigned char pat_encoding_choice;
|
unsigned char str_encoding_choice;
|
unsigned char *pattern;
|
unsigned char *str;
|
unsigned char *pattern_end;
|
unsigned char *str_end;
|
unsigned int num_encodings;
|
OnigEncodingType *pattern_enc;
|
OnigEncodingType *str_enc;
|
|
OnigEncodingType *encodings[] = {
|
ONIG_ENCODING_ASCII,
|
ONIG_ENCODING_ISO_8859_1,
|
ONIG_ENCODING_ISO_8859_2,
|
ONIG_ENCODING_ISO_8859_3,
|
ONIG_ENCODING_ISO_8859_4,
|
ONIG_ENCODING_ISO_8859_5,
|
ONIG_ENCODING_ISO_8859_6,
|
ONIG_ENCODING_ISO_8859_7,
|
ONIG_ENCODING_ISO_8859_8,
|
ONIG_ENCODING_ISO_8859_9,
|
ONIG_ENCODING_ISO_8859_10,
|
ONIG_ENCODING_ISO_8859_11,
|
ONIG_ENCODING_ISO_8859_13,
|
ONIG_ENCODING_ISO_8859_14,
|
ONIG_ENCODING_ISO_8859_15,
|
ONIG_ENCODING_ISO_8859_16,
|
ONIG_ENCODING_UTF8,
|
ONIG_ENCODING_UTF16_BE,
|
ONIG_ENCODING_UTF16_LE,
|
ONIG_ENCODING_UTF32_BE,
|
ONIG_ENCODING_UTF32_LE,
|
ONIG_ENCODING_EUC_JP,
|
ONIG_ENCODING_EUC_TW,
|
ONIG_ENCODING_EUC_KR,
|
ONIG_ENCODING_EUC_CN,
|
ONIG_ENCODING_SJIS,
|
//ONIG_ENCODING_KOI8,
|
ONIG_ENCODING_KOI8_R,
|
ONIG_ENCODING_CP1251,
|
ONIG_ENCODING_BIG5,
|
ONIG_ENCODING_GB18030,
|
};
|
|
if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE))
|
return 0;
|
if (Size > 0x1000)
|
return 0;
|
|
remaining_size = Size;
|
data = (unsigned char *)(Data);
|
|
// pull off bytes to switch off
|
pat_encoding_choice = data[0];
|
data++;
|
remaining_size--;
|
str_encoding_choice = data[0];
|
data++;
|
remaining_size--;
|
|
// copy first PATTERN_SIZE bytes off to be the pattern
|
pattern = (unsigned char *)malloc(PATTERN_SIZE);
|
memcpy(pattern, data, PATTERN_SIZE);
|
pattern_end = pattern + PATTERN_SIZE;
|
data += PATTERN_SIZE;
|
remaining_size -= PATTERN_SIZE;
|
|
str = (unsigned char*)malloc(remaining_size);
|
memcpy(str, data, remaining_size);
|
str_end = str + remaining_size;
|
|
num_encodings = sizeof(encodings) / sizeof(encodings[0]);
|
pattern_enc = encodings[pat_encoding_choice % num_encodings];
|
str_enc = encodings[str_encoding_choice % num_encodings];
|
|
r = exec_deluxe(pattern_enc, str_enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, (char *)str, (char *)str_end);
|
|
free(pattern);
|
free(str);
|
|
return r;
|
}
|
|
|
#ifdef STANDALONE
|
|
#include <unistd.h>
|
|
extern int main(int argc, char* argv[])
|
{
|
size_t n;
|
uint8_t Data[10000];
|
|
n = read(0, Data, sizeof(Data));
|
fprintf(stdout, "n: %ld\n", n);
|
LLVMFuzzerTestOneInput(Data, n);
|
|
return 0;
|
}
|
#endif /* STANDALONE */
|