/*
|
* encode.c
|
*/
|
#include <stdio.h>
|
#include "oniguruma.h"
|
|
static int
|
search(regex_t* reg, unsigned char* str, unsigned char* end)
|
{
|
int r;
|
unsigned char *start, *range;
|
OnigRegion *region;
|
|
region = onig_region_new();
|
|
start = str;
|
range = end;
|
r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
|
if (r >= 0) {
|
int i;
|
|
fprintf(stderr, "match at %d (%s)\n", r,
|
ONIGENC_NAME(onig_get_encoding(reg)));
|
for (i = 0; i < region->num_regs; i++) {
|
fprintf(stderr, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
|
}
|
}
|
else if (r == ONIG_MISMATCH) {
|
fprintf(stderr, "search fail (%s)\n",
|
ONIGENC_NAME(onig_get_encoding(reg)));
|
}
|
else { /* error */
|
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
onig_error_code_to_str((UChar* )s, r);
|
fprintf(stderr, "ERROR: %s\n", s);
|
fprintf(stderr, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
|
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
|
return -1;
|
}
|
|
onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
|
return 0;
|
}
|
|
static int
|
exec(OnigEncoding enc, OnigOptionType options,
|
char* apattern, char* astr)
|
{
|
int r;
|
unsigned char *end;
|
regex_t* reg;
|
OnigErrorInfo einfo;
|
UChar* pattern = (UChar* )apattern;
|
UChar* str = (UChar* )astr;
|
|
onig_initialize(&enc, 1);
|
|
r = onig_new(®, pattern,
|
pattern + onigenc_str_bytelen_null(enc, pattern),
|
options, enc, ONIG_SYNTAX_DEFAULT, &einfo);
|
if (r != ONIG_NORMAL) {
|
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
onig_error_code_to_str((UChar* )s, r, &einfo);
|
fprintf(stderr, "ERROR: %s\n", s);
|
return -1;
|
}
|
|
end = str + onigenc_str_bytelen_null(enc, str);
|
r = search(reg, str, end);
|
|
onig_free(reg);
|
onig_end();
|
return 0;
|
}
|
|
extern int main(int argc, char* argv[])
|
{
|
int r;
|
/* ISO 8859-1 test */
|
static unsigned char str[] = { 0xc7, 0xd6, 0xfe, 0xea, 0xe0, 0xe2, 0x00 };
|
static unsigned char pattern[] = { 0xe7, 0xf6, 0xde, '\\', 'w', '+', 0x00 };
|
|
r = exec(ONIG_ENCODING_SJIS, ONIG_OPTION_NONE,
|
"^a\\p{Hiragana}c$", "a\202\274c");
|
|
r = exec(ONIG_ENCODING_EUC_JP, ONIG_OPTION_NONE,
|
"^a\\p{Hiragana}c$", "a\244\276c");
|
|
r = exec(ONIG_ENCODING_CP1251, ONIG_OPTION_IGNORECASE,
|
"aBc", " AbC");
|
|
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
" [a-c\337z] ", " SS ");
|
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
" [\330-\341] ", " SS ");
|
|
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
|
"\337 ", " Ss ");
|
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
|
"SS ", " \337 ");
|
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
|
"\\A\\S\\z", "ss");
|
|
r = exec(ONIG_ENCODING_ISO_8859_2, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
|
r = exec(ONIG_ENCODING_ISO_8859_3, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_4, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_5, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_6, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_7, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_8, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_9, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_10, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_11, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_13, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_14, ONIG_OPTION_IGNORECASE,
|
"[ac]+", "bbbaAaCCC");
|
r = exec(ONIG_ENCODING_ISO_8859_15, ONIG_OPTION_IGNORECASE,
|
(char* )pattern, (char* )str);
|
r = exec(ONIG_ENCODING_ISO_8859_16, ONIG_OPTION_IGNORECASE,
|
(char* )pattern, (char* )str);
|
|
r = exec(ONIG_ENCODING_KOI8_R, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
|
r = exec(ONIG_ENCODING_EUC_TW, ONIG_OPTION_NONE, "b*a+?c+", "bbbaaaccc");
|
r = exec(ONIG_ENCODING_EUC_KR, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
|
r = exec(ONIG_ENCODING_EUC_CN, ONIG_OPTION_NONE, "c+", "bbbaaaccc");
|
r = exec(ONIG_ENCODING_BIG5, ONIG_OPTION_NONE, "a+", "bbbaaaccc");
|
|
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
"\337", "SS");
|
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
"SS", "\337");
|
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
"SSb\337ssc", "a\337bSS\337cd");
|
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
"[a\337]{0,2}", "aSS");
|
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
|
"is", "iss");
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
|
"\000[\000[\000:\000a\000l\000n\000u\000m\000:\000]\000]\000+\000\000",
|
"\000#\002\120\000a\000Z\012\077\012\076\012\075\000\000");
|
/* 0x0a3d == \012\075 : is not alnum */
|
/* 0x0a3e == \012\076 : is alnum */
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
|
"\000\\\000d\000+\000\000",
|
"\0003\0001\377\020\377\031\377\032\000\000");
|
|
r = exec(ONIG_ENCODING_GB18030, ONIG_OPTION_IGNORECASE,
|
"(Aa\\d)+", "BaA5Aa0234");
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
|
"\000[\000\337\000]\000\000", "\000S\000S\000\000");
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
|
"\000[\000\337\000]\000\000", "\000s\000S\000\000");
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
|
"\000^\000[\000\001\000-\377\375\000]\000$\000\000",
|
"\000s\000S\000\000");
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
|
"\000S\000S\000\000",
|
"\000S\000T\000\337\000\000");
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
|
"\000S\000T\000S\000S\000\000",
|
"\000S\000t\000s\000S\000\000");
|
|
{
|
UChar pat[] = { 0x1f, 0xfc, 0x00, 0x00 };
|
UChar str1[] = { 0x21, 0x26, 0x1f, 0xbe, 0x00, 0x00 };
|
UChar str2[] = { 0x1f, 0xf3, 0x00, 0x00 };
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
|
(char* )pat, (char* )str1);
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
|
(char* )pat, (char* )str2);
|
}
|
|
#if 0
|
/* You should define USE_UNICODE_CASE_FOLD_TURKISH_AZERI in regenc.h. */
|
|
set_case_fold(ONIGENC_CASE_FOLD_TURKISH_AZERI);
|
|
r = exec(ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8, ONIG_OPTION_IGNORECASE,
|
"Ii", "\304\261\304\260");
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
|
"\000I\000i\000\000", "\001\061\001\060\000\000");
|
|
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
|
"\001\061\001\060\000\000", "\000I\000i\000\000");
|
|
set_case_fold(ONIGENC_CASE_FOLD_MIN);
|
#endif
|
|
return r;
|
}
|