C语言正则表达式

1.POSIX标准库中正则表达式

#include <stdio.h>
#include <regex.h>
#include <string.h>
#include <stdlib.h>


int main() {
regex_t rt;
const char *pattern = ".*/([a-z]+)/(\\w+.html)$";
const char *str = "/home/blog/index.html";
const int group_count = 2; // 不能使用变量定义数组大小
int c = regcomp(&rt, pattern, REG_EXTENDED);
if (c != 0) {
perror("regex compile");
}
regmatch_t match[3];
printf("%zd\n", sizeof(match) / sizeof(regmatch_t)); // 数组长度
int e = regexec(&rt, str, 3, match, 0);
if (e != 0) {
perror("regex exec");
}
regfree(&rt);

char *groups[3];
for (int i = 0; i < 3; ++i) {
if (match[i].rm_eo != -1 && match[i].rm_so != -1) {
int len = match[i].rm_eo - match[i].rm_so;
groups[i] = (char *) malloc(sizeof(char) * len + 1);
strncpy(groups[i], str + match[i].rm_so, len);
groups[i][len] = '\0';
printf("%s\n", groups[i]);
}
}
return 0;
}


2.PCRE正则表达式

使用pcre能实现更高级、更复杂的正则表达式,兼容perl。

#include <stdlib.h>
#include <pcre.h>


int main() {
const char *pattern = ".*/(?:[a-z]+)/(\\w+.html)$";
const char *str = "/home/blog/index.html";

const char *errmsg;
int errpos;
pcre *re; // 找不到结构体只能使用指针
re = pcre_compile(pattern, 0, &errmsg, &errpos, NULL);
if (re == NULL) {
perror("pcre compile");
printf("error pos: %d\nerror msg: %s", errpos, errmsg);
return -1;
}
pcre_extra *ra;
ra = pcre_study(re, 0, &errmsg);

int v[6];
int rc = pcre_exec(re, ra, str, strlen(str), 0, 0, v, 6);
if (rc > 0) {
const char *rmsg;
pcre_get_substring(str, v, rc, 0, &rmsg);
printf("%s\n", rmsg);
pcre_get_substring(str, v, rc, 1, &rmsg);
printf("%s\n", rmsg);
} else {
perror("pcre exec");
}

return 0;
}

 

 

 

展开阅读全文