core/ngx_regex.h头文件分析
本章我们主要讲述一下nginx中表达式匹配(ngx_regex)的实现。
1. ngx_regex_t数据结构
/*
 * Copyright (C) Igor Sysoev
 * Copyright (C) Nginx, Inc.
 */
#ifndef _NGX_REGEX_H_INCLUDED_
#define _NGX_REGEX_H_INCLUDED_
#include <ngx_config.h>
#include <ngx_core.h>
#include <pcre.h>
#define NGX_REGEX_NO_MATCHED  PCRE_ERROR_NOMATCH   /* -1 */
#define NGX_REGEX_CASELESS    PCRE_CASELESS
typedef struct {
    pcre        *code;
    pcre_extra  *extra;
} ngx_regex_t;这里ngx_regex_t只是对pcre的一个再次封装。其中:
- 
    code: 代表一个编译后的正则表达式 
- 
    extra: 用作pcre的辅助数据,一般是通过 pcre_study()学习到的,可以加快后面的后续的匹配过程
2. 数据结构
typedef struct {
    ngx_str_t     pattern;
    ngx_pool_t   *pool;
    ngx_int_t     options;
    ngx_regex_t  *regex;
    int           captures;
    int           named_captures;
    int           name_size;
    u_char       *names;
    ngx_str_t     err;
} ngx_regex_compile_t;ngx_regex_compile_t表示一个编译的正则表达式模式(pattern)。其各字段含义如下:
- 
    pattern: 对应的正则表达式模式 
- 
    pool: 所关联的内存池 
- 
    options: 执行pcre_compile()时所传递的参数 
- 
    regex: 编译完成后的pcre实例 
- 
    captures: 用于保存 pattern中所有子模式的个数,包括命名子模式(named subpattern)和非命名子模式。
- 
    named_captures: 用于保存 pattern中命名子模式的个数
- 
    name_size: name table中每一项的大小
- 
    names: name table的入口地址
- 
    err: 用于存放相应的错误 
3. ngx_regex_elt_t数据结构
typedef struct {
    ngx_regex_t  *regex;
    u_char       *name;
} ngx_regex_elt_t;存放到ngx_pcre_studies链表中的元素, 其中各字段含义如下:
- 
    regex: pattern编译完成后的pcre实例 
- 
    name: 保存对应的Pattern 
4. 相关函数声明
//初始化nginx pcre
void ngx_regex_init(void);
//pcre pattern,封装了pcre_compile()
ngx_int_t ngx_regex_compile(ngx_regex_compile_t *rc);
//pcre_exec()的包装
#define ngx_regex_exec(re, s, captures, size)                                \
    pcre_exec(re->code, re->extra, (const char *) (s)->data, (s)->len, 0, 0, \
              captures, size)
#define ngx_regex_exec_n      "pcre_exec()"
//将字符串s匹配模式数组中的所有模式
ngx_int_t ngx_regex_exec_array(ngx_array_t *a, ngx_str_t *s, ngx_log_t *log);5. 附录: PCRE库中pcre_fullinfo混合子模式调用结果
#include <stdio.h>
#include <stdlib.h>
#include <pcre.h>
 
#define NGX_REGEX_CASELESS    PCRE_CASELESS
 
typedef struct {
    pcre        *code;
    pcre_extra  *extra;
} ngx_regex_t;
 
 
int main(int argc, char *argv[])
{
	pcre  *re;
	const char *errstr;
	int  erroff;
	char  *data = "(eeeee)(?<abb> exception)(?<adfa>xydz)(ddddd)\\k<abb>\\1\\2";
	int captures = 0;
	int named_captures = 0;
	int name_entrysize = 0;
	char *name_entry;
	char *p;
	int i,j,n;
	printf("pattern: %s\n\n", data);
	re = pcre_compile(data, PCRE_CASELESS, &errstr, &erroff, NULL);
	if(NULL == re)
	{
		printf("compile pcre failed\n");
		return 0;
	}
	//1) get all subpatterns count
	n = pcre_fullinfo(re, NULL, PCRE_INFO_CAPTURECOUNT, &captures);
	if(n < 0)
	{
		printf("pcre_fullinfo PCRE_INFO_CAPTURECOUNT failed %d \n", n);
		return 0;
	}         
	printf("captures %d \n", captures);
	//2) get named subpatterns count
	n = pcre_fullinfo(re, NULL, PCRE_INFO_NAMECOUNT, &named_captures);
	if(n < 0)
	{
		printf("pcre_fullinfo PCRE_INFO_NAMECOUNT failed %d \n", n);
		return 0;
	}
	printf("named_captures %d \n", named_captures);
	//3) get name table entry
	n = pcre_fullinfo(re, NULL, PCRE_INFO_NAMETABLE, &name_entry);
	if(n < 0)
	{
		printf("pcre_fullinfo PCRE_INFO_NAMETABLE failed %d \n", n);
		return 0;
	}
	p = name_entry;
	
	//4) get every entry size
	n = pcre_fullinfo(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entrysize);
	if(n < 0)
	{
		printf("pcre_fullinfo PCRE_INFO_NAMEENTRYSIZE failed %d \n", n);
		return 0;
	}
	printf("name_entrysize %d \n", name_entrysize);
	//5) print all named entry
	for(i = 0; i < named_captures; i++)
	{
		for(j = 0; j < 2; j++)
		{
			printf("%x ", p[i]);
		}
		printf("entry: %s\n", &p[2]);
		p += name_entrysize;
	}
	return 1;
}编译运行:
# gcc -c -o test.o test.c
# gcc -o test test.o -lpcre
# ./test
pattern: (eeeee)(?<abb> exception)(?<adfa>xydz)(ddddd)\k<abb>\1\2
captures 4 
named_captures 2 
name_entrysize 7 
0 0 entry: abb
3 3 entry: adfa
[参看]

