refresh_pattern源码分析

refresh_pattern配置用法回顾:

refresh_pattern [-i] regexp min percent max [options]  
	options: override-expire
		 override-lastmod
		 reload-into-ims
		 ignore-reload
		 ignore-no-cache
		 ignore-private
		 ignore-auth
		 stale-while-revalidate=NN
		 ignore-stale-while-revalidate
		 max-stale=NN
		 negative-ttl=NN


先来看配置文件的解析,在parse_line中有如下一段代码,用parse_refreshpattern来解析refresh_pattern的配置信息,将信息解析后加入Config.Refresh,这是一个refresh_t的单链表。

static int parse_line(char *buff)
{
	/**/
        ....
        ....

        if(...)
		...
	else if (!strcmp(token, "refresh_pattern"))
		parse_refreshpattern(&Config.Refresh);
	else if (...)
		...
}


下来分析下refresh_pattern配置存储结构refresh_t:

struct _refresh_t {
    const char *pattern;			/*存放命令中的regexp正则表达式字符串*/
    regex_t compiled_pattern;			/*正则表达式complied成regex_t的内部存储格式吧,加速模式匹配吧*/
    time_t min;					/*对应命令中的min*/
    double pct;					/*对应命令中的percent*/
    time_t max;					/*对应命令中的max*/
    refresh_t *next;				/*链表*/
    struct {
	unsigned int icase:1;			/*正则表达式是否大小写敏感,默认是区分的,命令加-i取出大小写区分*/
#if HTTP_VIOLATIONS
	unsigned int override_expire:1;		/*标识options override-expire*/
	unsigned int override_lastmod:1;	/*标识options override-lastmod*/
	unsigned int reload_into_ims:1;		/*标识options reload-into-ims*/
	unsigned int ignore_reload:1;		/*标识options ignore-reload*/
	unsigned int ignore_no_cache:1;		/*标识options ignore-no-cache*/
	unsigned int ignore_private:1;		/*标识options ignore-private*/
	unsigned int ignore_auth:1;		/*标识options ignore-auth*/
#endif
	unsigned int ignore_stale_while_revalidate:1;	/*标识options ignore-stale-while-revalidate*/
    } flags;
    int max_stale;				/*记录options max-stale的值*/
    int stale_while_revalidate;			/*记录options stale-while-revalidate的值*/
    int negative_ttl;				/*记录options negative-ttl的值*/
};


parse_refreshpattern函数分析如下:

static void parse_refreshpattern(refresh_t ** head)
{
    char *token;
    char *pattern;
    time_t min = 0;
    double pct = 0.0;
    time_t max = 0;
#if HTTP_VIOLATIONS
    int override_expire = 0;
    int override_lastmod = 0;
    int reload_into_ims = 0;
    int ignore_reload = 0;
    int ignore_no_cache = 0;
    int ignore_private = 0;
    int ignore_auth = 0;
#endif
    int stale_while_revalidate = -1;
    int ignore_stale_while_revalidate = 0;
    int max_stale = -1;
    int negative_ttl = -1;
    int i;
    refresh_t *t;
    regex_t comp;
    int errcode;
  
    /*
	REG_EXTENDED	- 说明使用的不是标准的正则表达式语法,是扩展的
	REG_NOSUB	- 说明正则表达式处理结果只报成功或者失败
    */
    int flags = REG_EXTENDED | REG_NOSUB;
    if ((token = strtok(NULL, w_space)) == NULL)
	self_destruct();
    if (strcmp(token, "-i") == 0) {
	flags |= REG_ICASE;	/*正则表达式不区分大小写*/
	token = strtok(NULL, w_space);
    } else if (strcmp(token, "+i") == 0) {
	flags &= ~REG_ICASE;	/*正则表达式区分大小写*/
	token = strtok(NULL, w_space);
    }
    if (token == NULL)
	self_destruct();
    pattern = xstrdup(token);	/*已经解析出正则表达式字符串*/
    i = GetInteger();		/* token: min */
    min = (time_t) (i * 60);	/* convert minutes to seconds */
    i = GetInteger();		/* token: pct */
    pct = (double) i / 100.0;
    i = GetInteger();		/* token: max */
    max = (time_t) (i * 60);	/* convert minutes to seconds */
    /* Options */
    while ((token = strtok(NULL, w_space)) != NULL) {
#if HTTP_VIOLATIONS
	if (!strcmp(token, "override-expire"))
	    override_expire = 1;
	else if (!strcmp(token, "override-lastmod"))
	    override_lastmod = 1;
	else if (!strcmp(token, "ignore-no-cache"))
	    ignore_no_cache = 1;
	else if (!strcmp(token, "ignore-private"))
	    ignore_private = 1;
	else if (!strcmp(token, "ignore-auth"))
	    ignore_auth = 1;
	else if (!strcmp(token, "reload-into-ims")) {
	    reload_into_ims = 1;
	    refresh_nocache_hack = 1;
	    /* tell client_side.c that this is used */
	} else if (!strcmp(token, "ignore-reload")) {
	    ignore_reload = 1;
	    refresh_nocache_hack = 1;
	    /* tell client_side.c that this is used */
	} else if (!strncmp(token, "stale-while-revalidate=", 23)) {
	    stale_while_revalidate = atoi(token + 23);
	} else
#endif
	if (!strncmp(token, "max-stale=", 10)) {
	    max_stale = atoi(token + 10);
	} else if (!strncmp(token, "negative-ttl=", 13)) {
	    negative_ttl = atoi(token + 13);
	} else if (!strcmp(token, "ignore-stale-while-revalidate")) {
	    ignore_stale_while_revalidate = 1;
	} else {
	    debug(22, 0) ("parse_refreshpattern: Unknown option '%s': %s\n",
		pattern, token);
	}
    }
    /*编译pattern为comp吧,也许也是一个正则表达式的语法的检查吧!*/
    if ((errcode = regcomp(&comp, pattern, flags)) != 0) {
	char errbuf[256];
	regerror(errcode, &comp, errbuf, sizeof errbuf);
	debug(22, 0) ("%s line %d: %s\n",
	    cfg_filename, config_lineno, config_input_line);
	debug(22, 0) ("parse_refreshpattern: Invalid regular expression '%s': %s\n",
	    pattern, errbuf);
	return;
    }
    pct = pct < 0.0 ? 0.0 : pct;
    max = max < 0 ? 0 : max;
    t = xcalloc(1, sizeof(refresh_t));
    t->pattern = (char *) xstrdup(pattern);
    t->compiled_pattern = comp;
    t->min = min;
    t->pct = pct;
    t->max = max;
    if (flags & REG_ICASE)
	t->flags.icase = 1;
#if HTTP_VIOLATIONS
    if (override_expire)
	t->flags.override_expire = 1;
    if (override_lastmod)
	t->flags.override_lastmod = 1;
    if (reload_into_ims)
	t->flags.reload_into_ims = 1;
    if (ignore_reload)
	t->flags.ignore_reload = 1;
    if (ignore_no_cache)
	t->flags.ignore_no_cache = 1;
    if (ignore_private)
	t->flags.ignore_private = 1;
    if (ignore_auth)
	t->flags.ignore_auth = 1;
#endif
    t->flags.ignore_stale_while_revalidate = ignore_stale_while_revalidate;
    t->stale_while_revalidate = stale_while_revalidate;
    t->max_stale = max_stale;
    t->negative_ttl = negative_ttl;
    t->next = NULL;
    while (*head)
	head = &(*head)->next;
    *head = t;	/*将其添加到链表Config.Refresh的后面*/
    safe_free(pattern);
}


过期验证只有对已经cache中了的response才发生,只有cache hit了的才发生,squid中函数clientCacheHit中通过refreshCheckHTTPStale做过期处理判断,其中代码如下:

static void clientCacheHit(void *data, HttpReply * rep) 
{


	/**/
        ....
        ....
	
	/*这个函数就是据refresh_pattern中配置,e(entry就是cache的对象)和r(request就是请求的对象)中的信息对cache做过期判断*/
	stale = refreshCheckHTTPStale(e, r);
	debug(33, 2) ("clientCacheHit: refreshCheckHTTPStale returned %d\n", stale);
	if (stale == 0) {	/*cache是fresh的*/
		debug(33, 2) ("clientCacheHit: HIT\n");
	} else if (stale == -1 && Config.refresh_stale_window > 0 && e->mem_obj->refresh_timestamp + Config.refresh_stale_window > squid_curtime) {
		debug(33, 2) ("clientCacheHit: refresh_stale HIT\n");
		http->log_type = LOG_TCP_STALE_HIT;
		stale = 0;
	} else if (stale == -2 && e->mem_obj->refresh_timestamp + e->mem_obj->stale_while_revalidate >= squid_curtime) {
		debug(33, 2) ("clientCacheHit: stale-while-revalidate HIT\n");
		http->log_type = LOG_TCP_STALE_HIT;
		stale = 0;
	} else if (stale && http->flags.internal) {
		debug(33, 2) ("clientCacheHit: internal HIT\n");
		stale = 0;
	} else if (stale && Config.onoff.offline) {
		debug(33, 2) ("clientCacheHit: offline HIT\n");
		http->log_type = LOG_TCP_OFFLINE_HIT;
		stale = 0;
	} else if (stale == -2 && !clientOnlyIfCached(http)) {
		debug(33, 2) ("clientCacheHit: stale-while-revalidate needs revalidation\n");
		/*cache过期时间满足在stale-while-revalidate=NN中的NN内,启动异步refresh,并标识是fresh的*/
		clientAsyncRefresh(http);
		http->log_type = LOG_TCP_STALE_HIT;
		stale = 0;
	}
	http->is_modified = is_modified;
	if (stale) {
		debug(33, 5) ("clientCacheHit: in refreshCheck() block\n");
		/*
		 * We hold a stale copy; it needs to be validated
		 */
		/*
		 * The 'need_validation' flag is used to prevent forwarding
		 * loops between siblings.  If our copy of the object is stale,
		 * then we should probably only use parents for the validation
		 * request.  Otherwise two siblings could generate a loop if
		 * both have a stale version of the object.
		 */
		r->flags.need_validation = 1;
		if (r->flags.nocache) {
			/*
			 * This did not match a refresh pattern that overrides no-cache
			 * we should honour the client no-cache header.
			 */
			 /*如果这里还是stale的,user agent要求强制更新cache,走clientProcessMiss分支*/
			http->log_type = LOG_TCP_CLIENT_REFRESH_MISS;
			clientProcessMiss(http);
			return;
		}
		/*这里启动external_refresh_check配置,这里就是说除了user agent强制要求
			stale(即no-cache之类的header域),还可以让stale的cache认为是
			fresh的,最终也会走clientProcessHit分支。
		*/
		clientRefreshCheck(http);
		return;
	}
	clientProcessHit(http);
}


我们来看一下HTTP/FTP的状态码,状态值在100-199范围的是fresh的,状态值在200-299范围的是stale的.

enum {
    FRESH_REQUEST_MAX_STALE_ALL = 100,	/*Cache-Control中max-stale值小于0的情况*/
    FRESH_REQUEST_MAX_STALE_VALUE,	/*过期时间小于Cache-Control中max-stale值*/
    FRESH_EXPIRES,			/*由response的header中Expires和Cache-Control中max-age确定未过期*/
    FRESH_LMFACTOR_RULE,		/*过期时间小于命令refresh_parrtern中percent和LMF算法确定的值*/
    FRESH_MIN_RULE,			/*过期时间小于命令refresh_parrtern中的min值*/
    FRESH_OVERRIDE_EXPIRES,		/*命令refresh_parrtern中的options override-expires生效*/
    FRESH_OVERRIDE_LASTMOD,		/*命令refresh_parrtern中的options override-lastmod生效*/
    STALE_MUST_REVALIDATE = 200,	/*entry->flags中设置了标志ENTRY_REVALIDATE,强制要求revalidate*/
    STALE_RELOAD_INTO_IMS,		/*命令refresh_parrtern中的options reload-into-ims生效,这里的ims就是添加If-Modified-Since去refresh*/
    STALE_FORCED_RELOAD,		/*user agent强制要求refresh*/
    STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,/*超过Cache-Control中max-age确定过期*/
    STALE_EXPIRES,			/*由response的header中Expires和Cache-Control中max-age确定过期*/
    STALE_MAX_RULE,			/*过期时间大于命令refresh_parrtern中的max值*/
    STALE_LMFACTOR_RULE,		/*过期时间大于命令refresh_parrtern中percent和LMF算法确定的值*/
    STALE_WITHIN_DELTA,
    STALE_ASYNC_REFRESH,		/*cache过期时间大于在stale-while-revalidate=NN中的NN*/
    STALE_MAX_STALE,			/*超过命令refresh_parrtern中的options max-stale=NN的NN*/
    STALE_DEFAULT = 299
};


refreshCheckHTTPStale函数分析如下:

int refreshCheckHTTPStale(const StoreEntry * entry, request_t * request)
{
    /*Config.refresh_stale_window这只是一个允许过期的一个偏差时间,如果在这个范围内
	就返回STALE_WITHIN_DELTA,refreshCheck过期判断处理,并返回过期标志。
    */
    int reason = refreshCheck(entry, request, -Config.refresh_stale_window);
    if (reason == STALE_WITHIN_DELTA)
	return -1;
    if (reason == STALE_ASYNC_REFRESH)
	return -2;
    if (reason == STALE_MAX_STALE)
	return 3;
    return (reason < 200) ? 0 : 1;
}


refreshCheck函数分析如下:

static int refreshCheck(const StoreEntry * entry, request_t * request, time_t delta)
{
    const refresh_t *R;
    const char *uri = NULL;
    time_t age = 0;
    time_t check_time = squid_curtime;
    int staleness;
    stale_flags sf;
    if (entry->mem_obj)
	uri = entry->mem_obj->url;
    else if (request)
	uri = urlCanonical(request);

    debug(22, 3) ("refreshCheck: '%s'\n", uri ? uri : "<none>");

    if (delta > 0)
	check_time += delta;	/*这里是-Config.refresh_stale_window*/
    if (check_time > entry->timestamp)
	age = check_time - entry->timestamp;	/*当前时间-上次引用或者创建时间来得到age*/
    /*refreshLimits是通过uri来搜索匹配refresh_pattern配置的R。得到refresh_t策略配置信息*/
    R = uri ? refreshLimits(uri) : refreshUncompiledPattern(".");
    if (NULL == R)
	R = &DefaultRefresh;
    memset(&sf, '\0', sizeof(sf));

    /*sf是一个记录哪些配置信息生效的状态。refreshStaleness函数通过entry,check_time,age和
	R来判断过期的时间值秒,这里描述了entry->expires > R->max > R->percent > R->min的比较先后顺序
    */
    staleness = refreshStaleness(entry, check_time, age, R, &sf);
    debug(22, 3) ("Staleness = %d\n", staleness);

    debug(22, 3) ("refreshCheck: Matched '%s %d %d%% %d'\n",
	R->pattern, (int) R->min, (int) (100.0 * R->pct), (int) R->max);
    debug(22, 3) ("refreshCheck: age = %d\n", (int) age);
    debug(22, 3) ("\tcheck_time:\t%s\n", mkrfc1123(check_time));
    debug(22, 3) ("\tentry->timestamp:\t%s\n", mkrfc1123(entry->timestamp));

    /*entry->flags中设置了标志ENTRY_REVALIDATE,强制要求revalidate*/
    if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE) && staleness > -1) {
	debug(22, 3) ("refreshCheck: YES: Must revalidate stale response\n");
	return STALE_MUST_REVALIDATE;
    }
    /* request-specific checks */
    /*通过user agent的Cache-Control行为来判断状态*/
    if (request) {
	HttpHdrCc *cc = request->cache_control;
#if HTTP_VIOLATIONS
	if (!request->flags.nocache_hack) {
	    (void) 0;
	} else if (R->flags.ignore_reload) {
	    /* The clients no-cache header is ignored */
	    debug(22, 3) ("refreshCheck: MAYBE: ignore-reload\n");
	} else if (R->flags.reload_into_ims || Config.onoff.reload_into_ims) {
	    /* The clients no-cache header is changed into a IMS query */
	    debug(22, 3) ("refreshCheck: YES: reload-into-ims\n");
	    return STALE_RELOAD_INTO_IMS;
	} else {
	    /* The clients no-cache header is not overridden on this request */
	    debug(22, 3) ("refreshCheck: YES: client reload\n");
	    request->flags.nocache = 1;
	    return STALE_FORCED_RELOAD;
	}
#endif
	if (NULL != cc) {
	    if (cc->max_age > -1) {
#if HTTP_VIOLATIONS
		if (R->flags.ignore_reload && cc->max_age == 0) {
		} else
#endif
		if (age > cc->max_age) {
		    debug(22, 3) ("refreshCheck: YES: age > client-max-age\n");
		    return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE;
		}
	    }
	    if (EBIT_TEST(cc->mask, CC_MAX_STALE) && staleness >= 0) {
		if (cc->max_stale < 0) {
		    /* max-stale directive without a value */
		    debug(22, 3) ("refreshCheck: NO: max-stale wildcard\n");
		    return FRESH_REQUEST_MAX_STALE_ALL;
		} else if (staleness < cc->max_stale) {
		    debug(22, 3) ("refreshCheck: NO: staleness < max-stale\n");
		    return FRESH_REQUEST_MAX_STALE_VALUE;
		}
	    }
	}
    }

    /* 过期的时间值小于0,说明是fresh的,这里只是确定生效的类型吧 */
    if (staleness < 0) {
	if (sf.expires)
	    return FRESH_EXPIRES;
	assert(!sf.max);
	if (sf.lmfactor)
	    return FRESH_LMFACTOR_RULE;
	assert(sf.min);
	return FRESH_MIN_RULE;
    }
    /*
     * At this point the response is stale, unless one of
     * the override options kicks in.
     */
     /*命令refresh_parrtern中的options stale-while-revalidate=NN和ignore-stale-while-revalidate的生效问题*/
    if (entry->mem_obj) {
	int stale_while_revalidate = -1;
	if (entry->mem_obj->reply && entry->mem_obj->reply->cache_control && EBIT_TEST(entry->mem_obj->reply->cache_control->mask, CC_STALE_WHILE_REVALIDATE))
	    stale_while_revalidate = entry->mem_obj->reply->cache_control->stale_while_revalidate;
	if (R->flags.ignore_stale_while_revalidate || stale_while_revalidate == -1)
	    stale_while_revalidate = R->stale_while_revalidate;
	if (staleness < stale_while_revalidate) {
	    debug(22, 3) ("stale-while-revalidate: age=%d, staleness=%d, stale_while_revalidate=%d\n", (int) age, staleness, stale_while_revalidate);
	    entry->mem_obj->stale_while_revalidate = stale_while_revalidate;
	    return STALE_ASYNC_REFRESH;
	}
    } {
	/*命令refresh_parrtern中的options max-stale=NN的生效问题*/
	int max_stale = Config.maxStale;
	if (R->max_stale >= 0)
	    max_stale = R->max_stale;
	if (entry->mem_obj && entry->mem_obj->reply && entry->mem_obj->reply->cache_control && EBIT_TEST(entry->mem_obj->reply->cache_control->mask, CC_STALE_IF_ERROR))
	    max_stale = entry->mem_obj->reply->cache_control->stale_if_error;
	if (max_stale >= 0 && staleness >= max_stale)
	    return STALE_MAX_STALE;
    }
    /*未过期的时间在Config.refresh_stale_window之类*/
    if (delta < 0 && staleness + delta < 0) {
	return STALE_WITHIN_DELTA;
    }
    if (sf.expires) {
#if HTTP_VIOLATIONS
	/*命令refresh_parrtern中的options override-expires让中的min优先于entry->expires生效*/
	if (R->flags.override_expire && age < R->min) {
	    debug(22, 3) ("refreshCheck: NO: age < min && override-expire\n");
	    return FRESH_OVERRIDE_EXPIRES;
	}
#endif
	return STALE_EXPIRES;
    }
    if (sf.max)
	return STALE_MAX_RULE;
    if (sf.lmfactor) {
#if HTTP_VIOLATIONS
	/*命令refresh_parrtern中的options override-lastmod让中的min优先于percent生效*/
	if (R->flags.override_lastmod && age < R->min) {
	    debug(22, 3) ("refreshCheck: NO: age < min && override-lastmod\n");
	    return FRESH_OVERRIDE_LASTMOD;
	}
#endif
	return STALE_LMFACTOR_RULE;
    }
    return STALE_DEFAULT;
}


最后来分析refreshStaleness,这个只是用来计算过期的时间值秒。这里描述了entry->expires > R->max > R->percent > R->min的比较先后顺序,这里看来entry->timestamp时间戳还是比较重要的啊,记得在createEntry的时候一定要赋值,不然后一直miss的!

static int refreshStaleness(const StoreEntry * entry, time_t check_time, time_t age, const refresh_t * R, stale_flags * sf)
{
    /*
     * Check for an explicit expiration time.
     */
    if (entry->expires > -1) {
	sf->expires = 1;
	if (entry->expires > check_time) {
	    debug(22, 3) ("FRESH: expires %d >= check_time %d \n",
		(int) entry->expires, (int) check_time);
	    return -1;
	} else {
	    debug(22, 3) ("STALE: expires %d < check_time %d \n",
		(int) entry->expires, (int) check_time);
	    return (check_time - entry->expires);
	}
    }
    assert(age >= 0);
    /*
     * Use local heuristics to determine staleness.  Start with the
     * max age from the refresh_pattern rule.
     */
    if (age > R->max) {
	debug(22, 3) ("STALE: age %d > max %d \n", (int) age, (int) R->max);
	sf->max = 1;
	return (age - R->max);
    }
    if (check_time < entry->timestamp) {
	debug(22, 1) ("STALE: Entry's timestamp greater than check time. Clock going backwards?\n");
	debug(22, 1) ("\tcheck_time:\t%s\n", mkrfc1123(check_time));
	debug(22, 1) ("\tentry->timestamp:\t%s\n", mkrfc1123(entry->timestamp));
	debug(22, 1) ("\tstaleness:\t%ld\n", (long int) entry->timestamp - check_time);
	return (entry->timestamp - check_time);
    }
    /*
     * Try the last-modified factor algorithm.
     */
    if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) {
	/*
	 * stale_age is the Age of the response when it became/becomes
	 * stale according to the last-modified factor algorithm.
	 */
	time_t stale_age = (entry->timestamp - entry->lastmod) * R->pct;
	sf->lmfactor = 1;
	if (age >= stale_age) {
	    debug(22, 3) ("STALE: age %d > stale_age %d\n",
		(int) age, (int) stale_age);
	    return (age - stale_age);
	} else {
	    debug(22, 3) ("FRESH: age %d <= stale_age %d\n",
		(int) age, (int) stale_age);
	    return -1;
	}
    }
    /*
     * If we are here, staleness is determined by the refresh_pattern
     * configured minimum age.
     */
    if (age < R->min) {
	debug(22, 3) ("FRESH: age %d < min %d\n", (int) age, (int) R->min);
	sf->min = 1;
	return -1;
    }
    debug(22, 3) ("STALE: age %d >= min %d\n", (int) age, (int) R->min);
    return (age - R->min);
}


总的来说refresh_pattern支持的regex还比较弱,如果配置太多的话,可能每次都要去搜索匹配是不是效率有影响!

你可能感兴趣的:(正则表达式,cache,header,validation,null,token)