refresh_pattern配置用法回顾:
先来看配置文件的解析,在parse_line中有如下一段代码,用parse_refreshpattern来解析refresh_pattern的配置信息,将信息解析后加入Config.Refresh,这是一个refresh_t的单链表。refresh_pattern [-i] regexp min percent max [options] options: override-expire override-lastmod reload-into-ims ignore-reload ignore-no-cache ignore-private ignore-auth stale-while-revalidate=NN ignore-stale-while-revalidate max-stale=NN negative-ttl=NN
下来分析下refresh_pattern配置存储结构refresh_t:static int parse_line(char *buff) { /**/ .... .... if(...) ... else if (!strcmp(token, "refresh_pattern")) parse_refreshpattern(&Config.Refresh); else if (...) ... }
struct _refresh_t { const char *pattern; /*存放命令中的regexp正则表达式字符串*/ regex_t compiled_pattern; /*正则表达式complied成regex_t的内部存储格式吧,加速模式匹配吧*/ time_t min; /*对应命令中的min*/ double pct; /*对应命令中的percent*/ time_t max; /*对应命令中的max*/ refresh_t *next; /*链表*/ struct { unsigned int icase:1; /*正则表达式是否大小写敏感,默认是区分的,命令加-i取出大小写区分*/ #if HTTP_VIOLATIONS unsigned int override_expire:1; /*标识options override-expire*/ unsigned int override_lastmod:1; /*标识options override-lastmod*/ unsigned int reload_into_ims:1; /*标识options reload-into-ims*/ unsigned int ignore_reload:1; /*标识options ignore-reload*/ unsigned int ignore_no_cache:1; /*标识options ignore-no-cache*/ unsigned int ignore_private:1; /*标识options ignore-private*/ unsigned int ignore_auth:1; /*标识options ignore-auth*/ #endif unsigned int ignore_stale_while_revalidate:1; /*标识options ignore-stale-while-revalidate*/ } flags; int max_stale; /*记录options max-stale的值*/ int stale_while_revalidate; /*记录options stale-while-revalidate的值*/ int negative_ttl; /*记录options negative-ttl的值*/ };
parse_refreshpattern函数分析如下:
static void parse_refreshpattern(refresh_t ** head) { char *token; char *pattern; time_t min = 0; double pct = 0.0; time_t max = 0; #if HTTP_VIOLATIONS int override_expire = 0; int override_lastmod = 0; int reload_into_ims = 0; int ignore_reload = 0; int ignore_no_cache = 0; int ignore_private = 0; int ignore_auth = 0; #endif int stale_while_revalidate = -1; int ignore_stale_while_revalidate = 0; int max_stale = -1; int negative_ttl = -1; int i; refresh_t *t; regex_t comp; int errcode; /* REG_EXTENDED - 说明使用的不是标准的正则表达式语法,是扩展的 REG_NOSUB - 说明正则表达式处理结果只报成功或者失败 */ int flags = REG_EXTENDED | REG_NOSUB; if ((token = strtok(NULL, w_space)) == NULL) self_destruct(); if (strcmp(token, "-i") == 0) { flags |= REG_ICASE; /*正则表达式不区分大小写*/ token = strtok(NULL, w_space); } else if (strcmp(token, "+i") == 0) { flags &= ~REG_ICASE; /*正则表达式区分大小写*/ token = strtok(NULL, w_space); } if (token == NULL) self_destruct(); pattern = xstrdup(token); /*已经解析出正则表达式字符串*/ i = GetInteger(); /* token: min */ min = (time_t) (i * 60); /* convert minutes to seconds */ i = GetInteger(); /* token: pct */ pct = (double) i / 100.0; i = GetInteger(); /* token: max */ max = (time_t) (i * 60); /* convert minutes to seconds */ /* Options */ while ((token = strtok(NULL, w_space)) != NULL) { #if HTTP_VIOLATIONS if (!strcmp(token, "override-expire")) override_expire = 1; else if (!strcmp(token, "override-lastmod")) override_lastmod = 1; else if (!strcmp(token, "ignore-no-cache")) ignore_no_cache = 1; else if (!strcmp(token, "ignore-private")) ignore_private = 1; else if (!strcmp(token, "ignore-auth")) ignore_auth = 1; else if (!strcmp(token, "reload-into-ims")) { reload_into_ims = 1; refresh_nocache_hack = 1; /* tell client_side.c that this is used */ } else if (!strcmp(token, "ignore-reload")) { ignore_reload = 1; refresh_nocache_hack = 1; /* tell client_side.c that this is used */ } else if (!strncmp(token, "stale-while-revalidate=", 23)) { stale_while_revalidate = atoi(token + 23); } else #endif if (!strncmp(token, "max-stale=", 10)) { max_stale = atoi(token + 10); } else if (!strncmp(token, "negative-ttl=", 13)) { negative_ttl = atoi(token + 13); } else if (!strcmp(token, "ignore-stale-while-revalidate")) { ignore_stale_while_revalidate = 1; } else { debug(22, 0) ("parse_refreshpattern: Unknown option '%s': %s\n", pattern, token); } } /*编译pattern为comp吧,也许也是一个正则表达式的语法的检查吧!*/ if ((errcode = regcomp(&comp, pattern, flags)) != 0) { char errbuf[256]; regerror(errcode, &comp, errbuf, sizeof errbuf); debug(22, 0) ("%s line %d: %s\n", cfg_filename, config_lineno, config_input_line); debug(22, 0) ("parse_refreshpattern: Invalid regular expression '%s': %s\n", pattern, errbuf); return; } pct = pct < 0.0 ? 0.0 : pct; max = max < 0 ? 0 : max; t = xcalloc(1, sizeof(refresh_t)); t->pattern = (char *) xstrdup(pattern); t->compiled_pattern = comp; t->min = min; t->pct = pct; t->max = max; if (flags & REG_ICASE) t->flags.icase = 1; #if HTTP_VIOLATIONS if (override_expire) t->flags.override_expire = 1; if (override_lastmod) t->flags.override_lastmod = 1; if (reload_into_ims) t->flags.reload_into_ims = 1; if (ignore_reload) t->flags.ignore_reload = 1; if (ignore_no_cache) t->flags.ignore_no_cache = 1; if (ignore_private) t->flags.ignore_private = 1; if (ignore_auth) t->flags.ignore_auth = 1; #endif t->flags.ignore_stale_while_revalidate = ignore_stale_while_revalidate; t->stale_while_revalidate = stale_while_revalidate; t->max_stale = max_stale; t->negative_ttl = negative_ttl; t->next = NULL; while (*head) head = &(*head)->next; *head = t; /*将其添加到链表Config.Refresh的后面*/ safe_free(pattern); }
过期验证只有对已经cache中了的response才发生,只有cache hit了的才发生,squid中函数clientCacheHit中通过refreshCheckHTTPStale做过期处理判断,其中代码如下:
static void clientCacheHit(void *data, HttpReply * rep) { /**/ .... .... /*这个函数就是据refresh_pattern中配置,e(entry就是cache的对象)和r(request就是请求的对象)中的信息对cache做过期判断*/ stale = refreshCheckHTTPStale(e, r); debug(33, 2) ("clientCacheHit: refreshCheckHTTPStale returned %d\n", stale); if (stale == 0) { /*cache是fresh的*/ debug(33, 2) ("clientCacheHit: HIT\n"); } else if (stale == -1 && Config.refresh_stale_window > 0 && e->mem_obj->refresh_timestamp + Config.refresh_stale_window > squid_curtime) { debug(33, 2) ("clientCacheHit: refresh_stale HIT\n"); http->log_type = LOG_TCP_STALE_HIT; stale = 0; } else if (stale == -2 && e->mem_obj->refresh_timestamp + e->mem_obj->stale_while_revalidate >= squid_curtime) { debug(33, 2) ("clientCacheHit: stale-while-revalidate HIT\n"); http->log_type = LOG_TCP_STALE_HIT; stale = 0; } else if (stale && http->flags.internal) { debug(33, 2) ("clientCacheHit: internal HIT\n"); stale = 0; } else if (stale && Config.onoff.offline) { debug(33, 2) ("clientCacheHit: offline HIT\n"); http->log_type = LOG_TCP_OFFLINE_HIT; stale = 0; } else if (stale == -2 && !clientOnlyIfCached(http)) { debug(33, 2) ("clientCacheHit: stale-while-revalidate needs revalidation\n"); /*cache过期时间满足在stale-while-revalidate=NN中的NN内,启动异步refresh,并标识是fresh的*/ clientAsyncRefresh(http); http->log_type = LOG_TCP_STALE_HIT; stale = 0; } http->is_modified = is_modified; if (stale) { debug(33, 5) ("clientCacheHit: in refreshCheck() block\n"); /* * We hold a stale copy; it needs to be validated */ /* * The 'need_validation' flag is used to prevent forwarding * loops between siblings. If our copy of the object is stale, * then we should probably only use parents for the validation * request. Otherwise two siblings could generate a loop if * both have a stale version of the object. */ r->flags.need_validation = 1; if (r->flags.nocache) { /* * This did not match a refresh pattern that overrides no-cache * we should honour the client no-cache header. */ /*如果这里还是stale的,user agent要求强制更新cache,走clientProcessMiss分支*/ http->log_type = LOG_TCP_CLIENT_REFRESH_MISS; clientProcessMiss(http); return; } /*这里启动external_refresh_check配置,这里就是说除了user agent强制要求 stale(即no-cache之类的header域),还可以让stale的cache认为是 fresh的,最终也会走clientProcessHit分支。 */ clientRefreshCheck(http); return; } clientProcessHit(http); }
我们来看一下HTTP/FTP的状态码,状态值在100-199范围的是fresh的,状态值在200-299范围的是stale的.
enum { FRESH_REQUEST_MAX_STALE_ALL = 100, /*Cache-Control中max-stale值小于0的情况*/ FRESH_REQUEST_MAX_STALE_VALUE, /*过期时间小于Cache-Control中max-stale值*/ FRESH_EXPIRES, /*由response的header中Expires和Cache-Control中max-age确定未过期*/ FRESH_LMFACTOR_RULE, /*过期时间小于命令refresh_parrtern中percent和LMF算法确定的值*/ FRESH_MIN_RULE, /*过期时间小于命令refresh_parrtern中的min值*/ FRESH_OVERRIDE_EXPIRES, /*命令refresh_parrtern中的options override-expires生效*/ FRESH_OVERRIDE_LASTMOD, /*命令refresh_parrtern中的options override-lastmod生效*/ STALE_MUST_REVALIDATE = 200, /*entry->flags中设置了标志ENTRY_REVALIDATE,强制要求revalidate*/ STALE_RELOAD_INTO_IMS, /*命令refresh_parrtern中的options reload-into-ims生效,这里的ims就是添加If-Modified-Since去refresh*/ STALE_FORCED_RELOAD, /*user agent强制要求refresh*/ STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE,/*超过Cache-Control中max-age确定过期*/ STALE_EXPIRES, /*由response的header中Expires和Cache-Control中max-age确定过期*/ STALE_MAX_RULE, /*过期时间大于命令refresh_parrtern中的max值*/ STALE_LMFACTOR_RULE, /*过期时间大于命令refresh_parrtern中percent和LMF算法确定的值*/ STALE_WITHIN_DELTA, STALE_ASYNC_REFRESH, /*cache过期时间大于在stale-while-revalidate=NN中的NN*/ STALE_MAX_STALE, /*超过命令refresh_parrtern中的options max-stale=NN的NN*/ STALE_DEFAULT = 299 };
refreshCheckHTTPStale函数分析如下:
int refreshCheckHTTPStale(const StoreEntry * entry, request_t * request) { /*Config.refresh_stale_window这只是一个允许过期的一个偏差时间,如果在这个范围内 就返回STALE_WITHIN_DELTA,refreshCheck过期判断处理,并返回过期标志。 */ int reason = refreshCheck(entry, request, -Config.refresh_stale_window); if (reason == STALE_WITHIN_DELTA) return -1; if (reason == STALE_ASYNC_REFRESH) return -2; if (reason == STALE_MAX_STALE) return 3; return (reason < 200) ? 0 : 1; }
refreshCheck函数分析如下:
static int refreshCheck(const StoreEntry * entry, request_t * request, time_t delta) { const refresh_t *R; const char *uri = NULL; time_t age = 0; time_t check_time = squid_curtime; int staleness; stale_flags sf; if (entry->mem_obj) uri = entry->mem_obj->url; else if (request) uri = urlCanonical(request); debug(22, 3) ("refreshCheck: '%s'\n", uri ? uri : "<none>"); if (delta > 0) check_time += delta; /*这里是-Config.refresh_stale_window*/ if (check_time > entry->timestamp) age = check_time - entry->timestamp; /*当前时间-上次引用或者创建时间来得到age*/ /*refreshLimits是通过uri来搜索匹配refresh_pattern配置的R。得到refresh_t策略配置信息*/ R = uri ? refreshLimits(uri) : refreshUncompiledPattern("."); if (NULL == R) R = &DefaultRefresh; memset(&sf, '\0', sizeof(sf)); /*sf是一个记录哪些配置信息生效的状态。refreshStaleness函数通过entry,check_time,age和 R来判断过期的时间值秒,这里描述了entry->expires > R->max > R->percent > R->min的比较先后顺序 */ staleness = refreshStaleness(entry, check_time, age, R, &sf); debug(22, 3) ("Staleness = %d\n", staleness); debug(22, 3) ("refreshCheck: Matched '%s %d %d%% %d'\n", R->pattern, (int) R->min, (int) (100.0 * R->pct), (int) R->max); debug(22, 3) ("refreshCheck: age = %d\n", (int) age); debug(22, 3) ("\tcheck_time:\t%s\n", mkrfc1123(check_time)); debug(22, 3) ("\tentry->timestamp:\t%s\n", mkrfc1123(entry->timestamp)); /*entry->flags中设置了标志ENTRY_REVALIDATE,强制要求revalidate*/ if (EBIT_TEST(entry->flags, ENTRY_REVALIDATE) && staleness > -1) { debug(22, 3) ("refreshCheck: YES: Must revalidate stale response\n"); return STALE_MUST_REVALIDATE; } /* request-specific checks */ /*通过user agent的Cache-Control行为来判断状态*/ if (request) { HttpHdrCc *cc = request->cache_control; #if HTTP_VIOLATIONS if (!request->flags.nocache_hack) { (void) 0; } else if (R->flags.ignore_reload) { /* The clients no-cache header is ignored */ debug(22, 3) ("refreshCheck: MAYBE: ignore-reload\n"); } else if (R->flags.reload_into_ims || Config.onoff.reload_into_ims) { /* The clients no-cache header is changed into a IMS query */ debug(22, 3) ("refreshCheck: YES: reload-into-ims\n"); return STALE_RELOAD_INTO_IMS; } else { /* The clients no-cache header is not overridden on this request */ debug(22, 3) ("refreshCheck: YES: client reload\n"); request->flags.nocache = 1; return STALE_FORCED_RELOAD; } #endif if (NULL != cc) { if (cc->max_age > -1) { #if HTTP_VIOLATIONS if (R->flags.ignore_reload && cc->max_age == 0) { } else #endif if (age > cc->max_age) { debug(22, 3) ("refreshCheck: YES: age > client-max-age\n"); return STALE_EXCEEDS_REQUEST_MAX_AGE_VALUE; } } if (EBIT_TEST(cc->mask, CC_MAX_STALE) && staleness >= 0) { if (cc->max_stale < 0) { /* max-stale directive without a value */ debug(22, 3) ("refreshCheck: NO: max-stale wildcard\n"); return FRESH_REQUEST_MAX_STALE_ALL; } else if (staleness < cc->max_stale) { debug(22, 3) ("refreshCheck: NO: staleness < max-stale\n"); return FRESH_REQUEST_MAX_STALE_VALUE; } } } } /* 过期的时间值小于0,说明是fresh的,这里只是确定生效的类型吧 */ if (staleness < 0) { if (sf.expires) return FRESH_EXPIRES; assert(!sf.max); if (sf.lmfactor) return FRESH_LMFACTOR_RULE; assert(sf.min); return FRESH_MIN_RULE; } /* * At this point the response is stale, unless one of * the override options kicks in. */ /*命令refresh_parrtern中的options stale-while-revalidate=NN和ignore-stale-while-revalidate的生效问题*/ if (entry->mem_obj) { int stale_while_revalidate = -1; if (entry->mem_obj->reply && entry->mem_obj->reply->cache_control && EBIT_TEST(entry->mem_obj->reply->cache_control->mask, CC_STALE_WHILE_REVALIDATE)) stale_while_revalidate = entry->mem_obj->reply->cache_control->stale_while_revalidate; if (R->flags.ignore_stale_while_revalidate || stale_while_revalidate == -1) stale_while_revalidate = R->stale_while_revalidate; if (staleness < stale_while_revalidate) { debug(22, 3) ("stale-while-revalidate: age=%d, staleness=%d, stale_while_revalidate=%d\n", (int) age, staleness, stale_while_revalidate); entry->mem_obj->stale_while_revalidate = stale_while_revalidate; return STALE_ASYNC_REFRESH; } } { /*命令refresh_parrtern中的options max-stale=NN的生效问题*/ int max_stale = Config.maxStale; if (R->max_stale >= 0) max_stale = R->max_stale; if (entry->mem_obj && entry->mem_obj->reply && entry->mem_obj->reply->cache_control && EBIT_TEST(entry->mem_obj->reply->cache_control->mask, CC_STALE_IF_ERROR)) max_stale = entry->mem_obj->reply->cache_control->stale_if_error; if (max_stale >= 0 && staleness >= max_stale) return STALE_MAX_STALE; } /*未过期的时间在Config.refresh_stale_window之类*/ if (delta < 0 && staleness + delta < 0) { return STALE_WITHIN_DELTA; } if (sf.expires) { #if HTTP_VIOLATIONS /*命令refresh_parrtern中的options override-expires让中的min优先于entry->expires生效*/ if (R->flags.override_expire && age < R->min) { debug(22, 3) ("refreshCheck: NO: age < min && override-expire\n"); return FRESH_OVERRIDE_EXPIRES; } #endif return STALE_EXPIRES; } if (sf.max) return STALE_MAX_RULE; if (sf.lmfactor) { #if HTTP_VIOLATIONS /*命令refresh_parrtern中的options override-lastmod让中的min优先于percent生效*/ if (R->flags.override_lastmod && age < R->min) { debug(22, 3) ("refreshCheck: NO: age < min && override-lastmod\n"); return FRESH_OVERRIDE_LASTMOD; } #endif return STALE_LMFACTOR_RULE; } return STALE_DEFAULT; }
最后来分析refreshStaleness,这个只是用来计算过期的时间值秒。这里描述了entry->expires > R->max > R->percent > R->min的比较先后顺序,这里看来entry->timestamp时间戳还是比较重要的啊,记得在createEntry的时候一定要赋值,不然后一直miss的!
static int refreshStaleness(const StoreEntry * entry, time_t check_time, time_t age, const refresh_t * R, stale_flags * sf) { /* * Check for an explicit expiration time. */ if (entry->expires > -1) { sf->expires = 1; if (entry->expires > check_time) { debug(22, 3) ("FRESH: expires %d >= check_time %d \n", (int) entry->expires, (int) check_time); return -1; } else { debug(22, 3) ("STALE: expires %d < check_time %d \n", (int) entry->expires, (int) check_time); return (check_time - entry->expires); } } assert(age >= 0); /* * Use local heuristics to determine staleness. Start with the * max age from the refresh_pattern rule. */ if (age > R->max) { debug(22, 3) ("STALE: age %d > max %d \n", (int) age, (int) R->max); sf->max = 1; return (age - R->max); } if (check_time < entry->timestamp) { debug(22, 1) ("STALE: Entry's timestamp greater than check time. Clock going backwards?\n"); debug(22, 1) ("\tcheck_time:\t%s\n", mkrfc1123(check_time)); debug(22, 1) ("\tentry->timestamp:\t%s\n", mkrfc1123(entry->timestamp)); debug(22, 1) ("\tstaleness:\t%ld\n", (long int) entry->timestamp - check_time); return (entry->timestamp - check_time); } /* * Try the last-modified factor algorithm. */ if (entry->lastmod > -1 && entry->timestamp > entry->lastmod) { /* * stale_age is the Age of the response when it became/becomes * stale according to the last-modified factor algorithm. */ time_t stale_age = (entry->timestamp - entry->lastmod) * R->pct; sf->lmfactor = 1; if (age >= stale_age) { debug(22, 3) ("STALE: age %d > stale_age %d\n", (int) age, (int) stale_age); return (age - stale_age); } else { debug(22, 3) ("FRESH: age %d <= stale_age %d\n", (int) age, (int) stale_age); return -1; } } /* * If we are here, staleness is determined by the refresh_pattern * configured minimum age. */ if (age < R->min) { debug(22, 3) ("FRESH: age %d < min %d\n", (int) age, (int) R->min); sf->min = 1; return -1; } debug(22, 3) ("STALE: age %d >= min %d\n", (int) age, (int) R->min); return (age - R->min); }
总的来说refresh_pattern支持的regex还比较弱,如果配置太多的话,可能每次都要去搜索匹配是不是效率有影响!