yara规则的详细信息请参考:
https://yara.readthedocs.io/en/stable/writingrules.html
根据官方文档,yara规则长这个样子:
[1]:yara rule
/*
This is a multi-line comment ...
*/
rule silent_banker : banker
{
meta:
description = "This is just an example"
threat_level = 3
in_the_wild = true
strings:
$a = {6A 40 68 00 30 00 00 6A 14 8D 91}
$b = {8D 4D B0 2B C1 83 C0 27 99 6A 4E 59 F7 F9}
$c = "UVODFRYSIHLNWPEJXQZAKCBGMT"
condition:
$a or $b or $c
}
1. /* 。。。。*/这部分是注释,可有可无。相关解释见链接。
2.rule 这个是yara的关键词,是一条规则必不可少的部分,其实 rule前边还可以有 global,private 等关键词。
3.silent_banker 这个是规则ID( rule identifier ),是用户自定义的部分,一般是用来表示规则的名称。
4.banke 这个是规则标签(Rule tags),主要用于过滤扫描结果。
5.meta 此部分是元数据( Metadata),主要是规则的描述信息,比如作者,日期或者其他信息
6.strings 此部分是规则字符串(strings), 就是描述样本特征的字符串,可以使用普通字符串,16进制字符串,和正则表达式。
7.condition 此部分是规则的条件部分(condition),主要用来表述怎么组合利用上边的样本特征(strings)及其他的一些条件。
其中,规则中最复杂,也最丰富的是6.7两部分。编写一条规则,最主要的就是编写以上两部分。
注:文章开始处的链接文档是最新版的yara的文档,而此文解析的源码是早期的源码,文档中的一部分特性在此源码中是没有的。
比如xor strings ,base64 strings.
规则编译,即是将[1]处的这样一条条规则转化成内存中的数据结构 YARA_CONTEXT* context;
此部分主要通过lex.l grammar.y ast.h,ast.c 这几个文件完成。
lex.l 是规则的词法分析部分。通过flex 程序可编译生成lex.h lex,c文件
grammar.y 是规则的语法分析部分。通过bison程序编译产生 grammarh 和grammar.c文件。
ast.h/ast.c 主要是生成新的rule,string 等内存结构及查找。
yara程序在规则扫描部分的入口是
parse_rules_string
parse_rules_file
这两个函数。
int parse_rules_string(const char* rules_string, YARA_CONTEXT* context)
{
yyscan_t yyscanner;
YY_BUFFER_STATE state;
yylex_init(&yyscanner);
yyset_extra(context, yyscanner);
state = yy_scan_string(rules_string, yyscanner);
yyset_lineno(1, yyscanner);
yyparse(yyscanner);
yylex_destroy(yyscanner);
return context->errors;
}
int parse_rules_file(FILE* rules_file, YARA_CONTEXT* context)
{
yyscan_t yyscanner;
yylex_init(&yyscanner);
#ifdef DEBUG
yyset_debug(1, yyscanner);
#endif
yyset_in(rules_file, yyscanner);
yyset_extra(context, yyscanner);
yyparse(yyscanner); //这个是语法(grammar)分析器的入口 yylex是词法分析器的入口
yylex_destroy(yyscanner);
return context->errors;
}
BNF与ABNF 巴斯克范式
[Flex&Bison]协同工作简介
扫描流程:
yyparse函数(grammar.c) 种调用YYLEX获取标识符,当匹配到用户定义的BNF范式后,进入一个大大的swithch,调用用户调用的各种归约函数(reduce_*):
yyreduce:
/* yyn is the number of a rule to reduce with. */
yylen = yyr2[yyn];
/* If YYLEN is nonzero, implement the default value of the action:
`$$ = $1'.
Otherwise, the following line sets YYVAL to garbage.
This behavior is undocumented and Bison
users should not rely upon it. Assigning to YYVAL
unconditionally makes the parser a bit smaller, and it avoids a
GCC warning that YYVAL may be used uninitialized. */
yyval = yyvsp[1-yylen];
YY_REDUCE_PRINT (yyn);
switch (yyn)
{
case 6: //这里的case 与grammar.y 中的BNF范式的定义顺序基本对应
#line 279 "grammar.y"
{
if (reduce_rule_declaration(yyscanner, (yyvsp[(3) - (9)].c_string),(yyvsp[(1) - (9)].integer),(yyvsp[(4) - (9)].tag),(yyvsp[(6) - (9)].meta),(yyvsp[(7) - (9)].string),(yyvsp[(8) - (9)].term)) != ERROR_SUCCESS)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
break;
.........................
之后的代码没有什么太难理解的了。
比较有意思的就是ast.c中的new_hex_string函数。
此函数中完整实现了对 ? 通配符 ,[num-num] ,(BYTE|BYTE) 等模式的匹配和处理。
对于这三种模式的相关信息是保存在mask字段中。
此部分的解析到此结束,更多信息请参考附录中的源码。
附录:
lex.l 此部分加有注释,格式可能已破坏
/*
Copyright (c) 2007. Victor M. Alvarez [[email protected]].
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software
must display the following acknowledgement:
This product includes software developed by Victor M. Alvarez and its
contributors.
4. Neither the name of Victor M. Alvarez nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
/* Lexical analyzer for YARA */
%{
#include
#include
#include
#include "grammar.h"
#include "xtoi.h"
#include "mem.h"
#include "sizedstr.h"
#include "lex.h"
#include "yara.h"
#define LEX_CHECK_SPACE_OK(data, current_size, max_length) \
if (strlen(data) + current_size >= max_length - 1) \
{ \
yyerror(yyscanner, "out of space in lex_buf"); \
yyterminate(); \
}
#define YYTEXT_TO_BUFFER \
{ \
char *yptr = yytext; \
LEX_CHECK_SPACE_OK(yptr, yyextra->lex_buf_len, LEX_BUF_SIZE); \
while ( *yptr ) \
{ \
*yyextra->lex_buf_ptr++ = *yptr++; \
yyextra->lex_buf_len++; \
} \
}
#ifdef WIN32
#define snprintf _snprintf
#endif
%}
/*flex 的配置信息 reentrant 可重入 bison-bridge配合bison使用 */
%option reentrant bison-bridge
/**/
%option noyywrap
%option nounistd
%option yylineno
%option verbose
%option warn
/*http://postgresqlchina.com/tecdocdetail/1 */
/*%x 定义 开始状态,开始状态代表进入一个特定的状态,在规则段只有定义了特定状态的规则才会匹配,*/
/*这种规则通过来标识。例如 定义段定义了 %x xb 则在规则段只有开头的规则才会匹配,其他的的规则则不会被匹配。*/
%x str
%x regexp
%x include
%x comment
digit [0-9]
letter [a-zA-Z]
hexdigit [a-fA-F0-9]
%%
/*关键字识别 */
"<" { return _LT_; }
">" { return _GT_; }
"<=" { return _LE_; }
">=" { return _GE_; }
"==" { return _EQ_; }
"!=" { return _NEQ_; }
"<<" { return _SHIFT_LEFT_; }
">>" { return _SHIFT_RIGHT_; }
"private" { return _PRIVATE_; }
"global" { return _GLOBAL_; }
"rule" { return _RULE_; }
"meta" { return _META_; }
"strings" { return _STRINGS_; }
"ascii" { return _ASCII_; }
"wide" { return _WIDE_; }
"fullword" { return _FULLWORD_; }
"nocase" { return _NOCASE_; }
"condition" { return _CONDITION_; }
"true" { return _TRUE_; }
"false" { return _FALSE_; }
"not" { return _NOT_; }
"and" { return _AND_; }
"or" { return _OR_; }
"at" { return _AT_; }
"in" { return _IN_; }
"of" { return _OF_; }
"them" { return _THEM_; }
"for" { return _FOR_; }
"all" { return _ALL_; }
"any" { return _ANY_; }
"entrypoint" { return _ENTRYPOINT_; }
"filesize" { return _SIZE_; }
"rva" { return _RVA_; }
"offset" { return _OFFSET_; }
"file" { return _FILE_; }
"section" { return _SECTION_; }
"uint8" { return _UINT8_; }
"uint16" { return _UINT16_; }
"uint32" { return _UINT32_; }
"int8" { return _INT8_; }
"int16" { return _INT16_; }
"int32" { return _INT32_; }
"matches" { return _MATCHES_; }
"contains" { return _CONTAINS_; }
"index" { return _INDEX_; }
/*多行注释识别 */
"/*" { BEGIN(comment); }
"*/" { BEGIN(INITIAL); }
(.|\n) { /* skip comments */ }
/*单行注释识别 */
"//"[^\n]* { /* skip single-line comments */ }
include[ \t]+\" {
yyextra->lex_buf_ptr = yyextra->lex_buf;
yyextra->lex_buf_len = 0;
BEGIN(include);
}
[^\"]+ {
YYTEXT_TO_BUFFER;
}
\" {
char buffer[1024];
char *current_file_name;
char *s = NULL;
char *b = NULL;
char *f;
FILE* fh;
YARA_CONTEXT* context = yyget_extra(yyscanner);
if (context->allow_includes)
{
*yyextra->lex_buf_ptr = '\0'; // null-terminate included file path
// move path of current source file into buffer
current_file_name = yr_get_current_file_name(context);
if (current_file_name != NULL)
{
strncpy(buffer, yr_get_current_file_name(context), sizeof(buffer)-1);
buffer[sizeof(buffer)-1] = '\0';
}
else
{
buffer[0] = '\0';
}
// make included file path relative to current source file
s = strrchr(buffer, '/');
#ifdef WIN32
b = strrchr(buffer, '\\'); // in Windows both path delimiters are accepted
#endif
if (s != NULL || b != NULL)
{
f = (b > s)? (b + 1): (s + 1);
strncpy(f, yyextra->lex_buf, sizeof(buffer) - (f - buffer));
buffer[sizeof(buffer)-1] = '\0';
// SECURITY: Potential for directory traversal here.
fh = fopen(buffer, "r");
// if include file was not found relative to current source file, try to open it
// with path as specified by user (maybe user wrote a full path)
if (fh == NULL)
{
// SECURITY: Potential for directory traversal here.
fh = fopen(yyextra->lex_buf, "r");
}
}
else
{
// SECURITY: Potential for directory traversal here.
fh = fopen(yyextra->lex_buf, "r");
}
if (fh != NULL)
{
int error_code = ERROR_SUCCESS;
if ((error_code = yr_push_file_name(context, yyextra->lex_buf)) != ERROR_SUCCESS)
{
if (error_code == ERROR_INCLUDES_CIRCULAR_REFERENCE)
{
yyerror(yyscanner, "includes circular reference");
}
else if (error_code == ERROR_INCLUDE_DEPTH_EXCEEDED)
{
yyerror(yyscanner, "includes circular reference");
}
yyterminate();
}
yr_push_file(context, fh);
yypush_buffer_state(yy_create_buffer(fh, YY_BUF_SIZE, yyscanner), yyscanner);
}
else
{
snprintf(buffer, sizeof(buffer), "can't open include file: %s", yyextra->lex_buf);
yyerror(yyscanner, buffer);
}
}
else // not allowing includes
{
yyerror(yyscanner, "includes are disabled");
yyterminate();
}
BEGIN(INITIAL);
}
<> {
YARA_CONTEXT* context = yyget_extra(yyscanner);
FILE* file = yr_pop_file(context);
if (file != NULL)
{
fclose(file);
}
yr_pop_file_name(context);
yypop_buffer_state(yyscanner);
if (!YY_CURRENT_BUFFER)
{
yyterminate();
}
}
/*string 变量名识别 现在好像没有了 */
$({letter}|{digit}|_)*"*" {
yylval->c_string = (char*) yr_strdup(yytext);
return _STRING_IDENTIFIER_WITH_WILDCARD_;
}
/*string 变量名识别 */
$({letter}|{digit}|_)* {
yylval->c_string = (char*) yr_strdup(yytext);
return _STRING_IDENTIFIER_;
}
/*条件部分的 变量名识别 */
#({letter}|{digit}|_)* {
yylval->c_string = (char*) yr_strdup(yytext);
yylval->c_string[0] = '$'; /* replace # by $*/
return _STRING_COUNT_;
}
/*条件部分的 变量名识别 */
@({letter}|{digit}|_)* {
yylval->c_string = (char*) yr_strdup(yytext);
yylval->c_string[0] = '$'; /* replace @ by $*/
return _STRING_OFFSET_;
}
/*ID 识别 */
({letter}|_)({letter}|{digit}|_)* {
if (strlen(yytext) > 128)
{
yyerror(yyscanner, "indentifier too long");
}
yylval->c_string = (char*) yr_strdup(yytext);
return _IDENTIFIER_;
}
{digit}+(MB|KB){0,1} {
yylval->integer = (size_t) atol(yytext);
if (strstr(yytext, "KB") != NULL)
{
yylval->integer *= 1024;
}
else if (strstr(yytext, "MB") != NULL)
{
yylval->integer *= 1048576;
}
return _NUMBER_;
}
0x{hexdigit}+ {
yylval->integer = xtoi(yytext + 2);
return _NUMBER_;
}
\" { /* saw closing quote - all done */
SIZED_STRING* s;
if (yyextra->lex_buf_len == 0)
{
yyerror(yyscanner, "empty string");
}
*yyextra->lex_buf_ptr = '\0';
BEGIN(INITIAL);
s = (SIZED_STRING*) yr_malloc(yyextra->lex_buf_len + sizeof(SIZED_STRING));
s->length = yyextra->lex_buf_len;
memcpy(s->c_string, yyextra->lex_buf, yyextra->lex_buf_len + 1);
yylval->sized_string = s;
return _TEXTSTRING_;
}
\\t { LEX_CHECK_SPACE_OK("\t", yyextra->lex_buf_len, LEX_BUF_SIZE); *yyextra->lex_buf_ptr++ = '\t'; yyextra->lex_buf_len++;}
\\\" { LEX_CHECK_SPACE_OK("\"", yyextra->lex_buf_len, LEX_BUF_SIZE); *yyextra->lex_buf_ptr++ = '\"'; yyextra->lex_buf_len++;}
\\\\ { LEX_CHECK_SPACE_OK("\\", yyextra->lex_buf_len, LEX_BUF_SIZE); *yyextra->lex_buf_ptr++ = '\\'; yyextra->lex_buf_len++;}
\\x{hexdigit}{2} {
int result;
if (sscanf( yytext + 2, "%x", &result ) != 1) {
yyerror(yyscanner, "Invalid escaped hex digit");
yyterminate();
}
LEX_CHECK_SPACE_OK("X", yyextra->lex_buf_len, LEX_BUF_SIZE);
*yyextra->lex_buf_ptr++ = result;
yyextra->lex_buf_len++;
}
[^\\\n\"]+ {
YYTEXT_TO_BUFFER;
}
\n {
yyerror(yyscanner, "unterminated string");
yyterminate();
}
\\(.|\n) {
yyerror(yyscanner, "illegal escape sequence");
}
"/" {
SIZED_STRING* s;
if (yyextra->lex_buf_len == 0)
{
yyerror(yyscanner, "empty regular expression");
}
*yyextra->lex_buf_ptr = '\0';
BEGIN(INITIAL);
s = (SIZED_STRING*) yr_malloc(yyextra->lex_buf_len + sizeof(SIZED_STRING));
s->length = yyextra->lex_buf_len;
strcpy(s->c_string, yyextra->lex_buf);
yylval->sized_string = s;
return _REGEXP_;
}
\\\/ {
LEX_CHECK_SPACE_OK("/", yyextra->lex_buf_len, LEX_BUF_SIZE);
*yyextra->lex_buf_ptr++ = '/';
yyextra->lex_buf_len++ ;
}
\\. {
LEX_CHECK_SPACE_OK("\\.", yyextra->lex_buf_len, LEX_BUF_SIZE);
*yyextra->lex_buf_ptr++ = yytext[0];
*yyextra->lex_buf_ptr++ = yytext[1];
yyextra->lex_buf_len += 2;
}
[^/\n\\]+ {
YYTEXT_TO_BUFFER;
}
\n {
yyerror(yyscanner, "unterminated regular expression");
yyterminate();
}
\" {
yyextra->lex_buf_ptr = yyextra->lex_buf;
yyextra->lex_buf_len = 0;
BEGIN(str);
}
"/" {
yyextra->lex_buf_ptr = yyextra->lex_buf;
yyextra->lex_buf_len = 0;
BEGIN(regexp);
}
\{({hexdigit}|[ \-|\?\[\]\(\)\n\t])+\} {
int len = strlen(yytext);
SIZED_STRING* s = (SIZED_STRING*) yr_malloc(len + sizeof(SIZED_STRING));
s->length = len;
strcpy(s->c_string, yytext);
yylval->sized_string = s;
return _HEXSTRING_;
}
[ \t\r\n] /* skip whitespace */
. {
if (yytext[0] >= 32 && yytext[0] < 127)
{
return yytext[0];
}
else
{
yyerror(yyscanner, "non-ascii character");
yyterminate();
}
}
%%
void yyerror(yyscan_t yyscanner, const char *error_message)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
char message[512] = {'\0'};
char* file_name = NULL;
/*
if error_message != NULL the error comes from yyparse internal code
else the error comes from my code and the error code is set in context->last_result
*/
context->errors++;
context->last_error_line = yyget_lineno(yyscanner);
if (context->file_name_stack_ptr > 0)
{
file_name = context->file_name_stack[context->file_name_stack_ptr - 1];
}
else
{
file_name = NULL;
}
if (error_message != NULL)
{
context->last_error = ERROR_SYNTAX_ERROR;
strncpy(context->last_error_extra_info, error_message, sizeof(context->last_error_extra_info) - 1);
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = '\0';
if (context->error_report_function != NULL)
{
context->error_report_function(file_name,
context->last_error_line,
error_message);
}
}
else
{
context->last_error = context->last_result;
if (context->error_report_function != NULL)
{
yr_get_error_message(context, message, sizeof(message));
context->error_report_function(file_name,
context->last_error_line,
message);
}
}
context->last_result = ERROR_SUCCESS;
}
int parse_rules_string(const char* rules_string, YARA_CONTEXT* context)
{
yyscan_t yyscanner;
YY_BUFFER_STATE state;
yylex_init(&yyscanner);
yyset_extra(context, yyscanner);
state = yy_scan_string(rules_string, yyscanner);
yyset_lineno(1, yyscanner);
yyparse(yyscanner);
yylex_destroy(yyscanner);
return context->errors;
}
int parse_rules_file(FILE* rules_file, YARA_CONTEXT* context)
{
yyscan_t yyscanner;
yylex_init(&yyscanner);
#ifdef DEBUG
yyset_debug(1, yyscanner);
#endif
yyset_in(rules_file, yyscanner);
yyset_extra(context, yyscanner);
yyparse(yyscanner);
yylex_destroy(yyscanner);
return context->errors;
}
libyara\grammar.y
/*
Copyright (c) 2007. Victor M. Alvarez [[email protected]].
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software
must display the following acknowledgement:
This product includes software developed by Victor M. Alvarez and its
contributors.
4. Neither the name of Victor M. Alvarez nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
%{
#include
#include
#include
#include "ast.h"
#include "sizedstr.h"
#include "mem.h"
#include "lex.h"
#include "regex.h"
#ifdef _MSC_VER
#define snprintf _snprintf
#endif
#define YYERROR_VERBOSE
//#define YYDEBUG 1
%}
%pure-parser
%parse-param {void *yyscanner}
%lex-param {yyscan_t yyscanner}
//http://blog.chinaunix.net/uid-27004869-id-3233026.html
//%token:通过%token将%union中的声明与特定的符号联系起来
//通过放在< >中的联合类型的相应成员名字来确定每种语法符号使用的值类型。
%token _RULE_
%token _PRIVATE_
%token _GLOBAL_
%token _META_
/*%token< union 中的变量名 > 终结符, 此语法是定义终结符和union中变量的绑定。这样就可以在flex中直接通过yylval->(union中的变量名)返回匹配的值*/
%token _STRINGS_
%token _CONDITION_
%token _END_
%token _IDENTIFIER_
%token _STRING_IDENTIFIER_
%token _STRING_COUNT_
%token _STRING_OFFSET_
%token _STRING_IDENTIFIER_WITH_WILDCARD_
%token _ANONYMOUS_STRING_
%token _NUMBER_
%token _UNKNOWN_
%token _TEXTSTRING_
%token _HEXSTRING_
%token _REGEXP_
%token _ASCII_
%token _WIDE_
%token _NOCASE_
%token _FULLWORD_
%token _AT_
%token _SIZE_
%token _ENTRYPOINT_
%token _ALL_
%token _ANY_
%token _RVA_
%token _OFFSET_
%token _FILE_
%token _IN_
%token _OF_
%token _FOR_
%token _THEM_
%token _SECTION_
%token _INT8_
%token _INT16_
%token _INT32_
%token _UINT8_
%token _UINT16_
%token _UINT32_
%token _MATCHES_
%token _CONTAINS_
%token _INDEX_
%token _MZ_
%token _PE_
%token _DLL_
%token _TRUE_
%token _FALSE_
//%left 操作符左结合 %right:操作符右结合 %nonassoc:没有结合性的操作符
//这些操作符 自上而下,优先级依次增高
%left _OR_
%left _AND_
%left '&' '|' '^'
%left _LT_ _LE_ _GT_ _GE_ _EQ_ _NEQ_ _IS_
%left _SHIFT_LEFT_ _SHIFT_RIGHT_
%left '+' '-'
%left '*' '\\' '%'
%right _NOT_
%right '~'
%type strings
%type string_declaration //字符串声明
%type string_declarations
%type meta //meta 元
%type meta_declaration
%type meta_declarations
%type string_modifier //字符串修饰符 可能指 FULL WILD 这些
%type string_modifiers
%type rule_modifier //规则修饰符 可能是 Global private 这些
%type rule_modifiers
%type tags
%type tag_list
%type boolean_expression //term 条件
%type for_expression
%type expression
%type number
%type string_set
%type integer_set
%type string_enumeration
%type integer_enumeration
%type string_enumeration_item
%type condition
%type range
//%union:声明标识出的符号值可能拥有的所有C类型,即声明语法分析器中符号值的类型。
%union {
void* sized_string;
char* c_string;
size_t integer;
void* string;
void* term;
void* tag;
void* meta;
}
//%destructor { free ($$); } _TEXTSTRING_ _HEXSTRING_ _REGEXP_ _IDENTIFIER_
%{
/* Function declarations */ //规约函数
int reduce_rule_declaration( yyscan_t yyscanner,
char* identifier,
int flags,
TAG* tag_list_head,
META* meta_list_head,
STRING* string_list_head,
TERM* condition);
TAG* reduce_tags( yyscan_t yyscanner,
TAG* tag_list_head,
char* identifier);
META* reduce_meta_declaration( yyscan_t yyscanner,
int type,
char* identifier,
unsigned int integer_value,
SIZED_STRING* string_value);
META* reduce_metas( yyscan_t yyscanner,
META* meta_list_head,
META* meta);
STRING* reduce_string_declaration( yyscan_t yyscanner,
char* identifier,
SIZED_STRING* str,
int flags);
STRING* reduce_strings( yyscan_t yyscanner,
STRING* string_list_head,
STRING* string);
TERM* reduce_string_enumeration( yyscan_t yyscanner,
TERM* string_list_head,
TERM* string_identifier);
TERM* reduce_string_with_wildcard( yyscan_t yyscanner,
char* identifier);
TERM* reduce_string( yyscan_t yyscanner,
char* identifier);
TERM* reduce_string_at( yyscan_t yyscanner,
char* identifier,
TERM* offset);
TERM* reduce_string_in_range( yyscan_t yyscanner,
char* identifier,
TERM* range);
TERM* reduce_string_in_section_by_name( yyscan_t yyscanner,
char* identifier,
SIZED_STRING* section_name);
TERM* reduce_string_count( yyscan_t yyscanner,
char* identifier);
TERM* reduce_string_offset( yyscan_t yyscanner,
char* identifier,
TERM* index);
TERM* reduce_filesize(yyscan_t yyscanner);
TERM* reduce_entrypoint(yyscan_t yyscanner);
TERM* reduce_term( yyscan_t yyscanner,
int type,
TERM* op1,
TERM* op2,
TERM* op3);
TERM* reduce_constant( yyscan_t yyscanner,
size_t constant);
TERM* reduce_identifier( yyscan_t yyscanner,
char* identifier);
TERM* reduce_string_operation( yyscan_t yyscanner,
int type,
char* identifier,
SIZED_STRING* string);
TERM* reduce_integer_enumeration( yyscan_t yyscanner,
TERM* vector,
TERM* expression);
TERM* reduce_integer_for( yyscan_t yyscanner,
TERM* count,
char* identifier,
TERM* items,
TERM* expression);
TERM* reduce_range( yyscan_t yyscanner,
TERM* min,
TERM* max);
%}
%%
rules : /* empty */
| rules rule
| rules error rule /* on error skip until next rule..*/
| rules error 'include' /* .. or include statement */
;
/* 规则定义 规则修饰符(Global/private $1) rule关键词 规则ID($3) 标签($4) {元数据($6) 字符串($7) 条件($8)}) */
rule : rule_modifiers _RULE_ _IDENTIFIER_ tags '{' meta strings condition '}'
{
if (reduce_rule_declaration(yyscanner, $3,$1,$4,$6,$7,$8) != ERROR_SUCCESS)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
;
meta : /* empty */ { $$ = NULL; }
| _META_ ':' meta_declarations { $$ = $3; }
;
strings : /* empty */ { $$ = NULL; }
| _STRINGS_ ':' string_declarations { $$ = $3; }
;
condition : _CONDITION_ ':' boolean_expression { $$ = $3; }
;
rule_modifiers : /* empty */ { $$ = 0; }
| rule_modifiers rule_modifier { $$ = $1 | $2; }
;
rule_modifier : _PRIVATE_ { $$ = RULE_FLAGS_PRIVATE; }
| _GLOBAL_ { $$ = RULE_FLAGS_GLOBAL; }
;
tags : /* empty */ { $$ = NULL; }
| ':' tag_list { $$ = $2; }
;
tag_list : _IDENTIFIER_ {
$$ = reduce_tags(yyscanner,NULL,$1);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| tag_list _IDENTIFIER_ {
$$ = reduce_tags(yyscanner,$1,$2);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
meta_declarations : meta_declaration {
$$ = reduce_metas(yyscanner, NULL, $1);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| meta_declarations meta_declaration {
$$ = reduce_metas(yyscanner, $1, $2);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
;
meta_declaration : _IDENTIFIER_ '=' _TEXTSTRING_ {
$$ = reduce_meta_declaration(yyscanner, META_TYPE_STRING, $1, 0, $3);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _IDENTIFIER_ '=' _NUMBER_ {
$$ = reduce_meta_declaration(yyscanner, META_TYPE_INTEGER, $1, $3, NULL);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _IDENTIFIER_ '=' _TRUE_ {
$$ = reduce_meta_declaration(yyscanner, META_TYPE_BOOLEAN, $1, TRUE, NULL);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _IDENTIFIER_ '=' _FALSE_ {
$$ = reduce_meta_declaration(yyscanner, META_TYPE_BOOLEAN, $1, FALSE, NULL);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
;
string_declarations : string_declaration
{
$$ = reduce_strings(yyscanner,NULL,$1);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| string_declarations string_declaration
{
$$ = reduce_strings(yyscanner,$1,$2);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
;
string_declaration : _STRING_IDENTIFIER_ '=' _TEXTSTRING_ string_modifiers
{
$$ = reduce_string_declaration(yyscanner, $1, $3, $4);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _STRING_IDENTIFIER_ '=' _REGEXP_ string_modifiers
{
$$ = reduce_string_declaration(yyscanner, $1, $3, $4 | STRING_FLAGS_REGEXP);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _STRING_IDENTIFIER_ '=' _HEXSTRING_
{
$$ = reduce_string_declaration(yyscanner, $1, $3, STRING_FLAGS_HEXADECIMAL);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
;
string_modifiers : /* empty */ { $$ = 0; }
| string_modifiers string_modifier { $$ = $1 | $2; }
;
string_modifier : _WIDE_ { $$ = STRING_FLAGS_WIDE; }
| _ASCII_ { $$ = STRING_FLAGS_ASCII; }
| _NOCASE_ { $$ = STRING_FLAGS_NO_CASE; }
| _FULLWORD_ { $$ = STRING_FLAGS_FULL_WORD; }
;
boolean_expression : _TRUE_ { $$ = reduce_constant(yyscanner, 1); }
| _FALSE_ { $$ = reduce_constant(yyscanner, 0); }
| _IDENTIFIER_
{
$$ = reduce_identifier(yyscanner, $1);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _IDENTIFIER_ _MATCHES_ _REGEXP_
{
$$ = reduce_string_operation(yyscanner, TERM_TYPE_STRING_MATCH, $1, $3);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _IDENTIFIER_ _CONTAINS_ _TEXTSTRING_
{
$$ = reduce_string_operation(yyscanner, TERM_TYPE_STRING_CONTAINS, $1, $3);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _STRING_IDENTIFIER_
{
$$ = reduce_string(yyscanner, $1);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _STRING_IDENTIFIER_ _AT_ expression
{
$$ = reduce_string_at(yyscanner, $1, $3);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _STRING_IDENTIFIER_ _AT_ _RVA_ expression
{
$$ = NULL;
}
| _STRING_IDENTIFIER_ _IN_ range
{
$$ = reduce_string_in_range(yyscanner, $1, $3);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _STRING_IDENTIFIER_ _IN_ _SECTION_ '(' _TEXTSTRING_ ')'
{
$$ = reduce_string_in_section_by_name(yyscanner, $1, $5);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _FOR_ for_expression _IDENTIFIER_ _IN_ integer_set ':'
{
yr_define_integer_variable(yyget_extra(yyscanner), $3, 0);
}
'(' boolean_expression ')'
{
$$ = reduce_integer_for(yyscanner, $2, $3, $5, $9);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _FOR_ for_expression _OF_ string_set ':'
{
yyget_extra(yyscanner)->inside_for++;
}
'(' boolean_expression ')'
{
yyget_extra(yyscanner)->inside_for--;
$$ = reduce_term(yyscanner, TERM_TYPE_STRING_FOR, $2, $4, $8);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| for_expression _OF_ string_set
{
$$ = reduce_term(yyscanner, TERM_TYPE_OF, $1, $3, NULL);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _FILE_ _IS_ type { $$ = NULL; }
| '(' boolean_expression ')' { $$ = $2; }
| _NOT_ boolean_expression { $$ = reduce_term(yyscanner, TERM_TYPE_NOT, $2, NULL, NULL); }
| boolean_expression _AND_ boolean_expression { $$ = reduce_term(yyscanner, TERM_TYPE_AND, $1, $3, NULL); }
| boolean_expression _OR_ boolean_expression { $$ = reduce_term(yyscanner, TERM_TYPE_OR, $1, $3, NULL); }
/* | boolean_expression _IS_ boolean_expression { $$ = reduce_term(yyscanner, TERM_TYPE_EQ, $1, $3, NULL); }*/
| expression _LT_ expression { $$ = reduce_term(yyscanner, TERM_TYPE_LT, $1, $3, NULL); }
| expression _GT_ expression { $$ = reduce_term(yyscanner, TERM_TYPE_GT, $1, $3, NULL); }
| expression _LE_ expression { $$ = reduce_term(yyscanner, TERM_TYPE_LE, $1, $3, NULL); }
| expression _GE_ expression { $$ = reduce_term(yyscanner, TERM_TYPE_GE, $1, $3, NULL); }
| expression _EQ_ expression { $$ = reduce_term(yyscanner, TERM_TYPE_EQ, $1, $3, NULL); }
| expression _IS_ expression { $$ = reduce_term(yyscanner, TERM_TYPE_EQ, $1, $3, NULL); }
| expression _NEQ_ expression { $$ = reduce_term(yyscanner, TERM_TYPE_NOT_EQ, $1, $3, NULL); }
;
integer_set : '(' integer_enumeration ')' { $$ = $2; }
| range { $$ = $1; }
;
integer_enumeration : expression
| integer_enumeration ',' expression { $$ = reduce_integer_enumeration(yyscanner, $1, $3); }
;
string_set : '(' string_enumeration ')' { $$ = $2; }
| _THEM_ { $$ = reduce_string_with_wildcard(yyscanner, yr_strdup("$*")); }
;
string_enumeration : string_enumeration_item
| string_enumeration ',' string_enumeration_item { $$ = reduce_string_enumeration(yyscanner, $1, $3); }
;
string_enumeration_item : _STRING_IDENTIFIER_
{
$$ = reduce_string(yyscanner, $1);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _STRING_IDENTIFIER_WITH_WILDCARD_
{
$$ = reduce_string_with_wildcard(yyscanner, $1);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
;
range : '(' expression '.' '.' expression ')' { $$ = reduce_range(yyscanner, $2, $5); }
;
for_expression : expression
| _ALL_ { $$ = reduce_constant(yyscanner, 0); }
| _ANY_ { $$ = reduce_constant(yyscanner, 1); }
;
expression : _SIZE_ { $$ = reduce_filesize(yyscanner); }
| _ENTRYPOINT_ { $$ = reduce_entrypoint(yyscanner); }
| _INT8_ '(' expression ')' { $$ = reduce_term(yyscanner, TERM_TYPE_INT8_AT_OFFSET, $3, NULL, NULL); }
| _INT16_ '(' expression ')' { $$ = reduce_term(yyscanner, TERM_TYPE_INT16_AT_OFFSET, $3, NULL, NULL); }
| _INT32_ '(' expression ')' { $$ = reduce_term(yyscanner, TERM_TYPE_INT32_AT_OFFSET, $3, NULL, NULL); }
| _UINT8_ '(' expression ')' { $$ = reduce_term(yyscanner, TERM_TYPE_UINT8_AT_OFFSET, $3, NULL, NULL); }
| _UINT16_ '(' expression ')' { $$ = reduce_term(yyscanner, TERM_TYPE_UINT16_AT_OFFSET, $3, NULL, NULL); }
| _UINT32_ '(' expression ')' { $$ = reduce_term(yyscanner, TERM_TYPE_UINT32_AT_OFFSET, $3, NULL, NULL); }
| _STRING_COUNT_
{
$$ = reduce_string_count(yyscanner, $1);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _STRING_OFFSET_ '[' expression ']'
{
$$ = reduce_string_offset(yyscanner, $1, $3);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _STRING_OFFSET_
{
$$ = reduce_string_offset(yyscanner, $1, reduce_constant(yyscanner, 1));
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| _IDENTIFIER_
{
$$ = reduce_identifier(yyscanner, $1);
if ($$ == NULL)
{
yyerror(yyscanner, NULL);
YYERROR;
}
}
| '(' expression ')' { $$ = $2; }
| expression '+' expression { $$ = reduce_term(yyscanner, TERM_TYPE_ADD, $1, $3, NULL); }
| expression '-' expression { $$ = reduce_term(yyscanner, TERM_TYPE_SUB, $1, $3, NULL); }
| expression '*' expression { $$ = reduce_term(yyscanner, TERM_TYPE_MUL, $1, $3, NULL); }
| expression '\\' expression { $$ = reduce_term(yyscanner, TERM_TYPE_DIV, $1, $3, NULL); }
| expression '%' expression { $$ = reduce_term(yyscanner, TERM_TYPE_MOD, $1, $3, NULL); }
| expression '^' expression { $$ = reduce_term(yyscanner, TERM_TYPE_BITWISE_XOR, $1, $3, NULL); }
| expression '&' expression { $$ = reduce_term(yyscanner, TERM_TYPE_BITWISE_AND, $1, $3, NULL); }
| expression '|' expression { $$ = reduce_term(yyscanner, TERM_TYPE_BITWISE_OR, $1, $3, NULL); }
| '~' expression { $$ = reduce_term(yyscanner, TERM_TYPE_BITWISE_NOT, $2, NULL, NULL); }
| expression _SHIFT_LEFT_ expression { $$ = reduce_term(yyscanner, TERM_TYPE_SHIFT_LEFT, $1, $3, NULL); }
| expression _SHIFT_RIGHT_ expression { $$ = reduce_term(yyscanner, TERM_TYPE_SHIFT_RIGHT, $1, $3, NULL); }
| number
;
number : _NUMBER_ { $$ = reduce_constant(yyscanner, $1); }
;
type : _MZ_
| _PE_
| _DLL_
;
%%
int valid_string_identifier(char* identifier, YARA_CONTEXT* context)
{
if (strcmp(identifier, "$") != 0 || context->inside_for > 0)
{
return TRUE;
}
else
{
context->last_result = ERROR_MISPLACED_ANONYMOUS_STRING;
return FALSE;
}
}
int reduce_rule_declaration( yyscan_t yyscanner,
char* identifier,
int flags,
TAG* tag_list_head,
META* meta_list_head,
STRING* string_list_head,
TERM* condition
)
{
STRING* string;
YARA_CONTEXT* context = yyget_extra(yyscanner);
context->last_result = new_rule(&context->rule_list,
identifier,
context->current_namespace,
flags | context->current_rule_flags,
tag_list_head,
meta_list_head,
string_list_head,
condition);
if (context->last_result != ERROR_SUCCESS)
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
}
else
{
string = string_list_head;
while (string != NULL)
{
if (! (string->flags & STRING_FLAGS_REFERENCED))
{
context->last_result = ERROR_UNREFERENCED_STRING;
strncpy(context->last_error_extra_info, string->identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
break;
}
string = string->next;
}
}
context->current_rule_flags = 0;
context->current_rule_strings = NULL;
return context->last_result;
}
STRING* reduce_string_declaration( yyscan_t yyscanner,
char* identifier,
SIZED_STRING* str,
int flags)
{
char tmp[200];
STRING* string = NULL;
YARA_CONTEXT* context = yyget_extra(yyscanner);
if (strcmp(identifier,"$") == 0)
{
flags |= STRING_FLAGS_ANONYMOUS;
}
context->last_result = new_string(context, identifier, str, flags, &string);
if (context->last_result == ERROR_INVALID_REGULAR_EXPRESSION)
{
snprintf(tmp, sizeof(tmp), "invalid regular expression in string \"%s\": %s", identifier, context->last_error_extra_info);
strncpy(context->last_error_extra_info, tmp, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
}
else if (context->last_result != ERROR_SUCCESS)
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
}
yr_free(str);
if (context->fast_match)
{
string->flags |= STRING_FLAGS_FAST_MATCH;
}
return string;
}
STRING* reduce_strings( yyscan_t yyscanner,
STRING* string_list_head,
STRING* string)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
/* no strings with the same identifier, except for anonymous strings */
if (IS_ANONYMOUS(string) || lookup_string(string_list_head,string->identifier) == NULL)
{
string->next = string_list_head;
context->current_rule_strings = string;
context->last_result = ERROR_SUCCESS;
return string;
}
else
{
strncpy(context->last_error_extra_info, string->identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
context->last_result = ERROR_DUPLICATE_STRING_IDENTIFIER;
return NULL;
}
}
META* reduce_meta_declaration( yyscan_t yyscanner,
int type,
char* identifier,
unsigned int integer_value,
SIZED_STRING* string_value)
{
META* meta = NULL;
YARA_CONTEXT* context = yyget_extra(yyscanner);
meta = yr_malloc(sizeof(META));
if (meta != NULL)
{
meta->identifier = identifier;
meta->type = type;
if (type == META_TYPE_INTEGER)
{
meta->integer = integer_value;
}
else if (type == META_TYPE_BOOLEAN)
{
meta->boolean = integer_value;
}
else
{
meta->string = yr_strdup(string_value->c_string);
yr_free(string_value);
}
}
else
{
context->last_result = ERROR_INSUFICIENT_MEMORY;
}
return meta;
}
META* reduce_metas( yyscan_t yyscanner,
META* meta_list_head,
META* meta)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
/* no metas with the same identifier */
if (lookup_meta(meta_list_head, meta->identifier) == NULL)
{
meta->next = meta_list_head;
context->last_result = ERROR_SUCCESS;
return meta;
}
else
{
strncpy(context->last_error_extra_info, meta->identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
context->last_result = ERROR_DUPLICATE_META_IDENTIFIER;
return NULL;
}
}
TAG* reduce_tags( yyscan_t yyscanner,
TAG* tag_list_head,
char* identifier)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TAG* tag;
if (lookup_tag(tag_list_head, identifier) == NULL) /* no tags with the same identifier */
{
tag = yr_malloc(sizeof(TAG));
if (tag != NULL)
{
tag->identifier = identifier;
tag->next = tag_list_head;
context->last_result = ERROR_SUCCESS;
}
else
{
context->last_result = ERROR_INSUFICIENT_MEMORY;
}
return tag;
}
else
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
context->last_result = ERROR_DUPLICATE_TAG_IDENTIFIER;
return NULL;
}
}
TERM* reduce_filesize(yyscan_t yyscanner)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM* term = NULL;
context->last_result = new_simple_term(TERM_TYPE_FILESIZE, &term);
context->current_rule_flags |= RULE_FLAGS_REQUIRE_FILE;
return (TERM*) term;
}
TERM* reduce_entrypoint(yyscan_t yyscanner)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM* term = NULL;
context->last_result = new_simple_term(TERM_TYPE_ENTRYPOINT, &term);
context->current_rule_flags |= RULE_FLAGS_REQUIRE_EXECUTABLE;
return (TERM*) term;
}
TERM* reduce_term(yyscan_t yyscanner, int type, TERM* op1, TERM* op2, TERM* op3)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM* term = NULL;
if (op2 == NULL && op3 == NULL)
{
context->last_result = new_unary_operation(type, op1, (TERM_UNARY_OPERATION**) &term);
}
else if (op3 == NULL)
{
context->last_result = new_binary_operation(type, op1, op2, (TERM_BINARY_OPERATION**) &term);
}
else
{
context->last_result = new_ternary_operation(type, op1, op2, op3, (TERM_TERNARY_OPERATION**) &term);
}
return (TERM*) term;
}
TERM* reduce_constant( yyscan_t yyscanner,
size_t constant)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_CONST* term = NULL;
context->last_result = new_constant(constant, &term);
return (TERM*) term;
}
TERM* reduce_string( yyscan_t yyscanner,
char* identifier)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_STRING* term = NULL;
if (valid_string_identifier(identifier, context))
{
context->last_result = new_string_identifier(TERM_TYPE_STRING, context->current_rule_strings, identifier, &term);
if (context->last_result != ERROR_SUCCESS)
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
}
}
yr_free(identifier);
return (TERM*) term;
}
TERM* reduce_string_with_wildcard( yyscan_t yyscanner,
char* identifier)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_STRING* term = NULL;
TERM_STRING* next;
STRING* string;
int len = 0;
string = context->current_rule_strings;
next = NULL;
while (identifier[len] != '\0' && identifier[len] != '*')
{
len++;
}
while (string != NULL)
{
if (strncmp(string->identifier, identifier, len) == 0)
{
context->last_result = new_string_identifier(TERM_TYPE_STRING, context->current_rule_strings, string->identifier, &term);
if (context->last_result != ERROR_SUCCESS)
break;
string->flags |= STRING_FLAGS_REFERENCED;
term->string = string;
term->next = next;
next = term;
}
string = string->next;
}
yr_free(identifier);
return (TERM*) term;
}
TERM* reduce_string_at( yyscan_t yyscanner,
char* identifier,
TERM* offset)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_STRING* term = NULL;
if (valid_string_identifier(identifier, context))
{
context->last_result = new_string_identifier(TERM_TYPE_STRING_AT, context->current_rule_strings, identifier, &term);
if (context->last_result != ERROR_SUCCESS)
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
}
else
{
term->offset = offset;
}
}
yr_free(identifier);
return (TERM*) term;
}
TERM* reduce_string_in_range( yyscan_t yyscanner,
char* identifier,
TERM* range)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_STRING* term = NULL;
if (valid_string_identifier(identifier, context))
{
context->last_result = new_string_identifier(TERM_TYPE_STRING_IN_RANGE, context->current_rule_strings, identifier, &term);
if (context->last_result != ERROR_SUCCESS)
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
}
else
{
term->range = range;
}
}
yr_free(identifier);
return (TERM*) term;
}
TERM* reduce_string_in_section_by_name( yyscan_t yyscanner,
char* identifier, SIZED_STRING* section_name)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_STRING* term = NULL;
if (valid_string_identifier(identifier, context))
{
context->last_result = new_string_identifier(TERM_TYPE_STRING_IN_SECTION_BY_NAME, context->current_rule_strings, identifier, &term);
if (context->last_result != ERROR_SUCCESS)
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
}
else
{
term->section_name = yr_strdup(section_name->c_string);
}
}
yr_free(section_name);
yr_free(identifier);
return (TERM*) term;
}
TERM* reduce_string_count( yyscan_t yyscanner,
char* identifier)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_STRING* term = NULL;
if (valid_string_identifier(identifier, context))
{
context->last_result = new_string_identifier(TERM_TYPE_STRING_COUNT, context->current_rule_strings, identifier, &term);
if (context->last_result != ERROR_SUCCESS)
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
}
}
yr_free(identifier);
return (TERM*) term;
}
TERM* reduce_string_offset( yyscan_t yyscanner,
char* identifier,
TERM* index)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_STRING* term = NULL;
if (valid_string_identifier(identifier, context))
{
context->last_result = new_string_identifier(TERM_TYPE_STRING_OFFSET, context->current_rule_strings, identifier, &term);
if (context->last_result != ERROR_SUCCESS)
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
}
else
{
term->index = index;
}
}
yr_free(identifier);
return (TERM*) term;
}
TERM* reduce_identifier( yyscan_t yyscanner,
char* identifier)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM* term = NULL;
RULE* rule;
rule = lookup_rule(&context->rule_list, identifier, context->current_namespace);
if (rule != NULL)
{
context->last_result = new_binary_operation(TERM_TYPE_RULE, rule->condition, NULL, (TERM_BINARY_OPERATION**) &term);
}
else
{
context->last_result = new_variable(context, identifier, (TERM_VARIABLE**) &term);
}
yr_free(identifier);
return (TERM*) term;
}
TERM* reduce_string_enumeration( yyscan_t yyscanner,
TERM* string_list_head,
TERM* string_identifier)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_STRING* term = (TERM_STRING*) string_identifier;
term->next = (TERM_STRING*) string_list_head;
term->string->flags |= STRING_FLAGS_REFERENCED;
return string_identifier;
}
TERM* reduce_string_operation( yyscan_t yyscanner,
int type,
char* identifier,
SIZED_STRING* string)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
int erroffset;
VARIABLE* variable;
TERM_STRING_OPERATION* term = NULL;
variable = lookup_variable(context->variables, identifier);
if ( variable != NULL)
{
if (variable->type == VARIABLE_TYPE_STRING)
{
term = (TERM_STRING_OPERATION*) yr_malloc(sizeof(TERM_STRING_OPERATION));
if (term != NULL)
{
term->type = type;
term->variable = variable;
if (type == TERM_TYPE_STRING_MATCH)
{
if (regex_compile(&(term->re),
string->c_string,
FALSE,
context->last_error_extra_info,
sizeof(context->last_error_extra_info),
&erroffset) <= 0)
{
yr_free(term);
term = NULL;
context->last_result = ERROR_INVALID_REGULAR_EXPRESSION;
}
}
else
{
term->string = yr_strdup(string->c_string);
}
yr_free(string);
}
else
{
context->last_result = ERROR_INSUFICIENT_MEMORY;
}
}
else
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
context->last_result = ERROR_INCORRECT_VARIABLE_TYPE;
}
}
else
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
context->last_result = ERROR_UNDEFINED_IDENTIFIER;
}
yr_free(identifier);
return (TERM*) term;
}
TERM* reduce_integer_enumeration( yyscan_t yyscanner,
TERM* term1,
TERM* term2)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_VECTOR* vector;
if (term1->type == TERM_TYPE_VECTOR)
{
context->last_result = add_term_to_vector((TERM_VECTOR*) term1, term2);
return term1;
}
else
{
context->last_result = new_vector(&vector);
if (context->last_result == ERROR_SUCCESS)
context->last_result = add_term_to_vector(vector, term1);
if (context->last_result == ERROR_SUCCESS)
context->last_result = add_term_to_vector(vector, term2);
return (TERM*) vector;
}
}
TERM* reduce_integer_for( yyscan_t yyscanner,
TERM* count,
char* identifier,
TERM* items,
TERM* expression)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_INTEGER_FOR* term = NULL;
VARIABLE* variable;
variable = lookup_variable(context->variables, identifier);
term = (TERM_INTEGER_FOR*) yr_malloc(sizeof(TERM_INTEGER_FOR));
if (term != NULL)
{
term->type = TERM_TYPE_INTEGER_FOR;
term->count = count;
term->items = (TERM_ITERABLE*) items;
term->expression = expression;
term->variable = variable;
}
else
{
context->last_result = ERROR_INSUFICIENT_MEMORY;
}
yr_free(identifier);
return (TERM*) term;
}
TERM* reduce_range( yyscan_t yyscanner,
TERM* min,
TERM* max)
{
YARA_CONTEXT* context = yyget_extra(yyscanner);
TERM_RANGE* term = NULL;
context->last_result = new_range(min, max, &term);
return (TERM*) term;
}
ast.c
/*
Copyright (c) 2007. Victor M. Alvarez [[email protected]].
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include
#include
#include
#include
#include "yara.h"
#include "ast.h"
#include "mem.h"
#include "regex.h"
#define todigit(x) ((x) >='A'&& (x) <='F')? ((unsigned char) (x - 'A' + 10)) : ((unsigned char) (x - '0'))
RULE* lookup_rule(RULE_LIST* rules, const char* identifier, NAMESPACE* ns)
{
RULE_LIST_ENTRY* entry;
RULE* rule;
unsigned int key;
key = hash(0, identifier, strlen(identifier));
key = hash(key, ns->name, strlen(ns->name));
key = key % RULE_LIST_HASH_TABLE_SIZE;
entry = &rules->hash_table[key];
while (entry != NULL)
{
rule = (RULE*) entry->rule;
if (rule != NULL &&
strcmp(rule->identifier, identifier) == 0 &&
strcmp(rule->ns->name, ns->name) == 0)
{
return rule;
}
entry = entry->next;
}
return NULL;
}
META* lookup_meta(META* meta_list_head, const char* identifier)
{
META* meta = meta_list_head;
while (meta != NULL)
{
if (strcmp(meta->identifier, identifier) == 0)
{
return meta;
}
meta = meta->next;
}
return NULL;
}
STRING* lookup_string(STRING* string_list_head, const char* identifier)
{
STRING* string = string_list_head;
while (string != NULL)
{
if (strcmp(string->identifier, identifier) == 0)
{
return string;
}
string = string->next;
}
return NULL;
}
TAG* lookup_tag(TAG* tag_list_head, const char* identifier)
{
TAG* tag = tag_list_head;
while (tag != NULL)
{
if (strcmp(tag->identifier, identifier) == 0)
{
return tag;
}
tag = tag->next;
}
return NULL;
}
//链表查找
VARIABLE* lookup_variable(VARIABLE* variable_list_head, const char* identifier)
{
VARIABLE* variable = variable_list_head;
while ( variable != NULL)
{
if (strcmp(variable->identifier, identifier) == 0)
{
return variable;
}
variable = variable->next;
}
return NULL;
}
int new_rule(RULE_LIST* rules, char* identifier, NAMESPACE* ns, int flags, TAG* tag_list_head, META* meta_list_head, STRING* string_list_head, TERM* condition)
{
RULE* new_rule;
RULE_LIST_ENTRY* entry;
unsigned int key;
int result = ERROR_SUCCESS;
if (lookup_rule(rules, identifier, ns) == NULL) /* do not allow rules with the same identifier */
{
new_rule = (RULE*) yr_malloc(sizeof(RULE));
if (new_rule != NULL)
{
new_rule->identifier = identifier;
new_rule->ns = ns;
new_rule->flags = flags;
new_rule->tag_list_head = tag_list_head;
new_rule->meta_list_head = meta_list_head;
new_rule->string_list_head = string_list_head;
new_rule->condition = condition;
new_rule->next = NULL;
if (rules->head == NULL && rules->tail == NULL) /* list is empty */
{
rules->head = new_rule;
rules->tail = new_rule;
}
else
{
rules->tail->next = new_rule;
rules->tail = new_rule;
}
key = hash(0, identifier, strlen(identifier));
key = hash(key, ns->name, strlen(ns->name));
key = key % RULE_LIST_HASH_TABLE_SIZE;
if (rules->hash_table[key].rule == NULL)
{
rules->hash_table[key].rule = new_rule;
}
else
{
entry = (RULE_LIST_ENTRY*) yr_malloc(sizeof(RULE_LIST_ENTRY));
if (entry == NULL)
return ERROR_INSUFICIENT_MEMORY;
entry->rule = new_rule;
entry->next = rules->hash_table[key].next;
rules->hash_table[key].next = entry;
}
}
else
{
result = ERROR_INSUFICIENT_MEMORY;
}
}
else
{
result = ERROR_DUPLICATE_RULE_IDENTIFIER;
}
return result;
}
int new_hex_string( YARA_CONTEXT* context,
SIZED_STRING* charstr,
unsigned char** hexstr,
unsigned char** maskstr,
unsigned int* length)
{
int i;
int skip_lo;
int skip_hi;
int skip_exact;
char c,d;
char* s;
char* closing_bracket;
int inside_or;
int or_count;
int len;
int result = ERROR_SUCCESS;
unsigned char high_nibble = 0;
unsigned char low_nibble = 0;
unsigned char mask_high_nibble = 0;
unsigned char mask_low_nibble = 0;
unsigned char* hex;
unsigned char* mask;
//assert(charstr && hexstr && maskstr && length);
len = (int) charstr->length;
//assert(charstr[0] == '{' && charstr[len - 1] == '}');
*hexstr = hex = (unsigned char*) yr_malloc(len / 2);
*maskstr = mask = (unsigned char*) yr_malloc(len);
if (hex == NULL || mask == NULL)
{
if (hex) yr_free(hex);
if (mask) yr_free(mask);
return ERROR_INSUFICIENT_MEMORY;
}
i = 1;
*length = 0;
inside_or = FALSE;
while (i < len - 1)
{
c = toupper(charstr->c_string[i]);
if (isalnum(c) || (c == '?'))
{
d = toupper(charstr->c_string[i + 1]);
if (!isalnum(d) && (d != '?'))
{
result = ERROR_UNPAIRED_NIBBLE;
break;
}
if (c != '?')
{
high_nibble = todigit(c);
mask_high_nibble = 0x0F;
}
else
{
high_nibble = 0;
mask_high_nibble = 0;
}
if (d != '?')
{
low_nibble = todigit(d);
mask_low_nibble = 0x0F;
}
else
{
low_nibble = 0;
mask_low_nibble = 0;
}
*hex++ = (high_nibble << 4) | (low_nibble);
*mask++ = (mask_high_nibble << 4) | (mask_low_nibble);
(*length)++;
i+=2;
}
else if (c == '(')
{
if (inside_or)
{
result = ERROR_NESTED_OR_OPERATION;
break;
}
inside_or = TRUE;
*mask++ = MASK_OR;
i++;
}
else if (c == ')')
{
inside_or = FALSE;
*mask++ = MASK_OR_END;
i++;
}
else if (c == '|')
{
if (!inside_or)
{
result = ERROR_MISPLACED_OR_OPERATOR;
break;
}
*mask++ = MASK_OR;
i++;
}
else if (c == '[')
{
if (inside_or)
{
result = ERROR_SKIP_INSIDE_OR_OPERATION;
break;
}
closing_bracket = strchr(charstr->c_string + i + 1, ']');
if (closing_bracket == NULL)
{
result = ERROR_MISMATCHED_BRACKET;
break;
}
else
{
s = closing_bracket + 1;
while (*s == ' ') s++; /* skip spaces */
if (*s == '}') /* no skip instruction should exists at the end of the string */
{
result = ERROR_SKIP_AT_END;
break;
}
else if (*s == '[') /* consecutive skip intructions are not allowed */
{
result = ERROR_CONSECUTIVE_SKIPS;
break;
}
}
/* only decimal digits and '-' are allowed between brackets */
for (s = charstr->c_string + i + 1; s < closing_bracket; s++)
{
if ((*s != '-') && (*s < '0' || *s > '9'))
{
result = ERROR_INVALID_SKIP_VALUE;
break;
}
}
skip_lo = atoi(charstr->c_string + i + 1);
if (skip_lo < 0 || skip_lo > MASK_MAX_SKIP)
{
result = ERROR_INVALID_SKIP_VALUE;
break;
}
skip_exact = 1;
s = strchr(charstr->c_string + i + 1, '-');
if (s != NULL && s < closing_bracket)
{
skip_hi = atoi(s + 1);
if (skip_hi <= skip_lo || skip_hi > MASK_MAX_SKIP)
{
result = ERROR_INVALID_SKIP_VALUE;
break;
}
skip_exact = 0;
}
if (skip_exact)
{
*mask++ = MASK_EXACT_SKIP;
*mask++ = (unsigned char) skip_lo;
}
else
{
*mask++ = MASK_RANGE_SKIP;
*mask++ = (unsigned char) skip_lo;
*mask++ = (unsigned char) skip_hi;
}
i = (int) (closing_bracket - charstr->c_string + 1);
}
else if (c == ']')
{
result = ERROR_MISMATCHED_BRACKET;
break;
}
else if (c == ' ' || c == '\n' || c == '\t')
{
i++;
}
else
{
result = ERROR_INVALID_CHAR_IN_HEX_STRING;
break;
}
}
*mask++ = MASK_END;
/* wildcards or skip instructions are not allowed at the first position the string */
if ((*maskstr)[0] != 0xFF)
{
result = ERROR_MISPLACED_WILDCARD_OR_SKIP;
}
/* check if byte or syntax is correct */
i = 0;
or_count = 0;
while ((*maskstr)[i] != MASK_END)
{
if ((*maskstr)[i] == MASK_OR)
{
or_count++;
if ( (*maskstr)[i+1] == MASK_OR || (*maskstr)[i+1] == MASK_OR_END )
{
result = ERROR_INVALID_OR_OPERATION_SYNTAX;
break;
}
}
else if ((*maskstr)[i] == MASK_OR_END)
{
if (or_count < 2)
{
result = ERROR_INVALID_OR_OPERATION_SYNTAX;
break;
}
or_count = 0;
}
i++;
}
if (result != ERROR_SUCCESS)
{
yr_free(*hexstr);
yr_free(*maskstr);
*hexstr = NULL;
*maskstr = NULL;
}
return result;
}
int new_text_string( YARA_CONTEXT* context,
SIZED_STRING* charstr,
int flags,
unsigned char** hexstr,
REGEXP* re,
unsigned int* length)
{
char *error;
int erroffset;
int options;
int result = ERROR_SUCCESS;
//assert(charstr && hexstr && regexp && length);
*length = charstr->length;
*hexstr = yr_malloc(charstr->length);
if (*hexstr == NULL)
{
return ERROR_INSUFICIENT_MEMORY;
}
memcpy(*hexstr, charstr->c_string, charstr->length);
if (flags & STRING_FLAGS_REGEXP)
{
if (regex_compile(re, // REGEXP *
charstr->c_string, // Regex pattern
flags & STRING_FLAGS_NO_CASE, // If TRUE then case insensitive search
context->last_error_extra_info, // Error message
sizeof(context->last_error_extra_info), // Size of error buffer
&erroffset) <= 0) // Offset into regex pattern if error detected
{
result = ERROR_INVALID_REGULAR_EXPRESSION;
}
}
else
{
// re contains multiple pointers now, if we're
// not doing a regex, make sure all are NULL.
memset(re, '\0', sizeof(REGEXP));
}
return result;
}
int new_string( YARA_CONTEXT* context,
char* identifier,
SIZED_STRING* charstr,
int flags,
STRING** string)
{
STRING* new_string;
int result = ERROR_SUCCESS;
new_string = (STRING*) yr_malloc(sizeof(STRING));
if(new_string != NULL)
{
if (!(flags & STRING_FLAGS_WIDE))
flags |= STRING_FLAGS_ASCII;
new_string->identifier = identifier;
new_string->flags = flags;
new_string->next = NULL;
new_string->matches_head = NULL;
new_string->matches_tail = NULL;
if (flags & STRING_FLAGS_HEXADECIMAL)
{
result = new_hex_string(context, charstr, &new_string->string, &new_string->mask, &new_string->length);
}
else
{
result = new_text_string(context, charstr, flags, &new_string->string, &new_string->re, &new_string->length);
}
if (result != ERROR_SUCCESS)
{
yr_free(new_string);
new_string = NULL;
}
}
else
{
result = ERROR_INSUFICIENT_MEMORY;
}
*string = new_string;
return result;
}
int new_simple_term(int type, TERM** term)
{
TERM* new_term;
int result = ERROR_SUCCESS;
new_term = (TERM*) yr_malloc(sizeof(TERM));
if (new_term != NULL)
{
new_term->type = type;
}
else
{
result = ERROR_INSUFICIENT_MEMORY;
}
*term = new_term;
return result;
}
int new_unary_operation(int type, TERM* op, TERM_UNARY_OPERATION** term)
{
TERM_UNARY_OPERATION* new_term;
int result = ERROR_SUCCESS;
new_term = (TERM_UNARY_OPERATION*) yr_malloc(sizeof(TERM_UNARY_OPERATION));
if (new_term != NULL)
{
new_term->type = type;
new_term->op = op;
}
else
{
result = ERROR_INSUFICIENT_MEMORY;
}
*term = new_term;
return result;
}
int new_binary_operation(int type, TERM* op1, TERM* op2, TERM_BINARY_OPERATION** term)
{
TERM_BINARY_OPERATION* new_term;
int result = ERROR_SUCCESS;
new_term = (TERM_BINARY_OPERATION*) yr_malloc(sizeof(TERM_BINARY_OPERATION));
if (new_term != NULL)
{
new_term->type = type;
new_term->op1 = op1;
new_term->op2 = op2;
}
else
{
result = ERROR_INSUFICIENT_MEMORY;
}
*term = new_term;
return result;
}
int new_ternary_operation(int type, TERM* op1, TERM* op2, TERM* op3, TERM_TERNARY_OPERATION** term)
{
TERM_TERNARY_OPERATION* new_term;
int result = ERROR_SUCCESS;
new_term = (TERM_TERNARY_OPERATION*) yr_malloc(sizeof(TERM_TERNARY_OPERATION));
if (new_term != NULL)
{
new_term->type = type;
new_term->op1 = op1;
new_term->op2 = op2;
new_term->op3 = op3;
}
else
{
result = ERROR_INSUFICIENT_MEMORY;
}
*term = new_term;
return result;
}
int new_constant(size_t constant, TERM_CONST** term)
{
TERM_CONST* new_term;
int result = ERROR_SUCCESS;
new_term = (TERM_CONST*) yr_malloc(sizeof(TERM_CONST));
if (new_term != NULL)
{
new_term->type = TERM_TYPE_CONST;
new_term->value = constant;
}
else
{
result = ERROR_INSUFICIENT_MEMORY;
}
*term = new_term;
return result;
}
int new_string_identifier(int type, STRING* defined_strings, char* identifier, TERM_STRING** term)
{
TERM_STRING* new_term = NULL;
STRING* string;
int result = ERROR_SUCCESS;
if (strcmp(identifier, "$") != 0) /* non-anonymous strings */
{
string = lookup_string(defined_strings, identifier);
if (string != NULL)
{
/* the string has been used in an expression, mark it as referenced */
string->flags |= STRING_FLAGS_REFERENCED;
/* in these cases we can't not use the fast-matching mode */
if (type == TERM_TYPE_STRING_COUNT ||
type == TERM_TYPE_STRING_AT ||
type == TERM_TYPE_STRING_IN_RANGE ||
type == TERM_TYPE_STRING_OFFSET)
{
string->flags &= ~STRING_FLAGS_FAST_MATCH;
}
new_term = (TERM_STRING*) yr_malloc(sizeof(TERM_STRING));
if (new_term != NULL)
{
new_term->type = type;
new_term->string = string;
new_term->next = NULL;
}
}
else
{
result = ERROR_UNDEFINED_STRING;
}
}
else /* anonymous strings */
{
new_term = (TERM_STRING*) yr_malloc(sizeof(TERM_STRING));
if (new_term != NULL)
{
new_term->type = type;
new_term->string = NULL;
new_term->next = NULL;
}
}
*term = new_term;
return result;
}
int new_variable(YARA_CONTEXT* context, char* identifier, TERM_VARIABLE** term)
{
TERM_VARIABLE* new_term = NULL;
VARIABLE* variable;
int result = ERROR_SUCCESS;
variable = lookup_variable(context->variables, identifier);
if (variable != NULL) /* external variable should be defined */
{
new_term = (TERM_VARIABLE*) yr_malloc(sizeof(TERM_VARIABLE));
if (new_term != NULL)
{
new_term->type = TERM_TYPE_VARIABLE;
new_term->variable = variable;
}
else
{
result = ERROR_INSUFICIENT_MEMORY;
}
}
else
{
strncpy(context->last_error_extra_info, identifier, sizeof(context->last_error_extra_info));
context->last_error_extra_info[sizeof(context->last_error_extra_info)-1] = 0;
result = ERROR_UNDEFINED_IDENTIFIER;
}
*term = new_term;
return result;
}
TERM* vector_first(TERM_ITERABLE* self, EVALUATION_FUNCTION evaluate, EVALUATION_CONTEXT* context)
{
TERM_VECTOR* vector = (TERM_VECTOR*) self;
vector->current = 0;
return vector->items[0];
}
TERM* vector_next(TERM_ITERABLE* self, EVALUATION_FUNCTION evaluate, EVALUATION_CONTEXT* context)
{
TERM_VECTOR* vector = (TERM_VECTOR*) self;
TERM* result = NULL;
if (vector->current < vector->count - 1)
{
vector->current++;
result = vector->items[vector->current];
}
return result;
}
int new_vector(TERM_VECTOR** term)
{
TERM_VECTOR* new_term;
int result = ERROR_SUCCESS;
new_term = (TERM_VECTOR*) yr_malloc(sizeof(TERM_VECTOR));
if (new_term != NULL)
{
new_term->type = TERM_TYPE_VECTOR;
new_term->first = vector_first;
new_term->next = vector_next;
new_term->count = 0;
new_term->current = 0;
new_term->items[0] = NULL;
}
else
{
result = ERROR_INSUFICIENT_MEMORY;
}
*term = new_term;
return result;
}
int add_term_to_vector(TERM_VECTOR* vector, TERM* term)
{
int result = ERROR_SUCCESS;
if (vector->count < MAX_VECTOR_SIZE)
{
vector->items[vector->count] = term;
vector->count++;
}
else
{
result = ERROR_VECTOR_TOO_LONG;
}
return result;
}
TERM* range_first(TERM_ITERABLE* self, EVALUATION_FUNCTION evaluate, EVALUATION_CONTEXT* context)
{
TERM_RANGE* range = (TERM_RANGE*) self;
range->current->value = evaluate(range->min, context);
return (TERM*) range->current;
}
TERM* range_next(TERM_ITERABLE* self, EVALUATION_FUNCTION evaluate, EVALUATION_CONTEXT* context)
{
TERM_RANGE* range = (TERM_RANGE*) self;
if (range->current->value < evaluate(range->max, context))
{
range->current->value++;
return (TERM*) range->current;
}
else
{
return NULL;
}
}
int new_range(TERM* min, TERM* max, TERM_RANGE** term)
{
TERM_RANGE* new_term = NULL;
int result = ERROR_SUCCESS;
new_term = (TERM_RANGE*) yr_malloc(sizeof(TERM_RANGE));
if (new_term != NULL)
{
new_term->type = TERM_TYPE_RANGE;
new_term->first = range_first;
new_term->next = range_next;
new_term->min = min;
new_term->max = max;
result = new_constant(0, &new_term->current);
}
else
{
result = ERROR_INSUFICIENT_MEMORY;
}
*term = new_term;
return result;
}
/*
free_term(TERM* term)
Frees a term. If the term depends on other terms they are also freed. Notice that
some terms hold references to STRING or VARIABLE structures, but these
structures are freed by yr_destroy_context not by this function.
*/
void free_term(TERM* term)
{
TERM_STRING* next;
TERM_STRING* tmp;
int i, count;
switch(term->type)
{
case TERM_TYPE_STRING:
next = ((TERM_STRING*) term)->next;
while (next != NULL)
{
tmp = next->next;
yr_free(next);
next = tmp;
}
break;
case TERM_TYPE_STRING_AT:
free_term(((TERM_STRING*)term)->offset);
break;
case TERM_TYPE_STRING_OFFSET:
free_term(((TERM_STRING*)term)->index);
break;
case TERM_TYPE_STRING_IN_RANGE:
free_term(((TERM_STRING*)term)->range);
break;
case TERM_TYPE_STRING_IN_SECTION_BY_NAME:
yr_free(((TERM_STRING*)term)->section_name);
break;
case TERM_TYPE_STRING_MATCH:
regex_free(&(((TERM_STRING_OPERATION*)term)->re));
break;
case TERM_TYPE_STRING_CONTAINS:
yr_free(((TERM_STRING_OPERATION*)term)->string);
break;
case TERM_TYPE_AND:
case TERM_TYPE_OR:
case TERM_TYPE_ADD:
case TERM_TYPE_SUB:
case TERM_TYPE_MUL:
case TERM_TYPE_DIV:
case TERM_TYPE_MOD:
case TERM_TYPE_GT:
case TERM_TYPE_LT:
case TERM_TYPE_GE:
case TERM_TYPE_LE:
case TERM_TYPE_EQ:
case TERM_TYPE_OF:
case TERM_TYPE_NOT_EQ:
case TERM_TYPE_SHIFT_LEFT:
case TERM_TYPE_SHIFT_RIGHT:
case TERM_TYPE_BITWISE_OR:
case TERM_TYPE_BITWISE_XOR:
case TERM_TYPE_BITWISE_AND:
free_term(((TERM_BINARY_OPERATION*)term)->op1);
free_term(((TERM_BINARY_OPERATION*)term)->op2);
break;
case TERM_TYPE_NOT:
case TERM_TYPE_BITWISE_NOT:
case TERM_TYPE_INT8_AT_OFFSET:
case TERM_TYPE_INT16_AT_OFFSET:
case TERM_TYPE_INT32_AT_OFFSET:
case TERM_TYPE_UINT8_AT_OFFSET:
case TERM_TYPE_UINT16_AT_OFFSET:
case TERM_TYPE_UINT32_AT_OFFSET:
free_term(((TERM_UNARY_OPERATION*)term)->op);
break;
case TERM_TYPE_RANGE:
free_term(((TERM_RANGE*)term)->min);
free_term(((TERM_RANGE*)term)->max);
free_term((TERM*) ((TERM_RANGE*)term)->current);
break;
case TERM_TYPE_VECTOR:
count = ((TERM_VECTOR*)term)->count;
for (i = 0; i < count; i++)
{
free_term(((TERM_VECTOR*)term)->items[i]);
}
break;
case TERM_TYPE_INTEGER_FOR:
free_term(((TERM_INTEGER_FOR*)term)->count);
free_term(((TERM_INTEGER_FOR*)term)->expression);
free_term((TERM*) ((TERM_INTEGER_FOR*)term)->items);
break;
case TERM_TYPE_STRING_FOR:
free_term(((TERM_TERNARY_OPERATION*)term)->op1);
free_term(((TERM_TERNARY_OPERATION*)term)->op2);
free_term(((TERM_TERNARY_OPERATION*)term)->op3);
break;
}
yr_free(term);
}