为什么80%的码农都做不了架构师?>>>
背景介绍:
为什么需要这个表达式解析微引擎?
首先这个引擎是产生于调用链路跟踪系统,我们知道在调用跟踪系统里经常会根据关键字搜索日志但有时候我们搜一个关键字可能搜出N条记录这时我们就需要有表达式了需要更精准搜索,比如搜 既包含 'a' 又包含 'b' 再排除 'c' 等类似这样。
那一般的做法可能就是前端搞一个动态表单,然后用户去选择类型(and、or、not),这样就需要前端修改了。重点来了,有没有一种方式前端不修改后端服务去解析用户输出的搜索词汇重而得出我们想要的表达式呢?
请往下看
解析引擎源码:
public class Keyword {
private String key;
private RelationEnum relation;
public Keyword(String key,RelationEnum relation) {
this.key = key;
this.relation = relation;
}
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public RelationEnum getRelation() {
return relation;
}
public void setRelation(RelationEnum relation) {
this.relation = relation;
}
}
public enum LabelEnum {
AND(new Character('&'),RelationEnum.AND),OR(new Character('|'),RelationEnum.OR),
BLANK(new Character(' '),RelationEnum.OR),EXCLAMATION(new Character('!'),RelationEnum.NO);
private Character label;
private RelationEnum relation;
LabelEnum(Character label,RelationEnum relation) {
this.label = label;
this.relation = relation;
}
public Character getLabel() {
return label;
}
public void setLabel(Character label) {
this.label = label;
}
public RelationEnum getRelation() {
return relation;
}
public void setRelation(RelationEnum relation) {
this.relation = relation;
}
}
public enum RelationEnum {
AND,OR,NO
}
public class SquareBracket {
private LinkedList left = Lists.newLinkedList();
private LinkedList right = Lists.newLinkedList();
public void addLeft(int index) {
this.left.add(index);
}
public void addRight(int index) {
this.right.add(index);
}
public LinkedList getLeft() {
return left;
}
public LinkedList getRight() {
return right;
}
}
public enum TagStatusEnum {
VALID,UNCONFIRMED,INVALID
}
public class Tag {
private int index;
private LabelEnum label;
private TagStatusEnum status;
public Tag(LabelEnum label,int index,TagStatusEnum labelStatusEnum) {
this.label = label;
this.index = index;
this.status = labelStatusEnum;
}
public LabelEnum getLabel() {
return label;
}
public void setLabel(LabelEnum label) {
this.label = label;
}
public int getIndex() {
return index;
}
public void setIndex(int index) {
this.index = index;
}
public TagStatusEnum getStatus() {
return status;
}
public void setStatus(TagStatusEnum status) {
this.status = status;
}
}
public class ExpressionProcessor {
private static final char PREFIX_BRACKET = '[';
private static final char SUFFIX_BRACKET = ']';
/**
* 分词keyword,并解析相应关系
* 具体参考:{@linkplain com.huize.framework.apm.data.es.expression.LabelEnum}
* 如果表达式的特殊字段需要查询则需要用 ‘[]’括起则有效,且在同一组中,只有最外层的有效;
*
*
* ExpressionProcessor.split("12&34") = (12,&)、(34,&)
* ExpressionProcessor.split("12!34") = (12,&)、(34,!)
* ExpressionProcessor.split("12[!]34") = (12!34,&)
* ExpressionProcessor.split("12[[!34]]") = (12[!34],&)
*
*/
public static List split(String keyword) {
return convert(keyword,mark(keyword));
}
private static Pair,List> mark(String keyword) {
List tags = Lists.newArrayList();
TagStatusEnum status = TagStatusEnum.VALID;
int unconfirmedCount = 0;
Tag tag = null;
List squareBrackets = Lists.newArrayList();
SquareBracket squareBracket = new SquareBracket();
squareBrackets.add(squareBracket);
for (int i = 0; i < keyword.length(); i++) {
final char ch = keyword.charAt(i);
if(ch == PREFIX_BRACKET) {
if(unconfirmedCount == 0) {
squareBracket = new SquareBracket();
squareBrackets.add(squareBracket);
}
squareBracket.addLeft(i);
unconfirmedCount += 1;
status = TagStatusEnum.UNCONFIRMED;
continue;
}
if(ch == SUFFIX_BRACKET) {
unconfirmedCount -=1;
if(status == TagStatusEnum.UNCONFIRMED) {
resetTagStatus(tags);
}
if(unconfirmedCount == 0) {
status = TagStatusEnum.VALID;
}
squareBracket.addRight(i);
continue;
}
tag = getTag(ch,i,status);
if(i == 0 && tag== null) {
tag = new Tag(LabelEnum.AND, -1, TagStatusEnum.VALID);
}
if(tag != null) {
tags.add(tag);
}
}
return Pair.of(tags, squareBrackets);
}
private static List convert(String keyword,Pair,List> pair) {
List tags = pair.getLeft();
if(CollectionUtils.isEmpty(tags)) {
Lists.newArrayList(new Keyword(keyword,RelationEnum.AND));
}
List keywords = Lists.newArrayListWithCapacity(tags.size());
List bracketIndexs = getValidBracketIndexs(pair.getRight());
final int size = tags.size();
int step = 0;
int next = 0;
for(int i = 0; i < size; i++) {
Tag tag = tags.get(i);
if(tag == null) {
continue;
}
if(TagStatusEnum.INVALID == tag.getStatus()) {
continue;
}
next = i + 1;
int index = tag.getIndex();
step = index;
if (next < size) {
while(TagStatusEnum.INVALID == tags.get(next).getStatus() && next < (size -1)) {
next += 1;
}
if(TagStatusEnum.INVALID == tags.get(next).getStatus()) {
step = keyword.length();
}else {
step = tags.get(next).getIndex();
}
}else {
step = keyword.length();
}
if(step - index > 1) {
keywords.add(new Keyword(cut(keyword,index+1,step,bracketIndexs),tag.getLabel().getRelation()));
}
}
return keywords;
}
private static String cut(String keyword,int start,int end,List bracketIndexs) {
List list = Lists.newArrayList();
for(Integer index : bracketIndexs) {
if(index >=start && index <= end) {
list.add(index);
}
}
StringBuffer buffer = new StringBuffer(StringUtils.substring(keyword,start,end));
if(!CollectionUtils.isEmpty(list)) {
int delCount = 0;
for(int index : list) {
int delIndex = index - delCount- start;
char ch = buffer.charAt(delIndex);
if(ch == PREFIX_BRACKET || ch == SUFFIX_BRACKET) {
buffer.deleteCharAt(delIndex);
delCount += 1;
}
}
}
return buffer.toString();
}
private static void resetTagStatus(List tags) {
if(!CollectionUtils.isEmpty(tags)) {
for(Tag tag : tags) {
if(TagStatusEnum.UNCONFIRMED == tag.getStatus()) {
tag.setStatus(TagStatusEnum.INVALID);
}
}
}
}
private static Tag getTag(char charset,int index,TagStatusEnum status) {
LabelEnum label = null;
if(LabelEnum.AND.getLabel() == charset) {
label = LabelEnum.AND;
}else if(LabelEnum.OR.getLabel() == charset) {
label = LabelEnum.OR;
}else if(LabelEnum.BLANK.getLabel() == charset) {
label = LabelEnum.BLANK;
}else if(LabelEnum.EXCLAMATION.getLabel() == charset) {
label = LabelEnum.EXCLAMATION;
}
if(label != null) {
return new Tag(label,index,status);
}
return null;
}
private static List getValidBracketIndexs(List squareBrackets) {
List bracketIndexs = Lists.newArrayList();
if(CollectionUtils.isEmpty(squareBrackets)) {
return bracketIndexs;
}
for(SquareBracket squareBracket : squareBrackets) {
if(CollectionUtils.isEmpty(squareBracket.getLeft()) || CollectionUtils.isEmpty(squareBracket.getRight())) {
continue;
}
int first = squareBracket.getLeft().getFirst();
int last = squareBracket.getRight().getLast();
if(last > first) {
bracketIndexs.add(first);
bracketIndexs.add(last);
}
}
return bracketIndexs;
}
}
代码就上面这么多,非常简洁。
下面就来测试下:
public class ExpressionProcessorTest {
public static void main(String[] args) {
String keyword = "a&d!c";
//String keyword = "20180209002149[!]27343565";
//String keyword = "20180209002149[!&!]27343565";
//String keyword = "201802||09002149[!&!]27343565&&sdfas";
//String keyword = "测试]][[!]测试[[[[psdf]23541345!@#$%^|)(*";
//String keyword = "201802|||09002149[!&!]27343565&&sdfas";
//String keyword = "201802|||!!09002149[!&!]27343565&&sdfas";
//String keyword = "201802||!!09002149[!&!]27343565&&sdfas";
System.out.println(JsonUtils.beanToJson(ExpressionProcessor.split(keyword)));
}
}
运行:
[{"key":"a","relation":"AND"},{"key":"d","relation":"AND"},{"key":"c","relation":"NO"}]