获取meta里的keywords及description的方法

通过jericho包获取meta里的keywords及description的方法一:
		// 获取meta里的keywords和description
		List segments = source.findAllElements(Tag.META);

		 getKeywordsDesc(segments);
		 
		 
		if (null != segments) {
			String keywordsStr = null;
			String descriptionStr = null;
			int sumSegments = segments.size();
			if (sumSegments > 0) {
				for (int i = 0; i < sumSegments; i++) {
					String metaStr = segments.get(i).toString();
					if (null != metaStr && !"".equals(metaStr.trim())) {
						// 获取description
						if (metaStr.indexOf("description") != -1
								&& metaStr.indexOf("content") != -1) {
							metaStr = metaStr.replaceAll("\"", "").replaceAll(
									"/", "");
							descriptionStr = metaStr.substring(metaStr
									.indexOf("content"));
							descriptionStr = descriptionStr.substring(
									descriptionStr.indexOf("=") + 1,
									descriptionStr.length() - 1);
							descriptionStr = TextHtml.html2text(descriptionStr);
						}
						parserBean.setDescription(removeTag(descriptionStr));

						// 获取keywords
						if (metaStr.indexOf("keywords") != -1
								&& metaStr.indexOf("content") != -1) {
							metaStr = metaStr.replaceAll("\"", "").replaceAll(
									"/", "");
							keywordsStr = metaStr.substring(metaStr
									.indexOf("content"));
							keywordsStr = keywordsStr
									.substring(keywordsStr.indexOf("=") + 1,
											keywordsStr.length() - 1);
							keywordsStr = TextHtml.html2text(keywordsStr);
							parserBean.setKeywords(removeTag(keywordsStr));
						}
					}
				}// for over
			}
		}


方法二:
/**
	 * 获取meta里的keywords和description
	 */
	private void getKeywordsDesc(List segments) {
		if (null != segments) {
			String keywords = null;
			String description = null;
			int sumSegments = segments.size();
			for (int i = 0; i < sumSegments; i++) {
				String segment = segments.get(i).toString().toLowerCase() ;
				if (null != segment && !"".equals(segment.trim())) {
					// 获取meta里的keywords
					if (segment.indexOf("keywords") > 0
							&& segment.indexOf("content") > 0) {
						String patternStr = "< *meta *name *= *\"? *keywords *\"? *content *= *\"?(.*) *\"? */? *>";
						keywords = Regex(patternStr, segment);
						if (null == keywords) {
							patternStr = "< *meta *content *= *\"?(.*) *\"? *name *= *\"? *keywords *\"? */? *>";
							keywords = Regex(patternStr, segment);
						}
						if (null != keywords) {
							keywords = removeTag(keywords);
						}
						this.keyowrds = keywords.replace("/", "").replace("\"", "");
					}

					// 获取meta里的description
					if (segment.indexOf("description") > 0
							&& segment.indexOf("content") > 0) {
						String patternStr = "< *meta name *= *\"? *description *\"? *content *= *\"?(.*) *\"? */? *>";
						description = Regex(patternStr, segment);
						if (null == description) {
							patternStr = "< *meta *content *= *\"?(.*) *\"? *name *= *\"? *description *\"? */? *>";
							description = Regex(patternStr, segment);
						}
						if (null != description) {
							description = removeTag(description).replace("/", "");
						}
						this.description = description.replace("/", "").replace("\"", "");
					}
				}

			}

		}

	}

	private String Regex(String patternStr, String segment) {
		String str = null;
		Pattern p = Pattern.compile(patternStr,Pattern.CASE_INSENSITIVE);
		Matcher m = p.matcher(segment);
		while (m.find()) {
			str = m.group(1);
		}
		return str;
	}



用htmlparser获取meta里面的keywords及descripton
 Parser parser=new Parser(url); 
        NodeFilter filter=new NodeClassFilter(MetaTag.class); 
        NodeList nodelist=parser.extractAllNodesThatMatch(filter); 
        for(Node node:nodelist.toNodeArray()) 
        { 
            MetaTag meta=(MetaTag) node; 
            System.out.println(meta.getAttribute("name")+":"+meta.getAttribute("content")); 
        } 

你可能感兴趣的:(script)