系统对接,抽取数据接口设计

CREATE TABLE extract_task_temp
(
  id integer NOT NULL DEFAULT nextval('extract_task_temp_731_id_seq'::regclass),
  task_init_time timestamp with time zone, -- 初始化抽取任务时间
  task_current_time timestamp with time zone, -- 当前任务抽取时间
  task_next_time timestamp with time zone, -- 下一次任务抽取时间
  create_time timestamp with time zone DEFAULT now(),
  update_time timestamp with time zone, -- 修改时间
  task_type integer, -- 任务类型1:文章,2回复
  website_id integer, -- 站点类型id
  start_size integer, -- 分页起始大小
  limit_size integer, -- 分次取多少条数据
  cid integer, -- 客户id
  authors text, -- 作者昵称
  interval_time integer -- 间隔时间单位(分钟)
)

   

package com.cyyun.mobile.tools;

import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.annotation.Resource;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.log4j.Logger;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;

import com.cyyun.mobile.dao.ICommentAccountTempDao;
import com.cyyun.mobile.dao.IExtractTaskTempDao;
import com.cyyun.mobile.pojo.CommentAccountTemp;
import com.cyyun.mobile.pojo.ExtractTaskTemp;
import com.cyyun.mobile.service.ExtractTaskTempService;
import com.cyyun.mobile.tools.httpconnection.HttpUrlConnection;
import com.cyyun.mobile.tools.json.JsonEntity;
import com.cyyun.mobile.tools.json.JsonEntityArray;
import com.twmacinta.util.MD5;

/**
 * 抽取数据任务
 * 
 * @author zhangzm
 * 
 */
@Component
public class ExtractTask {

	static Logger log = Logger.getLogger(ExtractTask.class);
	@Resource
	IExtractTaskTempDao iExtractTaskTempDao;

	@Resource
	ICommentAccountTempDao iCommentAccountTempDao;

	@Resource
	ExtractTaskTempService extractTaskTempService;

	/**
	 * 获取任务对象
	 * 
	 * @return
	 */
	public List<ExtractTaskTemp> getExtractTaskTemp() {
		List<ExtractTaskTemp> extractTaskTemps = null;
		try {
			extractTaskTemps = extractTaskTempService
					.queryExtractTaskTemp(null);
		} catch (Exception e) {
			log.error(e.getMessage(), e);
		}
		return extractTaskTemps;
	}

	@Scheduled(cron = "0 0/1 * * * ?")
	public void execute() {
		List<ExtractTaskTemp> extractTaskTemps = getExtractTaskTemp();
		if (CollectionUtils.isEmpty(extractTaskTemps)) {
			log.warn("extractTaskTemps isEmpty");
			return;
		}
		for (ExtractTaskTemp e : extractTaskTemps) {
			if (null != e) {
				if ("1".equals(String.valueOf(e.getTaskType()))) {
					createTask(e);
				} else {
					createReplyTask(e);
				}
			}
		}
	}

	/**
	 * 获取回复数
	 * 
	 * @param bean
	 */
	public void createReplyTask(ExtractTaskTemp bean) {
		if (bean == null) {
			log.warn("SpidTaskSynBean is null ");
			return;
		}

		if (null == bean.getTaskType()) {
			log.warn("ExtractTaskTemp getTaskType is null ");
			return;
		}

		initTask(bean);

		HttpUrlConnection connection = new HttpUrlConnection();
		Map<String, String> dataMap = new HashMap<String, String>();
		String url = Constant.GET_ARTICLE_REPLY_URL;
		dataMap.put("order", "rid");
		dataMap.put("desc", "asc");
		dataMap.put("cid", String.valueOf(bean.getCid()));
		dataMap.put("limit", String.valueOf(bean.getLimitSize()));
		dataMap.put("fid", String.valueOf(bean.getWebsiteId()));
		dataMap.put("authors", bean.getAuthors());
		dataMap.put("from", String.valueOf(bean.getTaskCurrentTime().getTime()));
		dataMap.put("to", String.valueOf(bean.getTaskNextTime().getTime()));

		StringBuilder logBuilder = new StringBuilder();
		logBuilder.append("开始时间为:").append(bean.getCreateTime())
				.append("结束时间为:").append(bean.getTaskNextTime())
				.append(" url :" + url).append(" dataMap :" + dataMap);
		log.info(logBuilder.toString());

		String response = null;
		try {
			response = connection.readData(dataMap, url);
		} catch (Exception e) {
			log.error(e.getMessage(), e);
			return;
		}
		if ("[]".equals(response)) {
			bean.setAuthors(null);
			bean.setStartSize(0);
			extractTaskTempService.updateExtractTaskTemp(bean);
			log.warn("get response is null " + url + " " + dataMap);
			return;
		}

		JsonEntityArray array = new JsonEntityArray(response);
		List<Map<String, Object>> addArticleBeans = new ArrayList<Map<String, Object>>();
		if (array != null && array.size() > 0) {
			for (int i = 0; i < array.size(); i++) {
				JsonEntity jsonE = array.getJsonEntity(i);
				String rid = jsonE.getString("rid");
				Map<String, Object> map = new HashMap<String, Object>();
				map.put("rid", Integer.valueOf(rid));
				addArticleBeans.add(map);
			}
			try {
				bean.setStartSize(bean.getStartSize() + addArticleBeans.size());
				extractTaskTempService
						.updateExtractTaskTempAndArticleReplyTemp(bean,
								addArticleBeans);
			} catch (Exception e) {
				log.error(e);
			}
		}

	}

	public String getCommentAccountTempName(
			List<CommentAccountTemp> accountTemps) {
		StringBuilder builder = new StringBuilder();
		try {
			if (CollectionUtils.isNotEmpty(accountTemps)) {
				for (CommentAccountTemp c : accountTemps) {
					if (c != null) {
						String name = c.getCommentNickname();
						builder.append(name).append(",");
					}
				}
			}

		} catch (Exception e) {
			log.error(e);
		}
		String s = builder.toString();
		if (s.endsWith(",")) {
			s = s.substring(0, s.length() - 1);
		}
		return s;

	}

	/**
	 * 初始化任务,设置 开始时间,结束时间,以及账号表中的任务起始时间。(每一次任务表中的结束时间=账号表中的开始时间) 在账号表中的时间会
	 * 出现的时间范围是 初始化时间+时间间隔*次数
	 * 
	 * @param bean
	 */
	public void initTask(ExtractTaskTemp bean) {

		if (StringUtils.isNotBlank(bean.getAuthors())) {
			return;
		}

		Map<String, Object> map = new HashMap<String, Object>();
		map.put("websiteId", bean.getWebsiteId());
		map.put("deleteFlag", 1);
		map.put("taskTime", bean.getTaskInitTime());
		List<CommentAccountTemp> accountTemps = null;
		try {
			accountTemps = extractTaskTempService.queryCommentAccountTemp(map);
		} catch (Exception e) {
			log.error(e);
		}
		if (CollectionUtils.isEmpty(accountTemps)) {
			map.clear();
			map.put("websiteId", bean.getWebsiteId());
			map.put("deleteFlag", 1);
			map.put("taskTime", bean.getTaskNextTime());
			try {
				accountTemps = extractTaskTempService
						.queryCommentAccountTemp(map);
				bean.setStartSize(0);// 起始页
				bean.setTaskCurrentTime(bean.getTaskNextTime());// 当前处理时间
				Date taskNextTime = DateUtils.addMinutes(
						bean.getTaskNextTime(), bean.getIntervalTime());
				bean.setTaskNextTime(taskNextTime);// 下次处理时间

			} catch (Exception e) {
				log.error(e);
			}
		} else {
			bean.setStartSize(0);// 起始页
			bean.setTaskCurrentTime(bean.getTaskInitTime());// 当前处理时间
			Date taskNextTime = DateUtils.addMinutes(bean.getTaskInitTime(),
					bean.getIntervalTime());
			bean.setTaskNextTime(taskNextTime);// 下次处理时间
		}
		try {
			bean.setAuthors(getCommentAccountTempName(accountTemps));// 设置作者
			extractTaskTempService.updateExtractTaskTempAndCommentAccountTemp(
					bean, accountTemps);
		} catch (Exception e) {
			log.error(e);
		}

	}

	/**
	 * 抽取文章数据
	 * 
	 * @param bean
	 */
	public void createTask(ExtractTaskTemp bean) {

		if (bean == null) {
			log.warn("SpidTaskSynBean is null ");
			return;
		}

		if (null == bean.getTaskType()) {
			log.warn("ExtractTaskTemp getSysTypeId is null ");
			return;
		}
		initTask(bean);

		while (true) {
			HttpUrlConnection connection = new HttpUrlConnection();
			Map<String, String> dataMap = new HashMap<String, String>();
			String url = Constant.GET_ARTICLE_URL;
			dataMap.put("action", "full");
			dataMap.put("sort", "6");
			dataMap.put("order", "1");
			dataMap.put("start", String.valueOf(bean.getStartSize()));
			dataMap.put("cid", String.valueOf(bean.getCid()));
			dataMap.put("limit", String.valueOf(bean.getLimitSize()));
			dataMap.put("fid", String.valueOf(bean.getWebsiteId()));
			dataMap.put("authors", bean.getAuthors());
			dataMap.put("from",
					String.valueOf(bean.getTaskCurrentTime().getTime()));
			dataMap.put("to", String.valueOf(bean.getTaskNextTime().getTime()));

			StringBuilder logBuilder = new StringBuilder();
			logBuilder.append("开始时间为:").append(bean.getCreateTime())
					.append("结束时间为:").append(bean.getTaskNextTime())
					.append(" url :" + url).append(" dataMap :" + dataMap);
			log.info(logBuilder.toString());

			String response = null;
			try {
				response = connection.readData(dataMap, url);
			} catch (Exception e) {
				log.error(e.getMessage(), e);
				return;
			}

			JsonEntity jsonEntity = new JsonEntity(response);
			int result = Integer.valueOf(jsonEntity.getString("count"));
			if (result == 0) {
				log.error("抽取文件数据为0条  ");
				bean.setAuthors(null);
				bean.setStartSize(0);
				extractTaskTempService.updateExtractTaskTemp(bean);
				return;
			} else {
				JsonEntityArray array = jsonEntity.getJsonEntityArray("items");
				List<Map<String, Object>> addArticleBeans = new ArrayList<Map<String, Object>>();
				if (array != null && array.size() > 0) {
					for (int i = 0; i < array.size(); i++) {
						JsonEntity jsonE = array.getJsonEntity(i);
						String aid = jsonE.getString("aid");
						Map<String, Object> map = new HashMap<String, Object>();
						map.put("aid", Integer.valueOf(aid));
						addArticleBeans.add(map);
					}
				}

				try {
					bean.setStartSize(bean.getStartSize()
							+ addArticleBeans.size());
					extractTaskTempService.updateExtractTaskTempAndArticleTemp(
							bean, addArticleBeans);
				} catch (Exception e) {
					log.error(e);
				}

			}
		}
	}

	public void getArticleByGuid(Set<String> guids) {
		if (CollectionUtils.isNotEmpty(guids)) {
			for (String guid : guids) {
				getArticleByGuid(Constant.CID, guid);
			}
		}
	}

	/**
	 * 抽取文章数据
	 * 
	 * @param bean
	 */
	public void getArticleByGuid(String cid, String guid) {
		if (StringUtils.isBlank(cid)) {
			log.warn("cid is null ");
			return;
		}

		if (StringUtils.isBlank(guid)) {
			log.warn("cid is null ");
			return;
		}
		HttpUrlConnection connection = new HttpUrlConnection();
		Map<String, String> dataMap = new HashMap<String, String>();
		String url = Constant.GET_ARTICLE_URL;
		dataMap.put("cid", cid);
		dataMap.put("guid", guid);

		String response = null;
		try {
			response = connection.readData(dataMap, url);
		} catch (Exception e) {
			log.error(e.getMessage(), e);
			return;
		}
		// 解析json
		JsonEntity jsonEntity = new JsonEntity(response);
		int result = Integer.valueOf(jsonEntity.getString("count"));
		if (result == 0) {
			log.error("获取0条数据");
		} else {

		}
	}

	/**
	 * 抽取文章数据
	 * 
	 * @param bean
	 */
	public static void testGetArticleByGuid(String cid, String guid) {
		if (StringUtils.isBlank(cid)) {
			log.warn("cid is null ");
			return;
		}

		if (StringUtils.isBlank(guid)) {
			log.warn("cid is null ");
			return;
		}
		HttpUrlConnection connection = new HttpUrlConnection();
		Map<String, String> dataMap = new HashMap<String, String>();
		dataMap.put("cid", cid);
		dataMap.put("guid", guid);

		String response = null;
		try {
			response = connection.readData(dataMap, url);
		} catch (Exception e) {
			log.error(e.getMessage(), e);
			return;
		}
		// 解析json
		JsonEntity jsonEntity = new JsonEntity(response);
		int result = Integer.valueOf(jsonEntity.getString("count"));
		if (result == 0) {
			log.error("获取0条数据");
		} else {

		}
	}

	/**
	 * 获取文章内容
	 * 
	 * @param aid
	 * @return
	 */
	public String getArticleContent(Long aid) {
		HttpUrlConnection connection = new HttpUrlConnection();
		Map<String, String> dataMap = new HashMap<String, String>();
		String url = Constant.GET_ARTICLE_CONTENT_URL;
		dataMap.put("aid", String.valueOf(aid));
		String response = null;
		try {
			response = connection.readData(dataMap, url);
		} catch (Exception e) {
			log.error(e.getMessage(), e);
			return null;
		}
		if (StringUtils.isNotBlank(response)) {
			// 解析json
			JsonEntity jsonEntity = new JsonEntity(response);
			return jsonEntity.getString("content");
		}
		return null;
	}

	public static void testSpidArticle() {
		HttpUrlConnection connection = new HttpUrlConnection();
		Map<String, String> dataMap = new HashMap<String, String>();

		dataMap.put("start", "0");
		dataMap.put("cid", "731");
		dataMap.put("limit", "5");
		dataMap.put("action", "full");
		dataMap.put("sort", "6");
		dataMap.put("order", "1");
		dataMap.put("author", "品味咖啡");

		dataMap.clear();
		String response = connection.readData(dataMap, url);

		JsonEntity jsonEntity = new JsonEntity(response);
		JsonEntityArray array = jsonEntity.getJsonEntityArray("items");
	}

	public static void testArticleReply() {
		HttpUrlConnection connection = new HttpUrlConnection();
		Map<String, String> dataMap = new HashMap<String, String>();
		dataMap.put("start", "0");
		dataMap.put("cid", Constant.CID);
		dataMap.put("limit", "10");
		dataMap.put("author", "最爱看九爷");
		String response = connection.readData(dataMap, url);
		// JsonEntity jsonEntity = new JsonEntity(response);
		JsonEntityArray array = new JsonEntityArray(response);
		// 采集完成
		// JsonEntityArray array = jsonEntity.getJsonEntityArray("items");
	}

	public static String getMD5Url(String url) {
		if (StringUtils.isBlank(url)) {
			return url;
		}
		try {
			MD5 md5 = new MD5();
			md5.Update(url);
			return md5.asHex();
		} catch (Exception e) {
			log.error("md5 加密异常", e);
		}
		return null;
	}

	public static void main(String[] args) {

	}

}

 

 

 

 

  最好把传输数据接口的参数都配置到数据库中,

  每次任务记录好当前任务的参数参数,当宕机或者重启的时候,有利于保存当前的查询参数,有利于下一次 的查询

你可能感兴趣的:(数据接口)