利用HtmlUtil 从网页提取数据

最近炒股,发现几个好的网站(散户大家庭http://www.shdjt.com/  股票大单网http://www.gupiaodadan.cn/),提供了每天大量分析数据------入主力持仓排名、大单买入排名、板块资金流入排名、DDX排名等等,于是我利用HtmlUtil 工具包,写了一个程序,把这些数据从网页上提取出来,保存在我自己的Oracle数据库里面,以便进行统计分析,从中找出规律,呵呵。。。。希望大发一笔,下面是具体的程序。

 

 利用HtmlUtil 从网页提取数据

 

 

 【开发环境】MyEclipse6.5.1、Oracle9i

 

【所需要的库文件】

 htmlunit-2.5.zip                                       下载地址 http://htmlunit.sourceforge.net/

 Oracle9i JDBC驱动程序classes12.jar          在Oracle9i的安装目录下面的JDBC文件夹里有

 

  【数据库设计】

--股票目录
drop table gpml;
create table gpml(
   gpdm   varchar2(6) primary key,--股票代码
   gpmc   varchar2(20),--股票名称
   ltgs   number(10),--流通股数
   zgs    number(10),--总股数
   ltbl   number(10),--流通比例
   mgyl   number,--每股盈利
   jzc    number,--净资产
   jlr    number,--净利润
   zxsj   varchar2(2500),--最新事件
   bz     varchar2(2500)--备注说明
);

--板块目录
create table bkml(
   bkid  number primary key,
   bkmc  varchar2(100),
   bc    varchar2(100)
);

--每日实时数据
--注意:此表即使股票停牌,也有把最近的DDX数据放入了
--写一个存储过程,生成每日的排名:参数是日期,注意那些停牌的哦,呵呵。。。。
create table daydata(
   gpdm  varchar2(6),
   rq     date,
   price  number,
   zf     number,
   ddx    number,
   ddy    number,
   ddz    number,
   ddx_sixty number,
   ddy_sixty number,
   ddx_tenl  number,
   ddx_tenc  number,
   tddc      number,
   ddc       number,
   zdc       number,
   xdc       number,
   hyd       number,
   dsb       number,
   tdmr      number,
   tdmc      number,
   ddmr      number,
   ddmc      number,
   zdmr      number,
   zdmc      number,
   xdmr      number,
   xdmc      number,
   hsl       number,
   lb        number,
   ddxpm     number,
   ddypm     number,
   ddzpm     number,
   constraint pk_daydata primary key(gpdm,rq)
);


--两市大单净买入全部排名
create table ddmrpm(
   gpdm  varchar2(6),
   rq    date,   
   pm    number,
   ddmr  number,
   ddmc  number,
   mrmcc number,
   cjl   number,
   jg    number,
   constraint pk_ddmrpm primary key(gpdm,rq)
);


--5日股票大单净买入排名50
create table fiveddmrpm(
   gpdm  varchar2(6),
   rq    date,   
   pm    number,
   ddmr  number,
   ddmc  number,
   mrmcc number,
   cjl   number,
   jg    number,
   constraint pk_fiveddmrpm primary key(gpdm,rq)
);

--10日股票大单净买入排名50
create table tenddmrpm(
   gpdm  varchar2(6),
   rq    date,   
   pm    number,
   ddmr  number,
   ddmc  number,
   mrmcc number,
   cjl   number,
   jg    number,
   constraint pk_tenddmrpm primary key(gpdm,rq)
);


--5日股票大单净卖出排名50
create table fiveddmcpm(
   gpdm  varchar2(6),
   rq    date,   
   pm    number,
   ddmr  number,
   ddmc  number,
   mrmcc number,
   cjl   number,
   jg    number,
   constraint pk_fiveddmcpm primary key(gpdm,rq)
);

--10日股票大单净卖出排名50
create table tenddmcpm(
   gpdm  varchar2(6),
   rq    date,   
   pm    number,
   ddmr  number,
   ddmc  number,
   mrmcc number,
   cjl   number,
   jg    number,
   constraint pk_tenddmcpm primary key(gpdm,rq)
);


--沪市Level-2数据 DDX 排名前50
--注意:此表如果股票停牌,没有把最近的DDX数据放入了
create table ddxpm(
   gpdm  varchar2(6),
   rq    date,   
   jg    number,
   zf    number,
   ddx   number,
   ddxpm number,
   ten_phts  number,
   ten_lxphts   number,
   ddy    number,
   ddypm  number,
   ddz    number,
   ddzpm  number,
   constraint pk_ddxpm primary key(gpdm,rq)
);


--两市板块资金净流入排名
create table bkzjlrpm(
   bkid  number,
   pm    number,
   rq    date,  
   lr    number,
   lc    number,
   jlr   number,--尽流入
   gpdm  varchar2(20),--代表个股
   constraint pk_bkzjlrpm primary key(bkid,rq)
);


--5日两市板块资金净流入排名
create table fivebkzjlrpm(
   bkid  number,
   rq    date,
   pm    number, 
   lr    number,
   lc    number,
   jlr   number,--尽流入
   constraint pk_fivebkzjlrpm primary key(bkid,rq)
);

--10日两市板块资金净流入排名
create table tenbkzjlrpm(
   bkid  number,
   pm    number,
   rq    date,  
   lr    number,
   lc    number,
   jlr   number,--尽流入
   constraint pk_tenbkzjlrpm primary key(bkid,rq)
);


--主力持仓排名(最新更新:2009年5月26日),把握主力资金的动向!
create table zlccpm(
   gpdm   varchar2(6),
   rq     date,
   adddec number,--增减性质 1代表增 0代表减
   dayNum number, --1日 5日 10日
   userType number,--主力分类:0主力 1特大户 2大户  3散户
   pm     number,--排名
   per    number,--增减百分比
   constraint pk_zlccpm primary key(gpdm,rq,adddec,dayNum,userType)   
)

 

 

       [用来进行排名的存储过程]

create or replace procedure produce_pm(in_Date IN daydata.rq%TYPE) is
  CURSOR cur_ddxpm is
    SELECT gpdm
      FROM daydata
      WHERE rq = trunc(in_Date)
       AND price > 0
     ORDER BY ddx desc,ddy desc,ddz desc;
    
  CURSOR cur_ddypm is
    SELECT gpdm
      FROM daydata
      WHERE rq = trunc(in_Date)
       AND price > 0
     ORDER BY ddy desc,ddx desc,ddz desc;    
    
  CURSOR cur_ddzpm is
    SELECT gpdm
      FROM daydata
      WHERE rq = trunc(in_Date)
       AND price > 0
     ORDER BY ddz desc,ddx desc,ddy desc;       
    
  curIndex NUMBER  :=0;
 
BEGIN
  FOR rec IN cur_ddxpm LOOP
    curIndex := curIndex + 1;
    update daydata set ddxpm=curIndex where gpdm=rec.gpdm and rq=trunc(in_date);
  END LOOP;
 
  curIndex:=0;
  FOR rec IN cur_ddypm LOOP
    curIndex := curIndex + 1;
    update daydata set ddypm=curIndex where gpdm=rec.gpdm and rq=trunc(in_date);
  END LOOP;
 
   
  curIndex:=0;
  FOR rec IN cur_ddzpm LOOP
    curIndex := curIndex + 1;
    update daydata set ddzpm=curIndex where gpdm=rec.gpdm and rq=trunc(in_date);
  END LOOP;
   
  commit;
end produce_pm;

 

 

    

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;

public class ConnectionManager {
    public static Connection getConnection() {
	Connection conn = null;
	try {
	    DriverManager.registerDriver(new oracle.jdbc.OracleDriver());

	    String url = "jdbc:oracle:oci8:@";
	    try {
		String url1 = System.getProperty("JDBC_URL");
		if (url1 != null)
		    url = url1;
	    } catch (Exception e) {
		e.printStackTrace();
	    }

	    conn = DriverManager.getConnection(url, "gp", "gp");
	} catch (Exception e1) {
	    e1.printStackTrace();
	}
	return conn;
    }
}

 

 

 

import java.awt.Dimension;
import java.awt.Toolkit;
import java.io.IOException;
import java.net.MalformedURLException;
import java.sql.CallableStatement;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.List;

import javax.swing.SwingUtilities;
import javax.swing.SwingWorker;

import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlTable;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;


public class DataImport extends javax.swing.JFrame {
    String zlccDate = null;//主力持仓,赢富数据要慢一天

    /**
     * 构造函数
     */
    public DataImport() {
	initComponents();
	Calendar calendar = Calendar.getInstance();
	calendar.add(Calendar.DAY_OF_MONTH, -1);
	this.txtZlcc.setText(new SimpleDateFormat("yyyy-MM-dd").format(calendar.getTime()));
	this.txtDate.setText(new SimpleDateFormat("yyyy-MM-dd").format(new Date()));

	Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
	Dimension frameSize = this.getPreferredSize();

	if (frameSize.height > screenSize.height)
	    frameSize.height = screenSize.height;
	if (frameSize.width > screenSize.width)
	    frameSize.width = screenSize.width;

	this.setLocation((screenSize.width - frameSize.width) / 2, (screenSize.height - frameSize.height) / 2);
    }

    /** This method is called from within the constructor to
     * initialize the form.
     * WARNING: Do NOT modify this code. The content of this method is
     * always regenerated by the Form Editor.
     */
    private void initComponents() {
	jPanel1 = new javax.swing.JPanel();
	btnImport = new javax.swing.JButton();
	txtDate = new javax.swing.JTextField();
	jScrollPane1 = new javax.swing.JScrollPane();
	txtStatus = new javax.swing.JTextArea();
	txtZlcc = new javax.swing.JTextField();
	jLabel1 = new javax.swing.JLabel();

	setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);

	btnImport.setText("\u5f00\u59cb\u5bfc\u5165\u6570\u636e");
	btnImport.addActionListener(new java.awt.event.ActionListener() {
	    public void actionPerformed(java.awt.event.ActionEvent evt) {
		btnImportActionPerformed(evt);
	    }
	});

	txtStatus.setColumns(20);
	txtStatus.setRows(5);
	jScrollPane1.setViewportView(txtStatus);

	jLabel1.setText("\u4e3b\u529b\u6301\u4ed3\u6392\u540d");

	org.jdesktop.layout.GroupLayout jPanel1Layout = new org.jdesktop.layout.GroupLayout(jPanel1);
	jPanel1.setLayout(jPanel1Layout);
	jPanel1Layout.setHorizontalGroup(jPanel1Layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING).add(
		jPanel1Layout.createSequentialGroup().add(
			jPanel1Layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING).add(jPanel1Layout.createSequentialGroup().addContainerGap().add(jScrollPane1, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, 425, Short.MAX_VALUE)).add(
				jPanel1Layout.createSequentialGroup().add(49, 49, 49).add(
					jPanel1Layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING).add(btnImport).add(
						jPanel1Layout.createSequentialGroup().add(txtDate, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE, 136, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE).add(16, 16, 16).add(jLabel1).addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED).add(txtZlcc,
							org.jdesktop.layout.GroupLayout.PREFERRED_SIZE, 121, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE))).add(37, 37, 37))).addContainerGap()));
	jPanel1Layout.setVerticalGroup(jPanel1Layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING).add(
		jPanel1Layout.createSequentialGroup().addContainerGap().add(
			jPanel1Layout.createParallelGroup(org.jdesktop.layout.GroupLayout.BASELINE).add(txtDate, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE).add(jLabel1).add(txtZlcc,
				org.jdesktop.layout.GroupLayout.PREFERRED_SIZE, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE)).addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE).add(
			btnImport).addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED).add(jScrollPane1, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE, 352, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE).addContainerGap()));

	getContentPane().add(jPanel1, java.awt.BorderLayout.CENTER);

	pack();
    }// </editor-fold>

    private void btnImportActionPerformed(java.awt.event.ActionEvent evt) {
	SwingWorker<Object, Object> sw = new SwingWorker<Object, Object>() {
	    protected Object doInBackground() throws Exception {
		importData();
		return null;
	    }
	};
	sw.execute();
    }

    /**
     * 按钮事件,正式套入数据
     */
    private void importData() {
	String importDate = this.txtDate.getText();
	zlccDate=this.txtZlcc.getText();
	
	final WebClient webClient = new WebClient();

	//1 首先导入每日数据(来自散户大家庭)
	this.txtStatus.append("开始导入http://www.shdjt.com/中的每日数据........");
	try {
	    importDayData(webClient, importDate);
	    this.txtStatus.append("\n成功!");
	} catch (Exception e) {
	    e.printStackTrace();
	    this.txtStatus.append("\n失败!");
	    return;
	}
	this.txtStatus.append("\n");

	//2 当日板块、五日板块、十日板块资金流入
	this.txtStatus.append("\n开始导入当日板块、五日板块、十日板块资金流入........");
	try {
	    importBkData(webClient, zlccDate);
	    this.txtStatus.append("\n成功!");
	} catch (Exception e) {
	    e.printStackTrace();
	    this.txtStatus.append("\n失败!");
	    return;
	}
	this.txtStatus.append("\n");

	//3 两市大单净买入全部排名 当日、五日、十日
	this.txtStatus.append("\n开始导入两市大单净买入全部排名 当日、五日、十日........");
	try {
	    importDDmrpm(webClient, importDate);
	    this.txtStatus.append("\n成功!");
	} catch (Exception e) {
	    e.printStackTrace();
	    this.txtStatus.append("\n失败!");
	    return;
	}
	this.txtStatus.append("\n");

	//4 沪市Level-2数据 DDX 排名前50
	this.txtStatus.append("\n开始导入沪市Level-2数据 DDX 排名前50........");
	try {
	    importDDXpm(webClient, importDate);
	    this.txtStatus.append("\n成功!");
	} catch (Exception e) {
	    e.printStackTrace();
	    this.txtStatus.append("\n失败!");
	    return;
	}
	this.txtStatus.append("\n");

	//5 主力持仓排名
	this.txtStatus.append("\n开始导入主力持仓排名........");
	try {
	    importZlCCData(webClient, importDate);
	    this.txtStatus.append("\n成功!");
	} catch (Exception e) {
	    e.printStackTrace();
	    this.txtStatus.append("\n失败!");
	    return;
	}
	this.txtStatus.append("\n");

	//6 调用存储过程,对每日数据进行排名(DDX,DDY,DDZ)
	this.txtStatus.append("\n开始执行存储过程........");
	try {
	    callProcForPM(importDate);
	    this.txtStatus.append("\n成功!");
	} catch (Exception e) {
	    e.printStackTrace();
	    this.txtStatus.append("\n失败!");
	    return;
	}

	this.txtStatus.append("\n-----------------全部成功-----------------");

    }

    /**
     * 每日详细数据(散户大家庭)
     * @param webClient
     * @param importDate
     * @throws Exception
     */
    public void importDayData(WebClient webClient, String importDate) throws Exception {
	final HtmlPage page = webClient.getPage("http://www.shdjt.com/");
	final HtmlTable tableElement = (HtmlTable) page.getByXPath("//table[@bgcolor='#FFffff']").get(0);

	List<HtmlTableRow> list = tableElement.getRows();
	int size = list.size();

	List<HtmlTableRow> list2 = list.subList(2, size);

	PreparedStatement pstmt = null;
	Connection conn = null;
	Number a;
	NumberFormat nf = NumberFormat.getInstance();
	try {
	    conn = ConnectionManager.getConnection();
	    pstmt = conn.prepareStatement("insert into daydata(gpdm,rq,price,zf,ddx,ddy,ddz,ddx_sixty,ddy_sixty,ddx_tenl,ddx_tenc,tddc,ddc,zdc,xdc,hyd,dsb,tdmr,tdmc,ddmr,ddmc,zdmr,zdmc,xdmr,xdmc,hsl,lb) " + "values (?,to_date('" + importDate
		    + "','yyyy-MM-dd'),?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)");

	    for (final HtmlTableRow row : list2) {
		pstmt.setString(1, isNull(row.getCell(3).asText()));// 股票代码
		pstmt.setFloat(2, Float.valueOf(isNull(row.getCell(5).asText())).floatValue());// 价格
		pstmt.setFloat(3, Float.valueOf(isNull(row.getCell(6).asText())).floatValue());// 涨幅
		pstmt.setFloat(4, Float.valueOf(isNull(row.getCell(7).asText())).floatValue());//
		pstmt.setFloat(5, Float.valueOf(isNull(row.getCell(8).asText())).floatValue());//
		pstmt.setFloat(6, nf.parse(row.getCell(9).asText()).floatValue());//DDZ
		pstmt.setFloat(7, Float.valueOf(isNull(row.getCell(10).asText())).floatValue());//
		pstmt.setFloat(8, Float.valueOf(isNull(row.getCell(11).asText())).floatValue());//
		pstmt.setInt(9, Integer.valueOf(isNull(row.getCell(12).asText())).intValue());// 10日次数
		pstmt.setInt(10, Integer.valueOf(isNull(row.getCell(13).asText())).intValue());// 10日次数连续
		pstmt.setFloat(11, Float.valueOf(isNull(row.getCell(14).asText())).floatValue());//
		pstmt.setFloat(12, Float.valueOf(isNull(row.getCell(15).asText())).floatValue());//
		pstmt.setFloat(13, Float.valueOf(isNull(row.getCell(16).asText())).floatValue());//
		pstmt.setFloat(14, Float.valueOf(isNull(row.getCell(17).asText())).floatValue());//
		pstmt.setInt(15, Integer.valueOf(isNull(row.getCell(18).asText())).intValue());// 活跃度
		pstmt.setFloat(16, Float.valueOf(isNull(row.getCell(19).asText())).floatValue());//
		pstmt.setFloat(17, Float.valueOf(isNull(row.getCell(20).asText())).floatValue());//
		pstmt.setFloat(18, Float.valueOf(isNull(row.getCell(21).asText())).floatValue());//
		pstmt.setFloat(19, Float.valueOf(isNull(row.getCell(22).asText())).floatValue());//
		pstmt.setFloat(20, Float.valueOf(isNull(row.getCell(23).asText())).floatValue());//
		pstmt.setFloat(21, Float.valueOf(isNull(row.getCell(24).asText())).floatValue());//
		pstmt.setFloat(22, Float.valueOf(isNull(row.getCell(25).asText())).floatValue());//
		pstmt.setFloat(23, Float.valueOf(isNull(row.getCell(26).asText())).floatValue());//
		pstmt.setFloat(24, Float.valueOf(isNull(row.getCell(27).asText())).floatValue());//
		pstmt.setFloat(25, Float.valueOf(isNull(row.getCell(28).asText())).floatValue());//
		pstmt.setFloat(26, Float.valueOf(isNull(row.getCell(29).asText())).floatValue());//

		pstmt.addBatch();
	    }
	    pstmt.executeBatch();
	    conn.commit();
	} finally {
	    try {
		pstmt.close();
		conn.close();
	    } catch (SQLException e) {
		e.printStackTrace();
	    }
	}
    }

    public static String isNull(String s) {
	String p = "0.0";
	int leng = s.length();

	if (s != null && leng > 0)
	    p = s;

	return p;
    }

    /**
     * 当日板块、五日板块、十日板块资金流入
     * @param webClient
     * @param currentDate
     */
    public void importBkData(WebClient webClient, String currentDate) throws Exception {
	importBkzjData(webClient, "http://www.gupiaodadan.cn/bankuai.jsp", "bkzjlrpm", currentDate);
	importBkzjData(webClient, "http://www.gupiaodadan.cn/bankuaimore-2", "fivebkzjlrpm", currentDate);
	importBkzjData(webClient, "http://www.gupiaodadan.cn/bankuaimore-1", "tenbkzjlrpm", currentDate);
    }

    public void importBkzjData(WebClient webClient, String url, String tableName, String date) throws Exception {
	PreparedStatement pstmt = null;
	Connection conn = null;

	try {
	    final HtmlPage page = webClient.getPage(url);
	    final HtmlDivision div = (HtmlDivision) page.getByXPath("//div[@class='tablecontent']").get(0);

	    HtmlTable tableElement = (HtmlTable) div.getHtmlElementsByTagName("table").get(0);

	    List<HtmlTableRow> list = tableElement.getRows();
	    int size = list.size();

	    List<HtmlTableRow> list2 = list.subList(1, size);

	    conn = ConnectionManager.getConnection();
	    if (tableName.equalsIgnoreCase("bkzjlrpm"))
		pstmt = conn.prepareStatement("insert into " + tableName + "(bkid,rq,pm,lr,lc,jlr,gpdm) values (?,to_date('" + date + "','yyyy-MM-dd'),?,?,?,?,?)");
	    else
		pstmt = conn.prepareStatement("insert into " + tableName + "(bkid,rq,pm,lr,lc,jlr) values (?,to_date('" + date + "','yyyy-MM-dd'),?,?,?,?)");

	    for (final HtmlTableRow row : list2) {
		int pm = Integer.valueOf(row.getCell(0).asText());// 排名

		HtmlElement tdElement = row.getHtmlElementsByTagName("td").get(1);
		HtmlElement aElement = tdElement.getHtmlElementsByTagName("a").get(0);
		String strUrl = aElement.getAttribute("href");
		int startIndex = strUrl.lastIndexOf("-");
		String bkId = strUrl.substring(startIndex + 1, strUrl.length());

		float lr = Float.valueOf(row.getCell(2).asText()).floatValue();
		float lc = Float.valueOf(row.getCell(3).asText()).floatValue();
		float jlr = Float.valueOf(row.getCell(4).asText()).floatValue();

		pstmt.setInt(1, Integer.valueOf(bkId).intValue());
		pstmt.setInt(2, pm);
		pstmt.setFloat(3, lr);
		pstmt.setFloat(4, lc);
		pstmt.setFloat(5, jlr);

		if (tableName.equalsIgnoreCase("bkzjlrpm")) {
		    tdElement = row.getHtmlElementsByTagName("td").get(5);
		    aElement = tdElement.getHtmlElementsByTagName("a").get(0);
		    strUrl = aElement.getAttribute("href");
		    String gpdm = strUrl.substring(strUrl.length() - 6);
		    pstmt.setString(6, gpdm);
		}

		pstmt.addBatch();
	    }
	    pstmt.executeBatch();
	    conn.commit();
	} finally {
	    try {
		pstmt.close();
		conn.close();
	    } catch (SQLException e) {
		e.printStackTrace();
	    }
	}
    }

    /**
     * 两市大单净买入全部排名 当日、五日、十日
     * @param webClient
     * @param currentDate
     * @throws Exception
     */
    public void importDDmrpm(WebClient webClient, String currentDate) throws Exception {
	// 当日买卖排名
	String currentURL = "http://www.gupiaodadan.cn/all.jsp";
	importDdMrData(webClient, currentURL, "ddmrpm", currentDate);

	// 五日买入
	String fivemrURL = "http://www.gupiaodadan.cn/more-3";
	importDdMrData(webClient, fivemrURL, "fiveddmrpm", currentDate);

	// 十日买入
	String tenmrURL = "http://www.gupiaodadan.cn/more-4";
	importDdMrData(webClient, tenmrURL, "tenddmrpm", currentDate);

	// 五日卖出
	String fivemcURL = "http://www.gupiaodadan.cn/more-5";
	importDdMrData(webClient, fivemcURL, "fiveddmcpm", currentDate);

	// 十日卖出
	String tenmcURL = "http://www.gupiaodadan.cn/more-6";
	importDdMrData(webClient, tenmcURL, "tenddmcpm", currentDate);
    }

    public static void importDdMrData(WebClient webClient, String url, String tableName, String date) throws Exception {
	PreparedStatement pstmt = null;
	Connection conn = null;
	NumberFormat nf = NumberFormat.getInstance();

	try {
	    final HtmlPage page = webClient.getPage(url);
	    final HtmlDivision div = (HtmlDivision) page.getByXPath("//div[@class='tablecontent']").get(0);

	    HtmlTable tableElement = (HtmlTable) div.getHtmlElementsByTagName("table").get(0);

	    List<HtmlTableRow> list = tableElement.getRows();
	    int size = list.size();

	    List<HtmlTableRow> list2 = list.subList(1, size);

	    conn = ConnectionManager.getConnection();
	    pstmt = conn.prepareStatement("insert into " + tableName + "(gpdm,rq,pm,ddmr,ddmc,mrmcc,cjl,jg) values (?,to_date('" + date + "','yyyy-MM-dd'),?,?,?,?,?,?)");
	    for (final HtmlTableRow row : list2) {
		int pm = Integer.valueOf(row.getCell(0).asText());
		String gpdm = row.getCell(1).asText();

		int ddmr = nf.parse(row.getCell(3).asText()).intValue();
		int ddmc = nf.parse(row.getCell(4).asText()).intValue();
		int mrmcc = ddmr - ddmc;
		int cjl = nf.parse(row.getCell(5).asText()).intValue();
		float jg = Float.valueOf(row.getCell(6).asText()).floatValue();

		pstmt.setString(1, gpdm);
		pstmt.setInt(2, pm);
		pstmt.setInt(3, ddmr);
		pstmt.setInt(4, ddmc);
		pstmt.setInt(5, mrmcc);
		pstmt.setInt(6, cjl);
		pstmt.setFloat(7, jg);

		pstmt.addBatch();
	    }
	    pstmt.executeBatch();
	    conn.commit();
	} finally {
	    try {
		pstmt.close();
		conn.close();
	    } catch (SQLException e) {
		e.printStackTrace();
	    }
	}
    }

    /**
     * 沪市Level-2数据 DDX 排名前50
     * @param webClient
     * @param curDate
     * @throws Exception
     */
    public void importDDXpm(WebClient webClient, String curDate) throws Exception {

	final HtmlPage page = webClient.getPage("http://www.gupiaodadan.cn/level2-home-1");
	final HtmlDivision div = (HtmlDivision) page.getByXPath("//div[@class='tablecontent']").get(0);

	HtmlTable tableElement = (HtmlTable) div.getHtmlElementsByTagName("table").get(0);

	List<HtmlTableRow> list = tableElement.getRows();
	int size = list.size();

	List<HtmlTableRow> list2 = list.subList(2, size);

	PreparedStatement pstmt = null;
	Connection conn = null;

	try {
	    conn = ConnectionManager.getConnection();
	    pstmt = conn.prepareStatement("insert into ddxpm( gpdm,rq,jg,zf,ddx,ddxpm,ten_phts,ten_lxphts,ddy,ddypm,ddz,ddzpm) " + "values (?,to_date('" + curDate + "','yyyy-MM-dd'),?,?,?,?,?,?,?,?,?,?)");
	    for (final HtmlTableRow row : list2) {
		pstmt.setString(1, row.getCell(0).asText());
		pstmt.setFloat(2, Float.valueOf(row.getCell(2).asText()).floatValue());
		pstmt.setFloat(3, Float.valueOf(row.getCell(3).asText()).floatValue());
		pstmt.setFloat(4, Float.valueOf(row.getCell(4).asText()).floatValue());
		pstmt.setInt(5, Integer.valueOf(row.getCell(5).asText()).intValue());
		pstmt.setInt(6, Integer.valueOf(row.getCell(6).asText()).intValue());
		pstmt.setInt(7, Integer.valueOf(row.getCell(7).asText()).intValue());
		pstmt.setFloat(8, Float.valueOf(row.getCell(8).asText()).floatValue());
		pstmt.setInt(9, Integer.valueOf(row.getCell(9).asText()).intValue());
		pstmt.setFloat(10, Float.valueOf(row.getCell(10).asText()).floatValue());
		pstmt.setInt(11, Integer.valueOf(row.getCell(11).asText()).intValue());
		pstmt.addBatch();
	    }
	    pstmt.executeBatch();
	    conn.commit();
	} finally {
	    try {
		pstmt.close();
		conn.close();
	    } catch (SQLException e) {

	    }
	}
    }

    /**
     * 主力持仓排名
     * @param webClient
     * @param sDate
     * @throws Exception
     */
    public static void importZlCCData(WebClient webClient, String sDate) throws Exception {
	PreparedStatement pstmt = null;
	Connection conn = null;

	try {
	    conn = ConnectionManager.getConnection();
	    pstmt = conn.prepareStatement("insert into zlccpm(gpdm,rq,adddec,dayNum,userType,pm,per) " + "values (?,to_date('" + sDate + "','yyyy-MM-dd'),?,?,?,?,?)");

	    final HtmlPage page = webClient.getPage("http://www.gupiaodadan.cn/topview-search.jsp");

	    List<?> listDivs = page.getByXPath("//div[@class=\"fr02\"]");

	    //adddec number,--增减性质 1代表增 0代表减
	    //dayNum number, --1代表1日 5代表5日 10代表10日
	    //userType number,--主力分类:0主力 1特大户 2大户  3散户

	    // 1日主力增仓
	    HtmlDivision div = (HtmlDivision) listDivs.get(0);
	    processData(div, pstmt, 1, 1, 0);

	    // 1日特大户增仓
	    div = (HtmlDivision) listDivs.get(1);
	    processData(div, pstmt, 1, 1, 1);

	    // 1日大户增仓
	    div = (HtmlDivision) listDivs.get(2);
	    processData(div, pstmt, 1, 1, 2);

	    // 1日散户减仓
	    div = (HtmlDivision) listDivs.get(3);
	    processData(div, pstmt, 0, 1, 3);

	    //-------------------------------------------
	    // 5日主力增仓
	    div = (HtmlDivision) listDivs.get(4);
	    processData(div, pstmt, 1, 5, 0);

	    // 5日特大户增仓
	    div = (HtmlDivision) listDivs.get(5);
	    processData(div, pstmt, 1, 5, 1);

	    // 5日大户增仓
	    div = (HtmlDivision) listDivs.get(6);
	    processData(div, pstmt, 1, 5, 2);

	    // 5日散户减仓
	    div = (HtmlDivision) listDivs.get(7);
	    processData(div, pstmt, 0, 5, 3);

	    //-------------------------------------------
	    // 10日主力增仓
	    div = (HtmlDivision) listDivs.get(8);
	    processData(div, pstmt, 1, 10, 0);

	    // 10日特大户增仓
	    div = (HtmlDivision) listDivs.get(9);
	    processData(div, pstmt, 1, 10, 1);

	    // 10日大户增仓
	    div = (HtmlDivision) listDivs.get(10);
	    processData(div, pstmt, 1, 10, 2);

	    // 10日散户减仓
	    div = (HtmlDivision) listDivs.get(11);
	    processData(div, pstmt, 0, 10, 3);

	    pstmt.executeBatch();
	    conn.commit();
	} finally {
	    try {
		pstmt.close();
		conn.close();
	    } catch (SQLException e) {
		e.printStackTrace();
	    }
	}
    }

    public static void processData(HtmlDivision div, PreparedStatement pstmt, int adddec, int dayNum, int userType) throws Exception {
	HtmlTable tableElement = (HtmlTable) div.getHtmlElementsByTagName("table").get(0);

	List<HtmlTableRow> list = tableElement.getRows();
	int size = list.size();

	List<HtmlTableRow> list2 = list.subList(1, size);

	// gpdm,rq,adddec,dayNum,userType,pm,per
	for (final HtmlTableRow row : list2) {
	    pstmt.setInt(5, Integer.parseInt(row.getCell(0).asText()));// 排名

	    HtmlElement tdElement = row.getHtmlElementsByTagName("td").get(1);
	    HtmlElement aElement = tdElement.getHtmlElementsByTagName("a").get(0);
	    String strUrl = aElement.getAttribute("href");
	    int startIndex = strUrl.lastIndexOf("-");
	    String gpdm = strUrl.substring(startIndex + 1, strUrl.length());

	    pstmt.setString(1, gpdm);
	    pstmt.setInt(2, adddec);
	    pstmt.setInt(3, dayNum);
	    pstmt.setInt(4, userType);
	    pstmt.setFloat(6, Float.parseFloat(row.getCell(2).asText()));//百分比

	    pstmt.addBatch();
	}
    }

    private void callProcForPM(String strDate) throws Exception {
	Connection conn = ConnectionManager.getConnection();
	CallableStatement cs = null;
	try {
	    cs = conn.prepareCall("{call produce_pm(?)}");
	    cs.setDate(1, new java.sql.Date(new SimpleDateFormat("yyyy-MM-dd").parse(strDate).getTime()));
	    cs.executeUpdate();
	} finally {
	    try {
		cs.close();
		conn.close();
	    } catch (SQLException e) {
		e.printStackTrace();
	    }
	}
    }

    /**
     * @param args the command line arguments
     */
    public static void main(String args[]) {
	java.awt.EventQueue.invokeLater(new Runnable() {
	    public void run() {
		new DataImport().setVisible(true);
	    }
	});
    }

    //GEN-BEGIN:variables
    // Variables declaration - do not modify
    private javax.swing.JButton btnImport;
    private javax.swing.JLabel jLabel1;
    private javax.swing.JPanel jPanel1;
    private javax.swing.JScrollPane jScrollPane1;
    private javax.swing.JTextField txtDate;
    private javax.swing.JTextArea txtStatus;
    private javax.swing.JTextField txtZlcc;
    // End of variables declaration//GEN-END:variables
}

 

   

    

你可能感兴趣的:(oracle,sql,jsp,swing,jdbc)