只是一个例子,方便以后查阅。
import ey.db.oracle.OracleHelper; import ey.db.type.*; import java.io.BufferedReader; import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; import java.io.FileOutputStream; //import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.net.URLDecoder; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.PreparedStatement; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.StringTokenizer; import java.util.regex.Pattern; import java.util.regex.Matcher; import oracle.jdbc.OracleTypes; public class ImpCNetData { static Configuration conf=new Configuration("/ProjectConfig.properties"); static String db_host=conf.getValue("cdma_host"); static String db_user=conf.getValue("cdma_user"); static String db_passwd=conf.getValue("cdma_passwd"); static String impFilePath=conf.getValue("ImpFilePath"); static String impFileLog=conf.getValue("ImpFileLog"); static String lastKeepString=conf.getValue("LastKeepString"); static String lastFilterString=conf.getValue("LastFilterString"); static String connStr=""; static Pattern lastFilterRex=null; static Pattern lastKeepRex=null; private final String oracleDriverName = "oracle.jdbc.driver.OracleDriver"; //批量插入数据 public void InsertDataBatch(List<CNetData> cnds) throws Exception { Connection conn=null; try { Class.forName(oracleDriverName); conn = DriverManager.getConnection(db_host, db_user, db_passwd); conn.setAutoCommit(false); String sql = "insert into cn_visit(visitid, mobile, url, visittime, onlinetime, desip, desport, mobileip, mobileport,urltypeid, comefrom, bsid, username,housecityid) VALUES(seq_cn_visitid.Nextval,?,?,?,?,?,?,?,?,?,?,?,?,?)"; PreparedStatement prest = conn.prepareStatement(sql,ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY); for(int x = 0; x < cnds.size(); x++){ CNetData cnd=cnds.get(x); prest.setString(1, cnd.getMobile()); prest.setString(2, cnd.getUrl()); prest.setString(3, cnd.getVisitTime()); prest.setString(4, cnd.getOnlineTime()); prest.setString(5,cnd.getDesIp()); prest.setInt(6,cnd.getDesPort()); prest.setString(7,cnd.getMobileIp()); prest.setInt(8,cnd.getMobilePort()); prest.setInt(9,cnd.getFileUrlId()); prest.setInt(10,cnd.getComeFrom()); prest.setString(11,cnd.getBsid()); prest.setString(12,cnd.getUsername()); prest.setInt(13,cnd.getHouseCityId()); prest.addBatch(); } prest.executeBatch(); conn.commit(); conn.close(); } catch (SQLException ex) { ex.printStackTrace(); } catch (ClassNotFoundException ex) { ex.printStackTrace(); } catch(Exception ex) { ex.printStackTrace(); } finally { if (conn!=null && !conn.isClosed()) conn.close(); } } //读取网址类型 public DataTable GetUrlType() { try { OracleConnection conn= new OracleConnection(db_host+";"+db_user+";"+db_passwd); String cmd="FX114V01_CN_VISIT.GetUrlList"; Parameter pds=new Parameter("out_data",OracleTypes.CURSOR,null, ParameterDirection.OUT); Parameter pcode=new Parameter("out_code",OracleTypes.INTEGER,null, ParameterDirection.OUT); DataSet ds=OracleHelper.ExecuteDataSet(conn,CommandType.StoreProcedure, cmd, pds,pcode); DataTable dt=ds.Tables[0]; return dt; } catch(Exception ex) { ex.printStackTrace(); } return null; } //导入数据 public void ImpData() throws Exception { //StringBuffer sb=new StringBuffer(); String tempstr=null; String url=""; DataTable dt=GetUrlType();//URL分类表 if(lastFilterString.length()>0 && lastFilterRex==null) lastFilterRex=Pattern.compile(lastFilterString); if(lastKeepString.length()>0 && lastKeepRex==null) lastKeepRex=Pattern.compile(lastKeepString); try { //网址类型不能为空 if(dt!=null) { //String path="/usr/hadoop/bigdata/filterurl/part/"; File dir=new File(impFilePath); File[] files=null; if(dir.isDirectory()) files=dir.listFiles(new MyFileFilter()); for(int i=0;i<files.length;i++) { //System.out.println(files[i].getName()); List<CNetData> cnds=new ArrayList<CNetData>(); FileInputStream fis=new FileInputStream(files[i]); BufferedReader br=new BufferedReader(new InputStreamReader(fis,"UTF-8")); //文件逐行读取 while((tempstr=br.readLine())!=null) { try { StringTokenizer itr=new StringTokenizer(tempstr,"|"); if(itr.hasMoreTokens() && itr.countTokens()==11) { CNetData cnd=new CNetData(); String mobile=itr.nextToken().trim(); if(mobile.length()>11) cnd.setMobile(mobile.substring(0,11)); else cnd.setMobile(mobile); url=itr.nextToken().trim(); //替换URL特殊编码 if(url.indexOf("%2F")>=0 || url.indexOf("%3F")>=0) { url = url.replaceAll("%2F","/"); url = url.replaceAll("%3A",":"); url = url.replaceAll("%20"," "); url = url.replaceAll("%3F","?"); url = url.replaceAll("%3D","="); } //保留需要的 if(lastKeepString.length()>0 && lastKeepRex!=null) { Matcher lastKeepMatcher=null; //防止引用或者搜索过来的 if(url.length()<=30) lastKeepMatcher=lastKeepRex.matcher(url); else lastKeepMatcher=lastKeepRex.matcher(url.substring(0,30)); //没找到的话跳出本次循环 if(!lastKeepMatcher.find()) continue; //跳出本次循环 } //过滤不需要的 if(lastFilterString.length()>0 && lastFilterRex!=null) { Matcher lastMatcher=lastFilterRex.matcher(url); //找到需要过滤掉的字符串 if(lastMatcher.find()) continue; //跳出本次循环 } cnd.setUrl(url); cnd.setVisitTime(itr.nextToken()); cnd.setMobileIp(itr.nextToken()); cnd.setDesIp(itr.nextToken()); cnd.setMobilePort(Integer.parseInt(itr.nextToken().trim())); cnd.setDesPort(Integer.parseInt(itr.nextToken().trim())); cnd.setOnlineTime(itr.nextToken()); cnd.setBsid(itr.nextToken()); cnd.setUsername(itr.nextToken()); cnd.setComeFrom(Integer.parseInt(itr.nextToken().trim())); cnd.setFileUrlId(0);//默认值 //查找所属URL类型 for(int k=0;k<dt.Rows.length;k++) { if(url.indexOf(dt.Rows[k].Columns[1].colValue.toString())>=0) { cnd.setFileUrlId(Integer.parseInt(dt.Rows[k].Columns[0].colValue.toString().trim())); break; } } //判断房源或者链接的城市 /* 21 北京 51 东莞 52 珠海 55 佛山 56 广州 59 惠州 68 深圳 162 武汉 174 长沙 341 上海 * */ if(url.indexOf("sz.")>=0||url.indexOf("sz/")>=0) cnd.setHouseCityId(68); else if(url.indexOf("gz.")>=0||url.indexOf("gz/")>=0||url.indexOf("gz_")>=0) cnd.setHouseCityId(56); else if(url.indexOf("dg.")>=0||url.indexOf("dg/")>=0||url.indexOf("dg_")>=0) cnd.setHouseCityId(51); else if(url.indexOf("fs.")>=0||url.indexOf("fs/")>=0||url.indexOf("fs_")>=0) cnd.setHouseCityId(55); else if(url.indexOf("zh.")>=0||url.indexOf("zh/")>=0||url.indexOf("zh_")>=0) cnd.setHouseCityId(52); else if(url.indexOf("hz.")>=0||url.indexOf("hz/")>=0||url.indexOf("hz_")>=0) cnd.setHouseCityId(59); else if(url.indexOf("zs.")>=0||url.indexOf("zs/")>=0||url.indexOf("zs_")>=0) cnd.setHouseCityId(73); else if(url.indexOf("bj.")>=0||url.indexOf("bj/")>=0||url.indexOf("bj_")>=0) cnd.setHouseCityId(21); else if(url.indexOf("sh.")>=0||url.indexOf("sh/")>=0||url.indexOf("sh_")>=0) cnd.setHouseCityId(341); else if(url.indexOf("cs.")>=0||url.indexOf("cs/")>=0||url.indexOf("cs_")>=0) cnd.setHouseCityId(174); else if(url.indexOf("wh.")>=0||url.indexOf("wh/")>=0||url.indexOf("wh_")>=0) cnd.setHouseCityId(162); else cnd.setHouseCityId(0); //上面的方式没找到的情况下才用下面的模式查找 if(cnd.getHouseCityId()<=0) { if(url.contains("sz")||url.contains("shenzhen")) cnd.setHouseCityId(68); else if(url.contains("guangzhou")) cnd.setHouseCityId(56); else if(url.contains("dg")||url.contains("dongguan")) cnd.setHouseCityId(51); else if(url.contains("fs")||url.contains("foshan")) cnd.setHouseCityId(55); else if(url.contains("zhuhai")) cnd.setHouseCityId(52); else if(url.contains("hz")||url.contains("huizhou")) cnd.setHouseCityId(59); else if(url.indexOf("zs")>=0||url.indexOf("zhongshan")>=0) cnd.setHouseCityId(73); else if(url.indexOf("bj")>=0||url.indexOf("beijing")>=0) cnd.setHouseCityId(21); else if(url.indexOf("shanghai")>=0) cnd.setHouseCityId(341); else if(url.indexOf("cs")>=0||url.indexOf("changsha")>=0) cnd.setHouseCityId(174); else if(url.indexOf("wh")>=0||url.indexOf("wuhan")>=0) cnd.setHouseCityId(162); else cnd.setHouseCityId(0); } cnds.add(cnd); } } catch(Exception ex) { ex.printStackTrace(); } } //System.out.println(cnds.size()); //插入数据 if(cnds.size()>0) { InsertDataBatch(cnds); writeLog(files[i].getName()+" " +cnds.size()); files[i].delete(); } br.close(); fis.close(); //break; } } } catch(IOException ex) { System.out.println(ex.getStackTrace()); } } public static void writeLog(String str) { try { //String path="/usr/hadoop/bigdata/filterurl/importfile.log"; File file=new File(impFileLog); if(!file.exists()) file.createNewFile(); SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); FileOutputStream out=new FileOutputStream(file,true); //如果追加方式用true StringBuffer sb=new StringBuffer(); //sb.append("-----------"+sdf.format(new Date())+"------------\n"); sb.append(sdf.format(new Date())+" "+str+"\n"); out.write(sb.toString().getBytes("utf-8"));//注意需要转换对应的字符集 out.close(); } catch(IOException ex) { System.out.println(ex.getStackTrace()); } } public static void main(String[] args) throws Exception { try { ImpCNetData icd=new ImpCNetData(); icd.ImpData(); } catch(Exception ex) { ex.printStackTrace(); } } }
记录类:
public class CNetData { private String mobile; public String getMobile() { return mobile; } public void setMobile(String mobile) { this.mobile = mobile; } public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public String getVisitTime() { return visitTime; } public void setVisitTime(String visitTime) { this.visitTime = visitTime; } public String getOnlineTime() { return onlineTime; } public void setOnlineTime(String onlineTime) { this.onlineTime = onlineTime; } public String getDesIp() { return desIp; } public void setDesIp(String desIp) { this.desIp = desIp; } public int getDesPort() { return desPort; } public void setDesPort(int desPort) { this.desPort = desPort; } public String getMobileIp() { return mobileIp; } public void setMobileIp(String mobileIp) { this.mobileIp = mobileIp; } public int getMobilePort() { return mobilePort; } public void setMobilePort(int mobilePort) { this.mobilePort = mobilePort; } public int getComeFrom() { return comeFrom; } public void setComeFrom(int comeFrom) { this.comeFrom = comeFrom; } public String getBsid() { return bsid; } public void setBsid(String bsid) { this.bsid = bsid; } public String getUsername() { return username; } public void setUsername(String username) { this.username = username; } private String url; private String visitTime; private String onlineTime; private String desIp; private int desPort; private String mobileIp; private int mobilePort; private int comeFrom; private String bsid; private String username; private int fileUrlId; private int houseCityId; public int getHouseCityId() { return houseCityId; } public void setHouseCityId(int houseCityId) { this.houseCityId = houseCityId; } public int getFileUrlId() { return fileUrlId; } public void setFileUrlId(int fileUrlId) { this.fileUrlId = fileUrlId; } }
获取配置文件类:
import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.Properties; /** * 读取properties文件 * */ public class Configuration { private Properties propertie; private InputStream in; /** *//** * 初始化Configuration类 */ public Configuration() { propertie = new Properties(); } /** *//** * 初始化Configuration类 * @param filePath 要读取的配置文件的路径+名称 */ public Configuration(String filePath) { propertie = new Properties(); try{ in =Object.class.getResourceAsStream(filePath); propertie.load(in); in.close(); } catch (FileNotFoundException ex){ System.out.println("读取属性文件--->失败!- 原因:文件路径错误或者文件不存在"); ex.printStackTrace(); } catch (IOException ex){ System.out.println("装载文件--->失败!"); ex.printStackTrace(); } }//end ReadConfigInfo(...) /** *//** * 重载函数,得到key的值 * @param key 取得其值的键 * @return key的值 */ public String getValue(String key) { if(propertie.containsKey(key)){ String value = propertie.getProperty(key);//得到某一属性的值 return value; } else return ""; }//end getValue(...) /** *//** * 重载函数,得到key的值 * @param fileName properties文件的路径+文件名 * @param key 取得其值的键 * @return key的值 */ public String getValue(String fileName, String key) { try{ String value = ""; in = Object.class.getResourceAsStream(fileName); propertie.load(in); in.close(); if(propertie.containsKey(key)){ value = propertie.getProperty(key); return value; }else return value; } catch (FileNotFoundException e){ e.printStackTrace(); return ""; } catch (IOException e){ e.printStackTrace(); return ""; } catch (Exception ex){ ex.printStackTrace(); return ""; } }//end getValue(...) }