【毕设进行时-工业大数据,数据挖掘】脚本改进,JDBC数据库读写

正文之前

因为不断的扩充了辅助类,所以改进了下脚本,没有原先那么巨细,但是感觉好了不少。我在想要不要下次直接开一个文件来记录上次更新的文件以及时间。然后再写一个Java或者Python的程序来自动输出要上传的文件名字?感觉是个大工程。现在还可以,以后再说吧!

【毕设进行时-工业大数据,数据挖掘】脚本改进,JDBC数据库读写_第1张图片

正文

echo "OK!NOW I WILL UPLOAD YOUR CHANGE TO GITHUB!"
time=$(date "+%Y-%m-%d %H:%M")
echo "${time}"
cd /Users/zhangzhaobo/Documents/Graduation-Design/
sudo cp -a  /Users/zhangzhaobo/IdeaProjects/Graduation_Design/src/ReadData.* /Users/zhangzhaobo/Documents/Graduation-Design/
sudo cp -a  /Users/zhangzhaobo/IdeaProjects/Graduation_Design/src/ZZB_JCS.* /Users/zhangzhaobo/Documents/Graduation-Design/
sudo cp -a  /Users/zhangzhaobo/IdeaProjects/Graduation_Design/src/data.txt /Users/zhangzhaobo/Documents/Graduation-Design/data.txt
sudo cp -a  /Users/zhangzhaobo/IdeaProjects/Graduation_Design/src/Mysql*  /Users/zhangzhaobo/Documents/Graduation-Design/
sudo cp -a  /Users/zhangzhaobo/IdeaProjects/Graduation_Design/mysql* /Users/zhangzhaobo/Documents/Graduation-Design/
sudo javac /Users/zhangzhaobo/Documents/Graduation-Design/ReadData.java
sudo javac  /Users/zhangzhaobo/Documents/Graduation-Design/ZZB_JCS.java
git add ReadData.* ZZB_JCS.* data.txt Mysql*
git commit -m "$time $1"
git push origin master

别的就不说了,先把数据读写的程序丢出来。这个数据读写其实我是想要到时候如果有人给我一个文本,那就GG,所以还要看看读文本存进数据库咋搞!另外!有没有人能告诉我!去哪儿找机械设备的运行数据啊!!!我很苦恼啊啊!

数据库连接程序

//***************************  数据库连接程序  ***************************
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;

public class Mysql_Connect {

    //此处查看网络才知道。要求SSL,所以就酱紫咯:https://zhidao.baidu.com/question/2056521203295428667.html
    private static String url = "jdbc:mysql://127.0.0.1:3306/Graduation_Design?useUnicode=true&characterEncoding=GBK&useSSL=true";

    private static String user = "root";

    private static String password = "zzb1184827350";

    private static String Database="192.168.2.127:3306/Graduation_Design";

    private Statement statement;

    private Connection conn;

    public void setUser(String user){
        this.user=user;
    }
    public  void setPassword(String p){
        this.password=p;
    }
    public void setDatabase(String database){
        this.Database=database;
        this.url="jdbc:mysql:/"+Database+"?useUnicode=true&characterEncoding=GBK&useSSL=true";
    }
    public void setDatabase(){
        this.url="jdbc:mysql:/192.168.2.127:3306/Shop_User?useUnicode=true&characterEncoding=GBK&useSSL=true";
    }
    public Statement getStatement() {
        return this.statement;
    }

    public void Connect() {
        try {
            String driver = "com.mysql.jdbc.Driver";
            Class.forName(driver);
            conn = DriverManager.getConnection(url, user, password);

            if (!conn.isClosed()){

            }
            else {
                System.out.println("\n\nFailed to connect to the Database!");
            }
            this.statement = conn.createStatement();

        } catch (ClassNotFoundException e) {
            System.out.println("Sorry,can`t find the Driver!");
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    public void Dis_Connect() throws SQLException {
        try {
            conn.close();
        } catch (Exception e) {

            e.printStackTrace();

        }
    }
}

数据库读写程序!

//***************************  数据库读写程序  ***************************

/* *********************
 * Author   :   HustWolf --- 张照博

 * Time     :   2018.3-2018.5

 * Address  :   HUST

 * Version  :   3.0
 ********************* */


/* *******************
* 这是从数据库或者是文本文件读取数据的时候用的
* 其实我觉得如果可以每一次读一条数据,然后处理一条会比较好
* 但是算了,数据量不大的话,这个样子也不会增加太多时间的!
******************* */
import java.io.*;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

public class ReadData {
    protected Mysql_Connect mysql=new Mysql_Connect();
    public void writeToDatabase(int id,Object[] data_array) {
        try {
            mysql.Connect();
            Statement statement=mysql.getStatement();
            String INSERT = "INSERT INTO watermelon(id,色泽,根蒂,敲声,纹理,脐部,触感,category) VALUES( " + id + "  , ' " + data_array[0] + "' , ' " + data_array[1] + "' ,  ' " + data_array[2] + "' ,  ' " + data_array[3] + "' ,  ' " + data_array[4] + "' , ' " + data_array[5] + " ', ' " + data_array[6] + "' )";
            boolean insert_ok = statement.execute(INSERT);
            if (insert_ok) {
                System.out.println("Insert Success!");
            }
            statement.close();
            mysql.Dis_Connect();
        } catch (SQLException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    public Object[] readFromDatabase(int id) {
        Object[] DataToOut = new Object[7];
        try {
            mysql.Connect();
            Statement statement=mysql.getStatement();
            String GETDATA = "SELECT  色泽,根蒂,敲声,纹理,脐部,触感,category FROM watermelon WHERE id="+id;
            ResultSet select_ok = statement.executeQuery(GETDATA);
            if(select_ok.next()) {
                DataToOut[0]=select_ok.getObject("色泽");
                DataToOut[1]=select_ok.getObject("根蒂");
                DataToOut[2]=select_ok.getObject("敲声");
                DataToOut[3]=select_ok.getObject("纹理");
                DataToOut[4]=select_ok.getObject("脐部");
                DataToOut[5]=select_ok.getObject("触感");
                DataToOut[6]=select_ok.getObject("category");
            }
            statement.close();
            mysql.Dis_Connect();
        } catch (SQLException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return DataToOut;
    }
    public Object[][] ReadData() throws IOException {
        Object[][] rawData = new Object [][]{
                {"青绿","蜷缩","浊响","清晰","凹陷","硬滑","是"},
                {"乌黑","蜷缩","沉闷","清晰","凹陷","硬滑","是"},
                {"乌黑","蜷缩","浊响","清晰","凹陷","硬滑","是"},
                {"青绿","蜷缩","沉闷","清晰","凹陷","硬滑","是"},
                {"浅白","蜷缩","浊响","清晰","凹陷","硬滑","是"},
                {"青绿","稍蜷","浊响","清晰","稍凹","软粘","是"},
                {"乌黑","稍蜷","浊响","稍糊","稍凹","软粘","是"},
                {"乌黑","稍蜷","浊响","清晰","稍凹","硬滑","是"},
                {"乌黑","稍蜷","沉闷","稍糊","稍凹","硬滑","否"},
                {"青绿","硬挺","清脆","清晰","平坦","软粘","否"},
                {"浅白","硬挺","清脆","模糊","平坦","硬滑","否"},
                {"浅白","蜷缩","浊响","模糊","平坦","软粘","否"},
                {"青绿","稍蜷","浊响","稍糊","凹陷","硬滑","否"},
                {"浅白","稍蜷","沉闷","稍糊","凹陷","硬滑","否"},
                {"乌黑","稍蜷","浊响","清晰","稍凹","软粘","否"},
                {"浅白","蜷缩","浊响","模糊","平坦","硬滑","否"},
                {"青绿","蜷缩","沉闷","稍糊","稍凹","硬滑","否"},


//                { "<30  ", "High  ", "No ", "Fair     ", "0" },
//                { "<30  ", "High  ", "No ", "Excellent", "0" },
//                { "30-40", "High  ", "No ", "Fair     ", "1" },
//                { ">40  ", "Medium", "No ", "Fair     ", "1" },
//                { ">40  ", "Low   ", "Yes", "Fair     ", "1" },
//                { ">40  ", "Low   ", "Yes", "Excellent", "0" },
//                { "30-40", "Low   ", "Yes", "Excellent", "1" },
//                { "<30  ", "Medium", "No ", "Fair     ", "0" },
//                { "<30  ", "Low   ", "Yes", "Fair     ", "1" },
//                { ">40  ", "Medium", "Yes", "Fair     ", "1" },
//                { "<30  ", "Medium", "Yes", "Excellent", "1" },
//                { "30-40", "Medium", "No ", "Excellent", "1" },
//                { "30-40", "High  ", "Yes", "Fair     ", "1" },
//                { "<30  "  , "Medium", "No ", "Excellent", "1" },
//                { ">40  ", "Medium", "No ", "Excellent", "0" }
        };

// ***************** 写入文件式 **************
//        File file = new File("data.txt");  //存放数组数据的文件
//
//        FileWriter DataToTXT = new FileWriter(file);  //文件写入流
//        int row=0;
//        //将数组中的数据写入到文件中。每行各数据之间TAB间隔
//        for(int i=0;i

最大头!决策树生成程序!

主要是照搬的网上的啦!后面再自己慢慢适合的吧!大神写的很棒啊啊!


/* *********************
 * Author   :   HustWolf --- 张照博

 * Time     :   2018.3-2018.5

 * Address  :   HUST

 * Version  :   1.0
 ********************* */


import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

//最外层类名
public class ZZB_JCS{

    /* *********************
     * Define the Class of Sample

     * it is about its nature and function
     ********************* */

    static class Sample{
        //attributes means 属性
        private Map attributes = new HashMap();
        //category means 类别
        private Object category;

        public Object getAttribute(String name){
            return attributes.get(name);
        }

        public void setAttribute(String name,Object value){
            attributes.put(name,value);
        }

        public void setCategory(Object category){
            this.category=category;
        }

        public String toString(){
            return attributes.toString();
        }
    }
    /* *********************
     * this is the function to read the sample

     * just like decoding the data
     ********************* */

// 此处需要改造为读取外部数据!并且能够进行分解,改造为可读取的形式
    static Map> readSample(String[] attribute_Names) throws IOException {
        //样本属性及其分类,暂时先在代码里面写了。后面需要数据库或者是文件读取
        ReadData data = new ReadData();
        Object[][] rawData =  data.ReadData();
        //最终组合出一个包含所有的样本的Map
        Map> sample_set = new HashMap>();

        //读取每一排的数据
        //分解后读取样本属性及其分类,然后利用这些数据构造一个Sample对象
        //然后按照样本最后的0,1进行二分类划分样本集,
        for (Object[] row:rawData) {
            //新建一个Sample对象,没处理一次加入Map中,最后一起返回
            Sample sample = new Sample();
            int i=0;
            //每次处理一排数据,构成一个样本中各项属性的值
            for (int n=row.length-1; i samples = sample_set.get(row[i]);
            //现在整体样本集中查询,有的话就返回value,而如果这个类别还没有样本,那么就添加一下
            if(samples == null){
                samples = new LinkedList();
                sample_set.put(row[i],samples);
            }
            //不管是当前分类的样本集中是否为空,都要加上把现在分离出来的样本丢进去。
            //此处基本只有前几次分类没有完毕的时候才会进入if,后面各个分类都有了样本就不会为空了。
            samples.add(sample);
        }
        //最后返回的是一个每一个类别一个链表的Map,串着该类别的所有样本 (类别 --> 此类样本)
        return sample_set;
    }

    /* *********************
     * this is the class of the decision-tree

     * 决策树(非叶结点),决策树中的每个非叶结点都引导了一棵决策树

     * 每个非叶结点包含一个分支属性和多个分支,分支属性的每个值对应一个分支,该分支引导了一棵子决策树
     ********************* */

    static class Tree{

        private String attribute;

        private Map children = new HashMap();

        public Tree(String attribute){
            this.attribute=attribute;
        }

        public String getAttribute(){
            return attribute;
        }

        public Object getChild(Object attrValue){
            return children.get(attrValue);
        }

        public void setChild(Object attrValue,Object child){
            children.put(attrValue,child);
        }

        public Set getAttributeValues(){
            return children.keySet();
        }
    }


    /* *********************
     * this is the function to choose the Best Test Attribute

     * it will be used in the generateDecisionTree()

     * 选取最优测试属性。最优是指如果根据选取的测试属性分支,则从各分支确定新样本

     * 的分类需要的信息熵之和最小,这等价于确定新样本的测试属性获得的信息增益最大

     * 返回数组:选取的属性下标、信息熵之和、Map(属性值->(分类->样本列表))
     ********************* */

    static Object[] chooseBestTestAttribute(Map> categoryToSamples,String[] attribute_Names){
        //最优的属性的下标!
        int minIndex = -1;
        //最小的信息熵
        double minValue = Double.MAX_VALUE;
        //最优的分支方案!
        Map>> minSplit = null;

        //对每一个属性,都要计算信息熵,选区最小的为最优,Ent(D)
        for (int attrIndex = 0;attrIndex(分类[Key]->样本列表[Value]) [Value]
            // curSplits就是一个某一个在当前属性下某一种选择值 所对应的所有样本集! 所有的Dv的集合是也??待定!
            Map>> curSplits = new HashMap>>();

    /* 这儿的整个流程画个图哈~下面是对某一个属性进行信息增益的计算了!

                   拿到一个数据对,【所属类别-->样本集】
                             |
                             V
                    解析数据对,分解出key和value
                其中key为类别,value为此类别所有的样本
                             |
                             V
               对于Value里边读出来的每个样本,分别:
      读取当前属性下的值,然后建立起来当前属性值相同的所有样本的样本集;
                             |
                             V
               此处还要将每个样本集拆分为分类样本集!
                             |
                             V
        这一轮下来,就得到关于这个属性的不同属性值对应的样本集合
                    而在这些集合集合中又有分类样本集!
          就好比,这一轮对年龄下手,最终得到了40岁以上的好人、坏人
                     30-40岁之间的好人、坏人集合
                     30岁以下的好人、坏人的集合
                     最后一共得到了6个样本集?
             只不过是已Map中键值对的形式存在,二层包装而已!
                                                                --正分类-->  一个Map
                                 ---属性值1,比如是学生  ->分类两类 |
                         某一属性(这个就是curSplits这个Map的本体)  --负分类-->  一个Map(此处画图方便重用了!)
                                 ---属性值2,比如不是学生->分类两类 |
                                                                 --正分类-->  一个Map
      */

            /*
             * Set> entrySet​()
             * Returns:  A set view of the mappings contained in this map
             * Entry 这个数据类型大致等于C++中的pair,也就是数据打包的意思
             */
            for (Entry> entry : categoryToSamples.entrySet()) {
                //先拿到数据的分类的名称,我们这儿就0,1
                Object category = entry.getKey();
                //再拿到这个类别!注意是类别,不是属性值!类别所对应的所有样本!
                List samples = entry.getValue();
                //然后再慢慢的对每个样本进行操作,将其分为按照属性值划分的各种Dv,然后返回到curSplits
                for (Sample sample : samples ) {
                    // 根据当前要计算的属性,得到当前样本的关于这个属性的值
                    Object attrValue = sample.getAttribute(attribute_Names[attrIndex]);
                    // 根据前面当前样本getAttribute()所获得的属性值,来获取这个属性的值相同的所有的样本的样本集
                    Map> split = curSplits.get(attrValue);
                    // 考虑到一开始肯定没法得到一个完整的Map,所以需要从无到有建立起来!
                    if (split == null) {
                        //建立一个关于这个属性值的Map,层次关系为:属性值->(All Sample) 见Line156
                        split = new HashMap>();
                        curSplits.put(attrValue,split);
                    }
                    //建立起来之后,就可以读取这个属性值等于某个值时对应的分类样本集合了。
                    List splitSamples = split.get(category);
                    // 如果读不到当前属性对应这个值的分类的话,那就要建立一个属性值等于当前样本的属性值,且分类相同的样本集。
                    if (splitSamples == null) {
                        splitSamples = new LinkedList();
                        // 结合当前这个属性值,组成一个集合,放到Map--split里面去。
                        split.put(category,splitSamples);
                    }
                    // 最后再把当前的这个样本放到这个样本集中???!!可以直接这么搞的?%%%%%% 难道是引用传递?
                    // 是的!没有用new自然就是一个引用传递!卧槽!这都给忘了!?
                    splitSamples.add(sample);
                }
                //统计样本总数的计数器需要对当前属性下的样本的数量进行统计。
                allCount += samples.size();
            }

            // 当前属性值的信息增益寄存器
            double curValue = 0.0;
            //读取当前属性下的每一种属性值对应的样本集
            for (Map> splits : curSplits.values()) {
                double perSplitCount = 0;
                //读取每个属性值的样本集Dv的size,得到所有该属性为此值的样本总数,不论类别如何
                for (List list : splits.values()) {
                    //累计当前样本的分支总数
                    perSplitCount += list.size();
                }
                //计数器,当前分支的信息熵和信息增益,这儿是按出现频率在算呢!
                double perSplitValue = 0.0;
                //计算每个属性值对应的信息熵
                for (List list : splits.values() ) {
                    //此处完全就是ID3算法的信息熵的计算公式!也就是ENT(D) = -Sum(Pk*log2(Pk))见《机器学习》 P75
                    double p = list.size() / perSplitCount;
                    //貌似是因为p无论如何都是小于1的,所以采用p -= 实际上是加了?
                    perSplitValue -= p*(Math.log(p)/Math.log(2));
                }
                //这应该还算不上不是信息增益吧!只能算是信息熵之和了。
                curValue += (perSplitCount / allCount) * perSplitValue;
            }
            //选择最小的信息熵为最优!?
            if (minValue > curValue){
                minIndex = attrIndex;
                minValue = curValue;
                minSplit = curSplits;
            }
        }
        //所以最终返回的就是一个信息熵之和  最小的属性的列表索引 + 最小的信息熵之和  + 最小的信息熵之和所对应的子树!
        return  new Object[] {minIndex,minValue,minSplit};
    }


    /* *********************
     * this is the function to output the Decision Tree to the Dashboard
     ********************* */

    static void outputDecisionTree(Object obj,int level, Object from){
        //这个到后面决定输出多少个|----- 也就是说是决定层级的
        for (int i=0; i < level ;++i){
            System.out.print("|-----");
        }
        // 所有子节点专用?除了根节点都要吧!
        if (from != null){
            System.out.printf("(%s):",from);
        }
        //大概是说,如果这个东西还有子节点,那就继续递归
        if (obj instanceof Tree){
            Tree tree = (Tree) obj;
            String attribute_Name = tree.getAttribute();
            System.out.printf("[%s = ?]\n",attribute_Name);
            for (Object attrValue : tree.getAttributeValues()){
                Object child =tree.getChild(attrValue);
                outputDecisionTree(child,level+1,attribute_Name + " = " + attrValue);
            }
        }else {
            System.out.printf("【* CATEGORY = %s *】\n", obj);
        }
    }


    /* *********************
     * this is the function to generate the DecisionTree

     * use the data which read from the files to get the Decisiontree

     * the most important part I think!
     ********************* */
    static Object generateDecisionTree(Map> categoryToSamples,String[] attribute_Names){
        //如果只有一个样本,那么该样本所属分类作为新样本的分类
        if(categoryToSamples.size() == 1)
            return categoryToSamples.keySet().iterator().next();

        //如果没有提供决策的属性(也就是没有给你属性名字清单),那么样本集中具有最多样本的分类作为新样本的分类,也就是投票选举出新的分类
        if (attribute_Names.length == 0) {
            int max = 0;
            Object maxCategory = null;
            // 如果没有属性列表的话,那就直接按照分类作为K个样本集,取数量较大的那个样本集的类别作为本分类。
            for (Entry> entry : categoryToSamples.entrySet() ) {
                int cur = entry.getValue().size();
                if (cur > max) {
                    max = cur;
                    maxCategory = entry.getKey();
                }
            }
            return maxCategory;
        }
        //如果有属性清单的话,那么就选择测试所用的属性了。
        Object[] rst = chooseBestTestAttribute(categoryToSamples,attribute_Names);
        //决策树的根节点选取,分支的属性为选取的测试属性
        Tree tree = new Tree(attribute_Names[(Integer)rst[0]]);

        //已用过的测试属性不能再次被选择为测试属性
        String[] Attr_Find_Already = new String[attribute_Names.length-1];
        for (int i=0,j=0;i>> splits = (Map>>) rst[2];
        for (Entry>> entry : splits.entrySet()) {
            Object attrValue = entry.getKey();
            Map> split = entry.getValue();
            //又是递归调用?那我岂不是玩完?层数不能超过二十层!这是底线!
            Object child = generateDecisionTree(split,Attr_Find_Already);
            tree.setChild(attrValue,child);
        }
        return tree;
    }

    public static  void main(String[] args) throws Exception{
//        String[] attribute_Names = new String[] {"AGE","INCOME","STUDENT","CREDIT_RATING"};
        String[] attribute_Names = new String[] {"色泽","根蒂","敲声","纹理","脐部","触感"};
        //读取样本集
        Map> samples = readSample(attribute_Names);

        //生成决策树
        Object decisionTree = generateDecisionTree(samples,attribute_Names);

        //输出决策树
        outputDecisionTree(decisionTree,0,null);
    }
}
 
 

另外要注意,这个因为用到了数据库,所以需要加载依赖文件包!具体看这儿:

解决方案! https://blog.csdn.net/sakura_yuan/article/details/51730493

最后的结果简直是美到爆炸!好么??爆炸!!

/Library/Java/JavaVirtualMachines/jdk-9.0.1.jdk/Contents/Home/bin/java "-javaagent:/Applications/IntelliJ IDEA.app/Contents/lib/idea_rt.jar=57631:/Applications/IntelliJ IDEA.app/Contents/bin" -Dfile.encoding=UTF-8 -classpath /Users/zhangzhaobo/IdeaProjects/Graduation_Design/out/production/Graduation_Design:/Users/zhangzhaobo/IdeaProjects/Graduation_Design/mysql-connector-java-5.1.44-bin.jar ZZB_JCS
[纹理 = ?]
|-----(纹理 =  稍糊):[触感 = ?]
|-----|-----(触感 =  硬滑 ):【* CATEGORY =  否 *】
|-----|-----(触感 =  软粘 ):【* CATEGORY =  是 *】
|-----(纹理 =  模糊):【* CATEGORY =  否 *】
|-----(纹理 =  清晰):[根蒂 = ?]
|-----|-----(根蒂 =  硬挺):【* CATEGORY =  否 *】
|-----|-----(根蒂 =  蜷缩):【* CATEGORY =  是 *】
|-----|-----(根蒂 =  稍蜷):[色泽 = ?]
|-----|-----|-----(色泽 =  青绿):【* CATEGORY =  是 *】
|-----|-----|-----(色泽 =  乌黑):[触感 = ?]
|-----|-----|-----|-----(触感 =  硬滑 ):【* CATEGORY =  是 *】
|-----|-----|-----|-----(触感 =  软粘 ):【* CATEGORY =  否 *】

Process finished with exit code 0

正文之后

今天早起(八点半?)吃到了西边百景园的早饭,油条和汤包,虽然贵了点,但是好吃啊!希望以后天天有早饭吃!阿西吧!整完了,吃午饭去!

【毕设进行时-工业大数据,数据挖掘】脚本改进,JDBC数据库读写_第2张图片

你可能感兴趣的:(【毕设进行时-工业大数据,数据挖掘】脚本改进,JDBC数据库读写)