从阿里云官网下载ODPS的客户端odpscmd_public.zip,该客户端在linux和windows环境上都可用。
进入该客户端的配置文件odps_client/conf/odps_config.ini ,进行配置,主要配置前三项:
project_name=XXXXXX
access_id=XXXXXX
access_key=XXXXXX
end_point=http://service.odps.aliyun.com/api
tunnel_endpoint=http://dt.odps.aliyun.com
log_view_host=http://logview.odps.aliyun.com
https_check=true
# confirm threshold for query input size(unit: GB)
data_size_confirm=100.0
之后即可运行odps_client/bin/odpscmd进入命令行进行操作。
部分使用过的命令总结如下:
显示所有表:
show tables;
查询某张表所有分区:
ls partitions tablename;
查询某表的某分区数据前5条:
select * from tablename where partitionname='xxxxxx' limit 5;
odps dship客户端可以表进行一些操作,后将该客户端整合到了odps客户端中,关于odps dship客户端可参照odps dship客户端使用,
此处介绍的是本人在项目中使用到的整合到odps客户端中的:
在shell脚本中如下代码,下载表tablename的xxxxxx分区到本地文件localtablename中,使用“##”作为字段分隔符(若不写默认使用“,”作为分隔符):
cd /home/username/odpsclient/bin
./odpscmd -e "tunnel download -fd '##' tablename/partitionname='xxxxxx' /home/username/localtablename"
注:odps暂不支持下载成标准.csv文件,所以若表中字段本身就包含“,”,则应指定非“,”的其他字符作为分隔符。否则若作为.csv来解析的话就会把字段里的“,”也当作了分隔符,
导致解析出错!!!
JAVA API整理:
odps支持java api,具体可见阿里云官网;此处记录本人曾在项目中用到的java整合spark的api:
OdpsOps odpsOps = new OdpsOps(sc, ODPS_ACCESS_ID, ODPS_ACCESS_KEY, ODPS_END_POINT, ODPS_TUNNEL_URL);
(第一个参数sc 即spak的入口SparkContext)
JavaRDD
new Function2
private static final long serialVersionUID = 1L;
@Override
public T call(Record record, TableSchema schema) throws Exception {
return cpFromRecord(record, schema);
}
}, numPartitoins);
(第二个参数是表名;第三个参数是分区名和分区值,如:data_date='20160701',第五个参数是spark的启动的线程数(SparkRDD的分区数))
下面是odps常规java api总结:
odps参数:
下面是api:
package com.jianfeitech.bd.res.db.odps.access;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.lang.StringUtils;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import com.aliyun.odps.Column;
import com.aliyun.odps.OdpsException;
import com.aliyun.odps.Partition;
import com.aliyun.odps.PartitionSpec;
import com.aliyun.odps.Resources;
import com.aliyun.odps.Table;
import com.aliyun.odps.TableSchema;
import com.aliyun.odps.Tables;
import com.aliyun.odps.account.Account;
import com.aliyun.odps.account.AliyunAccount;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.tunnel.TableTunnel;
import com.aliyun.odps.tunnel.TableTunnel.DownloadSession;
import com.aliyun.odps.tunnel.TableTunnel.UploadSession;
import com.aliyun.odps.tunnel.TunnelException;
import com.jianfeitech.bd.common.conf.access.ResConfAccess;
import com.jianfeitech.bd.common.conf.model.db.Odps;
public abstract class BaseOdpsAccess implements OdpsAccess, Closeable {
protected Logger logger = LoggerFactory.getLogger(BaseOdpsAccess.class);
protected Odps odps;
protected Account account;
protected com.aliyun.odps.Odps conn;
protected TableTunnel tunnel;
protected Tables tables;
protected UploadSession usession;
protected DownloadSession dsession;
public BaseOdpsAccess(Odps odps) {
this.account = new AliyunAccount(odps.getAccessKeyId(),odps.getAccessKeySecret());
this.conn = new com.aliyun.odps.Odps(account);
this.conn.setEndpoint(odps.getOdpsEndpoint());
this.conn.setDefaultProject(odps.getProjectName());
this.tables = this.conn.tables();
this.tunnel = new TableTunnel(conn);
this.tunnel.setEndpoint(odps.getTunnelEndpoint());
}
public BaseOdpsAccess(ResConfAccess rca) {
this(rca,"default");
}
public BaseOdpsAccess(ResConfAccess rca, String odpsName) {
this(rca.getDbOdps(odpsName));
}
public com.aliyun.odps.Odps getConn() {
return this.conn;
}
public TableTunnel gettTunnel() {
return this.tunnel;
}
// partitionPair example "data_date=20161227"
public UploadSession getUploadSession(String tableName, String partitionPair) {
String projectName = this.odps.getProjectName();
try {
if (StringUtils.isEmpty(partitionPair)) {
this.usession = this.tunnel.createUploadSession(projectName, tableName);
} else {
PartitionSpec partitionSpec = new PartitionSpec(partitionPair);
this.usession = this.tunnel.createUploadSession(projectName, tableName, partitionSpec);
}
logger.info("upload session state: "+this.usession.getStatus().toString());
} catch (TunnelException e) {
logger.error("upload session creation failed.",e);
} catch (IOException e) {
logger.error("please check network for upload session.",e);
}
return this.usession;
}
// partitionPair example "data_date=20161227"
public DownloadSession getDownloadSession(String tableName, String partitionPair) {
String projectName = this.getConn().getDefaultProject();
try {
if (StringUtils.isEmpty(partitionPair)) {
this.dsession = this.tunnel.createDownloadSession(projectName, tableName);
} else {
PartitionSpec partitionSpec = new PartitionSpec(partitionPair);
this.dsession = this.tunnel.createDownloadSession(projectName, tableName, partitionSpec);
}
logger.info("download session state: "+this.dsession.getStatus().toString());
} catch (TunnelException e) {
logger.error("download session creation failed.",e);
} catch (IOException e) {
logger.error("please check network for download session.",e);
}
return this.dsession;
}
public List
补充:
ODPS的sql与常规sql类似,如
select mac,count(1) as c from http_standard where data_date='20160701' group by mac order by c desc limit 5;