分两种:
第一种是单机模式访问oss的ossclient客户端;
第二种是MR程序访问oss的API,由emr封装了的API,对于MR程序来说此种方法更可靠。
(亲身经历过:MR程序用ossclient访问有时候会因为网络问题导致下载oss失败,后来换成第二种API后不再出现此类问题)
(https://help.aliyun.com/document_detail/28117.html?spm=5176.product28066.6.618.5wAJKR)
第三种是可将oss文件直接读成RDD
详见https://help.aliyun.com/document_detail/28116.html?spm=5176.product28066.6.616.NezW5R。
第一种:
package com.jianfeitech.bd.res.db.oss.access;
import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.aliyun.oss.ClientException;
import com.aliyun.oss.OSSException;
import com.aliyun.oss.model.GetObjectRequest;
import com.aliyun.oss.model.ListObjectsRequest;
import com.aliyun.oss.model.OSSObject;
import com.aliyun.oss.model.OSSObjectSummary;
import com.aliyun.oss.model.ObjectListing;
import com.aliyun.oss.model.ObjectMetadata;
import com.aliyun.oss.model.PutObjectResult;
import com.jianfeitech.bd.common.conf.access.BdResConfAccess;
import com.jianfeitech.bd.common.conf.access.ResConfAccess;
import com.jianfeitech.bd.common.conf.model.db.Oss;
import com.jianfeitech.bd.util.crypto.MD5Util;
public class OSSAccess implements Closeable {
private static Logger logger = LoggerFactory.getLogger(OSSAccess.class);
public int maxKye;
private OssCli ossCli;
/**
* ossConfName 配置框架中配置的OSS的名字
* @param ossConfName
*/
public OSSAccess(String ossConfName,ResConfAccess resConfAccess) {
Oss dbOss = resConfAccess.getDbOss(ossConfName);
ossCli = new OssCli(dbOss.getEndPoint(), dbOss.getAccessKeyId(), dbOss.getAccessKeySecret());
maxKye = dbOss.getMaxKey();
}
/**
* 获取 文件夹下每个文件的大小
*
* @param bucketName
* @param prefix
* @param suffix
* @return
* @throws OSSException
* @throws ClientException
*/
public Map
throws OSSException, ClientException {
Map
ListObjectsRequest listObjectsRequest = new ListObjectsRequest(bucketName);
listObjectsRequest.setPrefix(prefix);
listObjectsRequest.setMaxKeys(maxKye);
ObjectListing listing = ossCli.listObjects(listObjectsRequest);
for (OSSObjectSummary objectSummary : listing.getObjectSummaries()) {
if (null != suffix && !"".equals(suffix)) {
if (objectSummary.getKey().endsWith(suffix)) {
resultMap.put(objectSummary.getKey(), objectSummary.getSize());
}
} else {
resultMap.put(objectSummary.getKey(), objectSummary.getSize());
}
}
return resultMap;
}
/**
* 删除指定bucketName下的对象
*
* @param bucketName
* @param key
*/
public void deleteObj(String bucketName, String key) throws OSSException, ClientException {
ossCli.deleteObject(bucketName, key);
}
/**
* @param srcBucket
* 原文件根目录
* @param srcKey
* 源文件全路径
* @param destBucket
* 目标根目录
* @param destKey
* 目标文件全路径
*/
public void copyFile(String srcBucket, String srcKey, String destBucket, String destKey) {
ossCli.copyObject(srcBucket, srcKey, destBucket, destKey);
}
/**
* 小文件上传工具,来自ObjectUtil类。原有一重载方法 putObject(OSSClient client, File
* sourceFile, String bucketName, String key, byte[]
* fileMD5),仅在modifiyObject方法中调用,修改modifiyObject调用处,重载方法删除。
*
* @param client
* @param sourceFile
* @param bucketName
* @param key
* @param fileMD5
* @return
* @throws IOException
* @throws OSSException
* @throws ClientException
* @throws DecoderException
*/
public PutObjectResult putObject(File sourceFile, String bucketName, String key, String fileMD5)
throws IOException, OSSException, ClientException, DecoderException {
String md5 = null;
String md5Base64 = null;
if (StringUtils.isNotBlank(fileMD5)) {
md5 = fileMD5;
md5Base64 = Base64.encodeBase64String(Hex.decodeHex(md5.toCharArray()));
} else {
// 计算文件md5
md5 = MD5Util.fileMD5(sourceFile.getAbsolutePath());
md5Base64 = Base64.encodeBase64String(Hex.decodeHex(md5.toCharArray()));
}
ObjectMetadata meta = new ObjectMetadata();
meta.setHeader("Content-MD5", md5Base64);
String fileName = sourceFile.getName();
Long fileSize = sourceFile.length();
meta.setContentLength(fileSize);
// UserMetadata
meta.addUserMetadata("md5", md5);
meta.addUserMetadata("md5base64", md5Base64);
meta.addUserMetadata("filename", fileName);
meta.addUserMetadata("filesize", fileSize.toString());
// 上传文件
try (InputStream content = new FileInputStream(sourceFile)) {
PutObjectResult result = ossCli.putObject(bucketName, key, content, meta);
return result;
}
}
/**
* put方式更新文件 方法功能和putObject类似, 来自来自ObjectUtil类
*
* @param client
* @param sourceFile
* @param bucketName
* @param key
* @return
* @throws IOException
* @throws OSSException
* @throws ClientException
* @throws DecoderException
*/
public PutObjectResult modifiyObject(File sourceFile, String bucketName, String key)
throws IOException, OSSException, ClientException, DecoderException {
// 计算文件md5值
String md5 = MD5Util.fileMD5(sourceFile.getAbsolutePath());
// 获取 ObjectMetadata
ObjectMetadata objectMetadata = getMetadata(bucketName, key);
// 验证文件如果相同返回ObjectMetadata中PutObjectResult,否则重新上传
if (null != objectMetadata && null != objectMetadata.getUserMetadata()
&& objectMetadata.getUserMetadata().containsKey("md5")) {
String ossMD5 = objectMetadata.getUserMetadata().get("md5");
if (ossMD5.equals(md5)) {// 文件相同返回 PutObjectResult
PutObjectResult putObjectResult = new PutObjectResult();
putObjectResult.setETag(objectMetadata.getETag());
return putObjectResult;
}
}
return putObject(sourceFile, bucketName, key, md5);
}
public BufferedInputStream getOssStream(String bucketName, String key) throws IOException {
OSSObject ossObject = ossCli.getObject(bucketName, key);
BufferedInputStream bufferedInputStream = new BufferedInputStream(ossObject.getObjectContent());
return bufferedInputStream;
}
public ObjectMetadata getMetadata(String bucketName, String key) throws OSSException, ClientException, IOException {
ObjectMetadata objectMetadata = null;
try {
objectMetadata = ossCli.getObjectMetadata(bucketName, key);
} catch (OSSException e) {
if (!"NoSuchKey".equals(e.getErrorCode())) {
logger.error(e.getErrorMessage());
// throw e;
} else {
logger.error("NoSuchKey in the bucket");
// throw e;
}
}
return objectMetadata;
}
public void download(String bucketName, String ossFilePath, String targetPath)
throws OSSException, ClientException {
// OSSClient client = new OSSClient(OSSUtilHolder.endpoint,
// OSSUtilHolder.ossKey, OSSUtilHolder.ossSecret);
GetObjectRequest getObjectRequest = new GetObjectRequest(bucketName, ossFilePath);
ossCli.getObject(getObjectRequest, new File(targetPath));
}
public List
throws OSSException, ClientException {
List
// OSSClient client = new OSSClient(OSSUtilHolder.endpoint,
// OSSUtilHolder.ossKey, OSSUtilHolder.ossSecret);
ListObjectsRequest listObjectsRequest = new ListObjectsRequest(bucketName);
listObjectsRequest.setPrefix(prefix);
listObjectsRequest.setMaxKeys(maxKye);
ObjectListing listing = ossCli.listObjects(listObjectsRequest);
for (OSSObjectSummary objectSummary : listing.getObjectSummaries()) {
if (null != suffix && !"".equals(suffix)) {
if (objectSummary.getKey().endsWith(suffix)) {
list.add(objectSummary.getKey());
}
} else {
list.add(objectSummary.getKey());
}
// 如果控制了返回个数
if (null != MaxKeys && MaxKeys.intValue() != 0) {
if (list.size() == MaxKeys.intValue()) {
break;
}
}
}
return list;
}
public void putObjectWithClient(String bucketName, String key, File file) {
ossCli.putObject(bucketName, key, file);
}
public void putObjectWithClient(String bucketName, String key, InputStream input) {
ossCli.putObject(bucketName, key, input);
}
@Override
public void close() throws IOException {
ossCli.close();
}
public Boolean keyOSSExists(String key, String bucketName){
ObjectMetadata dataMap=null;
try{
dataMap = ossCli.getObjectMetadata(bucketName, key);
}catch(OSSException e){
if (!"NoSuchKey".equals(e.getErrorCode())) {
throw e;
}
}catch(ClientException e){
throw e;
}
if (dataMap == null) {
return false;
}
return true;
}
}
package com.jianfeitech.bd.res.db.oss.access;
import java.io.Closeable;
import java.io.IOException;
import com.aliyun.oss.OSSClient;
public class OssCli extends OSSClient implements Closeable{
public OssCli(String endpoint, String ossKey, String ossSecret) {
// TODO Auto-generated constructor stub
super(endpoint,ossKey,ossSecret);
}
@Override
public void close() throws IOException {
// TODO Auto-generated method stub
super.shutdown();
}
}
第二种:
package com.jianfeitech.bd.res.db.oss.access;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.jianfeitech.bd.common.conf.access.ResConfAccess;
import com.jianfeitech.bd.common.conf.model.db.Oss;
/**
* @author Liqiang
* @date 创建时间:2017年2月17日 下午4:27:09
* @version 1.0
* @parameter
* @since
* @return
*/
public class EmrOssAccess {
private static final Logger logger = LoggerFactory.getLogger(EmrOssAccess.class);
// 数据文件后缀
public static final String SUFFIX_DATA = ".tar.gz";
public static final String SUFFIX_ZIP = ".zip";
private static String accessKeyId;
private static String accessKeySecret;
private static String endPoint;
private static FileSystem fs;
private static Path path;
public EmrOssAccess(String ossConfName, ResConfAccess resConfAccess, String ossBucketName, String ossFolderPath) throws IOException {
// 获取配置信息
Oss dbOss = resConfAccess.getDbOss(ossConfName);
accessKeyId = dbOss.getAccessKeyId();
accessKeySecret = dbOss.getAccessKeySecret();
endPoint = dbOss.getEndPoint();
logger.info("accessKeyId is:" + accessKeyId + ",accessKeySecret is:" + accessKeySecret + ",endPoint is:" + endPoint);
// 初始化emr-oss配置
String ossIdAndSecret = "oss://" + accessKeyId + ":" + accessKeySecret;
String ossDirComplete = ossIdAndSecret + "@" + ossBucketName + endPoint.replace("http://", ".") + "/" + ossFolderPath;
logger.info("OssDirComplete Is :" + ossDirComplete);
path = new Path(ossDirComplete);
Configuration conf = new Configuration();
conf.set("fs.oss.impl", "com.aliyun.fs.oss.nat.NativeOssFileSystem");
fs = FileSystem.get(path.toUri(), conf);
}
// 获取oss文件名list
public List
List
FileStatus[] fileList = fs.listStatus(path);
logger.info("All File Count Is: " + fileList.length);
for (int i = 0; i < fileList.length; i++) {
if (fileList[i].getPath().toString().indexOf(SUFFIX_DATA) == -1 && fileList[i].getPath().toString().indexOf(SUFFIX_ZIP) == -1) {
// 非.tar.gz和.zip的文件不作处理
logger.info("Do Not Make Processing :" + fileList[i].getPath().toString());
continue;
}
ossFilePathList.add(fileList[i].getPath().toString());
}
return ossFilePathList;
}
// 下载一个oss文件到本地,返回下载后文件在本地的全路径名
public String download(String ossFilePath, String localStorePath) throws IllegalArgumentException, IOException {
File file = new File(localStorePath);
if (!file.exists()) {
file.mkdirs();
}
fs.copyToLocalFile(new Path(ossFilePath), new Path(localStorePath));
logger.info("Local Store Path Is: " + localStorePath);
String filePath = localStorePath + ossFilePath.substring(ossFilePath.lastIndexOf("/") + 1, ossFilePath.length());
logger.info("Download File Complete Path Is: " + filePath);
return filePath;
}
}