Kerberos 是一个用于服务请求时在服务端和客户端进行身份认证的服务,Kerberos支持在所有操作系统上进行构建,包括有 Windows、macOS、FrssBSD 和 Linux。
官方网址:https://www.kerberos.org/
Kerberos 环境下访问远程主机的服务,需要在主机上先进行认证,然后才可以携带 kerbeors 主机和 keytab 文件访问。
# keytab_file:keytab文件路径
# kerberos_principal:kerbeors的principal
kinit -kt
# method:请求方法,例如GET、POST、PUT...
# negotiate:指定kerberos环境,会携带当前主机认证的kerberos principal进行访问
# username:用户名
# password:密码
# url:要请求的服务地址
curl -X --negotiate -u : -i url
pip install requests
pip install requests_kerberos
pip install krbcontext
这里以 Python
调用 Hadoop
的 Rest
接口为例,测试 Python
在 Kerberos
环境下的服务调用。
Hadoop Rest
接口文档地址:https://hadoop.apache.org/docs/r1.0.4/webhdfs.html
# -*- coding=utf-8 -*-
import json
import logging
import requests
import posixpath
from requests.auth import HTTPBasicAuth
from requests_kerberos import HTTPKerberosAuth
from krbcontext import krbContext
_NAMENODE_STATUS_URL = 'jmx'
_HDFS_REST_URL = 'webhdfs/v1'
_ACCEPT_CONTENT_TYPE = 'application/json'
LOG = logging.getLogger(__name__)
class HdfsRestApi(object):
"""
HDFS Rest 接口工具类
"""
def __init__(self, url, username, password, security_enabled=False, principal=None, keytab_file=None):
self._url = posixpath.join(url)
self._username = username
self._password = password
self._security_enabled = security_enabled
self._auth = HTTPBasicAuth(self._username, self._password)
if self._security_enabled:
with krbContext(using_keytab=True, principal=principal, keytab_file=keytab_file):
self._auth = HTTPKerberosAuth()
def __str__(self):
return "HDFS Rest Url Address Is {HDFS_REST_URL}".format(HDFS_REST_URL=self._url)
def _get_params(self, params=None):
params = params if params is not None else {}
return params
def get_namenode_status(self):
"""
获取NameNode的状态信息
@return:
"""
result = {'code': 200, 'message': 'success'}
url = posixpath.join(self._url, _NAMENODE_STATUS_URL)
params = self._get_params({"qry": "Hadoop:service=NameNode,name=NameNodeStatus"})
try:
response = requests.get(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
if response.status_code == 200:
result['data'] = json.loads(response.text)
else:
result['code'] = response.status_code
result['message'] = response.text
except requests.exceptions.RequestException as e:
result['code'] = 500
result['message'] = "请求出现异常:{error_message}".format(error_message=e)
return result
def get_path_quota(self, path):
"""
获取路径的配额信息(空间配额)
@return:
"""
result = {
'code': 200,
'message': 'success'
}
path = path[1:] if path.startswith('/') else path
url = posixpath.join(self._url, _HDFS_REST_URL, path)
params = self._get_params({"op": "GETQUOTAUSAGE"})
try:
response = requests.get(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
if response.status_code == 200:
result['data'] = json.loads(response.text)
else:
result['code'] = response.status_code
result['message'] = response.text
except requests.exceptions.RequestException as e:
result['code'] = 500
result['message'] = "请求出现异常:{error_message}".format(error_message=e)
return result
def set_path_quota(self, path, quota):
"""
设置路径的配额信息(空间配额)
@return:
"""
# curl -i -X PUT "http://:/webhdfs/v1/?op=SETQUOTA
# &namespacequota=[&storagespacequota=]"
# namespacequota 指定目录中的最大文件和目录数
# storagespacequota 目录的最大空间使用量
result = {
'code': 200,
'message': 'success'
}
path = path[1:] if path.startswith('/') else path
url = posixpath.join(self._url, _HDFS_REST_URL, path)
params = self._get_params({"op": "SETQUOTA", "storagespacequota": quota})
try:
response = requests.put(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
if response.status_code != 200:
result['code'] = response.status_code
result['message'] = "设置配额出现异常,请联系管理员处理"
except requests.exceptions.RequestException as e:
result['code'] = 500
result['message'] = "请求出现异常:{error_message}".format(error_message=e)
return result
# Directory
def get_directory_list(self, path):
"""
获取路径下的文件及目录列表
@param path:
@return:
"""
result = {
'code': 200,
'message': 'success'
}
try:
path = path[1:] if path.startswith('/') else path
url = posixpath.join(self._url, _HDFS_REST_URL, path)
print url
params = self._get_params({"op": "LISTSTATUS"})
try:
response = requests.get(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
if response.status_code == 200:
response_data = json.loads(response.text)
if 'FileStatuses' in response_data and 'FileStatus' in response_data['FileStatuses']:
result['data'] = response_data['FileStatuses']['FileStatus']
else:
result['code'] = 500
result['message'] = 'HDFS Rest 接口数据发生变化,请联系开发人员进行处理'
elif response.status_code == 404:
result['code'] = 404
result['message'] = '要查询的路径不存在'
else:
result['code'] = response.status_code
result['message'] = response.text
except requests.exceptions.RequestException as e:
result['code'] = 500
result['message'] = "请求出现异常:{error_message}".format(error_message=e)
except Exception as e:
LOG.exception(e)
result['message'] = '代码运行时发生错误,错误原因:{error_message}'.format(error_message=e)
return result
def create_directory(self, path):
"""
创建HDFS目录并指定用户
@param path:
@return:
"""
# curl -i -X PUT "http://:/?op=MKDIRS[&permission=]"
result = {
'code': 200,
'message': 'success'
}
try:
path = path[1:] if path.startswith('/') else path
url = posixpath.join(self._url, _HDFS_REST_URL, path)
params = self._get_params({"op": "MKDIRS"})
try:
response = requests.put(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
if response.status_code != 200:
result['code'] = response.status_code
result['message'] = response.text
except requests.exceptions.RequestException as e:
result['code'] = 500
result['message'] = "请求出现异常:{error_message}".format(error_message=e)
except Exception as e:
LOG.exception(e)
result['message'] = '代码运行时发生错误,错误原因:{error_message}'.format(error_message=e)
return result
def set_path_owner(self, path, user):
"""
设置HDFS路径的owner
@param path:
@param user:
@return:
"""
# curl -i -X PUT "http://:/webhdfs/v1/?op=SETOWNER
# [&owner=][&group=]"
result = {
'code': 200,
'message': 'success'
}
try:
path = path[1:] if path.startswith('/') else path
url = posixpath.join(self._url, _HDFS_REST_URL, path)
params = self._get_params({"op": "SETOWNER", "owner": user, "group": user})
try:
response = requests.put(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
if response.status_code != 200:
result['code'] = response.status_code
result['message'] = response.text
except requests.exceptions.RequestException as e:
result['code'] = 500
result['message'] = "请求出现异常:{error_message}".format(error_message=e)
except Exception as e:
LOG.exception(e)
result['message'] = '代码运行时发生错误,错误原因:{error_message}'.format(error_message=e)
return result
def delete_directory(self, path):
"""
删除HDFS目录
@param path:
@return:
"""
# curl -i -X DELETE "http://:/webhdfs/v1/?op=DELETE
# [&recursive=]"
result = {
'code': 200,
'message': 'success'
}
try:
path = path[1:] if path.startswith('/') else path
url = posixpath.join(self._url, _HDFS_REST_URL, path)
params = self._get_params({"op": "DELETE"})
try:
response = requests.delete(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE},
auth=self._auth)
if response.status_code != 200:
result['code'] = response.status_code
result['message'] = response.text
except requests.exceptions.RequestException as e:
result['code'] = 500
result['message'] = "请求出现异常:{error_message}".format(error_message=e)
except Exception as e:
LOG.exception(e)
result['message'] = '代码运行时发生错误,错误原因:{error_message}'.format(error_message=e)
return result
def get_path_status(self, path):
"""
获取HDFS路径的状态
@param path:
@return:
"""
# curl -i "http://:/webhdfs/v1/?op=GETFILESTATUS"
result = {
'code': 200,
'message': 'success'
}
try:
path = path[1:] if path.startswith('/') else path
url = posixpath.join(self._url, _HDFS_REST_URL, path)
params = self._get_params({"op": "GETFILESTATUS"})
try:
response = requests.get(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
print response
if response.status_code == 200:
response_data = json.loads(response.text)
if 'FileStatus' in response_data:
result['data'] = response_data['FileStatus']
else:
result['code'] = 500
result['message'] = 'HDFS Rest 接口数据发生变化,请联系开发人员进行处理'
elif response.status_code == 404:
result['code'] = 404
result['message'] = '要查询的路径不存在'
else:
result['code'] = response.status_code
result['message'] = response.text
except requests.exceptions.RequestException as e:
result['code'] = 500
result['message'] = "请求出现异常:{error_message}".format(error_message=e)
except Exception as e:
LOG.exception(e)
result['message'] = '代码运行时发生错误,错误原因:{error_message}'.format(error_message=e)
return result
def get_active_namenode(url_list, username, password, security_enabled=False, principal=None, keytab_file=None):
"""
获取active状态的NameNode节点
@param url_list:
@param username:
@param password:
@param security_enabled:
@return:
"""
result = {
'code': 200,
'message': 'success'
}
try:
for item in url_list:
hdfs_rest_api = HdfsRestApi(item, username, password, security_enabled=security_enabled,
principal=principal, keytab_file=keytab_file)
get_status_result = hdfs_rest_api.get_namenode_status()
if get_status_result['code'] == 200:
if 'beans' in get_status_result['data'] and len(get_status_result['data']['beans']) > 0:
namenode_state = get_status_result['data']['beans'][0]['State']
if namenode_state.lower() == 'active':
result['data'] = hdfs_rest_api
return result
else:
result['code'] = 500
result['message'] = 'HDFS Rest 接口数据发生变化,请联系开发人员进行处理'
return result
else:
result['code'] = get_status_result['code']
result['message'] = get_status_result['message']
except Exception as e:
result['code'] = 500
result['message'] = '代码运行发生了错误:{error_message}'.format(error_message=e)
return result
def create_hdfs_directory(url_list, username, password, tenant, path, quota, security_enabled=False, principal=None, keytab_file=None):
"""
创建HDFS目录
@param url_list:
@param username:
@param password:
@param tenant:
@param path:
@param quota:
@param security_enabled:
@param principal:
@param keytab_file:
@return:
"""
result = {
'code':200,
'message':'success'
}
get_active_result = get_active_namenode(url_list, username, password, security_enabled=security_enabled, principal=principal, keytab_file=keytab_file)
if get_active_result['code'] == 200:
hdfs_rest_api = get_active_result['data']
# 确定路径是否存在
get_path_status_result = hdfs_rest_api.get_path_status(path)
if get_path_status_result['code'] == 200:
# 删除
result['code'] = 500
result['message'] = 'HDFS路径{hdfs_path}已存在'.format(hdfs_path=path)
elif get_path_status_result['code'] == 404:
# 创建目录
create_hdfs_directory_result = hdfs_rest_api.create_directory(path)
if create_hdfs_directory_result['code'] == 200:
set_path_quota_result = hdfs_rest_api.set_path_quota(path, quota)
if set_path_quota_result['code'] == 200:
hdfs_rest_api.set_path_owner(path, tenant)
else:
# 删除路径
result['code'] = 505
result['message'] = set_path_quota_result['message']
else:
result['code'] = create_hdfs_directory_result['code']
result['message'] = create_hdfs_directory_result['message']
else:
result['code'] = get_path_status_result['code']
result['message'] = get_active_result['message']
else:
result['code'] = get_active_result['code']
result['message'] = get_active_result['message']
return result
def setting_path_quota(url_list, username, password, path, quota, security_enabled=False, principal=None, keytab_file=None):
"""
设置HDFS路径配额
@param url_list:
@param username:
@param password:
@param security_enabled:
@param principal:
@param keytab_file:
@return:
"""
result = {
'code': 200,
'message': 'success'
}
get_active_result = get_active_namenode(url_list, username, password, security_enabled=security_enabled, principal=principal,keytab_file=keytab_file)
if get_active_result['code'] == 200:
hdfs_rest_api = get_active_result['data']
# 确定路径是否存在
get_path_status_result = hdfs_rest_api.get_path_status(path)
if get_path_status_result['code'] == 200:
# 设置配额
set_path_quota_result = hdfs_rest_api.set_path_quota(path, quota)
if set_path_quota_result['code'] != 200:
result['code'] = set_path_quota_result['code']
result['message'] = set_path_quota_result['message']
elif get_path_status_result['code'] == 404:
result['code'] = get_path_status_result['code']
result['message'] = '修改配额操作失败:HDFS路径{hdfs_path}不存在'.format(hdfs_path=path)
else:
result['code'] = get_path_status_result['code']
result['message'] = get_active_result['message']
else:
result['code'] = get_active_result['code']
result['message'] = get_active_result['message']
return result
def delete_hdfs_path(url_list, username, password, path, security_enabled=False, principal=None, keytab_file=None):
"""
删除HDFS的目录或文件
@param url_list:
@param username:
@param password:
@param path:
@param security_enabled:
@param principal:
@param keytab_file:
@return:
"""
result = {
'code': 200,
'message': 'success'
}
get_active_result = get_active_namenode(url_list, username, password, security_enabled=security_enabled, principal=principal, keytab_file=keytab_file)
if get_active_result['code'] == 200:
hdfs_rest_api = get_active_result['data']
# 确定路径是否存在
get_path_status_result = hdfs_rest_api.get_path_status(path)
if get_path_status_result['code'] == 200:
# 删除
delete_directory_reuslt = hdfs_rest_api.delete_directory(path)
if delete_directory_reuslt['code'] != 200:
result['code'] = delete_directory_reuslt['code']
result['message'] = delete_directory_reuslt['message']
elif get_path_status_result['code'] == 404:
result['code'] = get_path_status_result['code']
result['message'] = '删除操作失败:HDFS路径{hdfs_path}不存在'.format(hdfs_path=path)
else:
result['code'] = get_path_status_result['code']
result['message'] = get_active_result['message']
else:
result['code'] = get_active_result['code']
result['message'] = get_active_result['message']
return result
main
方法中调用工具类,获取目录文件以及目录的状态。
返回内容如下,Kerberos
环境下 Python
的接口调用操作成功。