【Python】Kerberos环境下Python的接口调用

Kerberos

Kerberos 是一个用于服务请求时在服务端和客户端进行身份认证的服务,Kerberos支持在所有操作系统上进行构建,包括有 Windows、macOS、FrssBSD 和 Linux。

官方网址:https://www.kerberos.org/

1.Kerberos 环境

Kerberos 环境下访问远程主机的服务,需要在主机上先进行认证,然后才可以携带 kerbeors 主机和 keytab 文件访问。

1.1 Kerbero 主机认证
# keytab_file:keytab文件路径
# kerberos_principal:kerbeors的principal
kinit -kt  

1.2 Kerberos环境下的curl
# method:请求方法,例如GET、POST、PUT...
# negotiate:指定kerberos环境,会携带当前主机认证的kerberos principal进行访问
# username:用户名
# password:密码
# url:要请求的服务地址
curl -X  --negotiate -u : -i url

【Python】Kerberos环境下Python的接口调用_第1张图片

2.Python

2.1 第三方包安装
pip install requests
pip install requests_kerberos
pip install krbcontext
2.2 工具类

这里以 Python 调用 Hadoop 的 Rest 接口为例,测试 Python 在 Kerberos 环境下的服务调用。

Hadoop Rest 接口文档地址:https://hadoop.apache.org/docs/r1.0.4/webhdfs.html

# -*- coding=utf-8 -*-
import json
import logging
import requests
import posixpath
from requests.auth import HTTPBasicAuth
from requests_kerberos import HTTPKerberosAuth
from krbcontext import krbContext

_NAMENODE_STATUS_URL = 'jmx'
_HDFS_REST_URL = 'webhdfs/v1'
_ACCEPT_CONTENT_TYPE = 'application/json'

LOG = logging.getLogger(__name__)


class HdfsRestApi(object):
    """
    HDFS Rest 接口工具类
    """

    def __init__(self, url, username, password, security_enabled=False, principal=None, keytab_file=None):
        self._url = posixpath.join(url)
        self._username = username
        self._password = password
        self._security_enabled = security_enabled
        self._auth = HTTPBasicAuth(self._username, self._password)
        if self._security_enabled:
            with krbContext(using_keytab=True, principal=principal, keytab_file=keytab_file):
                self._auth = HTTPKerberosAuth()

    def __str__(self):
        return "HDFS Rest Url Address Is {HDFS_REST_URL}".format(HDFS_REST_URL=self._url)

    def _get_params(self, params=None):
        params = params if params is not None else {}
        return params

    def get_namenode_status(self):
        """
        获取NameNode的状态信息
        @return:
        """
        result = {'code': 200, 'message': 'success'}

        url = posixpath.join(self._url, _NAMENODE_STATUS_URL)
        params = self._get_params({"qry": "Hadoop:service=NameNode,name=NameNodeStatus"})

        try:
            response = requests.get(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
            if response.status_code == 200:
                result['data'] = json.loads(response.text)
            else:
                result['code'] = response.status_code
                result['message'] = response.text

        except requests.exceptions.RequestException as e:
            result['code'] = 500
            result['message'] = "请求出现异常:{error_message}".format(error_message=e)

        return result

    def get_path_quota(self, path):
        """
        获取路径的配额信息(空间配额)
        @return:
        """
        result = {
            'code': 200,
            'message': 'success'
        }
        path = path[1:] if path.startswith('/') else path
        url = posixpath.join(self._url, _HDFS_REST_URL, path)
        params = self._get_params({"op": "GETQUOTAUSAGE"})

        try:
            response = requests.get(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
            if response.status_code == 200:
                result['data'] = json.loads(response.text)
            else:
                result['code'] = response.status_code
                result['message'] = response.text

        except requests.exceptions.RequestException as e:
            result['code'] = 500
            result['message'] = "请求出现异常:{error_message}".format(error_message=e)

        return result

    def set_path_quota(self, path, quota):
        """
        设置路径的配额信息(空间配额)
        @return:
        """
        # curl -i -X PUT "http://:/webhdfs/v1/?op=SETQUOTA
        #                               &namespacequota=[&storagespacequota=]"
        # namespacequota 指定目录中的最大文件和目录数
        # storagespacequota 目录的最大空间使用量
        result = {
            'code': 200,
            'message': 'success'
        }
        path = path[1:] if path.startswith('/') else path
        url = posixpath.join(self._url, _HDFS_REST_URL, path)
        params = self._get_params({"op": "SETQUOTA", "storagespacequota": quota})

        try:
            response = requests.put(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
            if response.status_code != 200:
                result['code'] = response.status_code
                result['message'] = "设置配额出现异常,请联系管理员处理"

        except requests.exceptions.RequestException as e:
            result['code'] = 500
            result['message'] = "请求出现异常:{error_message}".format(error_message=e)

        return result

    # Directory
    def get_directory_list(self, path):
        """
        获取路径下的文件及目录列表
        @param path:
        @return:
        """
        result = {
            'code': 200,
            'message': 'success'
        }

        try:
            path = path[1:] if path.startswith('/') else path
            url = posixpath.join(self._url, _HDFS_REST_URL, path)
            print url
            params = self._get_params({"op": "LISTSTATUS"})

            try:
                response = requests.get(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
                if response.status_code == 200:
                    response_data = json.loads(response.text)
                    if 'FileStatuses' in response_data and 'FileStatus' in response_data['FileStatuses']:
                        result['data'] = response_data['FileStatuses']['FileStatus']
                    else:
                        result['code'] = 500
                        result['message'] = 'HDFS Rest 接口数据发生变化,请联系开发人员进行处理'
                elif response.status_code == 404:
                    result['code'] = 404
                    result['message'] = '要查询的路径不存在'
                else:
                    result['code'] = response.status_code
                    result['message'] = response.text

            except requests.exceptions.RequestException as e:
                result['code'] = 500
                result['message'] = "请求出现异常:{error_message}".format(error_message=e)

        except Exception as e:
            LOG.exception(e)
            result['message'] = '代码运行时发生错误,错误原因:{error_message}'.format(error_message=e)

        return result

    def create_directory(self, path):
        """
        创建HDFS目录并指定用户
        @param path:
        @return:
        """
        # curl -i -X PUT "http://:/?op=MKDIRS[&permission=]"
        result = {
            'code': 200,
            'message': 'success'
        }

        try:
            path = path[1:] if path.startswith('/') else path
            url = posixpath.join(self._url, _HDFS_REST_URL, path)
            params = self._get_params({"op": "MKDIRS"})

            try:
                response = requests.put(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
                if response.status_code != 200:
                    result['code'] = response.status_code
                    result['message'] = response.text

            except requests.exceptions.RequestException as e:
                result['code'] = 500
                result['message'] = "请求出现异常:{error_message}".format(error_message=e)

        except Exception as e:
            LOG.exception(e)
            result['message'] = '代码运行时发生错误,错误原因:{error_message}'.format(error_message=e)

        return result

    def set_path_owner(self, path, user):
        """
        设置HDFS路径的owner
        @param path:
        @param user:
        @return:
        """
        # curl -i -X PUT "http://:/webhdfs/v1/?op=SETOWNER
        #                               [&owner=][&group=]"
        result = {
            'code': 200,
            'message': 'success'
        }

        try:
            path = path[1:] if path.startswith('/') else path
            url = posixpath.join(self._url, _HDFS_REST_URL, path)
            params = self._get_params({"op": "SETOWNER", "owner": user, "group": user})

            try:
                response = requests.put(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
                if response.status_code != 200:
                    result['code'] = response.status_code
                    result['message'] = response.text

            except requests.exceptions.RequestException as e:
                result['code'] = 500
                result['message'] = "请求出现异常:{error_message}".format(error_message=e)

        except Exception as e:
            LOG.exception(e)
            result['message'] = '代码运行时发生错误,错误原因:{error_message}'.format(error_message=e)

        return result

    def delete_directory(self, path):
        """
        删除HDFS目录
        @param path:
        @return:
        """
        # curl -i -X DELETE "http://:/webhdfs/v1/?op=DELETE
        #                               [&recursive=]"
        result = {
            'code': 200,
            'message': 'success'
        }

        try:
            path = path[1:] if path.startswith('/') else path
            url = posixpath.join(self._url, _HDFS_REST_URL, path)
            params = self._get_params({"op": "DELETE"})

            try:
                response = requests.delete(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE},
                                           auth=self._auth)
                if response.status_code != 200:
                    result['code'] = response.status_code
                    result['message'] = response.text

            except requests.exceptions.RequestException as e:
                result['code'] = 500
                result['message'] = "请求出现异常:{error_message}".format(error_message=e)

        except Exception as e:
            LOG.exception(e)
            result['message'] = '代码运行时发生错误,错误原因:{error_message}'.format(error_message=e)

        return result

    def get_path_status(self, path):
        """
        获取HDFS路径的状态
        @param path:
        @return:
        """
        # curl -i  "http://:/webhdfs/v1/?op=GETFILESTATUS"
        result = {
            'code': 200,
            'message': 'success'
        }

        try:
            path = path[1:] if path.startswith('/') else path
            url = posixpath.join(self._url, _HDFS_REST_URL, path)
            params = self._get_params({"op": "GETFILESTATUS"})

            try:
                response = requests.get(url, params=params, headers={'Accept': _ACCEPT_CONTENT_TYPE}, auth=self._auth)
                print response
                if response.status_code == 200:
                    response_data = json.loads(response.text)
                    if 'FileStatus' in response_data:
                        result['data'] = response_data['FileStatus']
                    else:
                        result['code'] = 500
                        result['message'] = 'HDFS Rest 接口数据发生变化,请联系开发人员进行处理'
                elif response.status_code == 404:
                    result['code'] = 404
                    result['message'] = '要查询的路径不存在'
                else:
                    result['code'] = response.status_code
                    result['message'] = response.text

            except requests.exceptions.RequestException as e:
                result['code'] = 500
                result['message'] = "请求出现异常:{error_message}".format(error_message=e)

        except Exception as e:
            LOG.exception(e)
            result['message'] = '代码运行时发生错误,错误原因:{error_message}'.format(error_message=e)

        return result


def get_active_namenode(url_list, username, password, security_enabled=False, principal=None, keytab_file=None):
    """
    获取active状态的NameNode节点
    @param url_list:
    @param username:
    @param password:
    @param security_enabled:
    @return:
    """
    result = {
        'code': 200,
        'message': 'success'
    }

    try:
        for item in url_list:
            hdfs_rest_api = HdfsRestApi(item, username, password, security_enabled=security_enabled,
                                        principal=principal, keytab_file=keytab_file)
            get_status_result = hdfs_rest_api.get_namenode_status()
            if get_status_result['code'] == 200:
                if 'beans' in get_status_result['data'] and len(get_status_result['data']['beans']) > 0:
                    namenode_state = get_status_result['data']['beans'][0]['State']
                    if namenode_state.lower() == 'active':
                        result['data'] = hdfs_rest_api
                        return result
                else:
                    result['code'] = 500
                    result['message'] = 'HDFS Rest 接口数据发生变化,请联系开发人员进行处理'
                    return result

            else:
                result['code'] = get_status_result['code']
                result['message'] = get_status_result['message']

    except Exception as e:
        result['code'] = 500
        result['message'] = '代码运行发生了错误:{error_message}'.format(error_message=e)

    return result

def create_hdfs_directory(url_list, username, password, tenant, path, quota, security_enabled=False, principal=None, keytab_file=None):
    """
    创建HDFS目录
    @param url_list:
    @param username:
    @param password:
    @param tenant:
    @param path:
    @param quota:
    @param security_enabled:
    @param principal:
    @param keytab_file:
    @return:
    """
    result = {
        'code':200,
        'message':'success'
    }

    get_active_result = get_active_namenode(url_list, username, password, security_enabled=security_enabled, principal=principal, keytab_file=keytab_file)
    if get_active_result['code'] == 200:
        hdfs_rest_api = get_active_result['data']
        # 确定路径是否存在
        get_path_status_result = hdfs_rest_api.get_path_status(path)
        if get_path_status_result['code'] == 200:
            # 删除
            result['code'] = 500
            result['message'] = 'HDFS路径{hdfs_path}已存在'.format(hdfs_path=path)
        elif get_path_status_result['code'] == 404:
            # 创建目录
            create_hdfs_directory_result = hdfs_rest_api.create_directory(path)
            if create_hdfs_directory_result['code'] == 200:
                set_path_quota_result = hdfs_rest_api.set_path_quota(path, quota)
                if set_path_quota_result['code'] == 200:
                    hdfs_rest_api.set_path_owner(path, tenant)
                else:
                    # 删除路径
                    result['code'] = 505
                    result['message'] = set_path_quota_result['message']
            else:
                result['code'] = create_hdfs_directory_result['code']
                result['message'] = create_hdfs_directory_result['message']
        else:
            result['code'] = get_path_status_result['code']
            result['message'] = get_active_result['message']
    else:
        result['code'] = get_active_result['code']
        result['message'] = get_active_result['message']

    return result


def setting_path_quota(url_list, username, password, path, quota, security_enabled=False, principal=None, keytab_file=None):
    """
    设置HDFS路径配额
    @param url_list:
    @param username:
    @param password:
    @param security_enabled:
    @param principal:
    @param keytab_file:
    @return:
    """
    result = {
        'code': 200,
        'message': 'success'
    }

    get_active_result = get_active_namenode(url_list, username, password, security_enabled=security_enabled, principal=principal,keytab_file=keytab_file)
    if get_active_result['code'] == 200:
        hdfs_rest_api = get_active_result['data']
        # 确定路径是否存在
        get_path_status_result = hdfs_rest_api.get_path_status(path)
        if get_path_status_result['code'] == 200:
            # 设置配额
            set_path_quota_result = hdfs_rest_api.set_path_quota(path, quota)
            if set_path_quota_result['code'] != 200:
                result['code'] = set_path_quota_result['code']
                result['message'] = set_path_quota_result['message']
        elif get_path_status_result['code'] == 404:
            result['code'] = get_path_status_result['code']
            result['message'] = '修改配额操作失败:HDFS路径{hdfs_path}不存在'.format(hdfs_path=path)
        else:
            result['code'] = get_path_status_result['code']
            result['message'] = get_active_result['message']
    else:
        result['code'] = get_active_result['code']
        result['message'] = get_active_result['message']

    return result


def delete_hdfs_path(url_list, username, password, path, security_enabled=False, principal=None, keytab_file=None):
    """
    删除HDFS的目录或文件
    @param url_list:
    @param username:
    @param password:
    @param path:
    @param security_enabled:
    @param principal:
    @param keytab_file:
    @return:
    """
    result = {
        'code': 200,
        'message': 'success'
    }

    get_active_result = get_active_namenode(url_list, username, password, security_enabled=security_enabled, principal=principal, keytab_file=keytab_file)
    if get_active_result['code'] == 200:
        hdfs_rest_api = get_active_result['data']
        # 确定路径是否存在
        get_path_status_result = hdfs_rest_api.get_path_status(path)
        if get_path_status_result['code'] == 200:
            # 删除
            delete_directory_reuslt = hdfs_rest_api.delete_directory(path)
            if delete_directory_reuslt['code'] != 200:
                result['code'] = delete_directory_reuslt['code']
                result['message'] = delete_directory_reuslt['message']

        elif get_path_status_result['code'] == 404:
            result['code'] = get_path_status_result['code']
            result['message'] = '删除操作失败:HDFS路径{hdfs_path}不存在'.format(hdfs_path=path)
        else:
            result['code'] = get_path_status_result['code']
            result['message'] = get_active_result['message']
    else:
        result['code'] = get_active_result['code']
        result['message'] = get_active_result['message']


    return result
2.3 代码测试

main 方法中调用工具类,获取目录文件以及目录的状态。

【Python】Kerberos环境下Python的接口调用_第2张图片

返回内容如下,Kerberos 环境下 Python 的接口调用操作成功。

【Python】Kerberos环境下Python的接口调用_第3张图片

你可能感兴趣的:(Python,大数据,python,hadoop,大数据)