python requests 登陆hue并提交hive query

"""python登录Hue并查询. 首次使用请修改 BASE_URL, USERNAME 和 PASSWORD.
"""
__author__ = 'Yujian Yang and 0ut0fcontrol'
__version__ = '1.0.0'

import time
import requests


class Hue(requests.Session):
    BASE_URL = 'http://localhost:8000'
    USERNAME = 'test'
    PASSWORD = '123'

    def __init__(self, base_url=None):
        if base_url is None:
            self.base_url = self.BASE_URL
        else:
            self.base_url = base_url
        super(Hue, self).__init__()

    def login(self, username=None, password=None):
        if username is None:
            username = self.USERNAME
        if password is None:
            password = self.PASSWORD
        login_url = self.base_url + '/accounts/login/'
        self.get(login_url)
        form_data = dict(username=username,
                         password=password,
                         csrfmiddlewaretoken=self.cookies['csrftoken'],
                         next='/')

        r = self.post(login_url,
                      data=form_data,
                      cookies=dict(),
                      headers=dict(Referer=login_url))
        if r.status_code == 200:
            print('login succeeded for user [%s] at %s\n' %
                  (username, self.base_url))
        else:
            raise Exception('login failed for user [%s] at %s\n' %
                            (username, self.base_url))
        return r

    def beeswax(self, database, query, filename=None):
        beeswax_url = self.base_url + '/beeswax'

        query_data = {
            'query-query': query,
            'query-database': database,
            'settings-next_form_id': 0,
            'file_resources-next_form_id': 0,
            'functions-next_form_id': 0,
            'query-email_notify': False,
            'query-is_parameterized': True,
        }

        HEADERS = {
            'X-CSRFToken': self.cookies['csrftoken'],
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'Referer': beeswax_url,
        }
        self.headers.update(HEADERS)

        excecute_url = self.base_url + '/beeswax/api/query/execute/'

        r = self.post(
            excecute_url,
            data=query_data,
            headers=self.headers,
            cookies=self.cookies,
        )

        assert r.status_code == 200

        r_json = r.json()
        job_id = r_json['id']
        watch_url = self.base_url + r_json['watch_url']

        t_sec = 2
        t_try = 100
        t_tol = t_sec * t_try
        for i in range(t_try):
            print('waiting %3d/%d secs for job %d: %s ...' %
                  (t_sec * i, t_tol, job_id, query[:50]))
            r = self.post(
                watch_url,
                data=query_data,
                headers=self.headers,
                cookies=self.cookies,
            )
            r_json = r.json()
            if r_json['isSuccess']:
                break
            else:
                time.sleep(t_sec)
        csv_url = self.base_url + '/beeswax/download/%d/csv' % (job_id)

        r = self.get(
            csv_url,
            headers=self.headers,
            cookies=self.cookies,
        )
        if filename is not None:
            with open(filename, 'wb') as f:
                f.write(r.content)
            print('save result in %s\n' % (filename))
        return r


if __name__ == "__main__":

    import argparse
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog='例子:\n'
        'python -d database -s query.sql -o result.csv\n'
        'python -d database -s q1.sql q2.sql # 输出 q1.sql.csv q2.sql.csv\n')
    parser.add_argument('-d', '--database', help='数据库名称', required=True)
    parser.add_argument('-s',
                        '--sql',
                        nargs='+',
                        help='一个或多个查询语句文件.',
                        required=True)
    parser.add_argument('-o', '--output', help='查询结果csv文件名, 默认为: sql文件名.csv')
    args = parser.parse_args()

    hue = Hue()
    hue.login()

    for q in args.sql:
        with open(q) as f:
            query = f.read()
        output = args.output
        if output is None:
            output = q + '.csv'
        hue.beeswax(args.database, query, output)

你可能感兴趣的:(hue,hive,python,浏览器,requests)