从未停下脚步

hadoop大数据平台手动搭建(七)-hue

1.解压

tar -zxvf /opt/hue-3.9.0-cdh5.8.0.tar.gz

2. 编译安装

make apss

make install

3.修改hue.ini

# Hue configuration file

# ===================================

# For complete documentation about the contents of this file, run

# $ /build/env/bin/hue config_help

# All .ini files under the current directory are treated equally. Their

# contents are merged to form the Hue configuration, which can

# can be viewed on the Hue at

# http://:/dump_config

###########################################################################

# General configuration for core Desktop features (authentication, etc)

###########################################################################

[desktop]

# Set this to a random string, the longer the better.

# This is used for secure hashing in the session store.

secret_key=13edcqwertyuiopuioplkj

# Execute this script to produce the Django secret key. This will be used when

# `secret_key` is not set.

## secret_key_script=

# Webserver listens on this address and port

http_host=0.0.0.0

http_port=8888

# Time zone name

time_zone=America/Los_Angeles

# Enable or disable Django debug mode.

django_debug_mode=false

# Enable or disable database debug mode.

## database_logging=false

# Whether to send debug messages from JavaScript to the server logs.

## send_dbug_messages=false

# Enable or disable backtrace for server error

http_500_debug_mode=false

# Enable or disable memory profiling.

## memory_profiler=false

# Server email for internal error messages

## django_server_email='[email protected]'

# Email backend

## django_email_backend=django.core.mail.backends.smtp.EmailBackend

# Webserver runs as this user

server_user=hadoop

server_group=hadoop

# This should be the Hue admin and proxy user

default_user=hadoop

# This should be the hadoop cluster admin

default_hdfs_superuser=hadoop

# If set to false, runcpserver will not actually start the web server.

# Used if Apache is being used as a WSGI container.

## enable_server=yes

# Number of threads used by the CherryPy web server

## cherrypy_server_threads=40

# Filename of SSL Certificate

## ssl_certificate=

# Filename of SSL RSA Private Key

## ssl_private_key=

# Filename of SSL Certificate Chain

## ssl_certificate_chain=

# SSL certificate password

## ssl_password=

# Execute this script to produce the SSL password. This will be used when `ssl_password` is not set.

## ssl_password_script=

# List of allowed and disallowed ciphers in cipher list format.

# See http://www.openssl.org/docs/apps/ciphers.html for more information on

# cipher list format. This list is from

# https://wiki.mozilla.org/Security/Server_Side_TLS v3.7 intermediate

# recommendation, which should be compatible with Firefox 1, Chrome 1, IE 7,

# Opera 5 and Safari 1.

## ssl_cipher_list=ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-DSS-AES128-GCM-SHA256:kEDH+AESGCM:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA:ECDHE-ECDSA-AES256-SHA:DHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA:DHE-DSS-AES128-SHA256:DHE-RSA-AES256-SHA256:DHE-DSS-AES256-SHA:DHE-RSA-AES256-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA256:AES256-SHA256:AES128-SHA:AES256-SHA:AES:CAMELLIA:DES-CBC3-SHA:!aNULL:!eNULL:!EXPORT:!DES:!RC4:!MD5:!PSK:!aECDH:!EDH-DSS-DES-CBC3-SHA:!EDH-RSA-DES-CBC3-SHA:!KRB5-DES-CBC3-SHA

# Path to default Certificate Authority certificates.

## ssl_cacerts=/etc/hue/cacerts.pem

# Choose whether Hue should validate certificates received from the server.

## validate=true

# Default LDAP/PAM/.. username and password of the hue user used for authentications with other services.

# Inactive if password is empty.

# e.g. LDAP pass-through authentication for HiveServer2 or Impala. Apps can override them individually.

## auth_username=hue

## auth_password=

# Default encoding for site data

## default_site_encoding=utf-8

# Help improve Hue with anonymous usage analytics.

# Use Google Analytics to see how many times an application or specific section of an application is used, nothing more.

## collect_usage=true

# Tile layer server URL for the Leaflet map charts

# Read more on http://leafletjs.com/reference.html#tilelayer

## leaflet_tile_layer=http://{s}.tile.osm.org/{z}/{x}/{y}.png

# The copyright message for the specified Leaflet maps Tile Layer

## leaflet_tile_layer_attribution='© OpenStreetMap contributors'

# X-Frame-Options HTTP header value. Use 'DENY' to deny framing completely

## http_x_frame_options=SAMEORIGIN

# Enable X-Forwarded-Host header if the load balancer requires it.

## use_x_forwarded_host=false

# Support for HTTPS termination at the load-balancer level with SECURE_PROXY_SSL_HEADER.

## secure_proxy_ssl_header=false

# Comma-separated list of Django middleware classes to use.

# See https://docs.djangoproject.com/en/1.4/ref/middleware/ for more details on middlewares in Django.

## middleware=desktop.auth.backend.LdapSynchronizationBackend

# Comma-separated list of regular expressions, which match the redirect URL.

# For example, to restrict to your local domain and FQDN, the following value can be used:

# ^\/.*$,^http:\/\/www.mydomain.com\/.*$

## redirect_whitelist=^(\/[a-zA-Z0-9]+.*|\/)$

# Comma separated list of apps to not load at server startup.

# e.g.: pig,zookeeper

## app_blacklist=

# Choose whether to show the new SQL editor.

## use_new_editor=true

# Enable saved default configurations for Hive, Impala, Spark, and Oozie.

## use_default_configuration=false

# The directory where to store the auditing logs. Auditing is disable if the value is empty.

# e.g. /var/log/hue/audit.log

## audit_event_log_dir=

# Size in KB/MB/GB for audit log to rollover.

## audit_log_max_file_size=100MB

# A json file containing a list of log redaction rules for cleaning sensitive data

# from log files. It is defined as:

# {

# "version": 1,

# "rules": [

# {

# "description": "This is the first rule",

# "trigger": "triggerstring 1",

# "search": "regex 1",

# "replace": "replace 1"

# },

# {

# "description": "This is the second rule",

# "trigger": "triggerstring 2",

# "search": "regex 2",

# "replace": "replace 2"

# }

# ]

# }

# Redaction works by searching a string for the [TRIGGER] string. If found,

# the [REGEX] is used to replace sensitive information with the

# [REDACTION_MASK]. If specified with `log_redaction_string`, the

# `log_redaction_string` rules will be executed after the

# `log_redaction_file` rules.

# For example, here is a file that would redact passwords and social security numbers:

# {

# "version": 1,

# "rules": [

# {

# "description": "Redact passwords",

# "trigger": "password",

# "search": "password=\".*\"",

# "replace": "password=\"???\""

# },

# {

# "description": "Redact social security numbers",

# "trigger": "",

# "search": "\d{3}-\d{2}-\d{4}",

# "replace": "XXX-XX-XXXX"

# }

# ]

# }

## log_redaction_file=

# Comma separated list of strings representing the host/domain names that the Hue server can serve.

# e.g.: localhost,domain1,*

## allowed_hosts=*

# Administrators

# ----------------

[[django_admins]]

## [[[admin1]]]

## name=john

## [email protected]

# UI customizations

# -------------------

[[custom]]

# Top banner HTML code

# e.g.

Test Lab A2 Hue Services

## banner_top_html=

# Login splash HTML code

# e.g. WARNING: You are required to have authorization before you proceed

## login_splash_html=

GetHue.com

WARNING: You have accessed a computer managed by GetHue. You are required to have authorization from GetHue before you proceed.

# Cache timeout in milliseconds for the assist, autocomplete, etc.

# defaults to 86400000 (1 day), set to 0 to disable caching

## cacheable_ttl=86400000

# Configuration options for user authentication into the web application

# ------------------------------------------------------------------------

[[auth]]

# Authentication backend. Common settings are:

# - django.contrib.auth.backends.ModelBackend (entirely Django backend)

# - desktop.auth.backend.AllowAllBackend (allows everyone)

# - desktop.auth.backend.AllowFirstUserDjangoBackend

# (Default. Relies on Django and user manager, after the first login)

# - desktop.auth.backend.LdapBackend

# - desktop.auth.backend.PamBackend

# - desktop.auth.backend.SpnegoDjangoBackend

# - desktop.auth.backend.RemoteUserDjangoBackend

# - libsaml.backend.SAML2Backend

# - libopenid.backend.OpenIDBackend

# - liboauth.backend.OAuthBackend

# (New oauth, support Twitter, Facebook, Google+ and Linkedin

# Multiple Authentication backends are supported by specifying a comma-separated list in order of priority.

# However, in order to enable OAuthBackend, it must be the ONLY backend configured.

## backend=desktop.auth.backend.AllowFirstUserDjangoBackend

# Class which defines extra accessor methods for User objects.

## user_aug=desktop.auth.backend.DefaultUserAugmentor

# The service to use when querying PAM.

## pam_service=login

# When using the desktop.auth.backend.RemoteUserDjangoBackend, this sets

# the normalized name of the header that contains the remote user.

# The HTTP header in the request is converted to a key by converting

# all characters to uppercase, replacing any hyphens with underscores

# and adding an HTTP_ prefix to the name. So, for example, if the header

# is called Remote-User that would be configured as HTTP_REMOTE_USER

# Defaults to HTTP_REMOTE_USER

## remote_user_header=HTTP_REMOTE_USER

# Ignore the case of usernames when searching for existing users.

# Supported in remoteUserDjangoBackend and SpnegoDjangoBackend

## ignore_username_case=true

# Forcibly cast usernames to lowercase, takes precedence over force_username_uppercase

# Supported in remoteUserDjangoBackend and SpnegoDjangoBackend

## force_username_lowercase=true

# Forcibly cast usernames to uppercase, cannot be combined with force_username_lowercase

## force_username_uppercase=false

# Users will expire after they have not logged in for 'n' amount of seconds.

# A negative number means that users will never expire.

## expires_after=-1

# Apply 'expires_after' to superusers.

## expire_superusers=true

# Users will automatically be logged out after 'n' seconds of inactivity.

# A negative number means that idle sessions will not be timed out.

idle_session_timeout=-1

# Force users to change password on first login with desktop.auth.backend.AllowFirstUserDjangoBackend

## change_default_password=false

# Number of login attempts allowed before a record is created for failed logins

## login_failure_limit=3

# After number of allowed login attempts are exceeded, do we lock out this IP and optionally user agent?

## login_lock_out_at_failure=false

# If set, defines period of inactivity in seconds after which failed logins will be forgotten

## login_cooloff_time=60

# If True, lock out based on an IP address AND a user agent.

# This means requests from different user agents but from the same IP are treated differently.

## login_lock_out_use_user_agent=false

# If True, lock out based on IP and user

## login_lock_out_by_combination_user_and_ip=false

# Configuration options for connecting to LDAP and Active Directory

# -------------------------------------------------------------------

[[ldap]]

# The search base for finding users and groups

## base_dn="DC=mycompany,DC=com"

# URL of the LDAP server

## ldap_url=ldap://auth.mycompany.com

# The NT domain used for LDAP authentication

## nt_domain=mycompany.com

# A PEM-format file containing certificates for the CA's that

# Hue will trust for authentication over TLS.

# The certificate for the CA that signed the

# LDAP server certificate must be included among these certificates.

# See more here http://www.openldap.org/doc/admin24/tls.html.

## ldap_cert=

## use_start_tls=true

# Distinguished name of the user to bind as -- not necessary if the LDAP server

# supports anonymous searches

## bind_dn="CN=ServiceAccount,DC=mycompany,DC=com"

# Password of the bind user -- not necessary if the LDAP server supports

# anonymous searches

## bind_password=

# Execute this script to produce the bind user password. This will be used

# when `bind_password` is not set.

## bind_password_script=

# Pattern for searching for usernames -- Use for the parameter

# For use when using LdapBackend for Hue authentication

## ldap_username_pattern="uid=,ou=People,dc=mycompany,dc=com"

# Create users in Hue when they try to login with their LDAP credentials

# For use when using LdapBackend for Hue authentication

## create_users_on_login = true

# Synchronize a users groups when they login

## sync_groups_on_login=false

# Ignore the case of usernames when searching for existing users in Hue.

## ignore_username_case=true

# Force usernames to lowercase when creating new users from LDAP.

# Takes precedence over force_username_uppercase

## force_username_lowercase=true

# Force usernames to uppercase, cannot be combined with force_username_lowercase

## force_username_uppercase=false

# Use search bind authentication.

## search_bind_authentication=true

# Choose which kind of subgrouping to use: nested or suboordinate (deprecated).

## subgroups=suboordinate

# Define the number of levels to search for nested members.

## nested_members_search_depth=10

# Whether or not to follow referrals

## follow_referrals=false

# Enable python-ldap debugging.

## debug=false

# Sets the debug level within the underlying LDAP C lib.

## debug_level=255

# Possible values for trace_level are 0 for no logging, 1 for only logging the method calls with arguments,

# 2 for logging the method calls with arguments and the complete results and 9 for also logging the traceback of method calls.

## trace_level=0

[[[users]]]

# Base filter for searching for users

## user_filter="objectclass=*"

# The username attribute in the LDAP schema

## user_name_attr=sAMAccountName

[[[groups]]]

# Base filter for searching for groups

## group_filter="objectclass=*"

# The group name attribute in the LDAP schema

## group_name_attr=cn

# The attribute of the group object which identifies the members of the group

## group_member_attr=members

[[[ldap_servers]]]

## [[[[mycompany]]]]

# The search base for finding users and groups

## base_dn="DC=mycompany,DC=com"

# URL of the LDAP server

## ldap_url=ldap://auth.mycompany.com

# The NT domain used for LDAP authentication

## nt_domain=mycompany.com

# A PEM-format file containing certificates for the CA's that

# Hue will trust for authentication over TLS.

# The certificate for the CA that signed the

# LDAP server certificate must be included among these certificates.

# See more here http://www.openldap.org/doc/admin24/tls.html.

## ldap_cert=

## use_start_tls=true

# Distinguished name of the user to bind as -- not necessary if the LDAP server

# supports anonymous searches

## bind_dn="CN=ServiceAccount,DC=mycompany,DC=com"

# Password of the bind user -- not necessary if the LDAP server supports

# anonymous searches

## bind_password=

# Execute this script to produce the bind user password. This will be used

# when `bind_password` is not set.

## bind_password_script=

# Pattern for searching for usernames -- Use for the parameter

# For use when using LdapBackend for Hue authentication

## ldap_username_pattern="uid=,ou=People,dc=mycompany,dc=com"

## Use search bind authentication.

## search_bind_authentication=true

# Whether or not to follow referrals

## follow_referrals=false

# Enable python-ldap debugging.

## debug=false

# Sets the debug level within the underlying LDAP C lib.

## debug_level=255

# Possible values for trace_level are 0 for no logging, 1 for only logging the method calls with arguments,

# 2 for logging the method calls with arguments and the complete results and 9 for also logging the traceback of method calls.

## trace_level=0

## [[[[[users]]]]]

# Base filter for searching for users

## user_filter="objectclass=Person"

# The username attribute in the LDAP schema

## user_name_attr=sAMAccountName

## [[[[[groups]]]]]

# Base filter for searching for groups

## group_filter="objectclass=groupOfNames"

# The username attribute in the LDAP schema

## group_name_attr=cn

# Configuration options for specifying the Desktop Database. For more info,

# see http://docs.djangoproject.com/en/1.4/ref/settings/#database-engine

# ------------------------------------------------------------------------

[[database]]

# Database engine is typically one of:

# postgresql_psycopg2, mysql, sqlite3 or oracle.

# Note that for sqlite3, 'name', below is a path to the filename. For other backends, it is the database name

# Note for Oracle, options={"threaded":true} must be set in order to avoid crashes.

# Note for Oracle, you can use the Oracle Service Name by setting "host=" and "port=" and then "name=:/".

# Note for MariaDB use the 'mysql' engine.

engine=mysql

host=master

port=3306

user=hadoop

password=123456

# Execute this script to produce the database password. This will be used when `password` is not set.

## password_script=/path/script

name=hue

## options={}

# Configuration options for specifying the Desktop session.

# For more info, see https://docs.djangoproject.com/en/1.4/topics/http/sessions/

# ------------------------------------------------------------------------

[[session]]

# The cookie containing the users' session ID will expire after this amount of time in seconds.

# Default is 2 weeks.

## ttl=1209600

# The cookie containing the users' session ID will be secure.

# Should only be enabled with HTTPS.

## secure=false

# The cookie containing the users' session ID will use the HTTP only flag.

## http_only=true

# Use session-length cookies. Logs out the user when she closes the browser window.

## expire_at_browser_close=false

# Configuration options for connecting to an external SMTP server

# ------------------------------------------------------------------------

[[smtp]]

# The SMTP server information for email notification delivery

host=localhost

port=25

user=

password=

# Whether to use a TLS (secure) connection when talking to the SMTP server

tls=no

# Default email address to use for various automated notification from Hue

## default_from_email=hue@localhost

# Configuration options for Kerberos integration for secured Hadoop clusters

# ------------------------------------------------------------------------

[[kerberos]]

# Path to Hue's Kerberos keytab file

## hue_keytab=

# Kerberos principal name for Hue

## hue_principal=hue/hostname.foo.com

# Path to kinit

## kinit_path=/path/to/kinit

# Configuration options for using OAuthBackend (Core) login

# ------------------------------------------------------------------------

[[oauth]]

# The Consumer key of the application

## consumer_key=XXXXXXXXXXXXXXXXXXXXX

# The Consumer secret of the application

## consumer_secret=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

# The Request token URL

## request_token_url=https://api.twitter.com/oauth/request_token

# The Access token URL

## access_token_url=https://api.twitter.com/oauth/access_token

# The Authorize URL

## authenticate_url=https://api.twitter.com/oauth/authorize

# Configuration options for Metrics

# ------------------------------------------------------------------------

[[metrics]]

# Enable the metrics URL "/desktop/metrics"

## enable_web_metrics=True

# If specified, Hue will write metrics to this file.

## location=/var/log/hue/metrics.json

# Time in milliseconds on how frequently to collect metrics

## collection_interval=30000

###########################################################################

# Settings to configure the snippets available in the Notebook

###########################################################################

[notebook]

## Show the notebook menu or not

# show_notebooks=true

## Base URL to Remote GitHub Server

# github_remote_url=https://github.com

## Base URL to GitHub API

# github_api_url=https://api.github.com

## Client ID for Authorized GitHub Application

# github_client_id=

## Client Secret for Authorized GitHub Application

# github_client_secret=

# One entry for each type of snippet. The first 5 will appear in the wheel.

[[interpreters]]

# Define the name and how to connect and execute the language.

[[[hive]]]

# The name of the snippet.

name=Hive

# The backend connection to use to communicate with the server.

interface=hiveserver2

[[[impala]]]

name=Impala

interface=hiveserver2

# [[[sparksql]]]

# name=SparkSql

# interface=hiveserver2

[[[spark]]]

name=Scala

interface=livy

[[[pyspark]]]

name=PySpark

interface=livy

[[[r]]]

name=R

interface=livy

[[[jar]]]

name=Spark Submit Jar

interface=livy-batch

[[[py]]]

name=Spark Submit Python

interface=livy-batch

[[[pig]]]

name=Pig

interface=pig

[[[text]]]

name=Text

interface=text

[[[markdown]]]

name=Markdown

interface=text

[[[mysql]]]

name = MySQL

interface=rdbms

[[[sqlite]]]

name = SQLite

interface=rdbms

[[[postgresql]]]

name = PostgreSQL

interface=rdbms

[[[oracle]]]

name = Oracle

interface=rdbms

[[[solr]]]

name = Solr SQL

interface=solr

## Name of the collection handler

# options='{"collection": "default"}'

# [[[mysql]]]

# name=MySql JDBC

# interface=jdbc

# ## Specific options for connecting to the server.

# ## The JDBC connectors, e.g. mysql.jar, need to be in the CLASSPATH environment variable.

# ## If 'user' and 'password' are omitted, they will be prompted in the UI.

# options='{"url": "jdbc:mysql://localhost:3306/hue", "driver": "com.mysql.jdbc.Driver", "user": "root", "password": "root"}'

## Main flag to override the automatic starting of the DBProxy server.

# enable_dbproxy_server=true

## Flag to enable the SQL query builder of the table assist.

# enable_query_builder=true

## Flag to enable the creation of a coordinator for the current SQL query.

# enable_query_scheduling=true

###########################################################################

# Settings to configure your Hadoop cluster.

###########################################################################

[hadoop]

# Configuration for HDFS NameNode

# ------------------------------------------------------------------------

[[hdfs_clusters]]

# HA support by using HttpFs

[[[default]]]

# Enter the filesystem uri

fs_defaultfs=hdfs://master:9000

# NameNode logical name.

## logical_name=

# Use WebHdfs/HttpFs as the communication mechanism.

# Domain should be the NameNode or HttpFs host.

# Default port is 14000 for HttpFs.

webhdfs_url=http://master:50070/webhdfs/v1

# Change this if your HDFS cluster is Kerberos-secured

## security_enabled=false

# In secure mode (HTTPS), if SSL certificates from YARN Rest APIs

# have to be verified against certificate authority

## ssl_cert_ca_verify=True

# Directory of the Hadoop configuration

hadoop_conf_dir=/opt/hadoop-2.6.0-cdh5.8.0/etc/hadoop

# Configuration for YARN (MR2)

# ------------------------------------------------------------------------

[[yarn_clusters]]

[[[default]]]

# Enter the host on which you are running the ResourceManager

resourcemanager_host=master

# The port where the ResourceManager IPC listens on

resourcemanager_port=8080

# Whether to submit jobs to this cluster

submit_to=True

# Resource Manager logical name (required for HA)

## logical_name=

# Change this if your YARN cluster is Kerberos-secured

## security_enabled=false

# URL of the ResourceManager API

resourcemanager_api_url=http://master:8088

# URL of the ProxyServer API

proxy_api_url=http://master:8088

# URL of the HistoryServer API

history_server_api_url=http://slave2:19888

# URL of the Spark History Server

## spark_history_server_url=http://localhost:18088

# In secure mode (HTTPS), if SSL certificates from YARN Rest APIs

# have to be verified against certificate authority

## ssl_cert_ca_verify=True

# HA support by specifying multiple clusters.

# Redefine different properties there.

# e.g.

# [[[ha]]]

# Resource Manager logical name (required for HA)

## logical_name=my-rm-name

# Un-comment to enable

## submit_to=True

# URL of the ResourceManager API

## resourcemanager_api_url=http://localhost:8088

# ...

# Configuration for MapReduce (MR1)

# ------------------------------------------------------------------------

[[mapred_clusters]]

[[[default]]]

# Enter the host on which you are running the Hadoop JobTracker

## jobtracker_host=localhost

# The port where the JobTracker IPC listens on

## jobtracker_port=8021

# JobTracker logical name for HA

## logical_name=

# Thrift plug-in port for the JobTracker

## thrift_port=9290

# Whether to submit jobs to this cluster

submit_to=False

# Change this if your MapReduce cluster is Kerberos-secured

## security_enabled=false

# HA support by specifying multiple clusters

# e.g.

# [[[ha]]]

# Enter the logical name of the JobTrackers

## logical_name=my-jt-name

###########################################################################

# Settings to configure Beeswax with Hive

###########################################################################

[beeswax]

# Host where HiveServer2 is running.

# If Kerberos security is enabled, use fully-qualified domain name (FQDN).

hive_server_host=master

# Port where HiveServer2 Thrift server runs on.

hive_server_port=10000

# Hive configuration directory, where hive-site.xml is located

hive_conf_dir=/opt/hive-1.1.0-cdh5.8.0/conf

# Timeout in seconds for thrift calls to Hive service

## server_conn_timeout=120

# Choose whether to use the old GetLog() thrift call from before Hive 0.14 to retrieve the logs.

# If false, use the FetchResults() thrift call from Hive 1.0 or more instead.

## use_get_log_api=false

# Limit the number of partitions that can be listed.

## list_partitions_limit=10000

# The maximum number of partitions that will be included in the SELECT * LIMIT sample query for partitioned tables.

## query_partitions_limit=10

# A limit to the number of cells (rows * columns) that can be downloaded from a query

# (e.g. - 10K rows * 1K columns = 10M cells.)

# A value of -1 means there will be no limit.

## download_cell_limit=10000000

# Hue will try to close the Hive query when the user leaves the editor page.

# This will free all the query resources in HiveServer2, but also make its results inaccessible.

## close_queries=false

# Thrift version to use when communicating with HiveServer2.

# New column format is from version 7.

## thrift_version=7

# A comma-separated list of white-listed Hive configuration properties that users are authorized to set.

## config_whitelist=hive.map.aggr,hive.exec.compress.output,hive.exec.parallel,hive.execution.engine,mapreduce.job.queuename

# Override the default desktop username and password of the hue user used for authentications with other services.

# e.g. Used for LDAP/PAM pass-through authentication.

## auth_username=hue

## auth_password=

[[ssl]]

# Path to Certificate Authority certificates.

## cacerts=/etc/hue/cacerts.pem

# Choose whether Hue should validate certificates received from the server.

## validate=true

###########################################################################

# Settings to configure Impala

###########################################################################

[impala]

# Host of the Impala Server (one of the Impalad)

## server_host=localhost

# Port of the Impala Server

## server_port=21050

# Kerberos principal

## impala_principal=impala/hostname.foo.com

# Turn on/off impersonation mechanism when talking to Impala

## impersonation_enabled=False

# Number of initial rows of a result set to ask Impala to cache in order

# to support re-fetching them for downloading them.

# Set to 0 for disabling the option and backward compatibility.

## querycache_rows=50000

# Timeout in seconds for thrift calls

## server_conn_timeout=120

# Hue will try to close the Impala query when the user leaves the editor page.

# This will free all the query resources in Impala, but also make its results inaccessible.

## close_queries=true

# If > 0, the query will be timed out (i.e. cancelled) if Impala does not do any work

# (compute or send back results) for that query within QUERY_TIMEOUT_S seconds.

## query_timeout_s=600

# If > 0, the session will be timed out (i.e. cancelled) if Impala does not do any work

# (compute or send back results) for that session within QUERY_TIMEOUT_S seconds (default 12hours).

## session_timeout_s=43200

# Override the desktop default username and password of the hue user used for authentications with other services.

# e.g. Used for LDAP/PAM pass-through authentication.

## auth_username=hue

## auth_password=

# A comma-separated list of white-listed Impala configuration properties that users are authorized to set.

# config_whitelist=debug_action,explain_level,mem_limit,optimize_partition_key_scans,query_timeout_s,request_pool

[[ssl]]

# SSL communication enabled for this server.

## enabled=false

# Path to Certificate Authority certificates.

## cacerts=/etc/hue/cacerts.pem

# Choose whether Hue should validate certificates received from the server.

## validate=true

###########################################################################

# Settings to configure the Spark application.

###########################################################################

[spark]

# Host address of the Livy Server.

## livy_server_host=localhost

# Port of the Livy Server.

## livy_server_port=8998

# Configure livy to start in local 'process' mode, or 'yarn' workers.

## livy_server_session_kind=process

# If livy should use proxy users when submitting a job.

## livy_impersonation_enabled=true

# Host of the Sql Server

## sql_server_host=localhost

# Port of the Sql Server

## sql_server_port=10000

###########################################################################

# Settings to configure the Oozie app

###########################################################################

[oozie]

# Location on local FS where the examples are stored.

## local_data_dir=..../examples

# Location on local FS where the data for the examples is stored.

## sample_data_dir=...thirdparty/sample_data

# Location on HDFS where the oozie examples and workflows are stored.

## remote_data_dir=/user/hue/oozie/workspaces

# Maximum of Oozie workflows or coodinators to retrieve in one API call.

## oozie_jobs_count=50

# Use Cron format for defining the frequency of a Coordinator instead of the old frequency number/unit.

## enable_cron_scheduling=true

###########################################################################

# Settings to configure the Filebrowser app

###########################################################################

[filebrowser]

# Location on local filesystem where the uploaded archives are temporary stored.

## archive_upload_tempdir=/tmp

# Show Download Button for HDFS file browser.

## show_download_button=false

# Show Upload Button for HDFS file browser.

## show_upload_button=false

###########################################################################

# Settings to configure Pig

###########################################################################

[pig]

# Location of piggybank.jar on local filesystem.

## local_sample_dir=/usr/share/hue/apps/pig/examples

# Location piggybank.jar will be copied to in HDFS.

## remote_data_dir=/user/hue/pig/examples

###########################################################################

# Settings to configure Sqoop2

###########################################################################

[sqoop]

# For autocompletion, fill out the librdbms section.

# Sqoop server URL

server_url=http://master:12000/sqoop

# Path to configuration directory

sqoop_conf_dir=/opt/sqoop-1.4.6-cdh5.8.0/conf

###########################################################################

# Settings to configure Proxy

###########################################################################

[proxy]

# Comma-separated list of regular expressions,

# which match 'host:port' of requested proxy target.

## whitelist=(localhost|127\.0\.0\.1):(50030|50070|50060|50075)

# Comma-separated list of regular expressions,

# which match any prefix of 'host:port/path' of requested proxy target.

# This does not support matching GET parameters.

## blacklist=

###########################################################################

# Settings to configure HBase Browser

###########################################################################

[hbase]

# Comma-separated list of HBase Thrift servers for clusters in the format of '(name|host:port)'.

# Use full hostname with security.

# If using Kerberos we assume GSSAPI SASL, not PLAIN.

hbase_clusters=(Cluster|master:9090)

# HBase configuration directory, where hbase-site.xml is located.

hbase_conf_dir=/opt/hbase-1.2.0-cdh5.8.0/conf

# Hard limit of rows or columns per row fetched before truncating.

## truncate_limit = 500

# 'buffered' is the default of the HBase Thrift Server and supports security.

# 'framed' can be used to chunk up responses,

# which is useful when used in conjunction with the nonblocking server in Thrift.

## thrift_transport=buffered

###########################################################################

# Settings to configure Solr Search

###########################################################################

[search]

# URL of the Solr Server

## solr_url=http://localhost:8983/solr/

# Requires FQDN in solr_url if enabled

## security_enabled=false

## Query sent when no term is entered

## empty_query=*:*

# Use latest Solr 5.2+ features.

## latest=false

###########################################################################

# Settings to configure Solr API lib

###########################################################################

[libsolr]

# Choose whether Hue should validate certificates received from the server.

## ssl_cert_ca_verify=true

###########################################################################

# Settings to configure Solr Indexer

###########################################################################

[indexer]

# Location of the solrctl binary.

## solrctl_path=/usr/bin/solrctl

###########################################################################

# Settings to configure Job Designer

###########################################################################

[jobsub]

# Location on local FS where examples and template are stored.

## local_data_dir=..../data

# Location on local FS where sample data is stored

## sample_data_dir=...thirdparty/sample_data

###########################################################################

# Settings to configure Job Browser.

###########################################################################

[jobbrowser]

# Share submitted jobs information with all users. If set to false,

# submitted jobs are visible only to the owner and administrators.

## share_jobs=true

# Whether to disalbe the job kill button for all users in the jobbrowser

## disable_killing_jobs=false

###########################################################################

# Settings to configure Sentry / Security App.

###########################################################################

[security]

# Use Sentry API V1 for Hive.

## hive_v1=true

# Use Sentry API V2 for Hive.

## hive_v2=false

# Use Sentry API V2 for Solr.

## solr_v2=true

###########################################################################

# Settings to configure the Zookeeper application.

###########################################################################

[zookeeper]

[[clusters]]

[[[default]]]

# Zookeeper ensemble. Comma separated list of Host/Port.

# e.g. localhost:2181,localhost:2182,localhost:2183

## host_ports=localhost:2181

# The URL of the REST contrib service (required for znode browsing).

## rest_url=http://localhost:9998

# Name of Kerberos principal when using security.

## principal_name=zookeeper

###########################################################################

# Settings for the User Admin application

###########################################################################

[useradmin]

# Default home directory permissions

## home_dir_permissions=0755

# The name of the default user group that users will be a member of

## default_user_group=default

[[password_policy]]

# Set password policy to all users. The default policy requires password to be at least 8 characters long,

# and contain both uppercase and lowercase letters, numbers, and special characters.

## is_enabled=false

## pwd_regex="^(?=.*?[A-Z])(?=(.*[a-z]){1,})(?=(.*[\d]){1,})(?=(.*[\W_]){1,}).{8,}$"

## pwd_hint="The password must be at least 8 characters long, and must contain both uppercase and lowercase letters, at least one number, and at least one special character."

## pwd_error_message="The password must be at least 8 characters long, and must contain both uppercase and lowercase letters, at least one number, and at least one special character."

###########################################################################

# Settings to configure liboozie

###########################################################################

[liboozie]

# The URL where the Oozie service runs on. This is required in order for

# users to submit jobs. Empty value disables the config check.

## oozie_url=http://localhost:11000/oozie

# Requires FQDN in oozie_url if enabled

## security_enabled=false

# Location on HDFS where the workflows/coordinator are deployed when submitted.

## remote_deployement_dir=/user/hue/oozie/deployments

###########################################################################

# Settings for the AWS lib

###########################################################################

[aws]

[[aws_accounts]]

# Default AWS account

[[[default]]]

# AWS credentials

## access_key_id=

## secret_access_key=

# Allow to use either environment variables or

# EC2 InstanceProfile to retrieve AWS credentials.

## allow_environment_credentials=yes

# AWS region to use

## region=us-east-1

###########################################################################

# Settings for the Sentry lib

###########################################################################

[libsentry]

# Hostname or IP of server.

## hostname=localhost

# Port the sentry service is running on.

## port=8038

# Sentry configuration directory, where sentry-site.xml is located.

## sentry_conf_dir=/etc/sentry/conf

###########################################################################

# Settings to configure the ZooKeeper Lib

###########################################################################

[libzookeeper]

# ZooKeeper ensemble. Comma separated list of Host/Port.

# e.g. localhost:2181,localhost:2182,localhost:2183

## ensemble=localhost:2181

# Name of Kerberos principal when using security.

## principal_name=zookeeper

###########################################################################

# Settings for the RDBMS application

###########################################################################

[librdbms]

# The RDBMS app can have any number of databases configured in the databases

# section. A database is known by its section name

# (IE sqlite, mysql, psql, and oracle in the list below).

[[databases]]

# sqlite configuration.

## [[[sqlite]]]

# Name to show in the UI.

## nice_name=SQLite

# For SQLite, name defines the path to the database.

## name=/tmp/sqlite.db

# Database backend to use.

## engine=sqlite

# Database options to send to the server when connecting.

# https://docs.djangoproject.com/en/1.4/ref/databases/

## options={}

# mysql, oracle, or postgresql configuration.

## [[[mysql]]]

# Name to show in the UI.

## nice_name="My SQL DB"

# For MySQL and PostgreSQL, name is the name of the database.

# For Oracle, Name is instance of the Oracle server. For express edition

# this is 'xe' by default.

## name=mysqldb

# Database backend to use. This can be:

# 1. mysql

# 2. postgresql

# 3. oracle

## engine=mysql

# IP or hostname of the database to connect to.

## host=localhost

# Port the database server is listening to. Defaults are:

# 1. MySQL: 3306

# 2. PostgreSQL: 5432

# 3. Oracle Express Edition: 1521

## port=3306

# Username to authenticate with when connecting to the database.

## user=example

# Password matching the username to authenticate with when

# connecting to the database.

## password=example

# Database options to send to the server when connecting.

# https://docs.djangoproject.com/en/1.4/ref/databases/

## options={}

###########################################################################

# Settings to configure SAML

###########################################################################

[libsaml]

# Xmlsec1 binary path. This program should be executable by the user running Hue.

## xmlsec_binary=/usr/local/bin/xmlsec1

# Entity ID for Hue acting as service provider.

# Can also accept a pattern where '' will be replaced with server URL base.

## entity_id="/saml2/metadata/"

# Create users from SSO on login.

## create_users_on_login=true

# Required attributes to ask for from IdP.

# This requires a comma separated list.

## required_attributes=uid

# Optional attributes to ask for from IdP.

# This requires a comma separated list.

## optional_attributes=

# IdP metadata in the form of a file. This is generally an XML file containing metadata that the Identity Provider generates.

## metadata_file=

# Private key to encrypt metadata with.

## key_file=

# Signed certificate to send along with encrypted metadata.

## cert_file=

# Path to a file containing the password private key.

## key_file_password=/path/key

# Execute this script to produce the private key password. This will be used when `key_file_password` is not set.

## key_file_password_script=/path/pwd.sh

# A mapping from attributes in the response from the IdP to django user attributes.

## user_attribute_mapping={'uid': ('username', )}

# Have Hue initiated authn requests be signed and provide a certificate.

## authn_requests_signed=false

# Have Hue initiated logout requests be signed and provide a certificate.

## logout_requests_signed=false

# Username can be sourced from 'attributes' or 'nameid'.

## username_source=attributes

# Performs the logout or not.

## logout_enabled=true

###########################################################################

# Settings to configure OpenID

###########################################################################

[libopenid]

# (Required) OpenId SSO endpoint url.

## server_endpoint_url=https://www.google.com/accounts/o8/id

# OpenId 1.1 identity url prefix to be used instead of SSO endpoint url

# This is only supported if you are using an OpenId 1.1 endpoint

## identity_url_prefix=https://app.onelogin.com/openid/your_company.com/

# Create users from OPENID on login.

## create_users_on_login=true

# Use email for username

## use_email_for_username=true

###########################################################################

# Settings to configure OAuth

###########################################################################

[liboauth]

# NOTE:

# To work, each of the active (i.e. uncommented) service must have

# applications created on the social network.

# Then the "consumer key" and "consumer secret" must be provided here.

# The addresses where to do so are:

# Twitter: https://dev.twitter.com/apps

# Google+ : https://cloud.google.com/

# Facebook: https://developers.facebook.com/apps

# Linkedin: https://www.linkedin.com/secure/developer

# Additionnaly, the following must be set in the application settings:

# Twitter: Callback URL (aka Redirect URL) must be set to http://YOUR_HUE_IP_OR_DOMAIN_NAME/oauth/social_login/oauth_authenticated

# Google+ : CONSENT SCREEN must have email address

# Facebook: Sandbox Mode must be DISABLED

# Linkedin: "In OAuth User Agreement", r_emailaddress is REQUIRED

# The Consumer key of the application

## consumer_key_twitter=

## consumer_key_google=

## consumer_key_facebook=

## consumer_key_linkedin=

# The Consumer secret of the application

## consumer_secret_twitter=

## consumer_secret_google=

## consumer_secret_facebook=

## consumer_secret_linkedin=

# The Request token URL

## request_token_url_twitter=https://api.twitter.com/oauth/request_token

## request_token_url_google=https://accounts.google.com/o/oauth2/auth

## request_token_url_linkedin=https://www.linkedin.com/uas/oauth2/authorization

## request_token_url_facebook=https://graph.facebook.com/oauth/authorize

# The Access token URL

## access_token_url_twitter=https://api.twitter.com/oauth/access_token

## access_token_url_google=https://accounts.google.com/o/oauth2/token

## access_token_url_facebook=https://graph.facebook.com/oauth/access_token

## access_token_url_linkedin=https://api.linkedin.com/uas/oauth2/accessToken

# The Authenticate URL

## authenticate_url_twitter=https://api.twitter.com/oauth/authorize

## authenticate_url_google=https://www.googleapis.com/oauth2/v1/userinfo?access_token=

## authenticate_url_facebook=https://graph.facebook.com/me?access_token=

## authenticate_url_linkedin=https://api.linkedin.com/v1/people/~:(email-address)?format=json&oauth2_access_token=

# Username Map. Json Hash format.

# Replaces username parts in order to simplify usernames obtained

# Example: {"@sub1.domain.com":"_S1", "@sub2.domain.com":"_S2"}

# converts '[email protected]' to 'email_S1'

## username_map={}

# Whitelisted domains (only applies to Google OAuth). CSV format.

## whitelisted_domains_google=

###########################################################################

# Settings to configure Metadata

###########################################################################

[metadata]

# For metadata tagging and enhancement features

[[optimizer]]

# For SQL query and table analysis

# Base URL to Optimizer API.

## api_url=https://alpha.optimizer.cloudera.com

# The name of the product or group which will have API access to the emails associated with it.

## product_name=hue

# A secret passphrase associated with the productName

## product_secret=hue

# Execute this script to produce the product secret. This will be used when `product_secret` is not set.

## product_secret_script=

# The email of the Optimizer account you want to associate with the Product.

## [email protected]

# The password associated with the Optimizer account you to associate with the Product.

## email_password=hue

# Execute this script to produce the email password. This will be used when `email_password` is not set.

## password_script=

# In secure mode (HTTPS), if Optimizer SSL certificates have to be verified against certificate authority.

## ssl_cert_ca_verify=True

[[navigator]]

# For tagging tables, files and getting lineage of data.

# Navigator API URL (without version suffix)

## api_url=http://localhost:7187/api

# Navigator API HTTP authentication username and password

# Override the desktop default username and password of the hue user used for authentications with other services.

# e.g. Used for LDAP/PAM pass-through authentication.

## auth_username=hue

## auth_password=

-------------------------------------------------------------------------

4.通过URL访问：http://slave1:8888/

大小: 125.9 KB

查看图片附件

你可能感兴趣的:(hadoop,hue)

flink-cdc实时增量同步mysql数据到elasticsearch 大数据技术派 #Flink elasticsearch flink mysql
什么是CDC？CDC是（ChangeDataCapture变更数据获取）的简称。核心思想是，监测并捕获数据库的变动（包括数据或数据表的插入INSERT、更新UPDATE、删除DELETE等），将这些变更按发生的顺序完整记录下来，写入到消息中间件中以供其他服务进行订阅及消费。1.环境准备mysqlelasticsearchflinkonyarn说明：如果没有安装hadoop，那么可以不用yarn，直
搭建分布式Hive集群逸曦玥泱大数据运维分布式 hive hadoop
title:搭建分布式Hive集群date:2024-11-2923:39:00categories:-服务器tags:-Hive-大数据搭建分布式Hive集群本次实验环境：Centos7-2009、Hadoop-3.1.4、JDK8、Zookeeper-3.6.3、Mysql-5.7.38、Hive-3.1.2功能规划方案一（本地运行模式）Master主节点（Mysql+Hive）192.168
Hadoop、Spark和 Hive 的详细关系夜行容忍 hadoop spark hive
Hadoop、Spark和Hive的详细关系1.ApacheHadoopHadoop是一个开源框架，用于分布式存储和处理大规模数据集。核心组件：HDFS(HadoopDistributedFileSystem)：分布式文件系统，提供高吞吐量的数据访问。YARN(YetAnotherResourceNegotiator)：集群资源管理和作业调度系统。MapReduce：基于YARN的并行处理框架，用
Windows系统下解压".tar"文件出错，提示：无法创建符号链接，可能需要以管理器身份运行winrar ruangaoyan
1、解压文件出错，如下信息：D:\tools\hadoop-3.1.2.tar.gz:无法创建符号链接D:\tools\hadoop-3.1.2\hadoop-3.1.2\lib\native\libhadoop.so您可能需要以管理器身份运行WinRAR!客户端没有所需的特权。2、解决方式如下：WIN+R快捷的打开命令窗口，输入CMD输入：cd/dD:\tools\hadoop-3.1.2这是我
大数据技术生态圈：Hadoop、Hive、Spark的区别和关系雨中徜徉的思绪漫溢大数据 hadoop hive
大数据技术生态圈：Hadoop、Hive、Spark的区别和关系在大数据领域中，Hadoop、Hive和Spark是三个常用的开源技术，它们在大数据处理和分析方面发挥着重要作用。虽然它们都是为了处理大规模数据集而设计的，但它们在功能和使用方式上存在一些区别。本文将详细介绍Hadoop、Hive和Spark的区别和关系，并提供相应的源代码示例。Hadoop：Hadoop是一个用于分布式存储和处理大规
ZooKeeper学习总结（1）——ZooKeeper入门介绍一杯甜酒 ZooKeeper学习总结 Zookeeper
1.概述Zookeeper是Hadoop的一个子项目，它是分布式系统中的协调系统，可提供的服务主要有：配置服务、名字服务、分布式同步、组服务等。它有如下的一些特点：简单Zookeeper的核心是一个精简的文件系统，它支持一些简单的操作和一些抽象操作，例如，排序和通知。丰富Zookeeper的原语操作是很丰富的，可实现一些协调数据结构和协议。例如，分布式队列、分布式锁和一组同级别节点中的“领导者选举
Zookeeper+kafka学习笔记 CHR_YTU Zookeeper
Zookeeper是Apache的一个java项目，属于Hadoop系统，扮演管理员的角色。配置管理分布式系统都有好多机器，比如我在搭建hadoop的HDFS的时候，需要在一个主机器上（Master节点）配置好HDFS需要的各种配置文件，然后通过scp命令把这些配置文件拷贝到其他节点上，这样各个机器拿到的配置信息是一致的，才能成功运行起来HDFS服务。Zookeeper提供了这样的一种服务：一种集
麒麟arm架构系统_安装nginx-1.27.0_访问500 internal server error nginx解决_13: Permission denied---Linux工作笔记072 添柴程序猿 java nginx-1.27.0 nginx最新版安装麒麟v10 arm架构麒麟v10 安装nginx
[[email protected]]#wget-chttp://nginx.org/download/nginx-1.27.0.tar.gz--2024-07-0509:47:00--http://nginx.org/download/nginx-1.27.0.tar.gzResolvingnginx.org(nginx.org)...3.125.197.172,52.58.19
Zookeeper与Kafka学习笔记上海研博数据 zookeeper kafka 学习
一、Zookeeper核心要点1.核心特性分布式协调服务，用于维护配置/命名/同步等元数据采用层次化数据模型（Znode树结构），每个节点可存储<1MB数据典型应用场景：HadoopNameNode高可用HBase元数据管理Kafka集群选举与状态管理2.设计限制内存型存储，不适合大数据量场景数据变更通过版本号（Version）控制，实现乐观锁机制采用ZAB协议保证数据一致性二、Kafka核心架构
phoenix无法连接hbase shell创建表失败_报错_PleaseHoldException: Master is initializing---记录020_大数据工作笔记0180 添柴程序猿 hbase连接报错 phoenix连接hbase phoenix PleaseHoldExcep
今天发现,我的phoenix,去连接hbase集群,怎么也连不上了,奇怪了...弄了一晚上org.apache.hadoop.hbase.PleaseHoldException:Masterisinitializing[root@hadoop120bin]#ll总用量184-rwxr-xr-x.1rootroot36371月222020chaos-daemon.sh-rwxr-xr-x.1root
Hadoop的运行模式对许 #Hadoop hadoop 大数据分布式
Hadoop的运行模式1、本地运行模式2、伪分布式运行模式3、完全分布式运行模式4、区别与总结Hadoop有三种可以运行的模式：本地运行模式、伪分布式运行模式和完全分布式运行模式1、本地运行模式本地运行模式无需任何守护进程，单机运行，所有的程序都运行在同一个JVM上执行Hadoop安装后默认为本地模式，数据存储在Linux本地。在本地模式下调试MapReduce程序非常高效方便，一般该模式主要是在
Hadoop的mapreduce的执行过程画纸仁大数据 hadoop mapreduce 大数据
一、map阶段的执行过程第一阶段：把输入目录下文件按照一定的标准逐个进行逻辑切片，形成切片规划。默认Splitsize=Blocksize（128M），每一个切片由一个MapTask处理。（getSplits）第二阶段：对切片中的数据按照一定的规则读取解析返回对。默认是按行读取数据。key是每一行的起始位置偏移量，value是本行的文本内容。（TextInputFormat）第三阶段：调用Mapp
Hadoop：分布式计算平台初探 dccrtbn6261333 大数据运维 java
Hadoop是一个开发和运行处理大规模数据的软件平台，是Apache的一个用java语言实现开源软件框架，实现在大量计算机组成的集群中对海量数据进行分布式计算。Hadoop框架中最核心设计就是：MapReduce和HDFS。MapReduce提供了对数据的计算，HDFS提供了海量数据的存储。MapReduceMapReduce的思想是由Google的一篇论文所提及而被广为流传的，简单的一句话解释M
【Hadoop】如何理解MapReduce？ 2302_79952574 hadoop mapreduce 数据库
MapReduce是一种用于处理大规模数据集的编程模型和计算框架。它的核心思想是将复杂的计算任务分解为两个简单的阶段：Map（映射）和Reduce（归约）。通过这种方式，MapReduce可以高效地并行处理海量数据。一.MapReduce的核心概念1.Map（映射）：将输入数据分割成小块，并对每个小块进行初步处理。输出键值对（key-valuepairs），例如。2.Shuffle和Sort（洗牌
Hadoop：全面深入解析 CloudJourney hadoop 大数据分布式
Hadoop是一个用于大规模数据处理的开源框架，其设计旨在通过集群的方式进行分布式存储和计算。本篇博文将从Hadoop的定义、架构、原理、应用场景以及常见命令等多个方面进行详细探讨，帮助读者全面深入地了解Hadoop。1.Hadoop的定义1.1什么是HadoopHadoop是由Apache软件基金会开发的开源软件框架，用于存储和处理大规模数据。其核心组件包括Hadoop分布式文件系统（HDFS）
Hadoop介绍：什么是Hadoop？了解Hadoop的应用 Zzzxt007 hadoop 大数据分布式
一、认识Hadoop框架Hadoop是一个提供分布式存储和计算的开源软件框架，使用Java语言编写，具有高扩展性、高容错性、无共享和高可用（HA）等特点，非常适合处理海量数据。它基于Google发布的MapReduce论文实现，并且应用了函数式编程的思想。Hadoop框架主要包括HDFS（HadoopDistributedFileSystem，Hadoop分布式文件系统）、MapReduce、YA
Hbase在hdfs上的archive目录占用空间过大宝罗Paul 大数据 hbase
hbase版本：1.1.2hadoop版本：2.7.3Hbase在hdfs上的目录/apps/hbase/data/archive占用空间过大，导致不停地发出hdfs空间使用率告警。【问题】告警信息alert:datanode_storageistriggered告警信息表明某个或某些datanode的HDFS存储空间使用率已超过阈值(我们设置的是80%)，需要清理。[hdfs@master-2r
Hadoop、Spark、Flink Shuffle对比逆袭的小学生 hadoop spark flink
一、Hadoop的shuffle前置知识：Map任务的数量由Hadoop框架自动计算，等于分片数量，等于输入文件总大小/分片大小，分片大小为HDFS默认值128M，可调Reduce任务数由用户在作业提交时通过Job.setNumReduceTasks(int)设置数据分配到Reduce任务的时间点，在Map任务执行期间，通过Partitioner（分区器）确定每个键值对的目标Reduce分区。默认
【Hadoop】什么是Zookeeper？如何理解Zookeeper？ 2302_79952574 hadoop zookeeper 大数据
ZooKeeper是一个开源的分布式应用程序协调服务，可以为分布式应用提供一致性的服务，功能包括：配置维护、名字服务、分布式同步、组服务等等。ZooKeeper的目标是封装好复杂易出错的关键服务，将简单易用的接口和性能高效、功能稳定的系统提供给用户。1.Zookeeper的特点最终一致性：Client不论连接到哪个Server,展示给它的都是同一个视图。可靠性：如果某个消息被一台服务器接受，那么它
【Hadoop】详解HDFS 2302_79952574 hadoop hdfs 大数据
Hadoop分布式文件系统(HDFS)被设计成适合运行在通用硬件上的分布式文件系统，它是一个高度容错性的系统，适合部署在廉价的机器上，能够提供高吞吐量的数据访问，非常适合大规模数据集上的应用。为了做到可靠性，HDFS创建了多份数据块的副本，并将它们放置在服务器群的计算节点中，MapReduce可以在它们所在的节点上处理这些数据。1.HDFS的设计目标存储大规模数据：HDFS可以存储并管理PB级甚至
HDFS的设计架构 F_0125 Hadoop hdfs hbase hadoop
HDFS是Hadoop生态系统中的分布式文件系统，设计用于存储和处理超大规模数据集。它具有高可靠性、高扩展性和高吞吐量的特点，适合运行在廉价硬件上。1.HDFS的设计思想HDFS的设计目标是解决大规模数据存储和处理的问题，其核心设计思想包括：（1）分布式存储-数据被分割成多个块（Block），并分布存储在集群中的多个节点上。-每个数据块默认大小为128MB或256MB，可以根据需求配置。（2）高容
大数据Flink（六十四）：Flink运行时架构介绍_flink中涉及到的大数据组件 2401_84181942 程序员大数据 flink 架构
于是人们提出了“不共享任何东西”（share-nothing）的分布式架构。从以Greenplum为代表的MPP（MassivelyParallelProcessing，大规模并行处理）架构，到Hadoop、Spark为代表的批处理架构，再到Storm、Flink为代表的流处理架构，都是以分布式作为系统架构的基本形态的。我们已经知道，Flink就是一个分布式的并行流处理系统。简单来说，它会由多个进
大数据运维实战指南：零基础入门与核心技术解析（第一篇） emmm形成中大数据运维
大数据运维实战指南：零基础入门与核心技术解析（第一篇）系列文章目录第一篇：大数据运维概述与核心技能体系第二篇：Hadoop生态体系与集群部署实战第三篇：分布式存储系统运维与优化第四篇：资源调度框架YARN/K8s深度解析第五篇：实时计算框架Flink/Spark运维指南第六篇：大数据监控体系与自动化运维第七篇：云原生时代的大数据运维实践第八篇：数据安全与合规性管理第九篇：性能调优与故障排查案例集第
hadoop框架与核心组件刨析（四）MapReduce 小刘爱喇石( ˝ᗢ̈˝ ) hadoop mapreduce 大数据
MapReduce是一种用于大规模数据处理的编程模型和计算框架，最初由Google提出，后来由ApacheHadoop实现并广泛应用。它的核心思想是将数据处理任务分解为两个阶段：Map和Reduce，并通过分布式计算并行处理海量数据。MapReduce的核心思想分而治之：将大规模数据集分割成多个小块，分布到集群中的多个节点上并行处理。Map阶段：将输入数据转换为键值对（Key-ValuePair）
hadoop 运行java程序_原生态在Hadoop上运行Java程序淇水煮汤 hadoop 运行java程序
第一种：原生态运行jar包1，利用eclipse编写Map-Reduce方法，一般引入Hadoop-core-1.1.2.jar。注意这里eclipse里没有安装hadoop的插件，只是引入其匝包，该eclipse可以安装在windows或者linux中，如果是在windows中安装的，且在其虚拟机安装的linux，可以通过共享文件夹来实现传递。2，编写要测试的数据，如命名为tempdata3，利
hadoop运行java程序命令_使用命令行编译打包运行自己的MapReduce程序 Hadoop2.6.0 emi0wb
网上的MapReduceWordCount教程对于如何编译WordCount.java几乎是一笔带过…而有写到的，大多又是0.20等旧版本版本的做法，即javac-classpath/usr/local/hadoop/hadoop-1.0.1/hadoop-core-1.0.1.jarWordCount.java，但较新的2.X版本中，已经没有hadoop-core*.jar这个文件，因此编辑和打
大数据Hadoop集群运行程序赵广陆 hadoop hadoop big data mapreduce
目录1运行自带的MapReduce程序2常见错误1运行自带的MapReduce程序下面我们在Hadoop集群上运行一个MapReduce程序，以帮助读者对分布式计算有个基本印象。在安装Hadoop时，系统给用户提供了一些MapReduce示例程序，其中有一个典型的用于计算圆周率的Java程序包，现在运行该程序。该jar包文件的位置和文件名是“~/hadoop-3.1.0/share/Hadoop/
hadoop框架与核心组件刨析（三）YARN 小刘爱喇石( ˝ᗢ̈˝ ) hadoop 大数据分布式
一、负载均衡的概念负载均衡（LoadBalancing）是一种将工作负载（如网络流量、计算任务或数据请求）分配到多个资源（如服务器、计算节点或存储设备）的技术，目的是优化资源使用、最大化吞吐量、最小化响应时间，并避免单个资源过载。负载均衡广泛应用于计算机网络、分布式系统、云计算等领域。负载均衡的核心目标提高性能：通过将负载分配到多个资源，避免单个资源成为瓶颈，从而提高系统的整体性能。提高可用性：如
Doris 数据集成 Kafka 不二人生 Doris 实战 doris 数据仓库
Doris数据集成Kafka这是我们Doris数据集成篇的第二篇，前面我们介绍过通过Catalog进行集成的例子Doris基础篇—数据集成Catalog目前公司的很多数据服务都开始使用Doris了，目前使用下来感觉还是很方便的，比起Hadoop那一套少了很多运维的成本，而且整体的效率也不错，现在也要把ELK那一套日志分析的替换掉，后面日志分析也走Doris。关于如何使用Doris做日志分析，可以参
大数据面试系列之——Hadoop 潜心_守道大数据面经面试大数据 Hadoop
Hadoop的三个核心：HDFS（分布式存储系统）MapReduce（分布式计算系统）YARN(分布式资源调度)1.Hadoop集群的几种搭建模式1.单机模式：直接解压安装，不存在分布式存储系统2.伪分布式：NameNode和DataNode安装于同一个节点，无法体现分布式处理的优势。3.完全分布式：一个主节点，多个从节点，存在如果主节点宕机，集群就无法使用的缺点。4.高可用模式：多个主节点，多个
web前段跨域nginx代理配置刘正强 nginx cms Web
nginx代理配置可参考server部分 server { listen 80; server_name localhost;
spring学习笔记 caoyong spring
一、概述 a>、核心技术 : IOC与AOP b>、开发为什么需要面向接口而不是实现接口降低一个组件与整个系统的藕合程度，当该组件不满足系统需求时，可以很容易的将该组件从系统中替换掉，而不会对整个系统产生大的影响 c>、面向接口编口编程的难点在于如何对接口进行初始化,(使用工厂设计模式)
Eclipse打开workspace提示工作空间不可用 0624chenhong eclipse
做项目的时候，难免会用到整个团队的代码，或者上一任同事创建的workspace， 1.电脑切换账号后，Eclipse打开时，会提示Eclipse对应的目录锁定，无法访问，根据提示，找到对应目录，G:\eclipse\configuration\org.eclipse.osgi\.manager，其中文件.fileTableLock提示被锁定。解决办法，删掉.fileTableLock文件，重
Javascript 面向对面写法的必要性？一炮送你回车库 JavaScript
现在Javascript面向对象的方式来写页面很流行，什么纯javascript的mvc框架都出来了：ember 这是javascript层的mvc框架哦,不是j2ee的mvc框架我想说的是，javascript本来就不是一门面向对象的语言，用它写出来的面向对象的程序，本身就有些别扭，很多人提到js的面向对象首先提的是：复用性。那么我请问你写的js里有多少是可以复用的，用fu
js array对象的迭代方法换个号韩国红果果 array
1.forEach 该方法接受一个函数作为参数，对数组中的每个元素使用该函数 return 语句失效 function square(num) { print(num, num * num); } var nums = [1,2,3,4,5,6,7,8,9,10]; nums.forEach(square); 2.every 该方法接受一个返回值为布尔类型
对Hibernate缓存机制的理解归来朝歌 session 一级缓存对象持久化
在hibernate中session一级缓存机制中，有这么一种情况：问题描述：我需要new一个对象，对它的几个字段赋值，但是有一些属性并没有进行赋值，然后调用 session.save()方法，在提交事务后，会出现这样的情况： 1：在数据库中有默认属性的字段的值为空 2：既然是持久化对象，为什么在最后对象拿不到默认属性的值？通过调试后解决方案如下：对于问题一，如你在数据库里设置了
WebService调用错误合集 darkranger webservice
Java.Lang.NoClassDefFoundError: Org/Apache/Commons/Discovery/Tools/DiscoverSingleton 调用接口出错，一个简单的WebService import org.apache.axis.client.Call;import org.apache.axis.client.Service; 首先必不可
JSP和Servlet的中文乱码处理 aijuans Java Web
JSP和Servlet的中文乱码处理前几天学习了JSP和Servlet中有关中文乱码的一些问题，写成了博客，今天进行更新一下。应该是可以解决日常的乱码问题了。现在作以下总结希望对需要的人有所帮助。我也是刚学，所以有不足之处希望谅解。一、表单提交时出现乱码：在进行表单提交的时候，经常提交一些中文，自然就避免不了出现中文乱码的情况，对于表单来说有两种提交方式：get和post提交方式。所以
面试经典六问 atongyeye 工作面试
题记：因为我不善沟通，所以在面试中经常碰壁，看了网上太多面试宝典，基本上不太靠谱。只好自己总结，并试着根据最近工作情况完成个人答案。以备不时之需。以下是人事了解应聘者情况的最典型的六个问题： 1 简单自我介绍关于这个问题，主要为了弄清两件事，一是了解应聘者的背景，二是应聘者将这些背景信息组织成合适语言的能力。我的回答：(针对技术面试回答，如果是人事面试，可以就掌
contentResolver.query()参数详解百合不是茶 android query()详解
收藏csdn的博客,介绍的比较详细,新手值得一看 1.获取联系人姓名一个简单的例子，这个函数获取设备上所有的联系人ID和联系人NAME。 [java] view plain copy public void fetchAllContacts() {
ora-00054:resource busy and acquire with nowait specified解决方法 bijian1013 oracle 数据库 kill nowait
当某个数据库用户在数据库中插入、更新、删除一个表的数据，或者增加一个表的主键时或者表的索引时，常常会出现ora-00054:resource busy and acquire with nowait specified这样的错误。主要是因为有事务正在执行（或者事务已经被锁），所有导致执行不成功。 1.下面的语句
web 开发乱码征客丶 spring Web
以下前端都是 utf-8 字符集编码一、后台接收 1.1、 get 请求乱码 get 请求中，请求参数在请求头中；乱码解决方法： a、通过在web 服务器中配置编码格式：tomcat 中，在 Connector 中添加URIEncoding="UTF-8"； 1.2、post 请求乱码 post 请求中，请求参数分两部份， 1.2.1、url？参数，
【Spark十六】： Spark SQL第二部分数据源和注册表的几种方式 bit1129 spark
Spark SQL数据源和表的Schema case class apply schema parquet json JSON数据源准备源数据 {"name":"Jack", "age": 12, "addr":{"city":"beijing&
JVM学习之:调优总结 -Xms -Xmx -Xmn -Xss BlueSkator -Xss -Xmn -Xms -Xmx
堆大小设置JVM 中最大堆大小有三方面限制：相关操作系统的数据模型（32-bt还是64-bit）限制；系统的可用虚拟内存限制；系统的可用物理内存限制。32位系统下，一般限制在1.5G~2G；64为操作系统对内存无限制。我在Windows Server 2003 系统，3.5G物理内存，JDK5.0下测试，最大可设置为1478m。典型设置： java -Xmx355
jqGrid 各种参数详解(转帖) BreakingBad jqGrid
jqGrid 各种参数详解分类：源代码分享个人随笔请勿参考解决开发问题 2012-05-09 20:29 84282人阅读评论(22) 收藏举报 jquery 服务器 parameters function ajax string
读《研磨设计模式》-代码笔记-代理模式-Proxy bylijinnan java 设计模式
声明：本文只为方便我个人查阅和理解，详细的分析以及源代码请移步原作者的博客http://chjavach.iteye.com/ import java.lang.reflect.InvocationHandler; import java.lang.reflect.Method; import java.lang.reflect.Proxy; /* * 下面
应用升级iOS8中遇到的一些问题 chenhbc ios8 升级iOS8
1、很奇怪的问题，登录界面，有一个判断，如果不存在某个值，则跳转到设置界面，ios8之前的系统都可以正常跳转，iOS8中代码已经执行到下一个界面了，但界面并没有跳转过去，而且这个值如果设置过的话，也是可以正常跳转过去的，这个问题纠结了两天多，之前的判断我是在 -(void)viewWillAppear:(BOOL)animated 中写的，最终的解决办法是把判断写在 -(void
工作流与自组织的关系？ comsci 设计模式工作
目前的工作流系统中的节点及其相互之间的连接是事先根据管理的实际需要而绘制好的，这种固定的模式在实际的运用中会受到很多限制，特别是节点之间的依存关系是固定的，节点的处理不考虑到流程整体的运行情况，细节和整体间的关系是脱节的，那么我们提出一个新的观点，一个流程是否可以通过节点的自组织运动来自动生成呢？这种流程有什么实际意义呢？这里有篇论文，摘要是：“针对网格中的服务
Oracle11.2新特性之INSERT提示IGNORE_ROW_ON_DUPKEY_INDEX daizj oracle
insert提示IGNORE_ROW_ON_DUPKEY_INDEX 转自：http://space.itpub.net/18922393/viewspace-752123 在 insert into tablea ...select * from tableb中，如果存在唯一约束，会导致整个insert操作失败。使用IGNORE_ROW_ON_DUPKEY_INDEX提示，会忽略唯一
二叉树:堆 dieslrae 二叉树
这里说的堆其实是一个完全二叉树,每个节点都不小于自己的子节点,不要跟jvm的堆搞混了.由于是完全二叉树,可以用数组来构建.用数组构建树的规则很简单: 一个节点的父节点下标为: (当前下标 - 1)/2 一个节点的左节点下标为: 当前下标 * 2 + 1 &
C语言学习八结构体 dcj3sjt126com c
为什么需要结构体，看代码 # include <stdio.h> struct Student //定义一个学生类型，里面有age, score, sex, 然后可以定义这个类型的变量 { int age; float score; char sex; } int main(void) { struct Student st = {80, 66.6,
centos安装golang dcj3sjt126com centos
#在国内镜像下载二进制包 wget -c http://www.golangtc.com/static/go/go1.4.1.linux-amd64.tar.gz tar -C /usr/local -xzf go1.4.1.linux-amd64.tar.gz #把golang的bin目录加入全局环境变量 cat >>/etc/profile<
10.性能优化-监控-MySQL慢查询 frank1234 性能优化 MySQL慢查询
1.记录慢查询配置 show variables where variable_name like 'slow%' ; --查看默认日志路径查询结果：--不用的机器可能不同 slow_query_log_file=/var/lib/mysql/centos-slow.log 修改mysqld配置文件：/usr /my.cnf[一般在/etc/my.cnf，本机在/user/my.cn
Java父类取得子类类名 happyqing java this 父类子类类名
在继承关系中，不管父类还是子类，这些类里面的this都代表了最终new出来的那个类的实例对象，所以在父类中你可以用this获取到子类的信息！ package com.urthinker.module.test; import org.junit.Test; abstract class BaseDao<T> { public void
Spring3.2新注解@ControllerAdvice jinnianshilongnian @Controller
@ControllerAdvice，是spring3.2提供的新注解，从名字上可以看出大体意思是控制器增强。让我们先看看@ControllerAdvice的实现： @Target(ElementType.TYPE) @Retention(RetentionPolicy.RUNTIME) @Documented @Component public @interface Co
Java spring mvc多数据源配置 liuxihope spring
转自：http://www.itpub.net/thread-1906608-1-1.html 1、首先配置两个数据库 <bean id="dataSourceA" class="org.apache.commons.dbcp.BasicDataSource" destroy-method="close&quo
第12章 Ajax（下） onestopweb Ajax
index.html <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/
BW / Universe Mappings blueoxygen BO
BW Element OLAP Universe Element Cube Dimension Class Charateristic A class with dimension and detail objects (Detail objects for key and desription) Hi
Java开发熟手该当心的11个错误 tomcat_oracle java 多线程工作单元测试
#1、不在属性文件或XML文件中外化配置属性。比如，没有把批处理使用的线程数设置成可在属性文件中配置。你的批处理程序无论在DEV环境中，还是UAT（用户验收测试）环境中，都可以顺畅无阻地运行，但是一旦部署在PROD 上，把它作为多线程程序处理更大的数据集时，就会抛出IOException，原因可能是JDBC驱动版本不同，也可能是#2中讨论的问题。如果线程数目可以在属性文件中配置，那么使它成为
推行国产操作系统的优劣 yananay windows linux 国产操作系统
最近刮起了一股风，就是去“国外货”。从应用程序开始，到基础的系统，数据库，现在已经刮到操作系统了。原因就是“棱镜计划”，使我们终于认识到了国外货的危害，开始重视起了信息安全。操作系统是计算机的灵魂。既然是灵魂，为了信息安全，那我们就自然要使用和推行国货。可是，一味地推行，是否就一定正确呢？先说说信息安全。其实从很早以来大家就在讨论信息安全。很多年以前，就据传某世界级的网络设备制造商生产的交