大数据常用命令

[grep | history ...]

【markdown代码折叠】


点击查看完整代码


【Linux搜索指定文件类型的指定内容】
grep -rin "9820" --include "*.xml" /export/server/


【启动hive的metastore、hiveserver2服务】
nohup hive --service metastore  2>&1 > /tmp/hive-metastore.log &     nohup hive --service metastore   &
nohup hive --service hiveserver2 2>&1 > /tmp/hive-hiveserver2.log &   nohup hive --service hiveserver2 &

【启动spark-thriftserver】
/export/server/spark/sbin/start-thriftserver.sh \
  --hiveconf hive.server2.thrift.port=10001 \
  --hiveconf hive.server2.thrift.bind.host=node1 \
  --master local[*]

/export/server/spark/sbin/start-thriftserver.sh \
--hiveconf hive.server2.thrift.port=10001 \
--hiveconf hive.server2.thrift.bind.host=p1 \
--master local[*]


/export/server/spark/sbin/start-thriftserver.sh \
--hiveconf hive.server2.thrift.port=10001 \
--hiveconf hive.server2.thrift.bind.host=p1 \
--master yarn \
--deploy-mode client \
--driver-memory 1g \
--executor-memory 2g \
--executor-cores 4 \
--num-executors 25 \

!connect jdbc:hive2://192.168.88.166:10001


【hive元数据初始化和更新】
schematool -dbType mysql -initSchema
schematool -dbType mysql -upgradeSchema
GRANT ALL PRIVILEGES ON hive.* TO 'root'@'%' IDENTIFIED BY '123456';
flush privileges;

【hive的jdbc连接地址】
jdbc:hive2://node1:10000


【开启Hive的本地模式】
set hive.exec.mode.local.auto=true;(默认为false)


【hadoop退出安全模式】
hdfs dfsadmin -safemode leave 
hdfs dfsadmin -safemode forceExit

【重新启动zk和kafka】
zkServer.sh stop 
kafka-server-stop.sh 
zkServer.sh start 
nohup /export/server/kafka/bin/kafka-server-start.sh /export/server/kafka/config/server.properties &

【彻底删除kafka主题】
kafka-topics.sh --zookeeper node3:2181 --list 
kafka-topics.sh --zookeeper node3:2181 --delete --topic spark_kafka 

【zkCli.sh】
ls /config/topics
rmr /config/topics/spark_kafka
rmr /brokers/topics/spark_kafka
rmr /admin/delete_topics/spark_kafka

【创建主题】
kafka-topics.sh --zookeeper node3:2181 --create --topic spark_kafka --partitions 3 --replication-factor 1
kafka-topics.sh --zookeeper node3:2181 --list

【启动生产者和消费者】
kafka-console-producer.sh --broker-list node3:9092 --topic spark_kafka
kafka-console-consumer.sh --from-beginning --bootstrap-server node3:9092 --topic spark_kafka
kafka-console-consumer.sh --from-beginning --bootstrap-server node3:9092 --topic __consumer_offsets


【完整的删除再重建主题步骤start
关闭生产者和消费者

kafka-topics.sh --zookeeper node3:2181 --list 
kafka-topics.sh --zookeeper node3:2181 --delete --topic stationTopic 

kafka-topics.sh --zookeeper node3:2181 --list

zkCli.sh
ls /config/topics
rmr /config/topics/stationTopic
rmr /brokers/topics/stationTopic
rmr /admin/delete_topics/stationTopic

删除log文件目录

kafka-topics.sh --zookeeper node3:2181 --create --topic stationTopic --partitions 3 --replication-factor 1
kafka-topics.sh --zookeeper node3:2181 --list
再次重启kafka
kafka-server-stop.sh 
nohup /export/server/kafka/bin/kafka-server-start.sh /export/server/kafka/config/server.properties &

启动生产者
kafka-console-producer.sh --broker-list node3:9092 --topic stationTopic
启动消费者
kafka-console-consumer.sh --bootstrap-server node3:9092 --topic stationTopic --from-beginning
完整的删除再重建主题步骤end】 
【安装redis】
tar
yum install gcc
cd /redis3.2.8
make
make PREFIX=/usr/local/src/redis install
cp /redis3.2.8/redis.conf /redis/bin/redis.conf 
启动redis
redis-server redis.conf
redis-cli -h 192.168.88.163
 

【kafka】
启动Zookeeper 服务
zookeeper-daemon.sh start

启动Kafka 服务
kafka-daemon.sh start


【免秘钥登录】
ssh-keygen -t rsa
ssh-copy-id node1
scp /root/.ssh/authorized_keys node2:/root/.ssh
scp /root/.ssh/authorized_keys node3:/root/.ssh

【sqoop】
sqoop import \
-D mapred.job.name=sqoop_import_dd_table  \
--connect "jdbc:mysql://192.168.88.163:3306/insurance"  \
--username root  \
--password "123456"  \
--table dd_table  \
--hive-import  \
--hive-database insurance  \
--hive-table dd_table  \
--hive-overwrite  \
-m 1  \
--fields-terminated-by ',' \
--delete-target-dir

【mysql】
mysqldump insurance --add-drop-trigger --result-file=/opt/insurance/insurance.sql --user=root --host=192.168.88.163 --port=3306
mysql -uusername -ppassword -h ip_address -P 3306 --default-character-set = utf8mb4 $ {i} mysql -uroot -p  --default-character-set=utf8mb4 insurance


【dolphinscheduler】
CREATE DATABASE dolphinscheduler DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;
set global validate_password.policy=0;
set global validate_password.length=1;

GRANT ALL PRIVILEGES ON dolphinscheduler.* TO '{user}'@'%' IDENTIFIED BY '{password}';
flush privileges;

安装dolphinscheduler 要启动zookeeper
一定要配在ds的env中配置sqoop的环境变量
需要root给租户授权最大。
谁将脚本上传到Linux,谁就记得授执行权限。

安装dos2unix命令
 dos2unix /opt/insurance/sqoop/sqoop_import_mort_10_13.sh 

修改工作流,要先下线


【structured Streaming】 
--memory sink
CREATE TABLE db_spark.tb_word_count (
  id int NOT NULL AUTO_INCREMENT,
  word varchar(255) NOT NULL,
  count int NOT NULL,
  PRIMARY KEY (id),
  UNIQUE KEY word (word)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;

REPLACE INTO  tb_word_count (id, word, count) VALUES (NULL, ?, ?);


【spark yarn Pi】
/export/server/spark/bin/spark-submit \
--master yarn \
--class org.apache.spark.examples.SparkPi \
${SPARK_HOME}/examples/jars/spark-examples_2.11-2.4.5.jar \
10

【WordCount yarn】
/export/server/spark/bin/spark-submit \
--master yarn \
--driver-memory 512m \
--executor-memory 512m \
--executor-cores 1 \
--num-executors 2 \
--queue default \
--class cn.itcast.spark._2SparkWordCount \
/opt/spark-chapter01-1.0-SNAPSHOT.jar 

【 Run application locally on 8 cores】
/export/server/spark/bin/spark-submit \
  --class org.apache.spark.examples.SparkPi \
  --master local[8] \
${SPARK_HOME}/examples/jars/spark-examples_2.11-2.4.5.jar \
  100

# Run on a Spark standalone cluster in client deploy mode
./bin/spark-submit \
  --class org.apache.spark.examples.SparkPi \
  --master spark://207.184.161.138:7077 \
  --executor-memory 20G \
  --total-executor-cores 100 \
${SPARK_HOME}/examples/jars/spark-examples_2.11-2.4.5.jar \
  1000

# Run on a Spark standalone cluster in cluster deploy mode with supervise
./bin/spark-submit \
  --class org.apache.spark.examples.SparkPi \
  --master spark://207.184.161.138:7077 \
  --deploy-mode cluster \
  --supervise \
  --executor-memory 20G \
  --total-executor-cores 100 \
  /path/to/examples.jar \
  1000

# Run on a YARN cluster
export HADOOP_CONF_DIR=XXX
./bin/spark-submit \
  --class org.apache.spark.examples.SparkPi \
  --master yarn \
  --deploy-mode cluster \  # can be client for client mode
  --executor-memory 20G \
  --num-executors 50 \
  /path/to/examples.jar \
  1000

# Run a Python application on a Spark standalone cluster
./bin/spark-submit \
  --master spark://207.184.161.138:7077 \
  examples/src/main/python/pi.py \
  1000

# Run on a Mesos cluster in cluster deploy mode with supervise
./bin/spark-submit \
  --class org.apache.spark.examples.SparkPi \
  --master mesos://207.184.161.138:7077 \
  --deploy-mode cluster \
  --supervise \
  --executor-memory 20G \
  --total-executor-cores 100 \
  http://path/to/examples.jar \
  1000

# Run on a Kubernetes cluster in cluster deploy mode
./bin/spark-submit \
  --class org.apache.spark.examples.SparkPi \
  --master k8s://xx.yy.zz.ww:443 \
  --deploy-mode cluster \
  --executor-memory 20G \
  --num-executors 50 \
  http://path/to/examples.jar \
  1000
 

你可能感兴趣的:(big,data,hive,linux,大数据,hadoop)