1:解压
[jifeng@jifeng01 ~]$ tar zxf sqoop-1.99.3-bin-hadoop100.tar.gz [jifeng@jifeng01 ~]$ ls apache-ant-1.9.4 hbase-0.94.21.tar.gz sqoop-1.99.3-bin-hadoop100 apache-ant-1.9.4-bin.tar.gz input sqoop-1.99.3-bin-hadoop100.tar.gz Desktop jdk1.7.0_45 VirtualBox-4.3-4.3.8_92456_el5-1.x86_64.rpm hadoop jdk-7u45-linux-x64.gz zookeeper-3.4.6 hadoop-core-1.0.4.jar mysql-connector-java-5.1.10-bin.jar zookeeper-3.4.6.tar.gz hbase-0.94.21
[jifeng@jifeng01 ~]$ cat .bash_profile # .bash_profile # Get the aliases and functions if [ -f ~/.bashrc ]; then . ~/.bashrc fi # User specific environment and startup programs PATH=$PATH:$HOME/bin export PATH export JAVA_HOME=$HOME/jdk1.7.0_45 export PATH=$JAVA_HOME/bin:$PATH export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar export HADOOP_HOME=$HOME/hadoop/hadoop-1.2.1 export ANT_HOME=$HOME/apache-ant-1.9.4 export HBASE_HOME=$HOME/hbase-0.94.21 export SQOOP_HOME=$HOME/sqoop-1.99.3-bin-hadoop100 export CATALINA_HOME=$SQOOP_HOME/server export LOGDIR=$SQOOP_HOME/logs export PATH=$PATH:$ANT_HOME/bin:$HBASE_HOME/bin:$SQOOP_HOME/bin [jifeng@jifeng01 ~]$ source .bash_profile
[jifeng@jifeng01 sqoop-1.99.3-bin-hadoop100]$ cat ./server/conf/sqoop.properties # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Sqoop configuration file used by the built in configuration # provider: org.apache.sqoop.core.PropertiesConfigurationProvider. # This file must reside in the system configuration directory # which is specified by the system property "sqoop.config.dir" # and must be called sqoop.properties. # # NOTE: Tokens specified in this file that are marked by a # leading and trailing '@' characters should be replaced by # their appropriate values. For example, the token @LOGDIR@ # should be replaced appropriately. # # The following tokens are used in this configuration file: # # LOGDIR # The absolute path to the directory where system genearated # log files will be kept. # # BASEDIR # The absolute path to the directory where Sqoop 2 is installed # # # Logging Configuration # Any property that starts with the prefix # org.apache.sqoop.log4j is parsed out by the configuration # system and passed to the log4j subsystem. This allows you # to specify log4j configuration properties from within the # Sqoop configuration. # org.apache.sqoop.log4j.appender.file=org.apache.log4j.RollingFileAppender org.apache.sqoop.log4j.appender.file.File=@LOGDIR@/sqoop.log org.apache.sqoop.log4j.appender.file.MaxFileSize=25MB org.apache.sqoop.log4j.appender.file.MaxBackupIndex=5 org.apache.sqoop.log4j.appender.file.layout=org.apache.log4j.PatternLayout org.apache.sqoop.log4j.appender.file.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} [%l] %m%n org.apache.sqoop.log4j.debug=true org.apache.sqoop.log4j.rootCategory=WARN, file org.apache.sqoop.log4j.category.org.apache.sqoop=DEBUG org.apache.sqoop.log4j.category.org.apache.derby=INFO # # Audit Loggers Configuration # Multiple audit loggers could be given here. To specify an # audit logger, you should at least add org.apache.sqoop. # auditlogger.[LoggerName].class. You could also provide # more configuration options by using org.apache.sqoop. # auditlogger.[LoggerName] prefix, then all these options # are parsed to the logger class. # org.apache.sqoop.auditlogger.default.class=org.apache.sqoop.audit.FileAuditLogger org.apache.sqoop.auditlogger.default.file=@LOGDIR@/default.audit # # Repository configuration # The Repository subsystem provides the special prefix which # is "org.apache.sqoop.repository.sysprop". Any property that # is specified with this prefix is parsed out and set as a # system property. For example, if the built in Derby repository # is being used, the sysprop prefixed properties can be used # to affect Derby configuration at startup time by setting # the appropriate system properties. # # Repository provider org.apache.sqoop.repository.provider=org.apache.sqoop.repository.JdbcRepositoryProvider org.apache.sqoop.repository.schema.immutable=false # JDBC repository provider configuration org.apache.sqoop.repository.jdbc.handler=org.apache.sqoop.repository.derby.DerbyRepositoryHandler org.apache.sqoop.repository.jdbc.transaction.isolation=READ_COMMITTED org.apache.sqoop.repository.jdbc.maximum.connections=10 org.apache.sqoop.repository.jdbc.url=jdbc:derby:@BASEDIR@/repository/db;create=true org.apache.sqoop.repository.jdbc.driver=org.apache.derby.jdbc.EmbeddedDriver org.apache.sqoop.repository.jdbc.user=sa org.apache.sqoop.repository.jdbc.password= # System properties for embedded Derby configuration org.apache.sqoop.repository.sysprop.derby.stream.error.file=@LOGDIR@/derbyrepo.log # # Connector configuration # org.apache.sqoop.connector.autoupgrade=false # # Framework configuration # org.apache.sqoop.framework.autoupgrade=false # Sleeping period for reloading configuration file (once a minute) org.apache.sqoop.core.configuration.provider.properties.sleep=60000 # # Submission engine configuration # # Submission engine class org.apache.sqoop.submission.engine=org.apache.sqoop.submission.mapreduce.MapreduceSubmissionEngine # Number of milliseconds, submissions created before this limit will be removed, default is one day #org.apache.sqoop.submission.purge.threshold= # Number of milliseconds for purge thread to sleep, by default one day #org.apache.sqoop.submission.purge.sleep= # Number of milliseconds for update thread to sleep, by default 5 minutes #org.apache.sqoop.submission.update.sleep= # # Configuration for Mapreduce submission engine (applicable if it's configured) # # Hadoop configuration directory org.apache.sqoop.submission.engine.mapreduce.configuration.directory=/home/jifeng/hadoop/hadoop-1.2.1/conf/ # # Execution engine configuration # org.apache.sqoop.execution.engine=org.apache.sqoop.execution.mapreduce.MapreduceExecutionEngine [jifeng@jifeng01 sqoop-1.99.3-bin-hadoop100]$修改倒数第7行为hadoop的安装目录
修改catalina.properties
[jifeng@jifeng01 sqoop-1.99.3-bin-hadoop100]$ vi ./server/conf/catalina.properties
common.loader=${catalina.base}/lib,${catalina.base}/lib/*.jar,${catalina.home}/lib,${catalina.home}/lib/*.jar,${ catalina.home}/../lib/*.jar,/home/jifeng/hadoop/hadoop-1.2.1/lib/*.jar,/home/jifeng/hadoop/hadoop-1.2.1/*.jar配置hadoop的jar目录
4:copymysql-connector-java-5.1.10-bin.jar
[jifeng@jifeng01 ~]$ cp mysql-connector-java-5.1.10-bin.jar sqoop-1.99.3-bin-hadoop100/server/lib/
5:启动停止sqoop
sqoop.sh server start/stop
[jifeng@jifeng01 bin]$ ./sqoop.sh server start Sqoop home directory: /home/jifeng/sqoop-1.99.3-bin-hadoop100 Setting SQOOP_HTTP_PORT: 12000 Setting SQOOP_ADMIN_PORT: 12001 Using CATALINA_OPTS: Adding to CATALINA_OPTS: -Dsqoop.http.port=12000 -Dsqoop.admin.port=12001 Using CATALINA_BASE: /home/jifeng/sqoop-1.99.3-bin-hadoop100/server Using CATALINA_HOME: /home/jifeng/sqoop-1.99.3-bin-hadoop100/server Using CATALINA_TMPDIR: /home/jifeng/sqoop-1.99.3-bin-hadoop100/server/temp Using JRE_HOME: /home/jifeng/jdk1.7.0_45 Using CLASSPATH: /home/jifeng/sqoop-1.99.3-bin-hadoop100/server/bin/bootstrap.jar [jifeng@jifeng01 bin]$ ./sqoop.sh server stop Sqoop home directory: /home/jifeng/sqoop-1.99.3-bin-hadoop100 Setting SQOOP_HTTP_PORT: 12000 Setting SQOOP_ADMIN_PORT: 12001 Using CATALINA_OPTS: Adding to CATALINA_OPTS: -Dsqoop.http.port=12000 -Dsqoop.admin.port=12001 Using CATALINA_BASE: /home/jifeng/sqoop-1.99.3-bin-hadoop100/server Using CATALINA_HOME: /home/jifeng/sqoop-1.99.3-bin-hadoop100/server Using CATALINA_TMPDIR: /home/jifeng/sqoop-1.99.3-bin-hadoop100/server/temp Using JRE_HOME: /home/jifeng/jdk1.7.0_45 Using CLASSPATH: /home/jifeng/sqoop-1.99.3-bin-hadoop100/server/bin/bootstrap.jar 八月 17, 2015 9:47:20 下午 org.apache.catalina.startup.ClassLoaderFactory validateFile 警告: Problem with directory [/home/jifeng/sqoop-1.99.3-bin-hadoop100/lib], exists: [true], isDirectory: [false], canRead: [true]
进入客户端
[jifeng@jifeng01 sqoop-1.99.3-bin-hadoop100]$ ./bin/sqoop.sh client Sqoop home directory: /home/jifeng/sqoop-1.99.3-bin-hadoop100 Sqoop Shell: Type 'help' or '\h' for help. sqoop:000>为客户端配置服务器
sqoop:000> set server --host jifeng01 --port 12000 --webapp sqoop Server is set successfully
sqoop:000> show version --all client version: Sqoop 1.99.3 revision 2404393160301df16a94716a3034e31b03e27b0b Compiled by mengweid on Fri Oct 18 14:51:11 EDT 2013 server version: Sqoop 1.99.3 revision 2404393160301df16a94716a3034e31b03e27b0b Compiled by mengweid on Fri Oct 18 14:51:11 EDT 2013 Protocol version: [1] sqoop:000>查看连接器
sqoop:000> show connector --all 1 connector(s) to show: Connector with id 1: Name: generic-jdbc-connector Class: org.apache.sqoop.connector.jdbc.GenericJdbcConnector Version: 1.99.3 Supported job types: [IMPORT, EXPORT] Connection form 1: Name: connection Label: Connection configuration Help: You must supply the information requested in order to create a connection object. Input 1: Name: connection.jdbcDriver Label: JDBC Driver Class Help: Enter the fully qualified class name of the JDBC driver that will be used for establishing this connection. Type: STRING Sensitive: false Size: 128 Input 2: Name: connection.connectionString Label: JDBC Connection String Help: Enter the value of JDBC connection string to be used by this connector for creating connections. Type: STRING Sensitive: false Size: 128 Input 3: Name: connection.username Label: Username Help: Enter the username to be used for connecting to the database. Type: STRING Sensitive: false Size: 40 Input 4: Name: connection.password Label: Password Help: Enter the password to be used for connecting to the database. Type: STRING Sensitive: true Size: 40 Input 5: Name: connection.jdbcProperties Label: JDBC Connection Properties Help: Enter any JDBC properties that should be supplied during the creation of connection. Type: MAP Sensitive: false Forms for job type IMPORT: Job form 1: Name: table Label: Database configuration Help: You must supply the information requested in order to create a job object. Input 1: Name: table.schemaName Label: Schema name Help: Schema name to process data in the remote database Type: STRING Sensitive: false Size: 50 Input 2: Name: table.tableName Label: Table name Help: Table name to process data in the remote database Type: STRING Sensitive: false Size: 50 Input 3: Name: table.sql Label: Table SQL statement Help: SQL statement to process data in the remote database Type: STRING Sensitive: false Size: 2000 Input 4: Name: table.columns Label: Table column names Help: Specific columns of a table name or a table SQL Type: STRING Sensitive: false Size: 50 Input 5: Name: table.partitionColumn Label: Partition column name Help: A specific column for data partition Type: STRING Sensitive: false Size: 50 Input 6: Name: table.partitionColumnNull Label: Nulls in partition column Help: Whether there are null values in partition column Type: BOOLEAN Sensitive: false Input 7: Name: table.boundaryQuery Label: Boundary query Help: The boundary query for data partition Type: STRING Sensitive: false Size: 50 Forms for job type EXPORT: Job form 1: Name: table Label: Database configuration Help: You must supply the information requested in order to create a job object. Input 1: Name: table.schemaName Label: Schema name Help: Schema name to process data in the remote database Type: STRING Sensitive: false Size: 50 Input 2: Name: table.tableName Label: Table name Help: Table name to process data in the remote database Type: STRING Sensitive: false Size: 2000 Input 3: Name: table.sql Label: Table SQL statement Help: SQL statement to process data in the remote database Type: STRING Sensitive: false Size: 50 Input 4: Name: table.columns Label: Table column names Help: Specific columns of a table name or a table SQL Type: STRING Sensitive: false Size: 50 Input 5: Name: table.stageTableName Label: Stage table name Help: Name of the stage table to use Type: STRING Sensitive: false Size: 2000 Input 6: Name: table.clearStageTable Label: Clear stage table Help: Indicate if the stage table should be cleared Type: BOOLEAN Sensitive: false sqoop:000>
7:导入MYSQL数据:
a:创建数据库连接:
sqoop:000> create connection --cid 1 Creating connection for connector with id 1 Please fill following values to create new connection object Name: mysql --输入名称 Connection configuration JDBC Driver Class: com.mysql.jdbc.Driver --输入 JDBC Connection String: jdbc:mysql://10.6.3.241:3306/lir?createDatabaseIfNotExist=true --输入 Username: dss --输入 Password: ****** --输入 JDBC Connection Properties: There are currently 0 values in the map: entry# Security related configuration options Max connections: 500 --输入 New connection was successfully created with validation status FINE and persistent id 1 sqoop:000>
b:创建job
sqoop:000> show job --all 0 job(s) to show: sqoop:000> create job --xid 1 --type import Creating job for connection with id 1 Please fill following values to create new job object Name: mysql_job Database configuration Schema name: Table name: bb_month Table SQL statement: Table column names: Partition column name: Nulls in partition column: Boundary query: Output configuration Storage type: 0 : HDFS Choose: 0 Output format: 0 : TEXT_FILE 1 : SEQUENCE_FILE Choose: 1 Compression format: 0 : NONE 1 : DEFAULT 2 : DEFLATE 3 : GZIP 4 : BZIP2 5 : LZO 6 : LZ4 7 : SNAPPY Choose: 0 Output directory: /home/jifeng/out Throttling resources Extractors: Loaders: New job was successfully created with validation status FINE and persistent id 1 sqoop:000>
sqoop:000> start job --jid 1 Submission details Job ID: 1 Server URL: http://localhost:12000/sqoop/ Created by: jifeng Creation date: 2014-08-20 00:01:35 CST Lastly updated by: jifeng External ID: job_201408190908_0004 http://jifeng01:50030/jobdetails.jsp?jobid=job_201408190908_0004 2014-08-20 00:01:35 CST: BOOTING - Progress is not available
4:查看状态
sqoop:000> status job --jid 1 Submission details Job ID: 1 Server URL: http://localhost:12000/sqoop/ Created by: jifeng Creation date: 2014-08-20 00:01:35 CST Lastly updated by: jifeng External ID: job_201408190908_0004 http://jifeng01:50030/jobdetails.jsp?jobid=job_201408190908_0004 2014-08-20 00:01:41 CST: RUNNING - 5.00 %
sqoop:000> status job --jid 1 Submission details Job ID: 1 Server URL: http://localhost:12000/sqoop/ Created by: jifeng Creation date: 2014-08-20 00:01:35 CST Lastly updated by: jifeng External ID: job_201408190908_0004 http://jifeng01:50030/jobdetails.jsp?jobid=job_201408190908_0004 2014-08-20 00:01:55 CST: SUCCEEDED Counters: org.apache.hadoop.mapred.JobInProgress$Counter SLOTS_MILLIS_MAPS: 49955 FALLOW_SLOTS_MILLIS_REDUCES: 0 FALLOW_SLOTS_MILLIS_MAPS: 0 TOTAL_LAUNCHED_MAPS: 16 SLOTS_MILLIS_REDUCES: 0 org.apache.hadoop.mapreduce.lib.output.FileOutputFormat$Counter BYTES_WRITTEN: 57881 org.apache.hadoop.mapreduce.lib.input.FileInputFormat$Counter BYTES_READ: 0 FileSystemCounters HDFS_BYTES_READ: 1187 FILE_BYTES_WRITTEN: 671920 HDFS_BYTES_WRITTEN: 57881 org.apache.sqoop.submission.counter.SqoopCounters ROWS_READ: 532 org.apache.hadoop.mapred.Task$Counter MAP_INPUT_RECORDS: 0 PHYSICAL_MEMORY_BYTES: 567025664 SPILLED_RECORDS: 0 COMMITTED_HEAP_BYTES: 216793088 CPU_MILLISECONDS: 4450 VIRTUAL_MEMORY_BYTES: 5002416128 SPLIT_RAW_BYTES: 1187 MAP_OUTPUT_RECORDS: 532 Job executed successfully
sqoop:000> show job +----+-----------+--------+-----------+---------+ | Id | Name | Type | Connector | Enabled | +----+-----------+--------+-----------+---------+ | 1 | mysql_job | IMPORT | 1 | true | +----+-----------+--------+-----------+---------+
[jifeng@jifeng01 ~]$ hadoop fs -ls /home/jifeng/out Warning: $HADOOP_HOME is deprecated. Found 12 items -rw-r--r-- 1 jifeng supergroup 0 2014-08-20 00:01 /home/jifeng/out/_SUCCESS drwxr-xr-x - jifeng supergroup 0 2014-08-20 00:01 /home/jifeng/out/_logs -rw-r--r-- 1 jifeng supergroup 6849 2014-08-20 00:01 /home/jifeng/out/part-m-00000 -rw-r--r-- 1 jifeng supergroup 8626 2014-08-20 00:01 /home/jifeng/out/part-m-00001 -rw-r--r-- 1 jifeng supergroup 8529 2014-08-20 00:01 /home/jifeng/out/part-m-00002 -rw-r--r-- 1 jifeng supergroup 8348 2014-08-20 00:01 /home/jifeng/out/part-m-00003 -rw-r--r-- 1 jifeng supergroup 8440 2014-08-20 00:01 /home/jifeng/out/part-m-00004 -rw-r--r-- 1 jifeng supergroup 8461 2014-08-20 00:01 /home/jifeng/out/part-m-00005 -rw-r--r-- 1 jifeng supergroup 6341 2014-08-20 00:01 /home/jifeng/out/part-m-00006 -rw-r--r-- 1 jifeng supergroup 0 2014-08-20 00:01 /home/jifeng/out/part-m-00007 -rw-r--r-- 1 jifeng supergroup 0 2014-08-20 00:01 /home/jifeng/out/part-m-00008 -rw-r--r-- 1 jifeng supergroup 2287 2014-08-20 00:01 /home/jifeng/out/part-m-00009
[jifeng@jifeng01 ~]$ hadoop fs -cat /home/jifeng/out/part-m-00000 Warning: $HADOOP_HOME is deprecated. 2,'上海','null','null','2014-01',2014,1,1,13343.0000,2177100.0000,919980.9542,1780826.8400,915861.2630 3,'南京','null','null','2014-01',2014,1,1,83205.0000,10170825.5000,5691685.7539,10176737.0000,5783684.3099 4,'南宁','null','null','2014-01',2014,1,1,37398.0000,4657064.5582,3858233.2324,4285315.8635,3866359.4134 5,'合肥','null','null','2014-01',2014,1,1,54265.0000,5927293.3000,3790212.4730,5697485.8000,3795554.7360 6,'天津','null','null','2014-01',2014,1,1,21513.0000,3340041.0146,1889377.3429,3037085.1446,1935199.7005 7,'太原','null','null','2014-01',2014,1,1,5328.0000,964847.4000,592040.0100,866491.0750,598172.1280 8,'广州','null','null','2014-01',2014,1,1,450543.0000,70471984.4267,35032429.3451,60040659.7924,35938779.1930 9,'成都','null','null','2014-01',2014,1,1,78312.0000,9642889.2608,6539638.1473,8307986.0082,6689729.8533 10,'昆明','null','null','2014-01',2014,1,1,48588.0000,6628503.6282,4988034.6513,6588396.6282,5056185.2296 11,'杭州','null','null','2014-01',2014,1,1,74618.0000,6956351.5554,3692710.1188,7157249.8000,3787703.1280 12,'沈阳','null','null','2014-01',2014,1,1,26707.0000,4015047.8333,2422683.5838,3955828.1400,2532257.5097 13,'济南','null','null','2014-01',2014,1,1,94682.0000,10864425.1621,6275161.1735,10061228.8866,6519761.5557 14,'石家庄','null','null','2014-01',2014,1,1,12632.0000,1927744.4868,1328895.2188,1697052.8140,1345241.9188 15,'福州','null','null','2014-01',2014,1,1,53421.0000,6902491.3272,4467384.3248,6883220.4372,4551101.9392 16,'西安','null','null','2014-01',2014,1,1,38837.0000,5608857.0000,3791745.0944,5097407.0600,3832304.1279 17,'贵阳','null','null','2014-01',2014,1,1,32912.0000,4725606.0957,3609643.0050,4045945.5000,3642090.1400 18,'郑州','null','null','2014-01',2014,1,1,92264.0000,16073972.5496,8838877.6275,13622151.6764,9953878.0672