Ubuntu环境下Hadoop1.2.1, HBase0.94.25, nutch2.2.1各个配置文件一览

/×××××××××××××××××××××××××××××××××××××××××/

Author:xxx0624

HomePage:http://www.cnblogs.com/xxx0624/

/×××××××××××××××××××××××××××××××××××××××××/

 

Hadoop伪分布式配置过程:

Hadoop:1.2.1

Hbase:0.94.25

nutch:2.2.1

Java:1.8.0

SSH:1.0.1j

tomcat:7.0.57

zookeeper:3.4.6

(1)配置Java环境:http://www.cnblogs.com/xxx0624/p/4164744.html

(2)配置OpenSSH:http://www.cnblogs.com/xxx0624/p/4165252.html

(3)配置Hadoop:http://www.cnblogs.com/xxx0624/p/4166095.html

(4)配置tomcat:http://www.cnblogs.com/xxx0624/p/4166840.html

(5)配置zookeeper:http://www.cnblogs.com/xxx0624/p/4168440.html

(6)配置HBase:http://www.cnblogs.com/xxx0624/p/4170468.html

(7)配置ant:http://www.cnblogs.com/xxx0624/p/4172277.html

(8)配置nutch:http://www.cnblogs.com/xxx0624/p/4172601.html

(9)集成:http://www.cnblogs.com/xxx0624/p/4176199.html

 

Hadoop:

(1)hadoop/conf/core-site.xml







	
	fs.default.name
	hdfs://localhost:9000
	
	
	hadoop.tmp.dir
	/home/xxx0624/hadoop/tmp
	

 (2)hadoop/conf/hdfs-site.xml







	
	dfs.replication
	1
	
	
	dfs.name.dir
	/home/xxx0624/hadoop/hdfs/name
	
	
	dfs.data.dir
	/home/xxx0624/hadoop/hdfs/data
	

 (3)hadoop/conf/mapred-site.xml







    
    mapred.job.tracker
    localhost:9001
    

 (4)hadoop/conf/hadoop-env.sh

 1 # Set Hadoop-specific environment variables here.
 2 
 3 # The only required environment variable is JAVA_HOME.  All others are
 4 # optional.  When running a distributed configuration it is best to
 5 # set JAVA_HOME in this file, so that it is correctly defined on
 6 # remote nodes.
 7 
 8 # The java implementation to use.  Required.
 9 # export JAVA_HOME=/usr/lib/j2sdk1.5-sun
10 
11 # Extra Java CLASSPATH elements.  Optional.
12 # export HADOOP_CLASSPATH=
13 
14 # The maximum amount of heap to use, in MB. Default is 1000.
15 # export HADOOP_HEAPSIZE=2000
16 
17 # Extra Java runtime options.  Empty by default.
18 # export HADOOP_OPTS=-server
19 
20 # Command specific options appended to HADOOP_OPTS when specified
21 export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
22 export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
23 export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
24 export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
25 export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
26 # export HADOOP_TASKTRACKER_OPTS=
27 # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
28 # export HADOOP_CLIENT_OPTS
29 
30 # Extra ssh options.  Empty by default.
31 # export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
32 
33 # Where log files are stored.  $HADOOP_HOME/logs by default.
34 # export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
35 
36 # File naming remote slave hosts.  $HADOOP_HOME/conf/slaves by default.
37 # export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
38 
39 # host:path where hadoop code should be rsync'd from.  Unset by default.
40 # export HADOOP_MASTER=master:/home/$USER/src/hadoop
41 
42 # Seconds to sleep between slave commands.  Unset by default.  This
43 # can be useful in large clusters, where, e.g., slave rsyncs can
44 # otherwise arrive faster than the master can service them.
45 # export HADOOP_SLAVE_SLEEP=0.1
46 
47 # The directory where pid files are stored. /tmp by default.
48 # NOTE: this should be set to a directory that can only be written to by 
49 #       the users that are going to run the hadoop daemons.  Otherwise there is
50 #       the potential for a symlink attack.
51 # export HADOOP_PID_DIR=/var/hadoop/pids
52 
53 # A string representing this instance of hadoop. $USER by default.
54 # export HADOOP_IDENT_STRING=$USER
55 
56 # The scheduling priority for daemon processes.  See 'man nice'.
57 # export HADOOP_NICENESS=10
58 
59 export JAVA_HOME=/usr/lib/jvm 
60 
61 export HADOOP_HOME=/home/xxx0624/hadoop
62 
63 export PATH=$PATH:/home/xxx0624/hadoop/bin
64 
65 export HBASE_CLASSPATH=/home/xxx0624/hadoop/conf
View Code

 (5)hadoop/conf/masters

localhost

 (6)hadoop/conf/slaves

localhost

 

 

HBase:

(1)hbase/conf/hbase-site.xml




        
                hbase.rootdir
                hdfs://127.0.0.1:9000/hbase
                The directory shared by region servers.
        
        
                hbase.master
            127.0.0.1:60000
        
        
                hbase.cluster.distributed
                true
        
        
        
                hbase.zookeeper.quorum
                127.0.0.1
        
	

 (2)hbase/conf/hbase-env.sh

  1 #
  2 #/**
  3 # * Copyright 2007 The Apache Software Foundation
  4 # *
  5 # * Licensed to the Apache Software Foundation (ASF) under one
  6 # * or more contributor license agreements.  See the NOTICE file
  7 # * distributed with this work for additional information
  8 # * regarding copyright ownership.  The ASF licenses this file
  9 # * to you under the Apache License, Version 2.0 (the
 10 # * "License"); you may not use this file except in compliance
 11 # * with the License.  You may obtain a copy of the License at
 12 # *
 13 # *     http://www.apache.org/licenses/LICENSE-2.0
 14 # *
 15 # * Unless required by applicable law or agreed to in writing, software
 16 # * distributed under the License is distributed on an "AS IS" BASIS,
 17 # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 # * See the License for the specific language governing permissions and
 19 # * limitations under the License.
 20 # */
 21 
 22 # Set environment variables here.
 23 
 24 # This script sets variables multiple times over the course of starting an hbase process,
 25 # so try to keep things idempotent unless you want to take an even deeper look
 26 # into the startup scripts (bin/hbase, etc.)
 27 
 28 # The java implementation to use.  Java 1.6 required.
 29 # export JAVA_HOME=/usr/java/jdk1.6.0/
 30 
 31 # Extra Java CLASSPATH elements.  Optional.
 32 # export HBASE_CLASSPATH=
 33 
 34 # The maximum amount of heap to use, in MB. Default is 1000.
 35 # export HBASE_HEAPSIZE=1000
 36 
 37 # Extra Java runtime options.
 38 # Below are what we set by default.  May only work with SUN JVM.
 39 # For more on why as well as other possible settings,
 40 # see http://wiki.apache.org/hadoop/PerformanceTuning
 41 export HBASE_OPTS="-XX:+UseConcMarkSweepGC"
 42 
 43 # Uncomment one of the below three options to enable java garbage collection logging for the server-side processes.
 44 
 45 # This enables basic gc logging to the .out file.
 46 # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
 47 
 48 # This enables basic gc logging to its own file.
 49 # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 50 # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
 51 
 52 # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
 53 # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 54 # export SERVER_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
 55 
 56 # Uncomment one of the below three options to enable java garbage collection logging for the client processes.
 57 
 58 # This enables basic gc logging to the .out file.
 59 # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps"
 60 
 61 # This enables basic gc logging to its own file.
 62 # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 63 # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH>"
 64 
 65 # This enables basic GC logging to its own file with automatic log rolling. Only applies to jdk 1.6.0_34+ and 1.7.0_2+.
 66 # If FILE-PATH is not replaced, the log file(.gc) would still be generated in the HBASE_LOG_DIR .
 67 # export CLIENT_GC_OPTS="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:<FILE-PATH> -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=1 -XX:GCLogFileSize=512M"
 68 
 69 # Uncomment below if you intend to use the EXPERIMENTAL off heap cache.
 70 # export HBASE_OPTS="$HBASE_OPTS -XX:MaxDirectMemorySize="
 71 # Set hbase.offheapcache.percentage in hbase-site.xml to a nonzero value.
 72 
 73 
 74 # Uncomment and adjust to enable JMX exporting
 75 # See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access.
 76 # More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html
 77 #
 78 # export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false"
 79 # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101"
 80 # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102"
 81 # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103"
 82 # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS $HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104"
 83 
 84 # File naming hosts on which HRegionServers will run.  $HBASE_HOME/conf/regionservers by default.
 85 # export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers
 86 
 87 # File naming hosts on which backup HMaster will run.  $HBASE_HOME/conf/backup-masters by default.
 88 # export HBASE_BACKUP_MASTERS=${HBASE_HOME}/conf/backup-masters
 89 
 90 # Extra ssh options.  Empty by default.
 91 # export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR"
 92 
 93 # Where log files are stored.  $HBASE_HOME/logs by default.
 94 # export HBASE_LOG_DIR=${HBASE_HOME}/logs
 95 
 96 # Enable remote JDWP debugging of major HBase processes. Meant for Core Developers 
 97 # export HBASE_MASTER_OPTS="$HBASE_MASTER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8070"
 98 # export HBASE_REGIONSERVER_OPTS="$HBASE_REGIONSERVER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8071"
 99 # export HBASE_THRIFT_OPTS="$HBASE_THRIFT_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8072"
100 # export HBASE_ZOOKEEPER_OPTS="$HBASE_ZOOKEEPER_OPTS -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8073"
101 
102 # A string representing this instance of hbase. $USER by default.
103 # export HBASE_IDENT_STRING=$USER
104 
105 # The scheduling priority for daemon processes.  See 'man nice'.
106 # export HBASE_NICENESS=10
107 
108 # The directory where pid files are stored. /tmp by default.
109 # export HBASE_PID_DIR=/var/hadoop/pids
110 
111 # Seconds to sleep between slave commands.  Unset by default.  This
112 # can be useful in large clusters, where, e.g., slave rsyncs can
113 # otherwise arrive faster than the master can service them.
114 # export HBASE_SLAVE_SLEEP=0.1
115 
116 # Tell HBase whether it should manage it's own instance of Zookeeper or not.
117 # export HBASE_MANAGES_ZK=true
118 
119 export JAVA_HOME=/usr/lib/jvm
120 export HBASE_MANAGES_ZK=true
121 export HBASE_CLASSPATH=/home/xxx0624/hadoop/conf
View Code

(3)hbase/conf/regionservers

localhost

 

 

nutch:

(1)nutch/conf/nutch-site.xml








	  
	storage.data.store.class  
	org.apache.gora.hbase.store.HBaseStore  
	Default class for storing data  
	  
 
	http.agent.name  
	xxx0624-ThinkPad-Edge  
	 

 (2)nutch/conf/gora.properties

 1 # Licensed to the Apache Software Foundation (ASF) under one or more
 2 # contributor license agreements.  See the NOTICE file distributed with
 3 # this work for additional information regarding copyright ownership.
 4 # The ASF licenses this file to You under the Apache License, Version 2.0
 5 # (the "License"); you may not use this file except in compliance with
 6 # the License.  You may obtain a copy of the License at
 7 #
 8 #     http://www.apache.org/licenses/LICENSE-2.0
 9 #
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
15 
16 #gora.datastore.default=org.apache.gora.mock.store.MockDataStore
17 #gora.datastore.autocreateschema=true
18 
19 ###############################
20 # Default SqlStore properties #
21 ###############################
22 
23 #gora.sqlstore.jdbc.driver=org.hsqldb.jdbc.JDBCDriver
24 #gora.sqlstore.jdbc.url=jdbc:hsqldb:hsql://localhost/nutchtest
25 #gora.sqlstore.jdbc.user=sa
26 #gora.sqlstore.jdbc.password=
27 
28 ################################
29 # Default AvroStore properties #
30 ################################
31 
32 # gora.avrostore.codec.type=BINARY||JSON
33 # gora.avrostore.output.path=file:///tmp/gora.avrostore.test.output
34 
35 ################################
36 # DatafileAvroStore properties #
37 ################################
38 # DataFileAvroStore is file based store which uses Avro's 
39 # DataFile{Writer,Reader}'s as a backend. This datastore supports 
40 # mapreduce.
41 
42 # gora.datafileavrostore.###=
43 
44 #########################
45 # HBaseStore properties #
46 #########################
47 # HBase requires that the Configuration has a valid "hbase.zookeeper.quorum"
48 # property. It should be included within hbase-site.xml on the classpath. When
49 # this property is omitted, it expects Zookeeper to run on localhost:2181.
50 
51 # To greatly improve scan performance, increase the hbase-site Configuration
52 # property "hbase.client.scanner.caching". This sets the number of rows to grab
53 # per request.
54 
55 # HBase autoflushing. Enabling autoflush decreases write performance. 
56 # Available since Gora 0.2. Defaults to disabled.
57 # hbase.client.autoflush.default=false
58 
59 #############################
60 # CassandraStore properties #
61 #############################
62 
63 # gora.cassandrastore.servers=localhost:9160
64 
65 #######################
66 # MemStore properties #
67 #######################
68 # This is a memory based {@link DataStore} implementation for tests.
69 
70 # gora.memstore.###=
71 
72 ############################
73 # AccumuloStore properties #
74 ############################
75 gora.datastore.default=org.apache.gora.hbase.store.HBaseStore
76 #gora.datastore.default=org.apache.gora.accumulo.store.AccumuloStore
77 #gora.datastore.accumulo.mock=true
78 #gora.datastore.accumulo.instance=a14
79 #gora.datastore.accumulo.zookeepers=localhost
80 #gora.datastore.accumulo.user=root
81 #gora.datastore.accumulo.password=secret
View Code

(3)nutch/build.xml

  1 xml version="1.0"?>
  2 
 18 
 19 <project name="${name}" default="runtime" xmlns:ivy="antlib:org.apache.ivy.ant" xmlns:artifact="antlib:org.apache.maven.artifact.ant">
 20 
 21  
 22  
 23  <property file="${user.home}/build.properties" />
 24  <property file="${basedir}/build.properties" />
 25  <property file="${basedir}/default.properties" />
 26  <property name="test.junit.output.format" value="plain" />
 27  <property name="release.dir" value="${build.dir}/release" />
 28 
 29  
 30  <property name="groupId" value="org.apache.nutch" />
 31  <property name="artifactId" value="nutch" />
 32  <property name="maven-repository-url" value="https://repository.apache.org/service/local/staging/deploy/maven2" />
 33  <property name="maven-repository-id" value="apache.releases.https" />
 34  <property name="maven-jar" value="${release.dir}/${artifactId}-${version}.jar" />
 35  <property name="maven-javadoc-jar" value="${release.dir}/${artifactId}-${version}-javadoc.jar" />
 36  <property name="maven-sources-jar" value="${release.dir}/${artifactId}-${version}-sources.jar" />
 37 
 38 
 39  
 40  <path id="classpath">
 41   <pathelement location="${build.classes}" />
 42   <fileset dir="${build.lib.dir}">
 43    <include name="*.jar" />
 44   fileset>
 45  path>
 46 
 47  <presetdef name="javac">
 48    <javac includeantruntime="false" />
 49  presetdef>
 50 
 51  
 52  <dirname property="plugins.classpath.dir" file="${build.plugins}" />
 53  <path id="test.classpath">
 54   <pathelement location="${test.build.classes}" />
 55   <pathelement location="${conf.dir}" />
 56   <pathelement location="${test.src.dir}" />
 57   <pathelement location="${plugins.classpath.dir}" />
 58   <path refid="classpath" />
 59   <pathelement location="${build.dir}/${final.name}.job" />
 60   <fileset dir="${build.lib.dir}">
 61    <include name="*.jar" />
 62   fileset>
 63  path>
 64 
 65  
 66  
 67  
 68  <target name="init" depends="ivy-init" description="--> stuff required by all targets">
 69   <mkdir dir="${build.dir}" />
 70   <mkdir dir="${build.classes}" />
 71   <mkdir dir="${release.dir}" />
 72 
 73   <mkdir dir="${test.build.dir}" />
 74   <mkdir dir="${test.build.classes}" />
 75 
 76   <touch datetime="01/25/1971 2:00 pm">
 77    <fileset dir="${conf.dir}" includes="**/*.template" />
 78   touch>
 79 
 80   <copy todir="${conf.dir}" verbose="true">
 81    <fileset dir="${conf.dir}" includes="**/*.template" />
 82    <mapper type="glob" from="*.template" to="*" />
 83   copy>
 84  target>
 85 
 86  
 87  
 88  
 89  <target name="compile" depends="compile-core, compile-plugins" description="--> compile all Java files"/>
 90 
 91  <target name="compile-core" depends="init, resolve-default" description="--> compile core Java files only">
 92   <javac 
 93       encoding="${build.encoding}" 
 94       srcdir="${src.dir}"
 95     includes="org/apache/nutch/**/*.java" 
 96       destdir="${build.classes}"
 97     debug="${javac.debug}" 
 98       optimize="${javac.optimize}" 
 99       target="${javac.version}"
100     source="${javac.version}" 
101       deprecation="${javac.deprecation}">
102    <compilerarg value="-Xlint:-path"/>
103    <classpath refid="classpath" />
104   javac>
105  target>
106 
107  <target name="compile-plugins" depends="init, resolve-default" description="--> compile plugins only">
108   <ant dir="src/plugin" target="deploy" inheritAll="false" />
109  target>
110 
111  
112  
113  
114  
115  
116  <target name="jar" depends="compile-core" description="--> make nutch.jar">
117   <copy file="${conf.dir}/nutch-default.xml" todir="${build.classes}" />
118   <copy file="${conf.dir}/nutch-site.xml" todir="${build.classes}" />
119   <jar jarfile="${build.dir}/${final.name}.jar" basedir="${build.classes}">
120    <manifest>
121    manifest>
122   jar>
123  target>
124 
125  
126  
127  
128  
129  
130  <target name="release" depends="compile-core"
131   description="--> generate the release distribution">
132   <copy file="${conf.dir}/nutch-default.xml" todir="${build.classes}" />
133   <copy file="${conf.dir}/nutch-site.xml" todir="${build.classes}" />
134 
135   
136   <jar jarfile="${maven-jar}" basedir="${build.classes}" />
137 
138   
139   <javadoc 
140       destdir="${release.dir}/javadoc" 
141       overview="${src.dir}/overview.html"
142     author="true" 
143       version="true" 
144       use="true" 
145       windowtitle="${name} ${version} API"
146     doctitle="${name} ${version} API"
147     bottom="Copyright &copy; ${year} The Apache Software Foundation">
148    <arg value="${javadoc.proxy.host}" />
149    <arg value="${javadoc.proxy.port}" />
150 
151    <packageset dir="${src.dir}"/>
152       <packageset dir="${plugins.dir}/creativecommons/src/java"/>
153       <packageset dir="${plugins.dir}/feed/src/java"/>
154       <packageset dir="${plugins.dir}/index-anchor/src/java"/>
155       <packageset dir="${plugins.dir}/index-basic/src/java"/>
156       <packageset dir="${plugins.dir}/index-more/src/java"/>
157       <packageset dir="${plugins.dir}/language-identifier/src/java"/>
158       <packageset dir="${plugins.dir}/lib-http/src/java"/>
159       <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
160       <packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
161       <packageset dir="${plugins.dir}/parse-ext/src/java"/>
162       <packageset dir="${plugins.dir}/parse-html/src/java"/>
163       <packageset dir="${plugins.dir}/parse-js/src/java"/>
164       <packageset dir="${plugins.dir}/parse-swf/src/java"/>
165       <packageset dir="${plugins.dir}/parse-tika/src/java"/>
166       <packageset dir="${plugins.dir}/parse-zip/src/java"/>
167       <packageset dir="${plugins.dir}/protocol-file/src/java"/>
168       <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
169       <packageset dir="${plugins.dir}/protocol-http/src/java"/>
170       <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
171       <packageset dir="${plugins.dir}/protocol-sftp/src/java"/>
172       <packageset dir="${plugins.dir}/scoring-link/src/java"/>
173       <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
174       <packageset dir="${plugins.dir}/subcollection/src/java"/>
175       <packageset dir="${plugins.dir}/tld/src/java"/>
176       <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
177       <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/>
178       <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
179       <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
180       <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/>
181       <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
182       <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
183       <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
184       <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
185 
186    <link href="${javadoc.link.java}" />
187    <link href="${javadoc.link.lucene}" />
188    <link href="${javadoc.link.hadoop}" />
189 
190    <classpath refid="classpath" />
191    <classpath>
192     <fileset dir="${plugins.dir}">
193      <include name="**/*.jar" />
194     fileset>
195    classpath>
196 
197    <group title="Core" packages="org.apache.nutch.*" />
198    <group title="Plugins API" packages="${plugins.api}" />
199    <group title="Protocol Plugins" packages="${plugins.protocol}" />
200    <group title="URL Filter Plugins" packages="${plugins.urlfilter}" />
201    <group title="Scoring Plugins" packages="${plugins.scoring}" />
202    <group title="Parse Plugins" packages="${plugins.parse}" />
203    <group title="Indexing Filter Plugins" packages="${plugins.index}" />
204    <group title="Misc. Plugins" packages="${plugins.misc}" />
205   javadoc>
206   <jar jarfile="${maven-javadoc-jar}">
207    <fileset dir="${release.dir}/javadoc" />
208   jar>
209 
210   
211   <jar jarfile="${maven-sources-jar}">
212    <fileset dir="${src.dir}" />
213   jar>
214  target>
215 
216  
217  
218  
219  
220  
221  <target name="deploy" depends="release" description="--> deploy to Apache Nexus">
222 
223   
224   <ivy:makepom ivyfile="${ivy.file}" pomfile="${basedir}/pom.xml"
225    templatefile="ivy/mvn.template">
226    <mapping conf="default" scope="compile" />
227    <mapping conf="runtime" scope="runtime" />
228   ivy:makepom>
229 
230   
231   <artifact:mvn>
232    <arg
233     value="org.apache.maven.plugins:maven-gpg-plugin:1.4:sign-and-deploy-file" />
234    <arg value="-Durl=${maven-repository-url}" />
235    <arg value="-DrepositoryId=${maven-repository-id}" />
236    <arg value="-DpomFile=pom.xml" />
237    <arg value="-Dfile=${maven-jar}" />
238    <arg value="-Papache-release" />
239   artifact:mvn>
240 
241   
242   <artifact:mvn>
243    <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.4:sign-and-deploy-file" />
244    <arg value="-Durl=${maven-repository-url}" />
245    <arg value="-DrepositoryId=${maven-repository-id}" />
246    <arg value="-DpomFile=pom.xml" />
247    <arg value="-Dfile=${maven-sources-jar}" />
248    <arg value="-Dclassifier=sources" />
249    <arg value="-Papache-release" />
250   artifact:mvn>
251 
252   
253   <artifact:mvn>
254    <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.4:sign-and-deploy-file" />
255    <arg value="-Durl=${maven-repository-url}" />
256    <arg value="-DrepositoryId=${maven-repository-id}" />
257    <arg value="-DpomFile=pom.xml" />
258    <arg value="-Dfile=${maven-javadoc-jar}" />
259    <arg value="-Dclassifier=javadoc" />
260    <arg value="-Papache-release" />
261   artifact:mvn>
262  target>
263 
264  
265  
266  
267  
268  
269  <target name="job" depends="compile" description="--> make nutch.job jar">
270   <jar jarfile="${build.dir}/${final.name}.job">
271    
275    <zipfileset dir="${build.classes}" excludes="nutch-default.xml,nutch-site.xml" />
276    <zipfileset dir="${conf.dir}" excludes="*.template,hadoop*.*" />
277    
281    <zipfileset dir="${build.lib.dir}" prefix="lib" includes="**/*.jar"
282     excludes="jasper*.jar,jsp-*.jar,hadoop-*.jar,hbase*test*.jar,ant*jar,hsqldb*.jar" />
283    <zipfileset dir="${build.plugins}" prefix="classes/plugins" />
284   jar>
285  target>
286 
287  <target name="runtime" depends="jar, job" description="--> default target for running Nutch">
288   <mkdir dir="${runtime.dir}" />
289   <mkdir dir="${runtime.local}" />
290   <mkdir dir="${runtime.deploy}" />
291   
292   <copy file="${build.dir}/${final.name}.job" todir="${runtime.deploy}" />
293   <copy todir="${runtime.deploy}/bin">
294    <fileset dir="src/bin" />
295   copy>
296   <chmod perm="ugo+x" type="file">
297    <fileset dir="${runtime.deploy}/bin" />
298   chmod>
299   
300   <copy file="${build.dir}/${final.name}.jar" todir="${runtime.local}/lib" />
301   <copy todir="${runtime.local}/lib/native">
302    <fileset dir="lib/native" />
303   copy>
304   <copy todir="${runtime.local}/conf">
305    <fileset dir="${conf.dir}" excludes="*.template" />
306   copy>
307   <copy todir="${runtime.local}/bin">
308    <fileset dir="src/bin" />
309   copy>
310   <chmod perm="ugo+x" type="file">
311    <fileset dir="${runtime.local}/bin" />
312   chmod>
313   <copy todir="${runtime.local}/lib">
314    <fileset dir="${build.dir}/lib"
315     excludes="ant*.jar,jasper*.jar,jsp-*.jar,hadoop*test*.jar,hbase*test*.jar" />
316   copy>
317   <copy todir="${runtime.local}/plugins">
318    <fileset dir="${build.dir}/plugins" />
319   copy>
320   <copy todir="${runtime.local}/test">
321    <fileset dir="${build.dir}/test" />
322   copy>
323  target>
324 
325  
326  
327  
328  <target name="compile-core-test" depends="compile-core, resolve-test" description="--> compile test code">
329   <javac 
330       encoding="${build.encoding}" 
331       srcdir="${test.src.dir}"
332     includes="org/apache/nutch*/**/*.java" 
333       destdir="${test.build.classes}"
334     debug="${javac.debug}" 
335       optimize="${javac.optimize}" 
336       target="${javac.version}"
337     source="${javac.version}" 
338       deprecation="${javac.deprecation}">
339    <compilerarg value="-Xlint:-path"/>
340    <classpath refid="test.classpath" />
341   javac>
342  target>
343 
344  
345  
346  
347 
348  <target name="proxy" depends="job, compile-core-test" description="--> run nutch proxy">
349   <java classname="org.apache.nutch.tools.proxy.TestbedProxy" fork="true">
350    <classpath refid="test.classpath" />
351    <arg value="-fake" />
352    
356    <jvmarg line="-Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl" />
357   java>
358  target>
359 
360  
361  
362  
363 
364  <target name="benchmark" description="--> run nutch benchmarking analysis">
365   <java classname="org.apache.nutch.tools.Benchmark" fork="true">
366    <classpath refid="test.classpath" />
367    <jvmarg line="-Xmx512m -Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl" />
368    <arg value="-maxPerHost" />
369    <arg value="10" />
370    <arg value="-seeds" />
371    <arg value="1" />
372    <arg value="-depth" />
373    <arg value="5" />
374   java>
375  target>
376 
377 
378  
379  
380  
381  <target name="test" depends="test-core, test-plugins" description="--> run JUnit tests"/>
382 
383  <target name="test-core" depends="job, compile-core-test" description="--> run core JUnit tests only">
384 
385   <delete dir="${test.build.data}" />
386   <mkdir dir="${test.build.data}" />
387   
390   <copy todir="${test.build.data}">
391    <fileset dir="src/testresources" includes="**/*" />
392   copy>
393   <copy file="${test.src.dir}/nutch-site.xml" 
394       todir="${test.build.classes}" />
395 
396   <copy file="${test.src.dir}/log4j.properties" 
397       todir="${test.build.classes}" />
398       
399   <copy file="${test.src.dir}/gora.properties" 
400       todir="${test.build.classes}" />
401 
402   <copy file="${test.src.dir}/crawl-tests.xml"
403         todir="${test.build.classes}"/>
404         
405   <copy file="${test.src.dir}/domain-urlfilter.txt"
406         todir="${test.build.classes}"/>
407 
408   <copy file="${test.src.dir}/filter-all.txt"
409         todir="${test.build.classes}"/>
410 
411   <junit printsummary="yes" haltonfailure="no" fork="yes" dir="${basedir}"
412    errorProperty="tests.failed" failureProperty="tests.failed" maxmemory="1000m">
413    <sysproperty key="test.build.data" value="${test.build.data}" />
414    <sysproperty key="test.src.dir" value="${test.src.dir}" />
415    <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl" />
416    <classpath refid="test.classpath" />
417    <formatter type="${test.junit.output.format}" />
418    <batchtest todir="${test.build.dir}" unless="testcase">
419     <fileset dir="${test.src.dir}" 
420         includes="**/Test*.java" excludes="**/${test.exclude}.java" />
421    batchtest>
422    <batchtest todir="${test.build.dir}" if="testcase">
423     <fileset dir="${test.src.dir}" includes="**/${testcase}.java" />
424    batchtest>
425   junit>
426 
427   <fail if="tests.failed">Tests failed!fail>
428 
429  target>
430 
431  <target name="test-plugins" depends="compile" description="--> run plugin JUnit tests only">
432   <ant dir="src/plugin" target="test" inheritAll="false" />
433  target>
434 
435  <target name="nightly" depends="test, tar-src, zip-src, javadoc" description="--> run the nightly target build">
436  target>
437 
438  
439  
440  
441 
442  
443  <target name="resolve-default" depends="clean-lib, init" description="--> resolve and retrieve dependencies with ivy">
444   <ivy:resolve file="${ivy.file}" conf="default" log="download-only" />
445   <ivy:retrieve pattern="${build.lib.dir}/[artifact]-[revision].[ext]" symlink="false" log="quiet" />
446   <antcall target="copy-libs" />
447  target>
448 
449  <target name="resolve-test" depends="clean-lib, init" description="--> resolve and retrieve dependencies with ivy">
450   <ivy:resolve file="${ivy.file}" conf="test" log="download-only" />
451   <ivy:retrieve pattern="${build.lib.dir}/[artifact]-[revision].[ext]" symlink="false" log="quiet" />
452   <antcall target="copy-libs" />
453  target>
454 
455  <target name="copy-libs" description="--> copy the libs in lib, which are not ivy enabled">
456   
457   <copy todir="${build.lib.dir}/" failonerror="false">
458    <fileset dir="${lib.dir}" includes="**/*.jar" />
459   copy>
460  target>
461 
462  
463  <target name="publish-local" depends="jar" description="--> publish this project in the local ivy repository">
464   <ivy:publish artifactspattern="${build.dir}/[artifact]-${version}.[ext]"
465     resolver="local" 
466       pubrevision="${version}" 
467       pubdate="${now}" 
468       status="integration"
469     forcedeliver="true" overwrite="true" />
470   <echo message="project ${ant.project.name} published locally with version ${version}" />
471  target>
472 
473  
474  <target name="report" depends="resolve-test" description="--> generates a report of dependencies">
475   <ivy:report todir="${build.dir}" />
476  target>
477 
478  
479  <target name="ivy-init" depends="ivy-probe-antlib, ivy-init-antlib" description="--> initialise Ivy settings">
480   <ivy:settings file="${ivy.dir}/ivysettings.xml" />
481  target>
482 
483  
484  <target name="ivy-probe-antlib" description="--> probe the antlib library">
485   <condition property="ivy.found">
486    <typefound uri="antlib:org.apache.ivy.ant" name="cleancache" />
487   condition>
488  target>
489 
490  
491  <target name="ivy-download" description="--> download ivy">
492   <available file="${ivy.jar}" property="ivy.jar.found" />
493   <antcall target="ivy-download-unchecked" />
494  target>
495 
496  
497  <target name="ivy-download-unchecked" unless="ivy.jar.found" description="--> fetch any ivy file">
498   <get src="${ivy.repo.url}" dest="${ivy.jar}" usetimestamp="true" />
499  target>
500 
501  
502  <target name="ivy-init-antlib" depends="ivy-download" unless="ivy.found" description="--> attempt to use Ivy with Antlib">
503   <typedef uri="antlib:org.apache.ivy.ant" onerror="fail"
504    loaderRef="ivyLoader">
505    <classpath>
506     <pathelement location="${ivy.jar}" />
507    classpath>
508   typedef>
509   <fail>
510    <condition>
511     <not>
512      <typefound uri="antlib:org.apache.ivy.ant" name="cleancache" />
513     not>
514    condition>
515    You need Apache Ivy 2.0 or later from http://ant.apache.org/
516    It could not be loaded from ${ivy.repo.url}
517   fail>
518  target>
519     
520   <target name="compile-avro-schema" depends="resolve-default" description="--> compile the avro schema(s) in src/gora/*.avsc">
521     <typedef name="schema" 
522                classname="org.apache.avro.specific.SchemaTask"
523                classpathref="classpath" />
524     
525       <mkdir dir="${build.gora}" />
526       <schema destdir="${build.gora}">
527         <fileset dir="./src/gora">
528           <include name="**/*.avsc"/>
529         fileset>
530       schema>
531       
532     target>
533 
534    
535   
536   
537   
538   <target name="generate-gora-src" depends="init" description="--> compile the avro schema(s) in src/gora/*.avsc">
539     <java classname="org.apache.gora.compiler.GoraCompiler">
540      <classpath refid="classpath"/>
541      <arg value="src/gora/"/>
542      <arg value="${src.dir}"/>
543     java>
544  target>
545 
546  
547  
548  
549  <target name="javadoc" depends="compile" description="--> generate Javadoc">
550   <mkdir dir="${build.javadoc}" />
551   <javadoc 
552       overview="${src.dir}/overview.html" 
553       destdir="${build.javadoc}"
554     author="true" 
555       version="true" 
556       use="true" 
557       windowtitle="${name} ${version} API"
558     doctitle="${name} ${version} API"
559     bottom="Copyright &copy; ${year} The Apache Software Foundation">
560    <arg value="${javadoc.proxy.host}" />
561    <arg value="${javadoc.proxy.port}" />
562 
563    <packageset dir="${src.dir}"/>
564       <packageset dir="${plugins.dir}/creativecommons/src/java"/>
565       <packageset dir="${plugins.dir}/feed/src/java"/>
566       <packageset dir="${plugins.dir}/index-anchor/src/java"/>
567       <packageset dir="${plugins.dir}/index-basic/src/java"/>
568       <packageset dir="${plugins.dir}/index-more/src/java"/>
569       <packageset dir="${plugins.dir}/language-identifier/src/java"/>
570       <packageset dir="${plugins.dir}/lib-http/src/java"/>
571       <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
572       <packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
573       <packageset dir="${plugins.dir}/parse-ext/src/java"/>
574       <packageset dir="${plugins.dir}/parse-html/src/java"/>
575       <packageset dir="${plugins.dir}/parse-js/src/java"/>
576       <packageset dir="${plugins.dir}/parse-swf/src/java"/>
577       <packageset dir="${plugins.dir}/parse-tika/src/java"/>
578       <packageset dir="${plugins.dir}/parse-zip/src/java"/>
579       <packageset dir="${plugins.dir}/protocol-file/src/java"/>
580       <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
581       <packageset dir="${plugins.dir}/protocol-http/src/java"/>
582       <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
583       <packageset dir="${plugins.dir}/protocol-sftp/src/java"/>
584       <packageset dir="${plugins.dir}/scoring-link/src/java"/>
585       <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
586       <packageset dir="${plugins.dir}/subcollection/src/java"/>
587       <packageset dir="${plugins.dir}/tld/src/java"/>
588       <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
589       <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/>
590       <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
591       <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
592       <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/>
593       <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/>
594       <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/>
595       <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/>
596       <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/>
597 
598    <link href="${javadoc.link.java}" />
599    <link href="${javadoc.link.lucene}" />
600    <link href="${javadoc.link.hadoop}" />
601 
602    <classpath refid="classpath" />
603    <classpath>
604     <fileset dir="${plugins.dir}">
605      <include name="**/*.jar" />
606     fileset>
607    classpath>
608 
609    <group title="Core" packages="org.apache.nutch.*" />
610    <group title="Plugins API" packages="${plugins.api}" />
611    <group title="Protocol Plugins" packages="${plugins.protocol}" />
612    <group title="URL Filter Plugins" packages="${plugins.urlfilter}" />
613    <group title="Scoring Plugins" packages="${plugins.scoring}" />
614    <group title="Parse Plugins" packages="${plugins.parse}" />
615    <group title="Indexing Filter Plugins" packages="${plugins.index}" />
616    <group title="Misc. Plugins" packages="${plugins.misc}" />
617   javadoc>
618   
619   <copy file="${plugins.dir}/plugin.dtd" todir="${build.javadoc}/org/apache/nutch/plugin/doc-files" />
620  target>
621 
622  <target name="default-doc" description="--> generate default Nutch documentation">
623   <style basedir="${conf.dir}" destdir="${docs.dir}" 
624       includes="nutch-default.xml" style="conf/nutch-conf.xsl" />
625  
626 
627  
628   
629   
630   
631   
632   
633     {dist.dir}"/>
634     {src.dist.version.dir}"/>
635     {src.dist.version.dir}/lib"/>
636     {src.dist.version.dir}/docs"/>
637     {src.dist.version.dir}/docs/api"/>
638     {src.dist.version.dir}/ivy"/>
639 
640     {src.dist.version.dir}/lib" includeEmptyDirs="false">
641       
642     
643 
644     {src.dist.version.dir}/conf">
645       {conf.dir}" excludes="**/*.template"/>
646     
647 
648     
649       
650     
651 
652     
653       
654         
655         
656       
657     
658 
659     
660       
661     
662 
663     
664       
665     
666 
667     
668     
669 
670   
671 
672  
673     
674     
675     
676     
677     
678     
679     
680     
681 
682     
683       
684     
685    
686     
687       
688     
689 
690     
691         
692     
693 
694     
695       */*.template"/>
696     
697 
698     {bin.dist.version.dir}/docs/api">
699       {build.javadoc}"/>
700     
701 
702     {bin.dist.version.dir}">
703       
704         
705       
706     
707 
708     {bin.dist.version.dir}/plugins" includeEmptyDirs="true">
709       
710     
711 
712   
713 
714   
715   
716   
717   
718     719       destfile="${src.dist.version.dir}.tar.gz">
720       {src.dist.version.dir}" mode="664" prefix="${final.name}">
721         /*" />
722         
723       
724       
725         
726       
727     
728   
729   
730   
731   
732   
733   
734     735       destfile="${bin.dist.version.dir}.tar.gz">
736       
737         
738         
739       
740       
741         
742       
743     
744   
745 
746   
747   
748   
749   
750    751      destfile="${src.dist.version.dir}.zip">
752    
753        
754        
755    
756    
757        
758    
759    
760   
761 
762   
763   
764   
765   
766    767      destfile="${bin.dist.version.dir}.zip">
768    
769        
770        
771    
772    
773        
774    
775    
776   
777 
778  
779  
780  
781 
782  
783  
784 
785  
786  
787   
788  
789 
790  
791  
792   
793  
794 
795  
796  
797   
798  
799 
800  
801  
802   
803  
804 
805  
806  
807   
808  
809 
810  
811   
812  
813 
814  
815  
816  
817  
818   
819    
820     
821    
822   
823  
824 
825  826   description="--> runs the tasks over src/java">
827   
828    
829     */*" />
830     /**/src/**/*" />
831    
832   
833  
834 
835  
836  
837  
838 
839  
840  
841   {ant.library.dir}" />
842   {mysql.library.dir}" />
843   
844  
845 
846  
847  
848 
849   
850   {src.dir}" />
851 
852   
853   
854   {build.dir}/classes" />
855   {build.dir}/plugins" />
856   {test.src.dir}" />
857 
858   {base.dir}" key="org.apache.nutch:branch"
859    version="2.0-SNAPSHOT" xmlns:sonar="antlib:org.sonar.ant" />
860  
861  
862   
863   
864   
865 
866   
867   
868     {build.lib.dir}">
869       
870       
871     
872   
873   
874   
875   
876     877          dest="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" usetimestamp="false" />
878 
879     {build.dir}/ant-eclipse-1.0.bin.tar.bz2"
880            dest="${build.dir}" compression="bzip2">
881       
882         
883       
884     
885 
886     {build.dir}/ant-eclipse-1.0.bin.tar.bz2" />
887   
888   
889   
890   891           depends="clean,init,job,ant-eclipse-download"
892           description="--> Create eclipse project files">
893 
894          
895            {basedir}"/>
896            
897          
898   
899     900              classname="prantl.ant.eclipse.EclipseTask"
901              classpath="${build.dir}/lib/ant-eclipse-1.0-jvm1.2.jar" />
902     
903       {eclipse.project}" />
904       
905         
906         {conf.dir}" exported="false" />
907         {basedir}/src/bin" exported="false" />
908         
909         {basedir}/build/plugins/urlfilter-automaton/automaton-1.11-8.jar" 
910                  exported="false" />
911         {basedir}/src/plugin/parse-swf/lib/javaswf.jar" 
912                  exported="false" />
913         {basedir}/build/plugins/lib-nekohtml/nekohtml-0.9.5.jar" 
914                  exported="false" />
915         {basedir}/build/plugins/lib-nekohtml/nekohtml-0.9.5.jar" 
916                  exported="false" />
917         {basedir}/build/plugins/parse-html/tagsoup-1.2.jar" 
918                  exported="false" />
919         {basedir}/build/plugins/protocol-sftp/jsch-0.1.41.jar" 
920                  exported="false" />
921                  
922         {basedir}/build/plugins/parse-html/tagsoup-1.2.jar" 
923                  exported="false" />
924 
925         {basedir}/src/java/" />
926         {basedir}/src/test/" />
927         {basedir}/src/plugin/creativecommons/src/java/" />
928         {basedir}/src/plugin/creativecommons/src/test/" />
929         
932         {basedir}/src/plugin/index-anchor/src/java/" />
933         {basedir}/src/plugin/index-anchor/src/test/" />
934         {basedir}/src/plugin/index-basic/src/java/" />
935         {basedir}/src/plugin/index-basic/src/test/" />
936         {basedir}/src/plugin/index-more/src/java/" />
937         {basedir}/src/plugin/index-more/src/test/" />
938         {basedir}/src/plugin/language-identifier/src/java/" />
939         {basedir}/src/plugin/language-identifier/src/test/" />
940         {basedir}/src/plugin/lib-http/src/java/" />
941         {basedir}/src/plugin/lib-http/src/test/" />
942         {basedir}/src/plugin/lib-regex-filter/src/java/" />
943         {basedir}/src/plugin/lib-regex-filter/src/test/" />
944         {basedir}/src/plugin/microformats-reltag/src/java/" />
945         {basedir}/src/plugin/microformats-reltag/src/test/" />
946         
949         {basedir}/src/plugin/parse-html/src/java/" />
950         {basedir}/src/plugin/parse-html/src/test/" />
951         {basedir}/src/plugin/parse-js/src/java/" />
952         {basedir}/src/plugin/parse-js/src/test/" />
953         
958         {basedir}/src/plugin/parse-tika/src/java/" />
959         {basedir}/src/plugin/parse-tika/src/test/" />
960         {basedir}/src/plugin/protocol-file/src/java/" />
961         {basedir}/src/plugin/protocol-file/src/test/" />
962         {basedir}/src/plugin/protocol-ftp/src/java/" />
963         {basedir}/src/plugin/protocol-httpclient/src/java/" />
964         {basedir}/src/plugin/protocol-httpclient/src/test/" />
965         {basedir}/src/plugin/protocol-http/src/java/" />
966         {basedir}/src/plugin/protocol-sftp/src/java/" />
967         {basedir}/src/plugin/scoring-link/src/java/" />
968         {basedir}/src/plugin/scoring-opic/src/java/" />
969         {basedir}/src/plugin/subcollection/src/java/" />
970         {basedir}/src/plugin/subcollection/src/test/" />
971         {basedir}/src/plugin/tld/src/java/" />
972         {basedir}/src/plugin/urlfilter-automaton/src/java/" />
973         {basedir}/src/plugin/urlfilter-automaton/src/test/" />
974         {basedir}/src/plugin/urlfilter-domain/src/java/" />
975         {basedir}/src/plugin/urlfilter-domain/src/test/" />
976         {basedir}/src/plugin/urlfilter-prefix/src/java/" />
977         {basedir}/src/plugin/urlfilter-regex/src/java/" />
978         {basedir}/src/plugin/urlfilter-regex/src/test/" />
979         {basedir}/src/plugin/urlfilter-suffix/src/java/" />
980         {basedir}/src/plugin/urlfilter-suffix/src/test/" />
981         {basedir}/src/plugin/urlfilter-validator/src/java/" />
982         {basedir}/src/plugin/urlnormalizer-basic/src/java/" />
983         {basedir}/src/plugin/urlnormalizer-basic/src/test/" />
984         {basedir}/src/plugin/urlnormalizer-pass/src/java/" />
985         {basedir}/src/plugin/urlnormalizer-pass/src/test/" />
986         {basedir}/src/plugin/urlnormalizer-regex/src/java/" />
987         {basedir}/src/plugin/urlnormalizer-regex/src/test/" />
988 
989         {basedir}/build/classes" />
990       
991     
992   
993 
View Code

(4)nutch/ivy/ivy.xml

  1 xml version="1.0" ?>
  2 
  3 
 13 
 14 <ivy-module version="1.0">
 15   <info organisation="org.apache.nutch" module="nutch">
 16     <license name="Apache 2.0"
 17       url="http://www.apache.org/licenses/LICENSE-2.0.txt/" />
 18     <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org" />
 19     <description homepage="http://nutch.apache.org">Nutch is an open source web-search
 20       software. It builds on Hadoop, Tika and Solr, adding web-specifics, such as a crawler, 
 21       a link-graph database etc.
 22     description>
 23   info>
 24 
 25   <configurations>
 26     <include file="${basedir}/ivy/ivy-configurations.xml" />
 27   configurations>
 28 
 29   <publications>
 30     
 31     <artifact conf="master" />
 32   publications>
 33 
 34   <dependencies>
 35     <dependency org="org.elasticsearch" name="elasticsearch" rev="0.19.4" 
 36                 conf="*->default"/>
 37   
 38     <dependency org="org.apache.solr" name="solr-solrj" rev="3.4.0"
 39       conf="*->default" />
 40     <dependency org="org.slf4j" name="slf4j-log4j12" rev="1.6.1"
 41       conf="*->master" />
 42 
 43     <dependency org="commons-lang" name="commons-lang" rev="2.4"
 44       conf="*->default" />
 45     <dependency org="commons-collections" name="commons-collections"
 46       rev="3.1" conf="*->default" />
 47     <dependency org="commons-httpclient" name="commons-httpclient"
 48       rev="3.1" conf="*->master" />
 49     <dependency org="commons-codec" name="commons-codec" rev="1.3"
 50       conf="*->default" />
 51 
 52     <dependency org="org.apache.hadoop" name="hadoop-core"
 53       rev="1.2.1" conf="*->default">
 54       <exclude org="net.sf.kosmosfs" name="kfs" />
 55       <exclude org="net.java.dev.jets3t" name="jets3t" />
 56       <exclude org="org.eclipse.jdt" name="core" />
 57       <exclude org="org.mortbay.jetty" name="jsp-*" />
 58     dependency>
 59 
 60     <dependency org="com.ibm.icu" name="icu4j" rev="4.0.1" />
 61     <dependency org="org.apache.tika" name="tika-core" rev="1.3" />
 62     <dependency org="com.googlecode.juniversalchardet" name="juniversalchardet" rev="1.0.3"/>
 63 
 64     <dependency org="log4j" name="log4j" rev="1.2.15" conf="*->master" />
 65 
 66     <dependency org="xerces" name="xercesImpl" rev="2.9.1" />
 67     <dependency org="xerces" name="xmlParserAPIs" rev="2.6.2" />
 68     <dependency org="xalan" name="serializer" rev="2.7.1" />
 69     <dependency org="oro" name="oro" rev="2.0.8" />
 70 
 71     <dependency org="org.jdom" name="jdom" rev="1.1" conf="*->default" />
 72 
 73     <dependency org="com.google.guava" name="guava" rev="11.0.2" />
 74     <dependency org="com.google.code.crawler-commons" name="crawler-commons" rev="0.2" />
 75 
 76     
 77 
 78     
 79     <dependency org="junit" name="junit" rev="4.11" conf="*->default" />
 80 
 81     <dependency org="org.apache.hadoop" name="hadoop-test" rev="1.2.1" conf="test->default">
 82       <exclude org="net.sf.kosmosfs" name="kfs" />
 83       <exclude org="net.java.dev.jets3t" name="jets3t" />
 84       <exclude org="org.eclipse.jdt" name="core" />
 85       <exclude org="org.mortbay.jetty" name="jsp-*" />
 86     dependency>
 87 
 88     <dependency org="org.mortbay.jetty" name="jetty" rev="6.1.26" conf="test->default" />
 89     <dependency org="org.mortbay.jetty" name="jetty-util" rev="6.1.26" conf="test->default" />
 90     <dependency org="org.mortbay.jetty" name="jetty-client" rev="6.1.26" />
 91 
 92     <dependency org="org.hsqldb" name="hsqldb" rev="2.2.8" conf="*->default" />
 93     <dependency org="org.jdom" name="jdom" rev="1.1" conf="test->default"/>
 94 
 95     <dependency org="org.restlet.jse" name="org.restlet" rev="2.0.5" conf="*->default" />
 96     <dependency org="org.restlet.jse" name="org.restlet.ext.jackson" rev="2.0.5" 
 97       conf="*->default" />
 98 
 99     
100     
101     
102     <dependency org="org.apache.gora" name="gora-core" rev="0.3" conf="*->default"/>
103     
106     
109     
110     
113     
114     
115     <dependency org="org.apache.gora" name="gora-hbase" rev="0.3" conf="*->default" />
116     
117     
118     
121     
122     
125 
126     
127     <exclude module="ant" />
128     <exclude module="slf4j-jdk14" />
129     <exclude module="slf4j-simple" />
130     <exclude org="hsqldb"/>
131     <exclude org="maven-plugins"/>
132     <exclude module="jmxtools" />
133     <exclude module="jms" />
134     <exclude module="jmxri" />
135     <exclude module="thrift" />
136   dependencies>
137 
138 ivy-module>
View Code

(5)nutch/ivy/ivysettings.xml

  1 <ivysettings>
  2 
  3  
 19 
 20  
 23   
 32   <property name="oss.sonatype.org" 
 33     value="http://oss.sonatype.org/content/repositories/releases/" 
 34     override="false"/>
 35   <property name="repo.maven.org"
 36     value="http://repo1.maven.org/maven2/"
 37     override="false"/>
 38   <property name="snapshot.apache.org"
 39     value="http://people.apache.org/repo/m2-snapshot-repository/"
 40     override="false"/>
 41   <property name="maven2.pattern"
 42     value="[organisation]/[module]/[revision]/[module]-[revision]"/>
 43   <property name="maven2.pattern.ext"
 44     value="${maven2.pattern}.[ext]"/>
 45   
 46   <include url="${ivy.default.conf.dir}/ivyconf-local.xml"/>
 47   <settings defaultResolver="default"/>
 48   <resolvers>
 49     <ibiblio name="maven2"
 50       root="${repo.maven.org}"
 51       pattern="${maven2.pattern.ext}"
 52       m2compatible="true"
 53       />
 54     <ibiblio name="apache-snapshot"
 55       root="${snapshot.apache.org}"
 56       pattern="${maven2.pattern.ext}"
 57       m2compatible="true"
 58       />
 59     <ibiblio name="restlet"
 60       root="http://maven.restlet.org"
 61       pattern="${maven2.pattern.ext}"
 62       m2compatible="true"
 63       />
 64      <ibiblio name="sonatype"
 65       root="${oss.sonatype.org}"
 66       pattern="${maven2.pattern.ext}"
 67       m2compatible="true"
 68       />
 69      
 70     <chain name="default" dual="true">
 71       <resolver ref="local"/>
 72       <resolver ref="maven2"/>
 73       <resolver ref="sonatype"/>
 74     chain>
 75     <chain name="internal">
 76       <resolver ref="local"/>
 77     chain>
 78     <chain name="external">
 79       <resolver ref="maven2"/>
 80       <resolver ref="sonatype"/>
 81     chain>
 82     <chain name="external-and-snapshots">
 83       <resolver ref="maven2"/>
 84       <resolver ref="apache-snapshot"/>
 85       <resolver ref="sonatype"/>
 86     chain>
 87     <chain name="restletchain">
 88       <resolver ref="restlet"/>
 89     chain>
 90   resolvers>
 91   <modules>
 92 
 93     
 97     <module organisation="org.apache.nutch" name=".*" resolver="internal"/>
 98     <module organisation="org.restlet" name=".*" resolver="restletchain"/>
 99     <module organisation="org.restlet.jse" name=".*" resolver="restletchain"/>
100   modules>
101 ivysettings>
View Code

 

转载于:https://www.cnblogs.com/xxx0624/p/4176358.html

你可能感兴趣的:(大数据,java,runtime)