01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
import
java.net.URL;
import
javax.xml.parsers.DocumentBuilder;
import
javax.xml.parsers.DocumentBuilderFactory;
import
org.w3c.dom.CharacterData;
import
org.w3c.dom.Document;
import
org.w3c.dom.Element;
import
org.w3c.dom.Node;
import
org.w3c.dom.NodeList;
public
class
RSSReader {
private
static
RSSReader instance =
null
;
private
RSSReader() {
}
public
static
RSSReader getInstance() {
if
(instance ==
null
) {
instance =
new
RSSReader();
}
return
instance;
}
public
void
writeNews() {
try
{
DocumentBuilder builder = DocumentBuilderFactory.newInstance().
newDocumentBuilder();
URL u =
new
URL("http:
//feeds.bbci.co.uk/news/world/rss.xml
?edition=uk#");
Document doc = builder.parse(u.openStream());
NodeList nodes = doc.getElementsByTagName(
"item"
);
for
(
int
i=
0
;i<nodes.getLength();i++) {
Element element = (Element)nodes.item(i);
System.out.println(
"Title: "
+ getElementValue(element,
"title"
));
System.out.println(
"Link: "
+ getElementValue(element,
"link"
));
System.out.println(
"Publish Date: "
+ getElementValue(element,
"pubDate"
));
System.out.println(
"author: "
+ getElementValue(element,
"dc:creator"
));
System.out.println(
"comments: "
+ getElementValue(element,
"wfw:comment"
));
System.out.println(
"description: "
+ getElementValue(element,
"description"
));
System.out.println();
}
}
catch
(Exception ex) {
ex.printStackTrace();
}
}
private
String getCharacterDataFromElement(Element e) {
try
{
Node child = e.getFirstChild();
if
(child
instanceof
CharacterData) {
CharacterData cd = (CharacterData) child;
return
cd.getData();
}
}
catch
(Exception ex) {
}
return
""
;
}
protected
float
getFloat(String value) {
if
(value !=
null
&& !value.equals(
""
)) {
return
Float.parseFloat(value);
}
return
0
;
}
protected
String getElementValue(Element parent,String label) {
return
getCharacterDataFromElement((Element)parent.getElements
ByTagName(label).item(
0
));
}
public
static
void
main(String[] args) {
RSSReader reader = RSSReader.getInstance();
reader.writeNews();
}
}
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
# The configuration file needs to define the sources,
# the channels and the sinks.
# Sources, channels and sinks are defined per agent,
# in this case called 'agent'
agent.sources = reader
agent.channels = memoryChannel
agent.sinks = avro-forward-sink
# For each one of the sources, the type is defined
agent.sources.reader.type = exec
agent.sources.reader.command = tail -f /var/log/flume-ng/source.txt
# stderr is simply discarded, unless logStdErr=true
# If the process exits for any reason, the source also exits and will produce no
# further data.
agent.sources.reader.logStdErr = true
agent.sources.reader.restart = true
# The channel can be defined as follows.
agent.sources.reader.channels = memoryChannel
# Each sink's type must be defined
agent.sinks.avro-forward-sink.type = avro
agent.sinks.avro-forward-sink.hostname = 10.0.0.3
agent.sinks.avro-forward-sink.port = 60000
#Specify the channel the sink should use
agent.sinks.avro-forward-sink.channel = memoryChannel
# Each channel's type is defined.
agent.channels.memoryChannel.type = memory
# Other config values specific to each type of channel(sink or source)
# can be defined as well
# In this case, it specifies the capacity of the memory channel
agent.channels.memoryChannel.capacity = 10000
agent.channels.memoryChannel.transactionCapacity = 100
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
agent.sources = reader
agent.channels = memoryChannel
agent.sinks = avro-forward-sink
# For each one of the sources, the type is defined
agent.sources.reader.type = exec
agent.sources.reader.command = tail -f /var/log/flume-ng/source.txt
# stderr is simply discarded, unless logStdErr=true
# If the process exits for any reason, the source also exits and will produce
# no further data.
agent.sources.reader.logStdErr = true
agent.sources.reader.restart = true
# The channel can be defined as follows.
agent.sources.reader.channels = memoryChannel
# Each sink's type must be defined
agent.sinks.avro-forward-sink.type = avro
agent.sinks.avro-forward-sink.hostname = 10.0.0.3
agent.sinks.avro-forward-sink.port = 60000
#Specify the channel the sink should use
agent.sinks.avro-forward-sink.channel = memoryChannel
# Each channel's type is defined.
agent.channels.memoryChannel.type = memory
# Other config values specific to each type of channel(sink or source)
# can be defined as well
# In this case, it specifies the capacity of the memory channel
agent.channels.memoryChannel.capacity = 10000
agent.channels.memoryChannel.transactionCapacity = 100
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
Collector configuration (flume-conf.properties on 10.0.0.3):
# The configuration file needs to define the sources,
# the channels and the sinks.
# Sources, channels and sinks are defined per agent,
# in this case called 'agent'
agent.sources = avro-collection-source
agent.channels = memoryChannel
agent.sinks = hdfs-sink
# For each one of the sources, the type is defined
agent.sources.avro-collection-source.type = avro
agent.sources.avro-collection-source.bind = 10.0.0.3
agent.sources.avro-collection-source.port = 60000
# The channel can be defined as follows.
agent.sources.avro-collection-source.channels = memoryChannel
# Each sink's type must be defined
agent.sinks.hdfs-sink.type = hdfs
agent.sinks.hdfs-sink.hdfs.path = hdfs://10.0.10.1:8020/flume
#Specify the channel the sink should use
agent.sinks.hdfs-sink.channel = memoryChannel
# Each channel's type is defined.
agent.channels.memoryChannel.type = memory
# Other config values specific to each type of channel(sink or source)
# can be defined as well
# In this case, it specifies the capacity of the memory channel
agent.channels.memoryChannel.capacity = 10000
|
1
2
|
$ javac RSSReader.java
$ java -
cp
/root/RSSReader
RSSReader >
/var/log/flume-ng/source
.txt &
|
1
2
3
|
Agent node 1 (on 10.0.0.1):
$ $FLUME_HOME
/bin/flume-ng
agent -n agent1 -c conf -f
$FLUME_HOME
/conf/flume-conf
.properties
|
1
2
3
|
Agent node 2 (on 10.0.0.2):
$ $FLUME_HOME
/bin/flume-ng
agent -n agent2 -c conf -f
$FLUME_HOME
/conf/flume-conf
.properties
|
1
2
|
$ $FLUME_HOME
/bin/flume-ng
agent -n collector -c conf -f
$FLUME_HOME
/conf/flume-conf
.properties
|