1
|
JavaStreamingContext ssc =
new
JavaStreamingContext(sparkConf, Durations.seconds(
5
));
|
1
|
JavaReceiverInputDStream lines = ssc.socketTextStream( hostname, port, StorageLevels.MEMORY_AND_DISK_SER);
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
JavaDStream<String> filelines = ssc.textFileStream(
"/tmp/Streamtest"
);
JavaDStream<String> updatedKeyWords = filelines.flatMap(
new
FlatMapFunction<String,String>() {
@Override
public
Iterable<String> call(String x) {
final
Pattern SPACE = Pattern.compile(
" "
);
String[] vec=SPACE.split(x);
List<String> ls=Arrays.asList(vec);
return
ls;
}
});
updatedKeyWords.foreachRDD(
new
Function<JavaRDD<String>, Void> (){
public
Void call(JavaRDD<String> rdd) {
rdd.foreach(
new
VoidFunction<String>(){
@Override
public
void
call(String x){
if
(x!=
null
)
keywords.add(x);
}});
return
null
;
|
1
2
3
4
5
6
7
|
JavaDStream<Boolean> wordPresent = lines.map(
new
Function<String, Boolean>() {
@Override
public
Boolean call(String x) {
return
keywords.contains(x);
}
});
wordPresent.print();
|
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
public
final
class
KeywordDetect {
private
static
final
Pattern SPACE = Pattern.compile(
" "
);
public
static
List<String> keywords=
new
ArrayList<String>();
public
static
void
main(String[] args) {
if
(args.length <
2
) {
System.err.println(
"Usage: KeywordDetect <hostname> <port> <words>"
);
System.exit(
1
);
}
SparkConf sparkConf =
new
SparkConf().setAppName(
"KeywordDetect"
);
JavaStreamingContext ssc =
new
JavaStreamingContext(sparkConf, Durations.seconds(
5
));
JavaDStream<String> filelines = ssc.textFileStream(
"/tmp/Streamtest"
);
JavaReceiverInputDStream<String> lines = ssc.socketTextStream(
args[
0
], Integer.parseInt(args[
1
]), StorageLevels.MEMORY_AND_DISK_SER);
keywords.add(
"initial"
);
//Initialize keyword list
JavaDStream<String> updatedKeyWords = filelines.flatMap(
new
FlatMapFunction<String,String>() {
@Override
public
Iterable<String> call(String x) {
final
Pattern SPACE = Pattern.compile(
" "
);
String[] vec=SPACE.split(x);
List<String> ls=Arrays.asList(vec);
return
ls;
}
});
updatedKeyWords.foreachRDD(
new
Function<JavaRDD<String>, Void> (){
public
Void call(JavaRDD<String> rdd) {
rdd.foreach(
new
VoidFunction<String>(){
@Override
public
void
call(String x){
//x=x+1;
if
(x!=
null
)
keywords.add(x);
//add newly read tokens to keyword list
}});
return
null
;
}
});
JavaDStream<Boolean> wordPresent = lines.map(
new
Function<String, Boolean>() {
@Override
public
Boolean call(String x) {
return
keywords.contains(x);
//compare token received from socket against keywords list
}
});
JavaDStream<String> inputWords = lines.map(
new
Function<String, String>() {
@Override
public
String call(String x) {
return
x;
}
});
wordPresent.print();
ssc.start();
ssc.awaitTermination();
}
}
|
01
02
03
04
05
06
07
08
09
10
|
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>1.2.1</version>
</dependency>
|
1
|
spark-submit --class
"org.apache.spark.examples.streaming.KeywordDetect"
--master
local
[4] target
/KeyWord-1
.0.jar rvm.svl.ibm.com 9212
|
1
2
|
15/09/06 01:43:31 INFO dstream.SocketReceiver:Connecting to rvm.svl.ibm.com:9121
15/09/06 01:43:31 INFO dstream.SocketReceiver:Connected to rvm.svl.ibm.com:9121
|
1
|
hadoop fs -
mkdir
/tmp/Streamtest
hadoop fs -put keywords
/tmp/Streamtest
|
1
2
|
15/09/06 01:54:25 INFO dstream.FileInputDStream:New files at time 1441529665000 ms: hdfs://rvm.svl.ibm.com:8020/tmp/Streamtest/keyword
15/09/06 01:54:25 INFO storage.MemoryStore: ensureFreeSpace(272214) called with curMem=109298, maxMem=278302556
|
1
2
|
[root@rvm Desktop]
# nc -l 9121
risk
|
1
2
3
4
|
-------------------------------------------
Time:1441529995000 ms
-------------------------------------------
true
|