突然想写一下各种遍历文件树的方法, 以下是代码, 发现多是以深度优先搜索为主, 我自己搞了个广度优先.
package com.test;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.stream.Stream;
/** * 所有walk方法未检查参数合法性, 请调用者自行处理. * @author John Kenrinus Lee * @version 2016-03-01 */
public final class FileTreeWalker {
public static void main(String[] args) {
String dirPath = "你的目录";
walkByJdk8Stream(new File(dirPath), file -> {
System.out.println(file);
if (file.isFile()) {
sJdk8StreamFiles.add(file);
}
});
walkByRecursion(new File(dirPath), file -> {
System.out.println(file);
if (file.isFile()) {
sRecursionFiles.add(file);
}
});
walkByDepthFirst(new File(dirPath), file -> {
System.out.println(file);
if (file.isFile()) {
sDepthFirstFiles.add(file);
}
});
walkByBreadthFirst(new File(dirPath), file -> {
System.out.println(file);
if (file.isFile()) {
sBreadthFirstFiles.add(file);
}
});
Collections.sort(sJdk8StreamFiles);
Collections.sort(sRecursionFiles);
Collections.sort(sDepthFirstFiles);
Collections.sort(sBreadthFirstFiles);
System.out.println(sJdk8StreamFiles.equals(sRecursionFiles));
System.out.println(sRecursionFiles.equals(sDepthFirstFiles));
System.out.println(sDepthFirstFiles.equals(sBreadthFirstFiles));
System.out.println(sBreadthFirstFiles.size());
}
private static final ArrayList<File> sJdk8StreamFiles = new ArrayList<>();
private static final ArrayList<File> sRecursionFiles = new ArrayList<>();
private static final ArrayList<File> sDepthFirstFiles = new ArrayList<>();
private static final ArrayList<File> sBreadthFirstFiles = new ArrayList<>();
public interface FileVisitor {
void visitFile(File file);
}
private FileTreeWalker() {}
public static void walkByJdk8Stream(File file, FileVisitor visitor) {
try {
final Stream<Path> pathStream = Files.walk(file.toPath(), FileVisitOption.FOLLOW_LINKS);
pathStream.forEach(path -> visitor.visitFile(path.toFile()));
} catch (IOException e) {
e.printStackTrace();
}
}
public static void walkByRecursion(File file, FileVisitor visitor) {
visitor.visitFile(file);
if (file.isDirectory()) {
final File[] fs = file.listFiles();
if (fs != null) {
for (File f : fs) {
walkByRecursion(f, visitor);
}
}
}
}
public static void walkByDepthFirst(File file, FileVisitor visitor) {
final LinkedList<File> stack = new LinkedList<>();
stack.addFirst(file); //push
while (!stack.isEmpty()) {
file = stack.removeFirst(); //pop
visitor.visitFile(file);
if (file.isDirectory()) {
final File[] fs = file.listFiles();
if (fs != null) {
for (File f : fs) {
stack.addFirst(f); //push
}
}
}
}
}
//亮点
public static void walkByBreadthFirst(File file, FileVisitor visitor) {
final LinkedList<File> queue = new LinkedList<>();
queue.addLast(file); //enqueue
while (!queue.isEmpty()) {
file = queue.removeFirst(); //dequeue
visitor.visitFile(file);
if (file.isDirectory()) {
final File[] fs = file.listFiles();
if (fs != null) {
for (File f : fs) {
queue.addLast(f); //enqueue
}
}
}
}
}
}
下面这是测试效率的代码:
package com.test;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedList;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
@Fork(1)
@Warmup(iterations = 4, time = 4)
@Measurement(iterations = 4, time = 4)
@State(Scope.Benchmark)
@BenchmarkMode(Mode.AverageTime)
//@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
//@OutputTimeUnit(TimeUnit.SECONDS)
public class FileTreeWalkerTest {
private static final File dir = new File("你的目录");
@Benchmark
@Threads(1)
public void walkByJdk8Stream() {
try {
final Stream<Path> pathStream = Files.walk(dir.toPath(), FileVisitOption.FOLLOW_LINKS);
pathStream.forEach(path -> {
});
} catch (IOException e) {
e.printStackTrace();
}
}
@Benchmark
@Threads(1)
public void walkByRecursion() {
_walkByRecursion(dir);
}
public void _walkByRecursion(File file) {
if (file.isDirectory()) {
final File[] fs = file.listFiles();
if (fs != null) {
for (File f : fs) {
_walkByRecursion(f);
}
}
}
}
@Benchmark
@Threads(1)
public void walkByDepthFirst() {
File file = dir;
final LinkedList<File> stack = new LinkedList<>();
stack.addFirst(file); //push
while (!stack.isEmpty()) {
file = stack.removeFirst(); //pop
if (file.isDirectory()) {
final File[] fs = file.listFiles();
if (fs != null) {
for (File f : fs) {
stack.addFirst(f); //push
}
}
}
}
}
@Benchmark
@Threads(1)
public void walkByBreadthFirst() {
File file = dir;
final LinkedList<File> queue = new LinkedList<>();
queue.addLast(file); //enqueue
while (!queue.isEmpty()) {
file = queue.removeFirst(); //dequeue
if (file.isDirectory()) {
final File[] fs = file.listFiles();
if (fs != null) {
for (File f : fs) {
queue.addLast(f); //enqueue
}
}
}
}
}
}
测试结果是:
c.t.FileTreeWalkerTest.walkByBreadthFirst thrpt 4 2584.308 ± 1383.143 ops/s
c.t.FileTreeWalkerTest.walkByDepthFirst thrpt 4 2417.517 ± 661.665 ops/s
c.t.FileTreeWalkerTest.walkByJdk8Stream thrpt 4 3546.248 ± 1492.373 ops/s
c.t.FileTreeWalkerTest.walkByRecursion thrpt 4 2976.203 ± 1347.744 ops/s
c.t.FileTreeWalkerTest.walkByBreadthFirst avgt 4 385389.731 ± 272768.069 ns/op
c.t.FileTreeWalkerTest.walkByDepthFirst avgt 4 373336.187 ± 319232.024 ns/op
c.t.FileTreeWalkerTest.walkByJdk8Stream avgt 4 269522.774 ± 132399.305 ns/op
c.t.FileTreeWalkerTest.walkByRecursion avgt 4 343995.010 ± 119822.375 ns/op
看起来, 不像c语言, java中递归还是很高效的, 比我自己实现的深度优先遍历(基于LinkedList实现的堆栈)和广度优先遍历(基于LinkedList实现的队列)要快.