转载注明出处
很多网上转载解决如下:
修改hdfs-site.xml:添加
不同磁盘空间大小,hadoop将数据平均写入,hadoop并不会自动将数据写到有更大空闲空间的磁盘中,还是会将之前的小磁盘写满,小磁盘写满会使mapreduce产生的临时文件没有空间写,而导致mapreduce执行失败。所以需要小磁盘留有一定的空闲空间,查看hadoop资料,设置 dfs.datanode.du.reserved配置项可以使每个磁盘保留相应的磁盘空间,单位使用bytes,但是我设置之后起作用了,总体容量下降,依然往小盘上写数据,艹艹,我使用的hadoop版本是cloudera的cdh4.6。
后续继续设置
测试后依然如此。
观察源代码:
./hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
long getCapacity() {
long remaining = usage.getCapacity() - reserved;
return remaining > 0 ? remaining : 0;
}
@Override
public long getAvailable() throws IOException {
long remaining = getCapacity()-getDfsUsed();
long available = usage.getAvailable();
if (remaining > available) {
remaining = available;
}
return (remaining > 0) ? remaining : 0;
}
long getReserved(){
return reserved;
}
./hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
private static class AllocatorPerContext {
private final Log LOG =
LogFactory.getLog(AllocatorPerContext.class);
private int dirNumLastAccessed;
private Random dirIndexRandomizer = new Random();
private FileSystem localFS;
private DF[] dirDF;
private String contextCfgItemName;
private String[] localDirs;
private String savedLocalDirs = "";
public AllocatorPerContext(String contextCfgItemName) {
this.contextCfgItemName = contextCfgItemName;
}
private synchronized void confChanged(Configuration conf)
throws IOException {
String newLocalDirs = conf.get(contextCfgItemName);
if (!newLocalDirs.equals(savedLocalDirs)) {
localDirs = StringUtils.getTrimmedStrings(newLocalDirs);
localFS = FileSystem.getLocal(conf);
int numDirs = localDirs.length;
ArrayList
ArrayList
for (int i = 0; i < numDirs; i++) {
try {
File tmpFile = tmpDir.isAbsolute()
? new File(localFS.makeQualified(tmpDir).toUri())
: new File(localDirs[i]);
DiskChecker.checkDir(tmpFile);
dirs.add(tmpFile.getPath());
dfList.add(new DF(tmpFile, 30000));
} catch (DiskErrorException de) {
LOG.warn( localDirs[i] + " is not writable\n", de);
}
} else {
LOG.warn( "Failed to create " + localDirs[i]);
}
} catch (IOException ie) {
LOG.warn( "Failed to create " + localDirs[i] + ": " +
ie.getMessage() + "\n", ie);
} //ignore
}
localDirs = dirs.toArray(new String[dirs.size()]);
dirDF = dfList.toArray(new DF[dirs.size()]);
savedLocalDirs = newLocalDirs;
// randomize the first disk picked in the round-robin selection
dirNumLastAccessed = dirIndexRandomizer.nextInt(dirs.size());
}
}
public synchronized Path getLocalPathForWrite(String pathStr, long size,
Configuration conf, boolean checkWrite) throws IOException {
confChanged(conf);
int numDirs = localDirs.length;
int numDirsSearched = 0;
//remove the leading slash from the path (to make sure that the uri
//resolution results in a valid path on the dir being checked)
if (pathStr.startsWith("/")) {
pathStr = pathStr.substring(1);
}
Path returnPath = null;
if(size == SIZE_UNKNOWN) { //do roulette selection: pick dir with probability
//proportional to available size
long[] availableOnDisk = new long[dirDF.length];
long totalAvailable = 0;
//build the "roulette wheel"
for(int i =0; i < dirDF.length; ++i) {
availableOnDisk[i] = dirDF[i].getAvailable();
totalAvailable += availableOnDisk[i];
}
// Keep rolling the wheel till we get a valid path
Random r = new java.util.Random();
while (numDirsSearched < numDirs && returnPath == null) {
long randomPosition = Math.abs(r.nextLong()) % totalAvailable;
int dir = 0;
while (randomPosition > availableOnDisk[dir]) {
randomPosition -= availableOnDisk[dir];
dir++;
}
dirNumLastAccessed = dir;
returnPath = createPath(pathStr, checkWrite);
if (returnPath == null) {
totalAvailable -= availableOnDisk[dir];
availableOnDisk[dir] = 0; // skip this disk
numDirsSearched++;
}
}
} else {
while (numDirsSearched < numDirs && returnPath == null) {
long capacity = dirDF[dirNumLastAccessed].getAvailable();
if (capacity > size) {
returnPath = createPath(pathStr, checkWrite);
}
dirNumLastAccessed++;
dirNumLastAccessed = dirNumLastAccessed % numDirs;
numDirsSearched++;
}
}
if (returnPath != null) {
return returnPath;
}
//no path found
throw new DiskErrorException("Could not find any valid local " +
"directory for " + pathStr);
}
对配置都有判读,咋无效呢?无语,有空再深究把。
其他配置选项依旧不能保证小磁盘写入少,哎~~~~~~~~~
还是以FAQ上为准把:
http://wiki.apache.org/hadoop/FAQ#On_an_individual_data_node.2C_how_do_you_balance_the_blocks_on_the_disk.3F
3.12. On an individual data node, how do you balance the blocks on the disk?
Hadoop currently does not have a method by which to do this automatically. To do this manually:
-
Take down the HDFS
-
Use the UNIX mv command to move the individual blocks and meta pairs from one directory to another on each host
-
Restart the HDFS
对于1)停止hdfs,只需要停止datanode,使用命令$HADOOP_HOME/bin/hadoop-daemon.sh stop datanode
对于2)必须是dfs.data.dir目录下current目录的子目录 mv path-to-data-dir/current/finalized/subdir11/* path-to-data-dir/current/finalized/subdir11
对于3)$HADOOP_HOME/bin/hadoop-daemon.sh start datanode
参见这个:http://search-hadoop.com/m/fSof91EYbe9 老外的操作,自助吧~~~ NNGX
转载注明出处