hadoop 2.6 Trash清除机制源代码分析

Namenode的垃圾回收机制,只有在命令行对文件进行删除时,会判断系统是否使用Trash,如果使用,则变成rename操作。

在Namenode类的构造函数中,有this.haContext = createHAContext();createHAContext()初始化一个NamenodeHAContext对象,代码如下:

 protected HAContext createHAContext() {
    return new NameNodeHAContext();
  }

NameNodeHAContext的startActiveService里,除了调用namesystem的startActiveServices()方法外,还调用startTrashEmpiter(conf)方法,代码如下:

 protected class NameNodeHAContext implements HAContext {
  
 。。。。。。
    @Override
    public void startActiveServices() throws IOException {
      try {
        namesystem.startActiveServices();
        startTrashEmptier(conf);
      } catch (Throwable t) {
        doImmediateShutdown(t);
      }
    }

   。。。。。。
  }

startTrashEmptier首先判断trashInterval是否大于0,然后得到fs对象,然后初始化Trash线程,代码如下:
 private void startTrashEmptier(final Configuration conf) throws IOException {
    long trashInterval =
        conf.getLong(FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT);
    if (trashInterval == 0) {
      return;
    } else if (trashInterval < 0) {
      throw new IOException("Cannot start trash emptier with negative interval."
          + " Set " + FS_TRASH_INTERVAL_KEY + " to a positive value.");
    }
    
    // This may be called from the transitionToActive code path, in which
    // case the current user is the administrator, not the NN. The trash
    // emptier needs to run as the NN. See HDFS-3972.
    FileSystem fs = SecurityUtil.doAsLoginUser(
        new PrivilegedExceptionAction<FileSystem>() {
          @Override
          public FileSystem run() throws IOException {
            return FileSystem.get(conf);
          }
        });
    this.emptier = new Thread(new Trash(fs, conf).getEmptier(), "Trash Emptier");
    this.emptier.setDaemon(true);
    this.emptier.start();
  }
Trash的构造方法如下:
  /**
   * Construct a trash can accessor for the FileSystem provided.
   * @param fs the FileSystem
   * @param conf a Configuration
   */
  public Trash(FileSystem fs, Configuration conf) throws IOException {
    super(conf);
    trashPolicy = TrashPolicy.getInstance(conf, fs, fs.getHomeDirectory());
  }

TrashPolicy的getInstance方法,初始化TrashPolicyDefault的一个实例,并且调用initialize来初始化。

  /**
   * Get an instance of the configured TrashPolicy based on the value 
   * of the configuration parameter fs.trash.classname.
   *
   * @param conf the configuration to be used
   * @param fs the file system to be used
   * @param home the home directory
   * @return an instance of TrashPolicy
   */
  public static TrashPolicy getInstance(Configuration conf, FileSystem fs, Path home) {
    Class<? extends TrashPolicy> trashClass = conf.getClass(
        "fs.trash.classname", TrashPolicyDefault.class, TrashPolicy.class);
    TrashPolicy trash = ReflectionUtils.newInstance(trashClass, conf);
    trash.initialize(conf, fs, home); // initialize TrashPolicy
    return trash;
  }

初始化方法如下:

 @Override
  public void initialize(Configuration conf, FileSystem fs, Path home) {
    this.fs = fs;
    this.trash = new Path(home, TRASH);
    this.homesParent = home.getParent();
    this.current = new Path(trash, CURRENT);
    this.deletionInterval = (long)(conf.getFloat(
        FS_TRASH_INTERVAL_KEY, FS_TRASH_INTERVAL_DEFAULT)
        * MSECS_PER_MINUTE);
    this.emptierInterval = (long)(conf.getFloat(
        FS_TRASH_CHECKPOINT_INTERVAL_KEY, FS_TRASH_CHECKPOINT_INTERVAL_DEFAULT)
        * MSECS_PER_MINUTE);
    LOG.info("Namenode trash configuration: Deletion interval = " +
             (this.deletionInterval / MSECS_PER_MINUTE) + " minutes, Emptier interval = " +
             (this.emptierInterval / MSECS_PER_MINUTE) + " minutes.");
   }
getEmptier方法如下:
 @Override
  public Runnable getEmptier() throws IOException {
    return new Emptier(getConf(), emptierInterval);
  }
Emptier构造方法里,判断如果emptierInterval等于0,则emptierInterval=deletionInterval
    Emptier(Configuration conf, long emptierInterval) throws IOException {
      this.conf = conf;
      this.emptierInterval = emptierInterval;
      if (emptierInterval > deletionInterval || emptierInterval == 0) {
        LOG.info("The configured checkpoint interval is " +
                 (emptierInterval / MSECS_PER_MINUTE) + " minutes." +
                 " Using an interval of " +
                 (deletionInterval / MSECS_PER_MINUTE) +
                 " minutes that is used for deletion instead");
        this.emptierInterval = deletionInterval;
      }
    }

Emptier的run方法先判断是否应该工作,如果不应该工作,则sleep一段时间,否则先遍历/home目录,然后对于每一个目录,都构建一个TrashPolicyDefault对象,然后调用  trash.deleteCheckpoint();                trash.createCheckpoint();两个方法,代码如下:

 @Override
    public void run() {
      if (emptierInterval == 0)
        return;                                   // trash disabled
      long now = Time.now();
      long end;
      while (true) {
        end = ceiling(now, emptierInterval);
        try {                                     // sleep for interval
          Thread.sleep(end - now);
        } catch (InterruptedException e) {
          break;                                  // exit on interrupt
        }

        try {
          now = Time.now();
          if (now >= end) {

            FileStatus[] homes = null;
            try {
              homes = fs.listStatus(homesParent);         // list all home dirs
            } catch (IOException e) {
              LOG.warn("Trash can't list homes: "+e+" Sleeping.");
              continue;
            }

            for (FileStatus home : homes) {         // dump each trash
              if (!home.isDirectory())
                continue;
              try {
                TrashPolicyDefault trash = new TrashPolicyDefault(
                    fs, home.getPath(), conf);
                trash.deleteCheckpoint();
                trash.createCheckpoint();
              } catch (IOException e) {
                LOG.warn("Trash caught: "+e+". Skipping "+home.getPath()+".");
              } 
            }
          }
        } catch (Exception e) {
          LOG.warn("RuntimeException during Trash.Emptier.run(): ", e); 
        }
      }
      try {
        fs.close();
      } catch(IOException e) {
        LOG.warn("Trash cannot close FileSystem: ", e);
      }
    }
deleteCheckpoint里,循环遍历当前用户的trash目录,如果是Current,那么再判断下一个,否则从目录名到时checkpoint的时间,并判断到现在是否大于deletionInterval,如果大于,则调用fs.delete(path,true),true代表递归删除,的代码如下:
 @Override
  public void deleteCheckpoint() throws IOException {
    FileStatus[] dirs = null;
    
    try {
      dirs = fs.listStatus(trash);            // scan trash sub-directories
    } catch (FileNotFoundException fnfe) {
      return;
    }

    long now = Time.now();
    for (int i = 0; i < dirs.length; i++) {
      Path path = dirs[i].getPath();
      String dir = path.toUri().getPath();
      String name = path.getName();
      if (name.equals(CURRENT.getName()))         // skip current
        continue;

      long time;
      try {
        time = getTimeFromCheckpoint(name);
      } catch (ParseException e) {
        LOG.warn("Unexpected item in trash: "+dir+". Ignoring.");
        continue;
      }

      if ((now - deletionInterval) > time) {
        if (fs.delete(path, true)) {
          LOG.info("Deleted trash checkpoint: "+dir);
        } else {
          LOG.warn("Couldn't delete checkpoint: "+dir+" Ignoring.");
        }
      }
    }
  }
createCheckpoint把当前Trash,改名为,代码如下:
@SuppressWarnings("deprecation")
  @Override
  public void createCheckpoint() throws IOException {
    if (!fs.exists(current))                     // no trash, no checkpoint
      return;

    Path checkpointBase;
    synchronized (CHECKPOINT) {
      checkpointBase = new Path(trash, CHECKPOINT.format(new Date()));
    }
    Path checkpoint = checkpointBase;

    int attempt = 0;
    while (true) {
      try {
        fs.rename(current, checkpoint, Rename.NONE);
        break;
      } catch (FileAlreadyExistsException e) {
        if (++attempt > 1000) {
          throw new IOException("Failed to checkpoint trash: "+checkpoint);
        }
        checkpoint = checkpointBase.suffix("-" + attempt);
      }
    }

    LOG.info("Created trash checkpoint: "+checkpoint.toUri().getPath());
  }





你可能感兴趣的:(hadoop,hdfs)