Node接口定义了一个网络拓扑的一个结点。结点可能是一个代表数据结点的叶子结点,或者代表数据中心或者机架的中间结点。
/** The interface defines a node in a network topology. * A node may be a leave representing a data node or an inner * node representing a datacenter or rack. * Each data has a name and its location in the network is * decided by a string with syntax similar to a file name. * For example, a data node's name is hostname:port# and if it's located at * rack "orange" in datacenter "dog", the string representation of its * network location is /dog/orange */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Unstable public interface Node { /** @return the string representation of this node's network location */ public String getNetworkLocation(); /** Set this node's network location * @param location the location */ public void setNetworkLocation(String location); /** @return this node's name */ public String getName(); /** @return this node's parent */ public Node getParent(); /** Set this node's parent * @param parent the parent */ public void setParent(Node parent); /** @return this node's level in the tree. * E.g. the root of a tree returns 0 and its children return 1 */ public int getLevel(); /** Set this node's level in the tree * @param i the level */ public void setLevel(int i); }
@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Unstable public class NodeBase implements Node { /** Path separator {@value} */ public final static char PATH_SEPARATOR = '/'; /** Path separator as a string {@value} */ public final static String PATH_SEPARATOR_STR = "/"; /** string representation of root {@value} */ public final static String ROOT = ""; protected String name; //host:port# protected String location; //string representation of this node's location protected int level; //which level of the tree the node resides protected Node parent; //its parent /** Default constructor */ public NodeBase() { } /** Construct a node from its path * @param path * a concatenation of this node's location, the path seperator, and its name */ public NodeBase(String path) { path = normalize(path); int index = path.lastIndexOf(PATH_SEPARATOR); if (index== -1) { set(ROOT, path); } else { set(path.substring(index+1), path.substring(0, index)); } } /** Construct a node from its name and its location * @param name this node's name (can be null, must not contain {@link #PATH_SEPARATOR}) * @param location this node's location */ public NodeBase(String name, String location) { set(name, normalize(location)); } /** Construct a node from its name and its location * @param name this node's name (can be null, must not contain {@link #PATH_SEPARATOR}) * @param location this node's location * @param parent this node's parent node * @param level this node's level in the tree */ public NodeBase(String name, String location, Node parent, int level) { set(name, normalize(location)); this.parent = parent; this.level = level; } /** * set this node's name and location * @param name the (nullable) name -which cannot contain the {@link #PATH_SEPARATOR} * @param location the location */ private void set(String name, String location) { if (name != null && name.contains(PATH_SEPARATOR_STR)) throw new IllegalArgumentException( "Network location name contains /: "+name); this.name = (name==null)?"":name; this.location = location; } /** @return this node's name */ @Override public String getName() { return name; } /** @return this node's network location */ @Override public String getNetworkLocation() { return location; } /** Set this node's network location * @param location the location */ @Override public void setNetworkLocation(String location) { this.location = location; } /** * Get the path of a node * @param node a non-null node * @return the path of a node */ public static String getPath(Node node) { return node.getNetworkLocation() + PATH_SEPARATOR_STR + node.getName(); } /** @return this node's path as its string representation */ @Override public String toString() { return getPath(this); } /** Normalize a path by stripping off any trailing {@link #PATH_SEPARATOR} * @param path path to normalize. * @return the normalised path * If <i>path</i>is null or empty {@link #ROOT} is returned * @throws IllegalArgumentException if the first character of a non empty path * is not {@link #PATH_SEPARATOR} */ public static String normalize(String path) { if (path == null || path.length() == 0) return ROOT; if (path.charAt(0) != PATH_SEPARATOR) { throw new IllegalArgumentException( "Network Location path does not start with " +PATH_SEPARATOR_STR+ ": "+path); } int len = path.length(); if (path.charAt(len-1) == PATH_SEPARATOR) { return path.substring(0, len-1); } return path; } /** @return this node's parent */ @Override public Node getParent() { return parent; } /** Set this node's parent * @param parent the parent */ @Override public void setParent(Node parent) { this.parent = parent; } /** @return this node's level in the tree. * E.g. the root of a tree returns 0 and its children return 1 */ @Override public int getLevel() { return level; } /** Set this node's level in the tree * @param level the level */ @Override public void setLevel(int level) { this.level = level; } public static int locationToDepth(String location) { String normalizedLocation = normalize(location); int length = normalizedLocation.length(); int depth = 0; for (int i = 0; i < length; i++) { if (normalizedLocation.charAt(i) == PATH_SEPARATOR) { depth++; } } return depth; } }
有两个变量
protected List<Node> children=new ArrayList<Node>(); private int numOfLeaves;
/** Judge if this node represents a rack * @return true if it has no child or its children are not InnerNodes */ boolean isRack() { if (children.isEmpty()) { return true; } Node firstChild = children.get(0); if (firstChild instanceof InnerNode) { return false; } return true; }
isAncestor方法判断当前结点是否为结点n的祖先。
/** Judge if this node is an ancestor of node <i>n</i> * * @param n a node * @return true if this node is an ancestor of <i>n</i> */ boolean isAncestor(Node n) { return getPath(this).equals(NodeBase.PATH_SEPARATOR_STR) || (n.getNetworkLocation()+NodeBase.PATH_SEPARATOR_STR). startsWith(getPath(this)+NodeBase.PATH_SEPARATOR_STR); }
/** Judge if this node is the parent of node <i>n</i> * * @param n a node * @return true if this node is the parent of <i>n</i> */ boolean isParent(Node n) { return n.getNetworkLocation().equals(getPath(this)); }
/* Return a child name of this node who is an ancestor of node <i>n</i> */ private String getNextAncestorName(Node n) { if (!isAncestor(n)) { throw new IllegalArgumentException( this + "is not an ancestor of " + n); } String name = n.getNetworkLocation().substring(getPath(this).length()); if (name.charAt(0) == PATH_SEPARATOR) { name = name.substring(1); } int index=name.indexOf(PATH_SEPARATOR); if (index !=-1) name = name.substring(0, index); return name; }
/** Add node <i>n</i> to the subtree of this node * @param n node to be added * @return true if the node is added; false otherwise */ boolean add(Node n) { if (!isAncestor(n)) throw new IllegalArgumentException(n.getName()+", which is located at " +n.getNetworkLocation()+", is not a decendent of " +getPath(this)); if (isParent(n)) { // this node is the parent of n; add n directly n.setParent(this); n.setLevel(this.level+1); for(int i=0; i<children.size(); i++) { if (children.get(i).getName().equals(n.getName())) { children.set(i, n); return false; } } children.add(n); numOfLeaves++; return true; } else { // find the next ancestor node String parentName = getNextAncestorName(n); InnerNode parentNode = null; for(int i=0; i<children.size(); i++) { if (children.get(i).getName().equals(parentName)) { parentNode = (InnerNode)children.get(i); break; } } if (parentNode == null) { // create a new InnerNode parentNode = createParentNode(parentName); children.add(parentNode); } // add n to the subtree of the next ancestor node if (parentNode.add(n)) { numOfLeaves++; return true; } else { return false; } } }
其中createParentNode的代码如下:
/** * Creates a parent node to be added to the list of children. * Creates a node using the InnerNode four argument constructor specifying * the name, location, parent, and level of this node. * * <p>To be overridden in subclasses for specific InnerNode implementations, * as alternative to overriding the full {@link #add(Node)} method. * * @param parentName The name of the parent node * @return A new inner node * @see InnerNode#InnerNode(String, String, InnerNode, int) */ protected InnerNode createParentNode(String parentName) { return new InnerNode(parentName, getPath(this), this, this.getLevel()+1); }
remove一个结点的代码如下:
/** Remove node <i>n</i> from the subtree of this node * @param n node to be deleted * @return true if the node is deleted; false otherwise */ boolean remove(Node n) { String parent = n.getNetworkLocation(); String currentPath = getPath(this); if (!isAncestor(n)) throw new IllegalArgumentException(n.getName() +", which is located at " +parent+", is not a descendent of "+currentPath); if (isParent(n)) { // this node is the parent of n; remove n directly for(int i=0; i<children.size(); i++) { if (children.get(i).getName().equals(n.getName())) { children.remove(i); numOfLeaves--; n.setParent(null); return true; } } return false; } else { // find the next ancestor node: the parent node String parentName = getNextAncestorName(n); InnerNode parentNode = null; int i; for(i=0; i<children.size(); i++) { if (children.get(i).getName().equals(parentName)) { parentNode = (InnerNode)children.get(i); break; } } if (parentNode==null) { return false; } // remove n from the parent node boolean isRemoved = parentNode.remove(n); // if the parent node has no children, remove the parent node too if (isRemoved) { if (parentNode.getNumOfChildren() == 0) { children.remove(i); } numOfLeaves--; } return isRemoved; } } // end of removegetLoc 根据一个字符串,返回代表这个字符串的Node类型的引用。
/** Given a node's string representation, return a reference to the node * @param loc string location of the form /rack/node * @return null if the node is not found or the childnode is there but * not an instance of {@link InnerNode} */ private Node getLoc(String loc) { if (loc == null || loc.length() == 0) return this; String[] path = loc.split(PATH_SEPARATOR_STR, 2); Node childnode = null; for(int i=0; i<children.size(); i++) { if (children.get(i).getName().equals(path[0])) { childnode = children.get(i); } } if (childnode == null) return null; // non-existing node if (path.length == 1) return childnode; if (childnode instanceof InnerNode) { return ((InnerNode)childnode).getLoc(path[1]); } else { return null; } }
/** get <i>leafIndex</i> leaf of this subtree * if it is not in the <i>excludedNode</i> * * @param leafIndex an indexed leaf of the node * @param excludedNode an excluded node (can be null) * @return */ Node getLeaf(int leafIndex, Node excludedNode) { int count=0; // check if the excluded node a leaf boolean isLeaf = excludedNode == null || !(excludedNode instanceof InnerNode); // calculate the total number of excluded leaf nodes int numOfExcludedLeaves = isLeaf ? 1 : ((InnerNode)excludedNode).getNumOfLeaves(); if (isLeafParent()) { // children are leaves if (isLeaf) { // excluded node is a leaf node int excludedIndex = children.indexOf(excludedNode); if (excludedIndex != -1 && leafIndex >= 0) { // excluded node is one of the children so adjust the leaf index leafIndex = leafIndex>=excludedIndex ? leafIndex+1 : leafIndex; } } // range check if (leafIndex<0 || leafIndex>=this.getNumOfChildren()) { return null; } return children.get(leafIndex); } else { for(int i=0; i<children.size(); i++) { InnerNode child = (InnerNode)children.get(i); if (excludedNode == null || excludedNode != child) { // not the excludedNode int numOfLeaves = child.getNumOfLeaves(); if (excludedNode != null && child.isAncestor(excludedNode)) { numOfLeaves -= numOfExcludedLeaves; } if (count+numOfLeaves > leafIndex) { // the leaf is in the child subtree return child.getLeaf(leafIndex-count, excludedNode); } else { // go to the next child count = count+numOfLeaves; } } else { // it is the excluededNode // skip it and set the excludedNode to be null excludedNode = null; } } return null; } }
/** Add a leaf node * Update node counter & rack counter if necessary * @param node node to be added; can be null * @exception IllegalArgumentException if add a node to a leave or node to be added is not a leaf */ public void add(Node node) { if (node==null) return; String oldTopoStr = this.toString(); if( node instanceof InnerNode ) { throw new IllegalArgumentException( "Not allow to add an inner node: "+NodeBase.getPath(node)); } int newDepth = NodeBase.locationToDepth(node.getNetworkLocation()) + 1; netlock.writeLock().lock(); try { if ((depthOfAllLeaves != -1) && (depthOfAllLeaves != newDepth)) { LOG.error("Error: can't add leaf node " + NodeBase.getPath(node) + " at depth " + newDepth + " to topology:\n" + oldTopoStr); throw new InvalidTopologyException("Failed to add " + NodeBase.getPath(node) + ": You cannot have a rack and a non-rack node at the same " + "level of the network topology."); } Node rack = getNodeForNetworkLocation(node); if (rack != null && !(rack instanceof InnerNode)) { throw new IllegalArgumentException("Unexpected data node " + node.toString() + " at an illegal network location"); } if (clusterMap.add(node)) { LOG.info("Adding a new node: "+NodeBase.getPath(node)); if (rack == null) { numOfRacks++; } if (!(node instanceof InnerNode)) { if (depthOfAllLeaves == -1) { depthOfAllLeaves = node.getLevel(); } } } if(LOG.isDebugEnabled()) { LOG.debug("NetworkTopology became:\n" + this.toString()); } } finally { netlock.writeLock().unlock(); } }
/** * Given a string representation of a rack, return its children * @param loc a path-like string representation of a rack * @return a newly allocated list with all the node's children */ public List<Node> getDatanodesInRack(String loc) { netlock.readLock().lock(); try { loc = NodeBase.normalize(loc); if (!NodeBase.ROOT.equals(loc)) { loc = loc.substring(1); } InnerNode rack = (InnerNode) clusterMap.getLoc(loc); if (rack == null) { return null; } return new ArrayList<Node>(rack.getChildren()); } finally { netlock.readLock().unlock(); } }
remove一个结点,注意在clusterMap.remove(node)方法,可以把没有子结点的上级结点删除。
/** Remove a node * Update node counter and rack counter if necessary * @param node node to be removed; can be null */ public void remove(Node node) { if (node==null) return; if( node instanceof InnerNode ) { throw new IllegalArgumentException( "Not allow to remove an inner node: "+NodeBase.getPath(node)); } LOG.info("Removing a node: "+NodeBase.getPath(node)); netlock.writeLock().lock(); try { if (clusterMap.remove(node)) { InnerNode rack = (InnerNode)getNode(node.getNetworkLocation()); if (rack == null) { numOfRacks--; } } if(LOG.isDebugEnabled()) { LOG.debug("NetworkTopology became:\n" + this.toString()); } } finally { netlock.writeLock().unlock(); } }
/** Return the distance between two nodes * It is assumed that the distance from one node to its parent is 1 * The distance between two nodes is calculated by summing up their distances * to their closest common ancestor. * @param node1 one node * @param node2 another node * @return the distance between node1 and node2 which is zero if they are the same * or {@link Integer#MAX_VALUE} if node1 or node2 do not belong to the cluster */ public int getDistance(Node node1, Node node2) { if (node1 == node2) { return 0; } Node n1=node1, n2=node2; int dis = 0; netlock.readLock().lock(); try { int level1=node1.getLevel(), level2=node2.getLevel(); while(n1!=null && level1>level2) { n1 = n1.getParent(); level1--; dis++; } while(n2!=null && level2>level1) { n2 = n2.getParent(); level2--; dis++; } while(n1!=null && n2!=null && n1.getParent()!=n2.getParent()) { n1=n1.getParent(); n2=n2.getParent(); dis+=2; } } finally { netlock.readLock().unlock(); } if (n1==null) { LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node1)); return Integer.MAX_VALUE; } if (n2==null) { LOG.warn("The cluster does not contain node: "+NodeBase.getPath(node2)); return Integer.MAX_VALUE; } return dis+2; }
chooseRandom方法从scope范围内随机选取一个结点,如果scope 以 ~开头,那么代表除了scope之外,随机选取一个结点。
/** randomly choose one node from <i>scope</i> * if scope starts with ~, choose one from the all nodes except for the * ones in <i>scope</i>; otherwise, choose one from <i>scope</i> * @param scope range of nodes from which a node will be chosen * @return the chosen node */ public Node chooseRandom(String scope) { netlock.readLock().lock(); try { if (scope.startsWith("~")) { return chooseRandom(NodeBase.ROOT, scope.substring(1)); } else { return chooseRandom(scope, null); } } finally { netlock.readLock().unlock(); } } private Node chooseRandom(String scope, String excludedScope){ if (excludedScope != null) { if (scope.startsWith(excludedScope)) { return null; } if (!excludedScope.startsWith(scope)) { excludedScope = null; } } Node node = getNode(scope); if (!(node instanceof InnerNode)) { return node; } InnerNode innerNode = (InnerNode)node; int numOfDatanodes = innerNode.getNumOfLeaves(); if (excludedScope == null) { node = null; } else { node = getNode(excludedScope); if (!(node instanceof InnerNode)) { numOfDatanodes -= 1; } else { numOfDatanodes -= ((InnerNode)node).getNumOfLeaves(); } } if (numOfDatanodes == 0) { throw new InvalidTopologyException( "Failed to find datanode (scope=\"" + String.valueOf(scope) + "\" excludedScope=\"" + String.valueOf(excludedScope) + "\")."); } int leaveIndex = r.nextInt(numOfDatanodes); return innerNode.getLeaf(leaveIndex, node); }
/** return the number of leaves in <i>scope</i> but not in <i>excludedNodes</i> * if scope starts with ~, return the number of nodes that are not * in <i>scope</i> and <i>excludedNodes</i>; * @param scope a path string that may start with ~ * @param excludedNodes a list of nodes * @return number of available nodes */ public int countNumOfAvailableNodes(String scope, Collection<Node> excludedNodes) { boolean isExcluded=false; if (scope.startsWith("~")) { isExcluded=true; scope=scope.substring(1); } scope = NodeBase.normalize(scope); int excludedCountInScope = 0; // the number of nodes in both scope & excludedNodes int excludedCountOffScope = 0; // the number of nodes outside scope & excludedNodes netlock.readLock().lock(); try { for (Node node : excludedNodes) { node = getNode(NodeBase.getPath(node)); if (node == null) { continue; } if ((NodeBase.getPath(node) + NodeBase.PATH_SEPARATOR_STR) .startsWith(scope + NodeBase.PATH_SEPARATOR_STR)) { excludedCountInScope++; } else { excludedCountOffScope++; } } Node n = getNode(scope); int scopeNodeCount = 0; if (n != null) { scopeNodeCount++; } if (n instanceof InnerNode) { scopeNodeCount=((InnerNode)n).getNumOfLeaves(); } if (isExcluded) { return clusterMap.getNumOfLeaves() - scopeNodeCount - excludedCountOffScope; } else { return scopeNodeCount - excludedCountInScope; } } finally { netlock.readLock().unlock(); } }
getWeight方法返回结点node和结点reader之间的距离,返回的值越低,代表两个结点的距离越近。
/** * Returns an integer weight which specifies how far away {node} is away from * {reader}. A lower value signifies that a node is closer. * * @param reader Node where data will be read * @param node Replica of data * @return weight */ protected int getWeight(Node reader, Node node) { // 0 is local, 1 is same rack, 2 is off rack // Start off by initializing to off rack int weight = 2; if (reader != null) { if (reader == node) { weight = 0; } else if (isOnSameRack(reader, node)) { weight = 1; } } return weight; }
sortByDistance方法,按和reader的距离,对nodes进行排序。在三层架构中,对于reader,结点node要么是本机,要么在同一个机架,要么在不同机架。
/** * Sort nodes array by network distance to <i>reader</i>. * <p/> * In a three-level topology, a node can be either local, on the same rack, * or on a different rack from the reader. Sorting the nodes based on network * distance from the reader reduces network traffic and improves * performance. * <p/> * As an additional twist, we also randomize the nodes at each network * distance. This helps with load balancing when there is data skew. * * @param reader Node where data will be read * @param nodes Available replicas with the requested data * @param activeLen Number of active nodes at the front of the array */ public void sortByDistance(Node reader, Node[] nodes, int activeLen) { /** Sort weights for the nodes array */ int[] weights = new int[activeLen]; for (int i=0; i<activeLen; i++) { weights[i] = getWeight(reader, nodes[i]); } // Add weight/node pairs to a TreeMap to sort TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>(); for (int i=0; i<activeLen; i++) { int weight = weights[i]; Node node = nodes[i]; List<Node> list = tree.get(weight); if (list == null) { list = Lists.newArrayListWithExpectedSize(1); tree.put(weight, list); } list.add(node); } int idx = 0; for (List<Node> list: tree.values()) { if (list != null) { Collections.shuffle(list, r); for (Node n: list) { nodes[idx] = n; idx++; } } } Preconditions.checkState(idx == activeLen, "Sorted the wrong number of nodes!"); }