在Mesos和YARN中,都用到了dominant resource fairness算法(DRF),它不同于hadoop基于slot-based实现的fair scheduler和capacity scheduler,论文阅读:Dominant Resource Fairness: Fair Allocation of Multiple Resource Types。
考虑在一个包括多种资源类型(主要考虑CPU和MEM)的系统的公平资源分配问题,其中不同用户对资源有不同的需求。为了解决这个问题,伯克利的几位大牛提出了Dominant Resource Fairness(DRF),一种针对不同资源类型的max-min fairness。并且在Mesos的设计和实现中评估了DRF,显示了它可以比slot-based 公平调度算法得到更好的吞吐量。
/**
* Makes scheduling decisions by trying to equalize dominant resource usage.
* A schedulable's dominant resource usage is the largest ratio of resource
* usage to capacity among the resource types it is using.
*/
@Private
@Unstable
public class DominantResourceFairnessPolicy extends SchedulingPolicy {
public static final String NAME = "DRF";
private DominantResourceFairnessComparator comparator =
new DominantResourceFairnessComparator();
@Override
public String getName() {
return NAME;
}
@Override
public byte getApplicableDepth() {
return SchedulingPolicy.DEPTH_ANY;
}
@Override
public Comparator getComparator() {
return comparator;
}
@Override
public void computeShares(Collection extends Schedulable> schedulables,
Resource totalResources) {
for (ResourceType type : ResourceType.values()) {
ComputeFairShares.computeShares(schedulables, totalResources, type);
}
}
@Override
public void initialize(Resource clusterCapacity) {
comparator.setClusterCapacity(clusterCapacity);
}
public static class DominantResourceFairnessComparator implements Comparator {
private static final int NUM_RESOURCES = ResourceType.values().length;
private Resource clusterCapacity;
public void setClusterCapacity(Resource clusterCapacity) {
this.clusterCapacity = clusterCapacity;
}
@Override
public int compare(Schedulable s1, Schedulable s2) {
ResourceWeights sharesOfCluster1 = new ResourceWeights();
ResourceWeights sharesOfCluster2 = new ResourceWeights();
ResourceWeights sharesOfMinShare1 = new ResourceWeights();
ResourceWeights sharesOfMinShare2 = new ResourceWeights();
ResourceType[] resourceOrder1 = new ResourceType[NUM_RESOURCES];
ResourceType[] resourceOrder2 = new ResourceType[NUM_RESOURCES];
// Calculate shares of the cluster for each resource both schedulables.
calculateShares(s1.getResourceUsage(),
clusterCapacity, sharesOfCluster1, resourceOrder1, s1.getWeights());
calculateShares(s1.getResourceUsage(),
s1.getMinShare(), sharesOfMinShare1, null, ResourceWeights.NEUTRAL);
calculateShares(s2.getResourceUsage(),
clusterCapacity, sharesOfCluster2, resourceOrder2, s2.getWeights());
calculateShares(s2.getResourceUsage(),
s2.getMinShare(), sharesOfMinShare2, null, ResourceWeights.NEUTRAL);
// A queue is needy for its min share if its dominant resource
// (with respect to the cluster capacity) is below its configured min share
// for that resource
boolean s1Needy = sharesOfMinShare1.getWeight(resourceOrder1[0]) < 1.0f;
boolean s2Needy = sharesOfMinShare2.getWeight(resourceOrder2[0]) < 1.0f;
int res = 0;
if (!s2Needy && !s1Needy) {
res = compareShares(sharesOfCluster1, sharesOfCluster2,
resourceOrder1, resourceOrder2);
} else if (s1Needy && !s2Needy) {
res = -1;
} else if (s2Needy && !s1Needy) {
res = 1;
} else { // both are needy below min share
res = compareShares(sharesOfMinShare1, sharesOfMinShare2,
resourceOrder1, resourceOrder2);
}
if (res == 0) {
// Apps are tied in fairness ratio. Break the tie by submit time.
res = (int)(s1.getStartTime() - s2.getStartTime());
}
return res;
}
/**
* Calculates and orders a resource's share of a pool in terms of two vectors.
* The shares vector contains, for each resource, the fraction of the pool that
* it takes up. The resourceOrder vector contains an ordering of resources
* by largest share. So if resource=<10 MB, 5 CPU>, and pool=<100 MB, 10 CPU>,
* shares will be [.1, .5] and resourceOrder will be [CPU, MEMORY].
*/
void calculateShares(Resource resource, Resource pool,
ResourceWeights shares, ResourceType[] resourceOrder, ResourceWeights weights) {
shares.setWeight(MEMORY, (float)resource.getMemory() /
(pool.getMemory() * weights.getWeight(MEMORY)));
shares.setWeight(CPU, (float)resource.getVirtualCores() /
(pool.getVirtualCores() * weights.getWeight(CPU)));
// sort order vector by resource share
if (resourceOrder != null) {
if (shares.getWeight(MEMORY) > shares.getWeight(CPU)) {
resourceOrder[0] = MEMORY;
resourceOrder[1] = CPU;
} else {
resourceOrder[0] = CPU;
resourceOrder[1] = MEMORY;
}
}
}
private int compareShares(ResourceWeights shares1, ResourceWeights shares2,
ResourceType[] resourceOrder1, ResourceType[] resourceOrder2) {
for (int i = 0; i < resourceOrder1.length; i++) {
int ret = (int)Math.signum(shares1.getWeight(resourceOrder1[i])
- shares2.getWeight(resourceOrder2[i]));
if (ret != 0) {
return ret;
}
}
return 0;
}
}
}