HTable table = new HTable(conf, "tableName"); Scan scan = new Scan(); scan.addColumn(...); scan.setStartRow(...); scan.setStopRow(...); scan.setBatch(...); ResultScanner ss = table.getScanner(scan);HTable.getScanner(Scan scan)
org.apache.hadoop.hbase.regionserver.HRegion.instantiateRegionScanner(HRegion.java) org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1426) org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1402) org.apache.hadoop.hbase.regionserver.HRegionServer.openScanner(HRegionServer.java:2068) sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) java.lang.reflect.Method.invoke(Method.java:597) org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(WritableRpcEngine.java:364) org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1326)定位到HRegionServer.openScanner
for (Map.Entry<byte[], NavigableSet<byte[]>> entry : scan.getFamilyMap().entrySet()) { Store store = stores.get(entry.getKey()); StoreScanner scanner = store.getScanner(scan, entry.getValue()); scanners.add(scanner); }
(HBase体系: http://www.tbdata.org/archives/1509 ,http://www.searchtb.com/2011/01/understanding-hbase.html,假设有个表test,下面有一个Column Family 为A,则Store表示一个Column Family的存储,也就是对应A)
然后根据Store初始化StoreScanner:
一、StoreScanner(Store, Scan, List<? extends KeyValueScanner> , ScanType , long , long )
// Seek all scanners to the start of the Row (or if the exact matching row // key does not exist, then to the start of the next matching Row). // Always check bloom filter to optimize the top row seek for delete // family marker. if (explicitColumnQuery && lazySeekEnabledGlobally) { for (KeyValueScanner scanner : scanners) { scanner.requestSeek(matcher.getStartKey(), false, true); } } else { for (KeyValueScanner scanner : scanners) { scanner.seek(matcher.getStartKey()); } }假设指定了Column,执行KeyValueScanner的requestSeek方法。
if(!seekAtOrAfter(hfs, key)) { close(); return false; }seekAtOrAfter方法会调用到HFileReaderV2.AbstractScannerV2.seekTo方法,通过HFile的block索引,把ScannerV2的blockBuffer指向startRow。
给StoreFileScanner.cur赋值:
cur = hfs.getKeyValue() = new KeyValue(blockBuffer.array(),blockBuffer.arrayOffset() + blockBuffer.position());(可以执行hbase org.apache.hadoop.hbase.io.hfile.HFile -b -m -f hdfs:/hbase/tbaleName/path看到HFile的索引信息)
// Combine all seeked scanners with a heap heap = new KeyValueHeap(scanners, store.comparator);包装成KeyValueHeap,this.current = pollRealKV();把current指向构造好的StoreFileScanner
基本结构:
RegionScannerImplResultScanner ss = table.getScanner(scan); for (Result r : ss) { for (KeyValue kv : r.raw()) { ...... } }
org.apache.hadoop.hbase.regionserver.StoreScanner.next(StoreScanner.java:350) org.apache.hadoop.hbase.regionserver.KeyValueHeap.next(KeyValueHeap.java:127) org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl.nextInternal(HRegion.java:3459) org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl.next(HRegion.java:3406) org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl.next(HRegion.java:3423) org.apache.hadoop.hbase.regionserver.HRegionServer.next(HRegionServer.java:2393) sun.reflect.GeneratedMethodAccessor21.invoke(Unknown Source) sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) java.lang.reflect.Method.invoke(Method.java:597) org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(WritableRpcEngine.java:364) org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1389)核心方法:StoreScanner.next(List<KeyValue>, int)
LOOP: while((kv = this.heap.peek()) != null) { ...... ScanQueryMatcher.MatchCode qcode = matcher.match(kv); switch(qcode) { ...... } }
如果scan添加Filter,会在ScanQueryMatcher.match(KeyValue)中判断:
if (filter != null) { ReturnCode filterResponse = filter.filterKeyValue(kv); if (filterResponse == ReturnCode.SKIP) { return MatchCode.SKIP; } else if (filterResponse == ReturnCode.NEXT_COL) { return columns.getNextRowOrNextColumn(bytes, offset, qualLength); } else if (filterResponse == ReturnCode.NEXT_ROW) { stickyNextRow = true; return MatchCode.SEEK_NEXT_ROW; } else if (filterResponse == ReturnCode.SEEK_NEXT_USING_HINT) { return MatchCode.SEEK_NEXT_USING_HINT; } }
/** * Implementing classes of this interface will be used for the tracking * and enforcement of columns and numbers of versions and timeToLive during * the course of a Get or Scan operation. */ MatchCode colChecker = columns.checkColumn(bytes, offset, qualLength, timestamp, type, kv.getMemstoreTS() > maxReadPointToTrackVersions);
客户端迭代结果集,返回的MatchCode:
SEEK_NEXT_COL......
最后StoreScanner返回结果:
case INCLUDE_AND_SEEK_NEXT_COL: Filter f = matcher.getFilter(); results.add(f == null ? kv : f.transform(kv)); if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW) { if (!matcher.moreRowsMayExistAfter(kv)) { outResult.addAll(results); return false; } reseek(matcher.getKeyForNextRow(kv)); } else if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL) { reseek(matcher.getKeyForNextColumn(kv)); } else { this.heap.next(); } RegionMetricsStorage.incrNumericMetric(metricNameGetSize, kv.getLength()); if (limit > 0 && (results.size() == limit)) { break LOOP; } continue; ...... if (!results.isEmpty()) { // copy jazz outResult.addAll(results); return true; }