543/* 544 * dbn is the starting sector, io_size is the number of sectors. 545 */ 546static int 547flashcache_lookup(struct cache_c *dmc, struct bio *bio, int *index) 548{ 549 sector_t dbn = bio->bi_sector; 550#if DMC_DEBUG 551 int io_size = to_sector(bio->bi_size); 552#endif 553 unsigned long set_number = hash_block(dmc, dbn); 554 int invalid, oldest_clean = -1; 555 int start_index; 556 557 start_index = dmc->assoc * set_number; 558 DPRINTK("Cache lookup : dbn %llu(%lu), set = %d", 559 dbn, io_size, set_number); 560 find_valid_dbn(dmc, dbn, start_index, index); 561 if (*index > 0) { 562 DPRINTK("Cache lookup HIT: Block %llu(%lu): VALID index %d", 563 dbn, io_size, *index); 564 /* We found the exact range of blocks we are looking for */ 565 return VALID; 566 } 567 invalid = find_invalid_dbn(dmc, start_index); 568 if (invalid == -1) { 569 /* We didn't find an invalid entry, search for oldest valid entry */ 570 find_reclaim_dbn(dmc, start_index, &oldest_clean); 571 } 572 /* 573 * Cache miss : 574 * We can't choose an entry marked INPROG, but choose the oldest 575 * INVALID or the oldest VALID entry. 576 */ 577 *index = start_index + dmc->assoc; 578 if (invalid != -1) { 579 DPRINTK("Cache lookup MISS (INVALID): dbn %llu(%lu), set = %d, index = %d, start_index = %d", 580 dbn, io_size, set_number, invalid, start_index); 581 *index = invalid; 582 } else if (oldest_clean != -1) { 583 DPRINTK("Cache lookup MISS (VALID): dbn %llu(%lu), set = %d, index = %d, start_index = %d", 584 dbn, io_size, set_number, oldest_clean, start_index); 585 *index = oldest_clean; 586 } else { 587 DPRINTK_LITE("Cache read lookup MISS (NOROOM): dbn %llu(%lu), set = %d", 588 dbn, io_size, set_number); 589 } 590 if (*index < (start_index + dmc->assoc)) 591 return INVALID; 592 else { 593 dmc->noroom++; 594 return -1; 595 } 596}
1864static void 1865flashcache_uncached_io_callback(unsigned long error, void *context) 1866{ 1867 struct kcached_job *job = (struct kcached_job *) context; 1868 1869 VERIFY(job->index == -1); 1870 push_uncached_io_complete(job); 1871 schedule_work(&_kcached_wq); 1872}
1805/* 1806 * We handle uncached IOs ourselves to deal with the problem of out of ordered 1807 * IOs corrupting the cache. Consider the case where we get 2 concurent IOs 1808 * for the same block Write-Read (or a Write-Write). Consider the case where 1809 * the first Write is uncacheable and the second IO is cacheable. If the 1810 * 2 IOs are out-of-ordered below flashcache, then we will cache inconsistent 1811 * data in flashcache (persistently). 1812 * 1813 * We do invalidations before launching uncacheable IOs to disk. But in case 1814 * of out of ordering the invalidations before launching the IOs does not help. 1815 * We need to invalidate after the IO completes. 1816 * 1817 * Doing invalidations after the completion of an uncacheable IO will cause 1818 * any overlapping dirty blocks in the cache to be written out and the IO 1819 * relaunched. If the overlapping blocks are busy, the IO is relaunched to 1820 * disk also (post invalidation). In these 2 cases, we will end up sending 1821 * 2 disk IOs for the block. But this is a rare case. 1822 * 1823 * When 2 IOs for the same block are sent down (by un co-operating processes) 1824 * the storage stack is allowed to re-order the IOs at will. So the applications 1825 * cannot expect any ordering at all. 1826 * 1827 * What we try to avoid here is inconsistencies between disk and the ssd cache. 1828 */
1829void 1830flashcache_uncached_io_complete(struct kcached_job *job) 1831{ 1832 struct cache_c *dmc = job->dmc; 1833 unsigned long flags; 1834 int queued; 1835 int error = job->error; 1836 1837 if (unlikely(error)) { 1838 if (bio_data_dir(job->bio) == WRITE) 1839 dmc->disk_write_errors++; 1840 else 1841 dmc->disk_read_errors++; 1842 } 1843 spin_lock_irqsave(&dmc->cache_spin_lock, flags); 1844 queued = flashcache_inval_blocks(dmc, job->bio); 1845 spin_unlock_irqrestore(&dmc->cache_spin_lock, flags); 1846 if (queued) { 1847 if (unlikely(queued < 0)) 1848 flashcache_bio_endio(job->bio, -EIO); 1849 /* 1850 * The IO will be re-executed. 1851 * The do_pending logic will re-launch the 1852 * disk IO post-invalidation calling start_uncached_io. 1853 * This should be a rare occurrence though. 1854 * XXX - We should track this. 1855 */ 1856 } else { 1857 flashcache_bio_endio(job->bio, error); 1858 } 1859 flashcache_free_cache_job(job); 1860 if (atomic_dec_and_test(&dmc->nr_jobs)) 1861 wake_up(&dmc->destroyq); 1862}
1288/* 1289 * Invalidate any colliding blocks if they are !BUSY and !DIRTY. If the colliding 1290 * block is DIRTY, we need to kick off a write. In both cases, we need to wait 1291 * until the underlying IO is finished, and then proceed with the invalidation. 1292 */ 1293static int 1294flashcache_inval_block_set(struct cache_c *dmc, int set, struct bio *bio, int rw, 1295 struct pending_job *pjob) 1296{ 1297 sector_t io_start = bio->bi_sector; 1298 sector_t io_end = bio->bi_sector + (to_sector(bio->bi_size) - 1); 1299 int start_index, end_index, i; 1300 struct cacheblock *cacheblk; 1301 1302 start_index = dmc->assoc * set; 1303 end_index = start_index + dmc->assoc; 1304 for (i = start_index ; i < end_index ; i++) { 1305 sector_t start_dbn = dmc->cache[i].dbn; 1306 sector_t end_dbn = start_dbn + dmc->block_size; 1307 1308 cacheblk = &dmc->cache[i]; 1309 if (cacheblk->cache_state & INVALID) 1310 continue; 1311 if ((io_start >= start_dbn && io_start < end_dbn) || 1312 (io_end >= start_dbn && io_end < end_dbn)) { 1313 /* We have a match */ 1314 if (rw == WRITE) 1315 dmc->wr_invalidates++; 1316 else 1317 dmc->rd_invalidates++; 1318 if (!(cacheblk->cache_state & (BLOCK_IO_INPROG | DIRTY)) && 1319 (cacheblk->head == NULL)) { 1320 dmc->cached_blocks--; 1321 DPRINTK("Cache invalidate (!BUSY): Block %llu %lx", 1322 start_dbn, cacheblk->cache_state); 1323 cacheblk->cache_state = INVALID; 1324 continue; 1325 } 1326 /* 1327 * The conflicting block has either IO in progress or is 1328 * Dirty. In all cases, we need to add ourselves to the 1329 * pending queue. Then if the block is dirty, we kick off 1330 * an IO to clean the block. 1331 * Note that if the block is dirty and IO is in progress 1332 * on it, the do_pending handler will clean the block 1333 * and then process the pending queue. 1334 */ 1335 flashcache_enq_pending(dmc, bio, i, INVALIDATE, pjob); 1336 if ((cacheblk->cache_state & (DIRTY | BLOCK_IO_INPROG)) == DIRTY) { 1337 /* 1338 * Kick off block write. 1339 * We can't kick off the write under the spinlock. 1340 * Instead, we mark the slot DISKWRITEINPROG, drop 1341 * the spinlock and kick off the write. A block marked 1342 * DISKWRITEINPROG cannot change underneath us. 1343 * to enqueue ourselves onto it's pending queue. 1344 * 1345 * XXX - The dropping of the lock here can be avoided if 1346 * we punt the cleaning of the block to the worker thread, 1347 * at the cost of a context switch. 1348 */ 1349 cacheblk->cache_state |= DISKWRITEINPROG; 1350 spin_unlock_irq(&dmc->cache_spin_lock); 1351 flashcache_dirty_writeback(dmc, i); /* Must inc nr_jobs */ 1352 spin_lock_irq(&dmc->cache_spin_lock); 1353 } 1354 return 1; 1355 } 1356 } 1357 return 0; 1358}
359void 360flashcache_do_pending(struct kcached_job *job) 361{ 362 if (job->error) 363 flashcache_do_pending_error(job); 364 else 365 flashcache_do_pending_noerror(job); 366}
262/* 263 * Common error handling for everything. 264 * 1) If the block isn't dirty, invalidate it. 265 * 2) Error all pending IOs that totally or partly overlap this block. 266 * 3) Free the job. 267 */ 268static void 269flashcache_do_pending_error(struct kcached_job *job) 270{ 271 struct cache_c *dmc = job->dmc; 272 unsigned long flags; 273 struct cacheblock *cacheblk = &dmc->cache[job->index]; 274 275 DMERR("flashcache_do_pending_error: error %d block %lu action %d", 276 -job->error, job->disk.sector, job->action); 277 spin_lock_irqsave(&dmc->cache_spin_lock, flags); 278 VERIFY(cacheblk->cache_state & VALID); 279 /* Invalidate block if possible */ 280 if ((cacheblk->cache_state & DIRTY) == 0) { 281 dmc->cached_blocks--; 282 dmc->pending_inval++; 283 cacheblk->cache_state &= ~VALID; 284 cacheblk->cache_state |= INVALID; 285 } 286 flashcache_free_pending_jobs(dmc, cacheblk, job->error); 287 cacheblk->cache_state &= ~(BLOCK_IO_INPROG); 288 spin_unlock_irqrestore(&dmc->cache_spin_lock, flags); 289 flashcache_free_cache_job(job); 290 if (atomic_dec_and_test(&dmc->nr_jobs)) 291 wake_up(&dmc->destroyq); 292}
294static void 295flashcache_do_pending_noerror(struct kcached_job *job) 296{ 297 struct cache_c *dmc = job->dmc; 298 int index = job->index; 299 unsigned long flags; 300 struct pending_job *pending_job; 301 int queued; 302 struct cacheblock *cacheblk = &dmc->cache[index]; 303 304 spin_lock_irqsave(&dmc->cache_spin_lock, flags); 305 if (cacheblk->cache_state & DIRTY) { 306 cacheblk->cache_state &= ~(BLOCK_IO_INPROG); 307 cacheblk->cache_state |= DISKWRITEINPROG; 308 spin_unlock_irqrestore(&dmc->cache_spin_lock, flags); 309 flashcache_dirty_writeback(dmc, index); 310 goto out; 311 } 312 DPRINTK("flashcache_do_pending: Index %d %lx", 313 index, cacheblk->cache_state); 314 VERIFY(cacheblk->cache_state & VALID); 315 dmc->cached_blocks--; 316 dmc->pending_inval++; 317 cacheblk->cache_state &= ~VALID; 318 cacheblk->cache_state |= INVALID; 319 while (cacheblk->head) { 320 VERIFY(!(cacheblk->cache_state & DIRTY)); 321 pending_job = cacheblk->head; 322 cacheblk->head = pending_job->next; 323 VERIFY(cacheblk->nr_queued > 0); 324 cacheblk->nr_queued--; 325 if (pending_job->action == INVALIDATE) { 326 DPRINTK("flashcache_do_pending: INVALIDATE %llu", 327 next_job->bio->bi_sector); 328 VERIFY(pending_job->bio != NULL); 329 queued = flashcache_inval_blocks(dmc, pending_job->bio); 330 if (queued) { 331 if (unlikely(queued < 0)) { 332 /* 333 * Memory allocation failure inside inval_blocks. 334 * Fail this io. 335 */ 336 flashcache_bio_endio(pending_job->bio, -EIO); 337 } 338 flashcache_free_pending_job(pending_job); 339 continue; 340 } 341 } 342 spin_unlock_irqrestore(&dmc->cache_spin_lock, flags); 343 DPRINTK("flashcache_do_pending: Sending down IO %llu", 344 pending_job->bio->bi_sector); 345 /* Start uncached IO */ 346 flashcache_start_uncached_io(dmc, pending_job->bio); 347 flashcache_free_pending_job(pending_job); 348 spin_lock_irqsave(&dmc->cache_spin_lock, flags); 349 } 350 VERIFY(cacheblk->nr_queued == 0); 351 cacheblk->cache_state &= ~(BLOCK_IO_INPROG); 352 spin_unlock_irqrestore(&dmc->cache_spin_lock, flags); 353out: 354 flashcache_free_cache_job(job); 355 if (atomic_dec_and_test(&dmc->nr_jobs)) 356 wake_up(&dmc->destroyq); 357}