今天同事在bug解析时,发现两个进程在调用shmat后返回的地址不一致,于是就说是错的。我反驳说这是正常的。他不信,叫我找证据。
于是回家就调查了一下shmat的系统调用。
在用户空间调用的是shmat,到了内核就是sys_shmat函数。
sys_shmat =》do_shmat =》do_mmap =》do_mmap_pgoff =》
unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, unsigned long flags,
unsigned int vm_flags, unsigned long pgoff)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
int correct_wcount = 0;
int error;
struct rb_node **rb_link, *rb_parent;
unsigned long charged = 0;
struct inode *inode = file ? file->f_path.dentry->d_inode : NULL;
/* Clear old maps */
error = -ENOMEM;
munmap_back:
vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
if (vma && vma->vm_start < addr + len) {
if (do_munmap(mm, addr, len))
return -ENOMEM;
goto munmap_back;
}
/* Check against address space limit. */
if (!may_expand_vm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
/*
* Set 'VM_NORESERVE' if we should not account for the
* memory use of this mapping.
*/
if ((flags & MAP_NORESERVE)) {
/* We honor MAP_NORESERVE if allowed to overcommit */
if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
vm_flags |= VM_NORESERVE;
/* hugetlb applies strict overcommit unless MAP_NORESERVE */
if (file && is_file_hugepages(file))
vm_flags |= VM_NORESERVE;
}
/*
* Private writable mapping: check memory availability
*/
if (accountable_mapping(file, vm_flags)) {
charged = len >> PAGE_SHIFT;
if (security_vm_enough_memory(charged))
return -ENOMEM;
vm_flags |= VM_ACCOUNT;
}
/*
* Can we just expand an old mapping?
*/
vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
if (vma)
goto out;
/*
* Determine the object being mapped and call the appropriate
* specific mapper. the address has already been validated, but
* not unmapped, but the maps are removed from the list.
*/
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!vma) {
error = -ENOMEM;
goto unacct_error;
}
vma->vm_mm = mm;
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_flags = vm_flags;
vma->vm_page_prot = vm_get_page_prot(vm_flags);
vma->vm_pgoff = pgoff;
INIT_LIST_HEAD(&vma->anon_vma_chain);
if (file) {
error = -EINVAL;
if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
goto free_vma;
if (vm_flags & VM_DENYWRITE) {
error = deny_write_access(file);
if (error)
goto free_vma;
correct_wcount = 1;
}
vma->vm_file = file;
get_file(file);
error = file->f_op->mmap(file, vma);
if (error)
goto unmap_and_free_vma;
if (vm_flags & VM_EXECUTABLE)
added_exe_file_vma(mm);
/* Can addr have changed??
*
* Answer: Yes, several device drivers can do it in their
* f_op->mmap method. -DaveM
*/
addr = vma->vm_start;
pgoff = vma->vm_pgoff;
vm_flags = vma->vm_flags;
} else if (vm_flags & VM_SHARED) {
error = shmem_zero_setup(vma);
if (error)
goto free_vma;
}
if (vma_wants_writenotify(vma)) {
pgprot_t pprot = vma->vm_page_prot;
/* Can vma->vm_page_prot have changed??
*
* Answer: Yes, drivers may have changed it in their
* f_op->mmap method.
*
* Ensures that vmas marked as uncached stay that way.
*/
vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
}
vma_link(mm, vma, prev, rb_link, rb_parent);
file = vma->vm_file;
/* Once vma denies write, undo our temporary denial count */
if (correct_wcount)
atomic_inc(&inode->i_writecount);
out:
perf_event_mmap(vma);
mm->total_vm += len >> PAGE_SHIFT;
vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
if (!mlock_vma_pages_range(vma, addr, addr + len))
mm->locked_vm += (len >> PAGE_SHIFT);
} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
make_pages_present(addr, addr + len);
return addr;
unmap_and_free_vma:
if (correct_wcount)
atomic_inc(&inode->i_writecount);
vma->vm_file = NULL;
fput(file);
/* Undo any partial mapping done by a device driver. */
unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
charged = 0;
free_vma:
kmem_cache_free(vm_area_cachep, vma);
unacct_error:
if (charged)
vm_unacct_memory(charged);
return error;
}
在ulk中,P794,linux2.6中IPC共享内存是与VFS紧密结合的。具体来说,每个IPC共享内存区与属于shm特殊文件系统的一个普通文件相关联。
至于如果一个进程把内存中数据更改后另一个进程是如何看到变更后的数据这一点。我想是因为pdflush线程来完成的工作,等有时间再跟踪一下pdflush线程的代码。