From 286790a7dd8c070e725e542e4cb0aa7d1f6bb5d0 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Tue, 15 Nov 2011 14:40:00 +0000 Subject: [PATCH] Update the device pager interface, while keeping the compatibility layer for old KPI and KBI. New interface should be used together with d_mmap_single cdevsw method. Device pager can be allocated with the cdev_pager_allocate(9) function, which takes struct cdev_pager_ops, containing constructor/destructor and page fault handler methods supplied by driver. Constructor and destructor, called at the pager allocation and deallocation time, allow the driver to handle per-object private data. The pager handler is called to handle page fault on the vm map entry backed by the driver pager. Driver shall return either the vm_page_t which should be mapped, or error code (which does not cause kernel panic anymore). The page handler interface has a placeholder to specify the access mode causing the fault, but currently PROT_READ is always passed there. Sponsored by: The FreeBSD Foundation Reviewed by: alc MFC after: 1 month --- sys/vm/device_pager.c | 245 ++++++++++++++++++++++++++++-------------- sys/vm/vm_object.h | 1 + sys/vm/vm_pager.h | 14 +++ 3 files changed, 180 insertions(+), 80 deletions(-) diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index d46d17055b6e..899369c44429 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -76,6 +76,18 @@ struct pagerops devicepagerops = { .pgo_haspage = dev_pager_haspage, }; +static int old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred, u_short *color); +static void old_dev_pager_dtor(void *handle); +static int old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, + int prot, vm_page_t *mres); + +static struct cdev_pager_ops old_dev_pager_ops = { + .cdev_pg_ctor = old_dev_pager_ctor, + .cdev_pg_dtor = old_dev_pager_dtor, + .cdev_pg_fault = old_dev_pager_fault +}; + static void dev_pager_init() { @@ -83,22 +95,28 @@ dev_pager_init() mtx_init(&dev_pager_mtx, "dev_pager list", NULL, MTX_DEF); } -/* - * MPSAFE - */ -static vm_object_t -dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, - vm_ooffset_t foff, struct ucred *cred) +vm_object_t +cdev_pager_lookup(void *handle) +{ + vm_object_t object; + + mtx_lock(&dev_pager_mtx); + object = vm_pager_object_lookup(&dev_pager_object_list, handle); + vm_object_reference(object); + mtx_unlock(&dev_pager_mtx); + return (object); +} + +vm_object_t +cdev_pager_allocate(void *handle, enum obj_type tp, struct cdev_pager_ops *ops, + vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred) { - struct cdev *dev; vm_object_t object, object1; vm_pindex_t pindex; - unsigned int npages; - vm_paddr_t paddr; - vm_ooffset_t off; - vm_memattr_t dummy; - struct cdevsw *csw; - int ref; + u_short color; + + if (tp != OBJT_DEVICE) + return (NULL); /* * Offset should be page aligned. @@ -109,27 +127,8 @@ dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, size = round_page(size); pindex = OFF_TO_IDX(foff + size); - /* - * Make sure this device can be mapped. - */ - dev = handle; - csw = dev_refthread(dev, &ref); - if (csw == NULL) + if (ops->cdev_pg_ctor(handle, size, prot, foff, cred, &color) != 0) return (NULL); - - /* - * Check that the specified range of the device allows the desired - * protection. - * - * XXX assumes VM_PROT_* == PROT_* - */ - npages = OFF_TO_IDX(size); - for (off = foff; npages--; off += PAGE_SIZE) - if (csw->d_mmap(dev, off, &paddr, (int)prot, &dummy) != 0) { - dev_relthread(dev, ref); - return (NULL); - } - mtx_lock(&dev_pager_mtx); /* @@ -144,9 +143,11 @@ dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, * device's memory. */ mtx_unlock(&dev_pager_mtx); - object1 = vm_object_allocate(OBJT_DEVICE, pindex); + object1 = vm_object_allocate(tp, pindex); object1->flags |= OBJ_COLORED; - object1->pg_color = atop(paddr) - OFF_TO_IDX(off - PAGE_SIZE); + object1->pg_color = color; + object1->handle = handle; + object1->un_pager.devp.ops = ops; TAILQ_INIT(&object1->un_pager.devp.devp_pglist); mtx_lock(&dev_pager_mtx); object = vm_pager_object_lookup(&dev_pager_object_list, handle); @@ -162,13 +163,14 @@ dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, object->handle = handle; TAILQ_INSERT_TAIL(&dev_pager_object_list, object, pager_object_list); + KASSERT(object->type == tp, + ("Inconsistent device pager type %p %d", object, tp)); } } else { if (pindex > object->size) object->size = pindex; } mtx_unlock(&dev_pager_mtx); - dev_relthread(dev, ref); if (object1 != NULL) { object1->handle = object1; mtx_lock(&dev_pager_mtx); @@ -180,6 +182,24 @@ dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, return (object); } +static vm_object_t +dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred) +{ + + return (cdev_pager_allocate(handle, OBJT_DEVICE, &old_dev_pager_ops, + size, prot, foff, cred)); +} + +void +cdev_pager_free_page(vm_object_t object, vm_page_t m) +{ + + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + TAILQ_REMOVE(&object->un_pager.devp.devp_pglist, m, pageq); + vm_page_putfake(m); +} + static void dev_pager_dealloc(object) vm_object_t object; @@ -187,6 +207,8 @@ dev_pager_dealloc(object) vm_page_t m; VM_OBJECT_UNLOCK(object); + object->un_pager.devp.ops->cdev_pg_dtor(object->handle); + mtx_lock(&dev_pager_mtx); TAILQ_REMOVE(&dev_pager_object_list, object, pager_object_list); mtx_unlock(&dev_pager_mtx); @@ -194,35 +216,57 @@ dev_pager_dealloc(object) /* * Free up our fake pages. */ - while ((m = TAILQ_FIRST(&object->un_pager.devp.devp_pglist)) != NULL) { - TAILQ_REMOVE(&object->un_pager.devp.devp_pglist, m, pageq); - vm_page_putfake(m); - } + while ((m = TAILQ_FIRST(&object->un_pager.devp.devp_pglist)) != NULL) + cdev_pager_free_page(object, m); } static int -dev_pager_getpages(object, m, count, reqpage) - vm_object_t object; - vm_page_t *m; - int count; - int reqpage; +dev_pager_getpages(vm_object_t object, vm_page_t *ma, int count, int reqpage) { - vm_pindex_t offset; - vm_paddr_t paddr; - vm_page_t m_paddr, page; - vm_memattr_t memattr; - struct cdev *dev; - int i, ref, ret; - struct cdevsw *csw; - struct thread *td; - struct file *fpop; + int error, i; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); - dev = object->handle; - page = m[reqpage]; - offset = page->pindex; + error = object->un_pager.devp.ops->cdev_pg_fault(object, + IDX_TO_OFF(ma[reqpage]->pindex), PROT_READ, &ma[reqpage]); + + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + + for (i = 0; i < count; i++) { + if (i != reqpage) { + vm_page_lock(ma[i]); + vm_page_free(ma[i]); + vm_page_unlock(ma[i]); + } + } + + if (error == VM_PAGER_OK) { + TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, + ma[reqpage], pageq); + } + + return (error); +} + +static int +old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, + vm_page_t *mres) +{ + vm_pindex_t pidx; + vm_paddr_t paddr; + vm_page_t m_paddr, page; + struct cdev *dev; + struct cdevsw *csw; + struct file *fpop; + struct thread *td; + vm_memattr_t memattr; + int ref, ret; + + pidx = OFF_TO_IDX(offset); memattr = object->memattr; + VM_OBJECT_UNLOCK(object); + + dev = object->handle; csw = dev_refthread(dev, &ref); if (csw == NULL) { VM_OBJECT_LOCK(object); @@ -231,11 +275,16 @@ dev_pager_getpages(object, m, count, reqpage) td = curthread; fpop = td->td_fpop; td->td_fpop = NULL; - ret = csw->d_mmap(dev, (vm_ooffset_t)offset << PAGE_SHIFT, &paddr, - PROT_READ, &memattr); - KASSERT(ret == 0, ("dev_pager_getpage: map function returns error")); + ret = csw->d_mmap(dev, offset, &paddr, prot, &memattr); td->td_fpop = fpop; dev_relthread(dev, ref); + if (ret != 0) { + printf( + "WARNING: dev_pager_getpage: map function returns error %d", ret); + VM_OBJECT_LOCK(object); + return (VM_PAGER_FAIL); + } + /* If "paddr" is a real page, perform a sanity check on "memattr". */ if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL && pmap_page_get_memattr(m_paddr) != memattr) { @@ -243,23 +292,14 @@ dev_pager_getpages(object, m, count, reqpage) printf( "WARNING: A device driver has set \"memattr\" inconsistently.\n"); } - if ((page->flags & PG_FICTITIOUS) != 0) { + if (((*mres)->flags & PG_FICTITIOUS) != 0) { /* - * If the passed in reqpage page is a fake page, update it with + * If the passed in result page is a fake page, update it with * the new physical address. */ + page = *mres; VM_OBJECT_LOCK(object); vm_page_updatefake(page, paddr, memattr); - if (count > 1) { - - for (i = 0; i < count; i++) { - if (i != reqpage) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - } - } } else { /* * Replace the passed in reqpage page with our own fake page and @@ -267,14 +307,11 @@ dev_pager_getpages(object, m, count, reqpage) */ page = vm_page_getfake(paddr, memattr); VM_OBJECT_LOCK(object); - TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, page, pageq); - for (i = 0; i < count; i++) { - vm_page_lock(m[i]); - vm_page_free(m[i]); - vm_page_unlock(m[i]); - } - vm_page_insert(page, object, offset); - m[reqpage] = page; + vm_page_lock(*mres); + vm_page_free(*mres); + vm_page_unlock(*mres); + *mres = page; + vm_page_insert(page, object, pidx); } page->valid = VM_PAGE_BITS_ALL; return (VM_PAGER_OK); @@ -288,6 +325,7 @@ dev_pager_putpages(object, m, count, sync, rtvals) boolean_t sync; int *rtvals; { + panic("dev_pager_putpage called"); } @@ -304,3 +342,50 @@ dev_pager_haspage(object, pindex, before, after) *after = 0; return (TRUE); } + +static int +old_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred, u_short *color) +{ + struct cdev *dev; + struct cdevsw *csw; + vm_memattr_t dummy; + vm_ooffset_t off; + vm_paddr_t paddr; + unsigned int npages; + int ref; + + /* + * Make sure this device can be mapped. + */ + dev = handle; + csw = dev_refthread(dev, &ref); + if (csw == NULL) + return (ENXIO); + + /* + * Check that the specified range of the device allows the desired + * protection. + * + * XXX assumes VM_PROT_* == PROT_* + */ + npages = OFF_TO_IDX(size); + for (off = foff; npages--; off += PAGE_SIZE) { + if (csw->d_mmap(dev, off, &paddr, (int)prot, &dummy) != 0) { + dev_relthread(dev, ref); + return (EINVAL); + } + } + + dev_ref(dev); + dev_relthread(dev, ref); + *color = atop(paddr) - OFF_TO_IDX(off - PAGE_SIZE); + return (0); +} + +static void +old_dev_pager_dtor(void *handle) +{ + + dev_rel(handle); +} diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 0c13786856a7..0c6c875c9e03 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -121,6 +121,7 @@ struct vm_object { */ struct { TAILQ_HEAD(, vm_page) devp_pglist; + struct cdev_pager_ops *ops; } devp; /* diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h index c18719a573a1..c361be86dd77 100644 --- a/sys/vm/vm_pager.h +++ b/sys/vm/vm_pager.h @@ -192,5 +192,19 @@ vm_pager_page_unswapped(vm_page_t m) (*pagertab[m->object->type]->pgo_pageunswapped)(m); } +struct cdev_pager_ops { + int (*cdev_pg_fault)(vm_object_t vm_obj, vm_ooffset_t offset, + int prot, vm_page_t *mres); + int (*cdev_pg_ctor)(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred, u_short *color); + void (*cdev_pg_dtor)(void *handle); +}; + +vm_object_t cdev_pager_allocate(void *handle, enum obj_type tp, + struct cdev_pager_ops *ops, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred); +vm_object_t cdev_pager_lookup(void *handle); +void cdev_pager_free_page(vm_object_t object, vm_page_t m); + #endif /* _KERNEL */ #endif /* _VM_PAGER_ */