Index: sys/sys/pmem.h =================================================================== RCS file: sys/sys/pmem.h diff -N sys/sys/pmem.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/pmem.h 27 Nov 2008 08:55:39 -0000 @@ -0,0 +1,224 @@ +/* $NetBSD: $ */ +/* + * Copyright (c) 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christoph Egger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYS_PMEM_H_ +#define _SYS_PMEM_H_ + +#include +#include + +enum pmem_type { + /* physical RAM types */ + PMEM_T_HOTSPARE = 0x0001, /* reserved to replace ram in + * critical or defect state + */ + PMEM_T_CRITICAL = 0x0004, /* usable, but may become unusable + * if too frequently accessed or + * doesn't run with lower power, + * for example. + */ + PMEM_T_DEFECT = 0x0008, /* really broken, unusable */ + PMEM_T_OFFLINE = 0x0010, /* unusable but does not imply + * to be defect + */ + PMEM_T_SERIALNR = 0x0020, /* serial number is available */ + + PMEM_T_MASK = 0xffff, /* physical type mask */ + + /* logical *use* RAM types */ + PMEM_U_TEXT = 0x000010000, /* Code */ + PMEM_U_DMABUF = 0x000020000, /* DMA buffer */ + PMEM_U_FIRMWARE = 0x000040000, /* Firmware data (e.g. ACPI) */ + PMEM_U_RAM = 0x000080000, /* normal usable RAM */ + PMEM_U_ROM = 0x000100000, /* any ROM */ + PMEM_U_MMIO = 0x000200000, /* any MMIO (e.g. PCI memory) */ + + PMEM_U_MIRROR = 0x000400000, /* mirrors an other range to provide + * a valid copy in case of memory + * errors during access. + * Allows to turn uncorrectable + * machine-check errors into an + * correctable error, for example. + */ + PMEM_U_PTP = 0x00080000, /* Pagetable Pages (e.g. MMU, IOMMU) */ + PMEM_U_MASK = 0xffff0000, /* logical use mask */ +}; + +enum pmem_prot { /* hardware implementation */ + PMEM_PROT_READ = 0x01, /* PCI bus bridge, IOMMU */ + PMEM_PROT_WRITE = 0x02, /* PCI bus bridge, IOMMU, MTRR, + * AMD Elan SC520 PAR + */ + PMEM_PROT_EXEC = 0x04, /* AMD Elan SC520 PAR */ +}; + +enum pmem_props { /* hardware implementation */ + PMEM_P_WTHRU = 0x01, /* MTRR */ + PMEM_P_WBACK = 0x02, /* MTRR */ + PMEM_P_WCOMB = 0x04, /* MTRR */ + PMEM_P_UNCACHED = 0x08, /* MTRR, AMD Elan SC520 PAR */ + PMEM_P_PREFETCH = 0x10, /* PCI bus bridge */ + PMEM_P_32BIT = 0x20, /* 32-bit access */ + PMEM_P_64BIT = 0x40, /* 64-bit access */ + PMEM_P_DMA = 0x80, /* DMA-safe memory */ +}; + + +typedef enum pmem_type pmem_type_t; +typedef enum pmem_prot pmem_prot_t; +typedef enum pmem_props pmem_props_t; + + +struct numa_info; +struct pmem_arena; +struct pmem_mapping; + +/* Describe a memory DIMM + * you have physically in your machine + * Some information may provide MD bootstrap code, + * most information may provide spdmem(4). + */ +struct pmem_dimm_spec { + paddr_t d_start; + paddr_t d_end; + pmem_type_t d_type; + uint32_t d_serial; /* DIMM serial number */ +}; + +struct pmem_region_spec { + paddr_t r_start; + paddr_t r_end; + pmem_prot_t r_prot; + pmem_props_t r_props; + pmem_type_t r_type; +}; + +struct pmem_region { + struct pmem_region_spec r_spec; + + u_int r_refcount; + vmem_t *r_vmem; +}; + +/* One arena per NUMA-node */ +struct pmem_arena; + +struct pmem_mapping { + int dummy; /* TBD */ +}; + +typedef uint32_t pmem_metric_t; + + +/* Create arena. [start, end) describes the address range + * of the arena including all holes. + */ +struct pmem_arena * +pmem_arena_create(paddr_t start, paddr_t end); + +/* Add new memory module to arena. */ +int +pmem_arena_add_dimm(struct pmem_arena *arena, struct pmem_dimm_spec *dimm); + +/* Load physical addresses [start, end) having the given default properties. + */ +int +pmem_region_create(paddr_t start, paddr_t end, + pmem_type_t type, pmem_prot_t prot, pmem_props_t props); + +/* Connect loaded physical addresses with this arena. */ +int +pmem_arena_add_regions(struct pmem_arena *arena); + +/* Load arena with physical addresses [start, end) having the given + * default properties. This basically does the same as + * pmem_region_create() and pmem_arena_add_regions() in one step, + * but can't be used in very early MD bootstrapping. + */ +int +pmem_arena_prime(struct pmem_arena *arena, paddr_t start, paddr_t end, + pmem_type_t type, pmem_prot_t prot, pmem_props_t props); + +/* Connect two arenas. */ +int +pmem_arena_connect(struct pmem_arena *left, struct pmem_arena *right, + struct pmem_mapping *m, pmem_metric_t metric); + +/* Reserve a region in arena that meets the given criteria. + * The region is returned with a reference count of at least 1. + */ +struct pmem_region * +pmem_alloc(struct pmem_arena *arena, paddr_t minaddr, paddr_t maxaddr, + pmem_prot_t prot, pmem_props_t props, pmem_type_t type, + size_t align, size_t phase, size_t size, size_t nocross, + vm_flag_t flags, pmem_metric_t maxmetric); + +int +pmem_free(struct pmem_region **r); + +/* Get/set properties on the region `r'. */ +int +pmem_get(struct pmem_region *r, pmem_prot_t *prot, pmem_props_t *props, + pmem_type_t *type); + +int +pmem_set(struct pmem_region *r, pmem_prot_t prot, pmem_props_t props, + pmem_type_t type); + +/* Count another reference to region `r'. */ +void +pmem_incref(struct pmem_region *r); + +/* Reduce the reference count on `r' by one. pmem_decref may reclaim the + * resources held by `r'. + */ +void +pmem_decref(struct pmem_region *r); + +/* Map region `r' into arena `a'. + * + * Returns NULL on failure. `paddr' is undefined on failure. + * + * On success, return `r' if region `r' belongs to arena `a', or else + * return an alias for region `r' in `a'. The returned region's reference + * count is increased by one. Set `paddr' to the physical address of + * the start of the region `r' in arena `a'. + */ +struct pmem_region * +pmem_map(struct pmem_arena *arena, struct pmem_region *r, paddr_t *paddr); + +/* Remove a mapping of `r' from its arena. Decrease the reference count + * by one. + */ +void +pmem_unmap(struct pmem_region *r); + + +#endif /* _SYS_PMEM_H_ */ Index: sys/kern/subr_pmem.c =================================================================== RCS file: sys/kern/subr_pmem.c diff -N sys/kern/subr_pmem.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/kern/subr_pmem.c 27 Nov 2008 08:55:39 -0000 @@ -0,0 +1,479 @@ +/* $NetBSD: $ */ + +/* + * Copyright (c) 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Christoph Egger. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: $"); + +#include +#include +#include +#include +#include + +#include /* for PAGE_SIZE */ + +/* structs */ + +struct pmem_dimm { + struct pmem_arena *d_pmarena; /* pointer to pmem_arena */ + vmem_t *d_vmem; + vmem_addr_t d_vmemaddr; + TAILQ_ENTRY(pmem_dimm) d_entry; + TAILQ_ENTRY(pmem_dimm) d_arena_entry; + + struct pmem_dimm_spec d_spec; +}; + +struct pmem_phys_region { + struct pmem_arena *r_pmarena; /* pointer to pmem_arena */ + vmem_t *r_vmem; + vmem_addr_t r_vmemaddr; + TAILQ_ENTRY(pmem_phys_region) r_entry; + TAILQ_ENTRY(pmem_phys_region) r_arena_entry; + + struct pmem_region_spec r_spec; +}; + +struct pmem_arena { + paddr_t pa_start; + paddr_t pa_end; + + struct numa_info *numa_info; + + TAILQ_ENTRY(pmem_arena) pa_entry; + + TAILQ_HEAD(, pmem_dimm) pa_dimms; + TAILQ_HEAD(, pmem_phys_region) pa_regions; +}; + +/* lists */ + +static TAILQ_HEAD(pmem_dimm_head, pmem_dimm) pmem_dimm_head = + TAILQ_HEAD_INITIALIZER(pmem_dimm_head); + +static TAILQ_HEAD(pmem_region_head, pmem_phys_region) pmem_region_head = + TAILQ_HEAD_INITIALIZER(pmem_region_head); + +static TAILQ_HEAD(pmem_arena_head, pmem_arena) pmem_arena_head = + TAILQ_HEAD_INITIALIZER(pmem_arena_head); + +/* macros */ + +#define DIMM_ADD(dimm) \ + TAILQ_INSERT_TAIL(&(pmem_dimm_head), (dimm), d_entry) +#define DIMM_REMOVE(dimm) \ + TAILQ_REMOVE(&(pmem_dimm_head), (dimm), d_entry) + + +#define REGION_ADD(region) \ + TAILQ_INSERT_TAIL(&(pmem_region_head), (region), r_entry) +#define REGION_REMOVE(region) \ + TAILQ_REMOVE(&(pmem_region_head), (region), r_entry) +#define REGION_FOREACH(idx) \ + TAILQ_FOREACH((idx), &(pmem_region_head), r_entry) + + +#define ARENA_ADD(arena) \ + TAILQ_INSERT_TAIL(&(pmem_arena_head), (arena), pa_entry) +#define ARENA_REMOVE(arena) \ + TAILQ_REMOVE(&(pmem_arena_head), (arena), pa_entry) + + +#define ARENA_DIMM_INIT(arena) \ + TAILQ_INIT(&(arena)->pa_dimms) +#define ARENA_DIMM_ADD(arena, dimm) \ + TAILQ_INSERT_TAIL(&(arena)->pa_dimms, (dimm), d_arena_entry) +#define ARENA_DIMM_REMOVE(arena, dimm) \ + TAILQ_REMOVE(&(arena)->pa_dimms, (dimm), d_arena_entry) +#define ARENA_DIMM_EMPTY(arena) \ + TAILQ_EMPTY(&(arena)->pa_dimms) + +#define ARENA_REGION_INIT(arena) \ + TAILQ_INIT(&(arena)->pa_regions) +#define ARENA_REGION_ADD(arena, region) \ + TAILQ_INSERT_TAIL(&(arena)->pa_regions, region, r_arena_entry) +#define ARENA_REGION_REMOVE(arena, region) \ + TAILQ_REMOVE(&(arena)->pa_regions, region, r_arena_entry) +#define ARENA_REGION_EMPTY(arena) \ + TAILQ_EMPTY(&(arena)->pa_regions) +#define ARENA_REGION_FOREACH(arena, region) \ + TAILQ_FOREACH((region), &(arena)->pa_regions, r_arena_entry) + + +/* internal */ + +static struct pmem_phys_region * +pmem_region_alloc(paddr_t start, paddr_t end, + pmem_type_t type, pmem_prot_t prot, pmem_props_t props) +{ + struct pmem_phys_region *region; + struct pmem_region_spec region_spec; + + KASSERT(start > end); + + region_spec.r_start = start; + region_spec.r_end = end; + region_spec.r_prot = prot; + region_spec.r_props = props; + region_spec.r_type = type; + + region = kmem_zalloc(sizeof(struct pmem_region), KM_NOSLEEP); + if (region == NULL) + return NULL; + + KASSERT(region != NULL); + memcpy(®ion->r_spec, ®ion_spec, sizeof(struct pmem_region_spec)); + + return region; +} + +static int +pmem_arena_add_region(struct pmem_arena *arena, + struct pmem_phys_region *region) +{ + int error; + + KASSERT(arena != NULL); + KASSERT(region != NULL); + + KASSERT(region != NULL); + region->r_vmem = vmem_create("pmem_region", + region->r_spec.r_start, + region->r_spec.r_end - region->r_spec.r_start, + PAGE_SIZE, + NULL, NULL, + NULL, /* vmem backend */ + 0, /* qcache_max */ + VM_NOSLEEP, IPL_NONE); + if (region->r_vmem == NULL) { + error = ENOMEM; + goto err0; + } + + region->r_pmarena = arena; + ARENA_REGION_ADD(arena, region); + + /* XXX register callback handler */ + + return 0; + +err0: + return error; +} + +/* API */ + +/* Load physical addresses [start, end) having the given default properties. + */ +int +pmem_region_create(paddr_t start, paddr_t end, + pmem_type_t type, pmem_prot_t prot, pmem_props_t props) +{ + struct pmem_phys_region *region; + + KASSERT(start > end); + + region = pmem_region_alloc(start, end, type, prot, props); + if (region == NULL) + return ENOMEM; + + REGION_ADD(region); + + return 0; +} + +/* Connect loaded physical addresses with this arena. */ +int +pmem_arena_add_regions(struct pmem_arena *arena) +{ + int error = 0; + struct pmem_phys_region *region; + + KASSERT(arena != NULL); + + REGION_FOREACH(region) { + if (region->r_pmarena != NULL) + continue; + KASSERT(region->r_vmem == NULL); + if (region->r_spec.r_start < arena->pa_start) + continue; + if (region->r_spec.r_end > arena->pa_end) + continue; + + error = pmem_arena_add_region(arena, region); + if (error) + goto out; + } + +out: + return error; +} + +int +pmem_arena_add_dimm(struct pmem_arena *arena, struct pmem_dimm_spec *dimm_spec) +{ + int error; + struct pmem_dimm *dimm; + + KASSERT(arena != NULL); + KASSERT(dimm_spec != NULL); + KASSERT(dimm_spec->d_end > dimm_spec->d_start); + + dimm = kmem_zalloc(sizeof(struct pmem_dimm), KM_NOSLEEP); + if (dimm == NULL) { + error = ENOMEM; + goto err0; + } + + KASSERT(dimm != NULL); + dimm->d_pmarena = arena; + + memcpy(&dimm->d_spec, dimm_spec, sizeof(struct pmem_dimm_spec)); + + DIMM_ADD(dimm); + ARENA_DIMM_ADD(arena, dimm); + + /* XXX register callback handler */ + + return 0; + +err0: + return error; +} + +struct pmem_arena * +pmem_arena_create(paddr_t start, paddr_t end) +{ + struct pmem_arena *arena; + KASSERT(end > start); + + arena = kmem_zalloc(sizeof(struct pmem_arena), KM_NOSLEEP); + if (arena == NULL) + goto err0; + + arena->pa_start = start; + arena->pa_end = end; + ARENA_DIMM_INIT(arena); + ARENA_REGION_INIT(arena); + + ARENA_ADD(arena); + + return arena; + +err0: + return NULL; +} + +int +pmem_arena_prime(struct pmem_arena *arena, paddr_t start, paddr_t end, + pmem_type_t type, pmem_prot_t prot, pmem_props_t props) +{ + int error; + struct pmem_phys_region *region; + + KASSERT(arena != NULL); + KASSERT(start > end); + + region = pmem_region_alloc(start, end, type, prot, props); + if (region == NULL) { + error = ENOMEM; + goto err0; + } + + error = pmem_arena_add_region(arena, region); + if (error) + goto err1; + + REGION_ADD(region); + return 0; + +err1: + kmem_free(region, sizeof(struct pmem_region)); +err0: + return error; +} + +/* Connect two arenas. */ +int +pmem_arena_connect(struct pmem_arena *left, struct pmem_arena *right, + struct pmem_mapping *m, pmem_metric_t metric) +{ + KASSERT(left != NULL); + KASSERT(right != NULL); + + return 0; +} + +/* Reserve a region in arena `a' that meets the given criteria. + * The region is returned with a reference count of at least 1. + */ +struct pmem_region * +pmem_alloc(struct pmem_arena *arena, paddr_t minaddr, paddr_t maxaddr, + pmem_prot_t prot, pmem_props_t props, pmem_type_t type, + size_t align, size_t phase, size_t size, size_t nocross, + vm_flag_t flags, pmem_metric_t maxmetric) +{ + struct pmem_region *r = NULL; + struct pmem_phys_region *tmp; + vmem_addr_t addr; + + KASSERT(arena != NULL); + KASSERT(maxaddr > minaddr); + KASSERT(size > 0); + KASSERT((maxaddr - minaddr) >= size); + KASSERT((flags & (VM_BESTFIT|VM_INSTANTFIT)) != 0); + KASSERT((~flags & (VM_BESTFIT|VM_INSTANTFIT)) != 0); + + /* Clip search area to the arena. */ + if (arena->pa_start > minaddr) + minaddr = arena->pa_start; + if (arena->pa_end < maxaddr) + maxaddr = arena->pa_end; + + /* Check if this arena is large enough */ + if ((maxaddr - minaddr) < size) + return NULL; + + /* Check bounds */ + if (arena->pa_start >= maxaddr) + return NULL; + if (arena->pa_end <= minaddr) + return NULL; + + r = kmem_zalloc(sizeof(struct pmem_region), KM_NOSLEEP); + if (r == NULL) + return NULL; + + ARENA_REGION_FOREACH(arena, tmp) { + if (tmp->r_spec.r_prot != prot) + continue; + if (tmp->r_spec.r_props != props) + continue; + if (tmp->r_spec.r_type != type) + continue; + + addr = vmem_xalloc(tmp->r_vmem, size, align, phase, + nocross, minaddr, maxaddr, flags | VM_NOSLEEP); + if (addr == VMEM_ADDR_NULL) + continue; + + goto found; + } + + kmem_free(r, sizeof(struct pmem_region)); + return NULL; + +found: + r->r_spec.r_start = addr; + r->r_spec.r_end = addr + size; + r->r_spec.r_prot = tmp->r_spec.r_prot; + r->r_spec.r_props = tmp->r_spec.r_props; + r->r_spec.r_type = tmp->r_spec.r_type; + r->r_refcount = 1; + r->r_vmem = tmp->r_vmem; + + return r; +} + +int +pmem_free(struct pmem_region **r) +{ + struct pmem_region *r1; + KASSERT(r != NULL); + KASSERT(*r != NULL); + + r1 = *r; + + /* still referenced by others */ + if (r1->r_refcount > 1) + return EBUSY; + + vmem_xfree(r1->r_vmem, r1->r_spec.r_start, + r1->r_spec.r_end - r1->r_spec.r_start); + kmem_free(r1, sizeof(struct pmem_region)); + + *r = NULL; + return 0; +} + +/* Get/set properties on the region `r'. */ +int +pmem_get(struct pmem_region *r, pmem_prot_t *prot, pmem_props_t *props, + pmem_type_t *type) +{ + KASSERT(r != NULL); + KASSERT(prot != NULL); + KASSERT(props != NULL); + KASSERT(type != NULL); + + return 0; +} + +int +pmem_set(struct pmem_region *r, pmem_prot_t prot, pmem_props_t props, + pmem_type_t type) +{ + KASSERT(r != NULL); + + return 0; +} + +/* Count another reference to region `r'. */ +void +pmem_incref(struct pmem_region *r) +{ + KASSERT(r != NULL); + +} + +void +pmem_decref(struct pmem_region *r) +{ + KASSERT(r != NULL); + +} + +struct pmem_region * +pmem_map(struct pmem_arena *arena, struct pmem_region *r, paddr_t *paddr) +{ + KASSERT(arena != NULL); + KASSERT(r != NULL); + KASSERT(paddr != NULL); + + return NULL; +} + +void +pmem_unmap(struct pmem_region *r) +{ + KASSERT(r != NULL); + +}