/* * Copyright (c) 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 * The President and Fellows of Harvard College. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include /* * Kernel malloc. */ /* * Fill a block with 0xdeadbeef. */ static void fill_deadbeef(void *vptr, size_t len) { uint32_t *ptr = vptr; size_t i; for (i=0; ipageaddr_and_blocktype & PAGE_FRAME) #define PR_BLOCKTYPE(pr) ((pr)->pageaddr_and_blocktype & ~PAGE_FRAME) #define MKPAB(pa, blk) (((pa)&PAGE_FRAME) | ((blk) & ~PAGE_FRAME)) //////////////////////////////////////// /* * Use one spinlock for the whole thing. Making parts of the kmalloc * logic per-cpu is worthwhile for scalability; however, for the time * being at least we won't, because it adds a lot of complexity and in * OS/161 performance and scalability aren't super-critical. */ static struct spinlock kmalloc_spinlock = SPINLOCK_INITIALIZER; //////////////////////////////////////// /* * We can only allocate whole pages of pageref structure at a time. * This is a struct type for such a page. * * Each pageref page contains 256 pagerefs, which can manage up to * 256 * 4K = 1M of kernel heap. */ #define NPAGEREFS_PER_PAGE (PAGE_SIZE / sizeof(struct pageref)) struct pagerefpage { struct pageref refs[NPAGEREFS_PER_PAGE]; }; /* * This structure holds a pointer to a pageref page and also its * bitmap of free entries. */ #define INUSE_WORDS (NPAGEREFS_PER_PAGE / 32) struct kheap_root { struct pagerefpage *page; uint32_t pagerefs_inuse[INUSE_WORDS]; unsigned numinuse; }; /* * It would be better to make this dynamically sizeable. However, * since we only actually run on System/161 and System/161 is * specifically limited to 16M of RAM, we'll just adopt that as a * static size limit. * * FUTURE: it would be better to pick this number based on the RAM * size we find at boot time. */ #define NUM_PAGEREFPAGES 16 #define TOTAL_PAGEREFS (NUM_PAGEREFPAGES * NPAGEREFS_PER_PAGE) static struct kheap_root kheaproots[NUM_PAGEREFPAGES]; /* * Allocate a page to hold pagerefs. */ static void allocpagerefpage(struct kheap_root *root) { vaddr_t va; KASSERT(root->page == NULL); /* * We release the spinlock while calling alloc_kpages. This * avoids deadlock if alloc_kpages needs to come back here. * Note that this means things can change behind our back... */ spinlock_release(&kmalloc_spinlock); va = alloc_kpages(1); spinlock_acquire(&kmalloc_spinlock); if (va == 0) { kprintf("kmalloc: Couldn't get a pageref page\n"); return; } KASSERT(va % PAGE_SIZE == 0); if (root->page != NULL) { /* Oops, somebody else allocated it. */ spinlock_release(&kmalloc_spinlock); free_kpages(va); spinlock_acquire(&kmalloc_spinlock); /* Once allocated it isn't ever freed. */ KASSERT(root->page != NULL); return; } root->page = (struct pagerefpage *)va; } /* * Allocate a pageref structure. */ static struct pageref * allocpageref(void) { unsigned i,j; uint32_t k; unsigned whichroot; struct kheap_root *root; for (whichroot=0; whichroot < NUM_PAGEREFPAGES; whichroot++) { root = &kheaproots[whichroot]; if (root->numinuse >= NPAGEREFS_PER_PAGE) { continue; } /* * This should probably not be a linear search. */ for (i=0; ipagerefs_inuse[i]==0xffffffff) { /* full */ continue; } for (k=1,j=0; k!=0; k<<=1,j++) { if ((root->pagerefs_inuse[i] & k)==0) { root->pagerefs_inuse[i] |= k; root->numinuse++; if (root->page == NULL) { allocpagerefpage(root); } if (root->page == NULL) { return NULL; } return &root->page->refs[i*32 + j]; } } KASSERT(0); } } /* ran out */ return NULL; } /* * Release a pageref structure. */ static void freepageref(struct pageref *p) { size_t i, j; uint32_t k; unsigned whichroot; struct kheap_root *root; struct pagerefpage *page; for (whichroot=0; whichroot < NUM_PAGEREFPAGES; whichroot++) { root = &kheaproots[whichroot]; page = root->page; if (page == NULL) { KASSERT(root->numinuse == 0); continue; } j = p-page->refs; /* note: j is unsigned, don't test < 0 */ if (j < NPAGEREFS_PER_PAGE) { /* on this page */ i = j/32; k = ((uint32_t)1) << (j%32); KASSERT((root->pagerefs_inuse[i] & k) != 0); root->pagerefs_inuse[i] &= ~k; KASSERT(root->numinuse > 0); root->numinuse--; return; } } /* pageref wasn't on any of the pages */ KASSERT(0); } //////////////////////////////////////// /* * Each pageref is on two linked lists: one list of pages of blocks of * that same size, and one of all blocks. */ static struct pageref *sizebases[NSIZES]; static struct pageref *allbase; //////////////////////////////////////// #ifdef GUARDS /* Space returned to the client is filled with GUARD_RETBYTE */ #define GUARD_RETBYTE 0xa9 /* Padding space (internal fragmentation loss) is filled with GUARD_FILLBYTE */ #define GUARD_FILLBYTE 0xba /* The guard bands on an allocated block should contain GUARD_HALFWORD */ #define GUARD_HALFWORD 0xb0b0 /* The guard scheme uses 8 bytes per block. */ #define GUARD_OVERHEAD 8 /* Pointers are offset by 4 bytes when guards are in use. */ #define GUARD_PTROFFSET 4 /* * Set up the guard values in a block we're about to return. */ static void * establishguardband(void *block, size_t clientsize, size_t blocksize) { vaddr_t lowguard, lowsize, data, enddata, highguard, highsize, i; KASSERT(clientsize + GUARD_OVERHEAD <= blocksize); KASSERT(clientsize < 65536U); lowguard = (vaddr_t)block; lowsize = lowguard + 2; data = lowsize + 2; enddata = data + clientsize; highguard = lowguard + blocksize - 4; highsize = highguard + 2; *(uint16_t *)lowguard = GUARD_HALFWORD; *(uint16_t *)lowsize = clientsize; for (i=data; i smallerblocksize); KASSERT(clientsize + GUARD_OVERHEAD <= blocksize); enddata = data + clientsize; for (i=enddata; ifreelist_offset == INVALID_OFFSET) { KASSERT(pr->nfree==0); return; } prpage = PR_PAGEADDR(pr); blktype = PR_BLOCKTYPE(pr); KASSERT(blktype >= 0 && blktype < NSIZES); blocksize = sizes[blktype]; #ifdef CHECKGUARDS smallerblocksize = blktype > 0 ? sizes[blktype - 1] : 0; for (i=0; i= MIPS_KSEG0); KASSERT(prpage < MIPS_KSEG1); #endif KASSERT(pr->freelist_offset < PAGE_SIZE); KASSERT(pr->freelist_offset % blocksize == 0); fla = prpage + pr->freelist_offset; fl = (struct freelist *)fla; for (; fl != NULL; fl = fl->next) { fla = (vaddr_t)fl; KASSERT(fla >= prpage && fla < prpage + PAGE_SIZE); KASSERT((fla-prpage) % blocksize == 0); #ifdef CHECKBEEF checkdeadbeef(fl, blocksize); #endif #ifdef CHECKGUARDS blocknum = (fla-prpage) / blocksize; mask = 1U << (blocknum % 32); KASSERT((isfree[blocknum / 32] & mask) == 0); isfree[blocknum / 32] |= mask; #endif KASSERT(fl->next != fl); nfree++; } KASSERT(nfree==pr->nfree); #ifdef CHECKGUARDS numblocks = PAGE_SIZE / blocksize; for (i=0; inext_samesize) { checksubpage(pr); KASSERT(sc < TOTAL_PAGEREFS); sc++; } } for (pr = allbase; pr != NULL; pr = pr->next_all) { checksubpage(pr); KASSERT(ac < TOTAL_PAGEREFS); ac++; } KASSERT(sc==ac); } #else #define checksubpages() #endif //////////////////////////////////////// #ifdef LABELS #define LABEL_PTROFFSET sizeof(struct malloclabel) #define LABEL_OVERHEAD LABEL_PTROFFSET struct malloclabel { vaddr_t label; unsigned generation; }; static unsigned mallocgeneration; /* * Label a block of memory. */ static void * establishlabel(void *block, vaddr_t label) { struct malloclabel *ml; ml = block; ml->label = label; ml->generation = mallocgeneration; ml++; return ml; } static void dump_subpage(struct pageref *pr, unsigned generation) { unsigned blocksize = sizes[PR_BLOCKTYPE(pr)]; unsigned numblocks = PAGE_SIZE / blocksize; unsigned numfreewords = DIVROUNDUP(numblocks, 32); uint32_t isfree[numfreewords], mask; vaddr_t prpage; struct freelist *fl; vaddr_t blockaddr; struct malloclabel *ml; unsigned i; for (i=0; ifreelist_offset); for (; fl != NULL; fl = fl->next) { i = ((vaddr_t)fl - prpage) / blocksize; mask = 1U << (i % 32); isfree[i / 32] |= mask; } for (i=0; igeneration != generation) { continue; } kprintf("%5zu bytes at %p, allocated at %p\n", blocksize, (void *)blockaddr, (void *)ml->label); } } static void dump_subpages(unsigned generation) { struct pageref *pr; int i; kprintf("Remaining allocations from generation %u:\n", generation); for (i=0; inext_samesize) { dump_subpage(pr, generation); } } } #else #define LABEL_OVERHEAD 0 #endif /* LABELS */ void kheap_nextgeneration(void) { #ifdef LABELS spinlock_acquire(&kmalloc_spinlock); mallocgeneration++; spinlock_release(&kmalloc_spinlock); #endif } void kheap_dump(void) { #ifdef LABELS /* print the whole thing with interrupts off */ spinlock_acquire(&kmalloc_spinlock); dump_subpages(mallocgeneration); spinlock_release(&kmalloc_spinlock); #else kprintf("Enable LABELS in kmalloc.c to use this functionality.\n"); #endif } void kheap_dumpall(void) { #ifdef LABELS unsigned i; /* print the whole thing with interrupts off */ spinlock_acquire(&kmalloc_spinlock); for (i=0; i<=mallocgeneration; i++) { dump_subpages(i); } spinlock_release(&kmalloc_spinlock); #else kprintf("Enable LABELS in kmalloc.c to use this functionality.\n"); #endif } //////////////////////////////////////// /* * Print the allocated/freed map of a single kernel heap page. */ static void subpage_stats(struct pageref *pr) { vaddr_t prpage, fla; struct freelist *fl; int blktype; unsigned i, n, index; uint32_t freemap[PAGE_SIZE / (SMALLEST_SUBPAGE_SIZE*32)]; checksubpage(pr); KASSERT(spinlock_do_i_hold(&kmalloc_spinlock)); /* clear freemap[] */ for (i=0; i= 0 && blktype < NSIZES); /* compute how many bits we need in freemap and assert we fit */ n = PAGE_SIZE / sizes[blktype]; KASSERT(n <= 32 * ARRAYCOUNT(freemap)); if (pr->freelist_offset != INVALID_OFFSET) { fla = prpage + pr->freelist_offset; fl = (struct freelist *)fla; for (; fl != NULL; fl = fl->next) { fla = (vaddr_t)fl; index = (fla-prpage) / sizes[blktype]; KASSERT(indexnfree, n); kprintf(" "); for (i=0; inext_all) { subpage_stats(pr); } spinlock_release(&kmalloc_spinlock); } //////////////////////////////////////// /* * Remove a pageref from both lists that it's on. */ static void remove_lists(struct pageref *pr, int blktype) { struct pageref **guy; KASSERT(blktype>=0 && blktypenext_samesize) { checksubpage(*guy); if (*guy == pr) { *guy = pr->next_samesize; break; } } for (guy = &allbase; *guy; guy = &(*guy)->next_all) { checksubpage(*guy); if (*guy == pr) { *guy = pr->next_all; break; } } } /* * Given a requested client size, return the block type, that is, the * index into the sizes[] array for the block size to use. */ static inline int blocktype(size_t clientsz) { unsigned i; for (i=0; inext_samesize) { /* check for corruption */ KASSERT(PR_BLOCKTYPE(pr) == blktype); checksubpage(pr); if (pr->nfree > 0) { doalloc: /* comes here after getting a whole fresh page */ KASSERT(pr->freelist_offset < PAGE_SIZE); prpage = PR_PAGEADDR(pr); fla = prpage + pr->freelist_offset; fl = (struct freelist *)fla; retptr = fl; fl = fl->next; pr->nfree--; if (fl != NULL) { KASSERT(pr->nfree > 0); fla = (vaddr_t)fl; KASSERT(fla - prpage < PAGE_SIZE); pr->freelist_offset = fla - prpage; } else { KASSERT(pr->nfree == 0); pr->freelist_offset = INVALID_OFFSET; } #ifdef GUARDS retptr = establishguardband(retptr, clientsz, sz); #endif #ifdef LABELS retptr = establishlabel(retptr, label); #endif checksubpages(); spinlock_release(&kmalloc_spinlock); return retptr; } } /* * No page of the right size available. * Make a new one. * * We release the spinlock while calling alloc_kpages. This * avoids deadlock if alloc_kpages needs to come back here. * Note that this means things can change behind our back... */ spinlock_release(&kmalloc_spinlock); prpage = alloc_kpages(1); if (prpage==0) { /* Out of memory. */ kprintf("kmalloc: Subpage allocator couldn't get a page\n"); return NULL; } KASSERT(prpage % PAGE_SIZE == 0); #ifdef CHECKBEEF /* deadbeef the whole page, as it probably starts zeroed */ fill_deadbeef((void *)prpage, PAGE_SIZE); #endif spinlock_acquire(&kmalloc_spinlock); pr = allocpageref(); if (pr==NULL) { /* Couldn't allocate accounting space for the new page. */ spinlock_release(&kmalloc_spinlock); free_kpages(prpage); kprintf("kmalloc: Subpage allocator couldn't get pageref\n"); return NULL; } pr->pageaddr_and_blocktype = MKPAB(prpage, blktype); pr->nfree = PAGE_SIZE / sizes[blktype]; /* * Note: fl is volatile because the MIPS toolchain we were * using in spring 2001 attempted to optimize this loop and * blew it. Making fl volatile inhibits the optimization. */ fla = prpage; fl = (struct freelist *)fla; fl->next = NULL; for (i=1; infree; i++) { fl = (struct freelist *)(fla + i*sizes[blktype]); fl->next = (struct freelist *)(fla + (i-1)*sizes[blktype]); KASSERT(fl != fl->next); } fla = (vaddr_t) fl; pr->freelist_offset = fla - prpage; KASSERT(pr->freelist_offset == (pr->nfree-1)*sizes[blktype]); pr->next_samesize = sizebases[blktype]; sizebases[blktype] = pr; pr->next_all = allbase; allbase = pr; /* This is kind of cheesy, but avoids duplicating the alloc code. */ goto doalloc; } /* * Free a pointer previously returned from subpage_kmalloc. If the * pointer is not on any heap page we recognize, return -1. */ static int subpage_kfree(void *ptr) { int blktype; // index into sizes[] that we're using vaddr_t ptraddr; // same as ptr struct pageref *pr; // pageref for page we're freeing in vaddr_t prpage; // PR_PAGEADDR(pr) vaddr_t fla; // free list entry address struct freelist *fl; // free list entry vaddr_t offset; // offset into page #ifdef GUARDS size_t blocksize, smallerblocksize; #endif ptraddr = (vaddr_t)ptr; #ifdef GUARDS if (ptraddr % PAGE_SIZE == 0) { /* * With guard bands, all client-facing subpage * pointers are offset by GUARD_PTROFFSET (which is 4) * from the underlying blocks and are therefore not * page-aligned. So a page-aligned pointer is not one * of ours. Catch this up front, as otherwise * subtracting GUARD_PTROFFSET could give a pointer on * a page we *do* own, and then we'll panic because * it's not a valid one. */ return -1; } ptraddr -= GUARD_PTROFFSET; #endif #ifdef LABELS if (ptraddr % PAGE_SIZE == 0) { /* ditto */ return -1; } ptraddr -= LABEL_PTROFFSET; #endif spinlock_acquire(&kmalloc_spinlock); checksubpages(); /* Silence warnings with gcc 4.8 -Og (but not -O2) */ prpage = 0; blktype = 0; for (pr = allbase; pr; pr = pr->next_all) { prpage = PR_PAGEADDR(pr); blktype = PR_BLOCKTYPE(pr); KASSERT(blktype >= 0 && blktype < NSIZES); /* check for corruption */ KASSERT(blktype>=0 && blktype= prpage && ptraddr < prpage + PAGE_SIZE) { break; } } if (pr==NULL) { /* Not on any of our pages - not a subpage allocation */ spinlock_release(&kmalloc_spinlock); return -1; } offset = ptraddr - prpage; /* Check for proper positioning and alignment */ if (offset >= PAGE_SIZE || offset % sizes[blktype] != 0) { panic("kfree: subpage free of invalid addr %p\n", ptr); } #ifdef GUARDS blocksize = sizes[blktype]; smallerblocksize = blktype > 0 ? sizes[blktype - 1] : 0; checkguardband(ptraddr, smallerblocksize, blocksize); #endif /* * Clear the block to 0xdeadbeef to make it easier to detect * uses of dangling pointers. */ fill_deadbeef((void *)ptraddr, sizes[blktype]); /* * We probably ought to check for free twice by seeing if the block * is already on the free list. But that's expensive, so we don't. */ fla = prpage + offset; fl = (struct freelist *)fla; if (pr->freelist_offset == INVALID_OFFSET) { fl->next = NULL; } else { fl->next = (struct freelist *)(prpage + pr->freelist_offset); /* this block should not already be on the free list! */ #ifdef SLOW { struct freelist *fl2; for (fl2 = fl->next; fl2 != NULL; fl2 = fl2->next) { KASSERT(fl2 != fl); } } #else /* check just the head */ KASSERT(fl != fl->next); #endif } pr->freelist_offset = offset; pr->nfree++; KASSERT(pr->nfree <= PAGE_SIZE / sizes[blktype]); if (pr->nfree == PAGE_SIZE / sizes[blktype]) { /* Whole page is free. */ remove_lists(pr, blktype); freepageref(pr); /* Call free_kpages without kmalloc_spinlock. */ spinlock_release(&kmalloc_spinlock); free_kpages(prpage); } else { spinlock_release(&kmalloc_spinlock); } #ifdef SLOWER /* Don't get the lock unless checksubpages does something. */ spinlock_acquire(&kmalloc_spinlock); checksubpages(); spinlock_release(&kmalloc_spinlock); #endif return 0; } // //////////////////////////////////////////////////////////// /* * Allocate a block of size SZ. Redirect either to subpage_kmalloc or * alloc_kpages depending on how big SZ is. */ void * kmalloc(size_t sz) { size_t checksz; #ifdef LABELS vaddr_t label; #endif #ifdef LABELS #ifdef __GNUC__ label = (vaddr_t)__builtin_return_address(0); #else #error "Don't know how to get return address with this compiler" #endif /* __GNUC__ */ #endif /* LABELS */ checksz = sz + GUARD_OVERHEAD + LABEL_OVERHEAD; if (checksz >= LARGEST_SUBPAGE_SIZE) { unsigned long npages; vaddr_t address; /* Round up to a whole number of pages. */ npages = (sz + PAGE_SIZE - 1)/PAGE_SIZE; address = alloc_kpages(npages); if (address==0) { return NULL; } KASSERT(address % PAGE_SIZE == 0); return (void *)address; } #ifdef LABELS return subpage_kmalloc(sz, label); #else return subpage_kmalloc(sz); #endif } /* * Free a block previously returned from kmalloc. */ void kfree(void *ptr) { /* * Try subpage first; if that fails, assume it's a big allocation. */ if (ptr == NULL) { return; } else if (subpage_kfree(ptr)) { KASSERT((vaddr_t)ptr%PAGE_SIZE==0); free_kpages((vaddr_t)ptr); } }