From eb18645f17877de4ced951eed5abac61bdfcd5c5 Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Thu, 5 Aug 2010 12:10:54 -0400 Subject: [PATCH 1/7] fix allocuvm() to handle sbrk() with non-page-granularity argument (maybe this never worked, but it works now) --- Makefile | 2 +- defs.h | 2 -- mmu.h | 4 ++++ usertests.c | 34 ++++++++++++++++++++++++++++++++++ vm.c | 36 ++++++++++++++++++++++++++---------- 5 files changed, 65 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index a0c901d..ef9b66d 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ AS = $(TOOLPREFIX)gas LD = $(TOOLPREFIX)ld OBJCOPY = $(TOOLPREFIX)objcopy OBJDUMP = $(TOOLPREFIX)objdump -CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 +CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) ASFLAGS = -m32 -gdwarf-2 # FreeBSD ld wants ``elf_i386_fbsd'' diff --git a/defs.h b/defs.h index 86268b2..b414a5f 100644 --- a/defs.h +++ b/defs.h @@ -153,8 +153,6 @@ void uartintr(void); void uartputc(int); // vm.c -#define PGROUNDUP(sz) ((sz+PGSIZE-1) & ~(PGSIZE-1)) -extern pde_t *kpgdir; void pminit(void); void ksegment(void); void kvmalloc(void); diff --git a/mmu.h b/mmu.h index 378ae22..e3ea46f 100644 --- a/mmu.h +++ b/mmu.h @@ -126,6 +126,9 @@ struct segdesc { #define PTXSHIFT 12 // offset of PTX in a linear address #define PDXSHIFT 22 // offset of PDX in a linear address +#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) ((char*)((((unsigned int)a) & ~(PGSIZE-1)))) + // Page table/directory entry flags. #define PTE_P 0x001 // Present #define PTE_W 0x002 // Writeable @@ -148,6 +151,7 @@ struct segdesc { #define PTE_ADDR(pte) ((uint) (pte) & ~0xFFF) typedef uint pte_t; +extern pde_t *kpgdir; // Control Register flags #define CR0_PE 0x00000001 // Protection Enable diff --git a/usertests.c b/usertests.c index cc2601c..2bd21ba 100644 --- a/usertests.c +++ b/usertests.c @@ -1229,6 +1229,38 @@ forktest(void) printf(1, "fork test OK\n"); } +void +sbrktest(void) +{ + printf(stdout, "sbrk test\n"); + char *a = sbrk(0); + int i; + for(i = 0; i < 5000; i++){ + char *b = sbrk(1); + if(b != a){ + printf(stdout, "sbrk test failed %d %x %x\n", i, a, b); + exit(); + } + *b = 1; + a = b + 1; + } + int pid = fork(); + if(pid < 0){ + printf(stdout, "sbrk test fork failed\n"); + exit(); + } + char *c = sbrk(1); + c = sbrk(1); + if(c != a + 1){ + printf(stdout, "sbrk test failed post-fork\n"); + exit(); + } + if(pid == 0) + exit(); + wait(); + printf(stdout, "sbrk test OK\n"); +} + int main(int argc, char *argv[]) { @@ -1240,6 +1272,8 @@ main(int argc, char *argv[]) } close(open("usertests.ran", O_CREATE)); + sbrktest(); + opentest(); writetest(); writetest1(); diff --git a/vm.c b/vm.c index 231e133..b89efd5 100644 --- a/vm.c +++ b/vm.c @@ -54,6 +54,9 @@ printpgdir(pde_t *pgdir) cprintf("printpgdir done\n", pgdir); } +// return the address of the PTE in page table pgdir +// that corresponds to linear address va. if create!=0, +// create any required page table pages. static pte_t * walkpgdir(pde_t *pgdir, const void *va, int create) { @@ -80,6 +83,8 @@ walkpgdir(pde_t *pgdir, const void *va, int create) return &pgtab[PTX(va)]; } +// create PTEs for linear addresses starting at la that refer to +// physical addresses starting at pa. static int mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm) { @@ -89,6 +94,8 @@ mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm) for (i = 0; i < size; i += PGSIZE) { if (!(pte = walkpgdir(pgdir, (void*)(la + i), 1))) return 0; + if(*pte & PTE_P) + panic("remap"); *pte = (pa + i) | perm | PTE_P; } return 1; @@ -177,21 +184,30 @@ uva2ka(pde_t *pgdir, char *uva) return (char *)pa; } +// allocate sz bytes more memory for a process starting at the +// given user address; allocates physical memory and page +// table entries. addr and sz need not be page-aligned. +// it is a no-op for any parts of the requested memory +// that are already allocated. int allocuvm(pde_t *pgdir, char *addr, uint sz) { - uint i, n; - char *mem; - - n = PGROUNDUP(sz); - if (addr + n >= USERTOP) + if (addr + sz >= (char*)USERTOP) return 0; - for (i = 0; i < n; i += PGSIZE) { - if (!(mem = kalloc(PGSIZE))) { // XXX cleanup what we did? - return 0; + char *start = PGROUNDDOWN(addr); + char *last = PGROUNDDOWN(addr + sz - 1); + char *a; + for(a = start; a <= last; a += PGSIZE){ + pte_t *pte = walkpgdir(pgdir, a, 0); + if(pte == 0 || (*pte & PTE_P) == 0){ + char *mem = kalloc(PGSIZE); + if(mem == 0){ + // XXX clean up? + return 0; + } + memset(mem, 0, PGSIZE); + mappages(pgdir, a, PGSIZE, PADDR(mem), PTE_W|PTE_U); } - memset(mem, 0, PGSIZE); - mappages(pgdir, addr + i, PGSIZE, PADDR(mem), PTE_W|PTE_U); } return 1; } From 2cf6b32d4dbc915f5d3b2d7b0e382c0ad20299be Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Thu, 5 Aug 2010 14:15:03 -0400 Subject: [PATCH 2/7] move jkstack to main.c replace jstack with asm()s --- defs.h | 2 -- main.c | 22 ++++++++++++++++------ runoff.list | 1 + sh.c | 1 - swtch.S | 8 -------- vm.c | 11 ----------- 6 files changed, 17 insertions(+), 28 deletions(-) diff --git a/defs.h b/defs.h index b414a5f..6a19244 100644 --- a/defs.h +++ b/defs.h @@ -110,7 +110,6 @@ void yield(void); // swtch.S void swtch(struct context**, struct context*); -void jstack(uint); // spinlock.c void acquire(struct spinlock*); @@ -157,7 +156,6 @@ void pminit(void); void ksegment(void); void kvmalloc(void); void vminit(void); -void jkstack(); void printstack(void); void printpgdir(pde_t *); pde_t* setupkvm(void); diff --git a/main.c b/main.c index 78cd334..8be66e3 100644 --- a/main.c +++ b/main.c @@ -7,7 +7,8 @@ static void bootothers(void); static void mpmain(void); -void jkstack(void) __attribute__((noreturn)); +void jkstack(void) __attribute__((noreturn)); +void mainc(void); // Bootstrap processor starts running C code here. int @@ -21,13 +22,24 @@ main(void) consoleinit(); // I/O devices & their interrupts uartinit(); // serial port pminit(); // physical memory for kernel - jkstack(); // Jump to mainc on a proper-allocated kernel stack + jkstack(); // Jump to mainc on a properly-allocated stack +} + +void +jkstack(void) +{ + char *kstack = kalloc(PGSIZE); + if (!kstack) + panic("jkstack\n"); + char *top = kstack + PGSIZE; + asm volatile("movl %0,%%esp" : : "r" (top)); + asm volatile("call mainc"); + panic("jkstack"); } void mainc(void) { - cprintf("cpus %p cpu %p\n", cpus, cpu); cprintf("\ncpu%d: starting xv6\n\n", cpu->id); kvmalloc(); // allocate the kernel page table pinit(); // process table @@ -52,14 +64,12 @@ mpmain(void) { if(cpunum() != mpbcpu()) { ksegment(); - cprintf("other cpu\n"); lapicinit(cpunum()); } vminit(); // Run with paging on each processor - cprintf("cpu%d: mpmain\n", cpu->id); + cprintf("cpu%d: starting\n", cpu->id); idtinit(); xchg(&cpu->booted, 1); - cprintf("cpu%d: scheduling\n", cpu->id); scheduler(); } diff --git a/runoff.list b/runoff.list index 6bbd386..3258398 100644 --- a/runoff.list +++ b/runoff.list @@ -22,6 +22,7 @@ proc.h proc.c swtch.S kalloc.c +vm.c # system calls traps.h diff --git a/sh.c b/sh.c index e8d65f0..16e325b 100644 --- a/sh.c +++ b/sh.c @@ -420,7 +420,6 @@ parseexec(char **ps, char *es) int tok, argc; struct execcmd *cmd; struct cmd *ret; - int *x = (int *) peek; if(peek(ps, es, "(")) return parseblock(ps, es); diff --git a/swtch.S b/swtch.S index 49efdf9..8751317 100644 --- a/swtch.S +++ b/swtch.S @@ -26,11 +26,3 @@ swtch: popl %ebx popl %ebp ret - -# Jump on a new stack, fake C calling conventions -.globl jstack -jstack: - movl 4(%esp), %esp - subl $16, %esp # space for arguments - movl $0, %ebp # terminate functions that follow ebp's - call mainc # continue at mainc diff --git a/vm.c b/vm.c index b89efd5..6689b82 100644 --- a/vm.c +++ b/vm.c @@ -324,17 +324,6 @@ pminit(void) kinit((char *)kernend, freesz); } -// Jump to mainc on a properly-allocated kernel stack -void -jkstack(void) -{ - char *kstack = kalloc(PGSIZE); - if (!kstack) - panic("jkstack\n"); - char *top = kstack + PGSIZE; - jstack((uint) top); -} - // Allocate one page table for the machine for the kernel address // space for scheduler processes. void From c99599784e950169d85bf1e4446e7dbfb1a40f59 Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Thu, 5 Aug 2010 16:00:59 -0400 Subject: [PATCH 3/7] remove some unused vm #defines fix corner cases with alignment when mapping kernel ELF file --- defs.h | 2 -- mmu.h | 30 ++++------------------------- proc.c | 2 +- vm.c | 60 ++++++++++++++++++++++------------------------------------ 4 files changed, 28 insertions(+), 66 deletions(-) diff --git a/defs.h b/defs.h index 6a19244..4a63154 100644 --- a/defs.h +++ b/defs.h @@ -156,8 +156,6 @@ void pminit(void); void ksegment(void); void kvmalloc(void); void vminit(void); -void printstack(void); -void printpgdir(pde_t *); pde_t* setupkvm(void); char* uva2ka(pde_t*, char*); int allocuvm(pde_t*, char*, uint); diff --git a/mmu.h b/mmu.h index e3ea46f..76a6f1b 100644 --- a/mmu.h +++ b/mmu.h @@ -85,32 +85,20 @@ struct segdesc { // | Page Directory | Page Table | Offset within Page | // | Index | Index | | // +----------------+----------------+---------------------+ -// \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/ -// \----------- PPN(la) -----------/ -// -// The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown. -// To construct a linear address la from PDX(la), PTX(la), and PGOFF(la), -// use PGADDR(PDX(la), PTX(la), PGOFF(la)). - -// page number field of address -#define PPN(la) (((uint) (la)) >> PTXSHIFT) -#define VPN(la) PPN(la) // used to index into vpt[] +// \--- PDX(la) --/ \--- PTX(la) --/ // page directory index #define PDX(la) ((((uint) (la)) >> PDXSHIFT) & 0x3FF) -#define VPD(la) PDX(la) // used to index into vpd[] // page table index #define PTX(la) ((((uint) (la)) >> PTXSHIFT) & 0x3FF) -// offset in page -#define PGOFF(la) (((uint) (la)) & 0xFFF) - // construct linear address from indexes and offset #define PGADDR(d, t, o) ((uint) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) -// mapping from physical addresses to virtual addresses is the identity one -// (really linear addresses, but we map linear to physical also directly) +// turn a kernel linear address into a physical address. +// all of the kernel data structures have linear and +// physical addresses that are equal. #define PADDR(a) ((uint) a) // Page directory and page table constants. @@ -120,9 +108,6 @@ struct segdesc { #define PGSIZE 4096 // bytes mapped by a page #define PGSHIFT 12 // log2(PGSIZE) -#define PTSIZE (PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry -#define PTSHIFT 22 // log2(PTSIZE) - #define PTXSHIFT 12 // offset of PTX in a linear address #define PDXSHIFT 22 // offset of PDX in a linear address @@ -140,13 +125,6 @@ struct segdesc { #define PTE_PS 0x080 // Page Size #define PTE_MBZ 0x180 // Bits must be zero -// The PTE_AVAIL bits aren't used by the kernel or interpreted by the -// hardware, so user processes are allowed to set them arbitrarily. -#define PTE_AVAIL 0xE00 // Available for software use - -// Only flags in PTE_USER may be used in system calls. -#define PTE_USER (PTE_AVAIL | PTE_P | PTE_W | PTE_U) - // Address in page table or page directory entry #define PTE_ADDR(pte) ((uint) (pte) & ~0xFFF) diff --git a/proc.c b/proc.c index c1faec6..dd6f27e 100644 --- a/proc.c +++ b/proc.c @@ -414,9 +414,9 @@ wait(void) // Found one. pid = p->pid; kfree(p->kstack, KSTACKSIZE); + p->kstack = 0; freevm(p->pgdir); p->state = UNUSED; - p->kstack = 0; p->pid = 0; p->parent = 0; p->name[0] = 0; diff --git a/vm.c b/vm.c index 6689b82..c94e9a3 100644 --- a/vm.c +++ b/vm.c @@ -33,27 +33,6 @@ static uint kernend; static uint freesz; pde_t *kpgdir; // One kernel page table for scheduler procs -void -printpgdir(pde_t *pgdir) -{ - uint i; - uint j; - - cprintf("printpgdir 0x%x\n", pgdir); - for (i = 0; i < NPDENTRIES; i++) { - if (pgdir[i] != 0 && i < 100) { - cprintf("pgdir %d, v=0x%x\n", i, pgdir[i]); - pte_t *pgtab = (pte_t*) PTE_ADDR(pgdir[i]); - for (j = 0; j < NPTENTRIES; j++) { - if (pgtab[j] != 0) - cprintf("pgtab %d, v=0x%x, addr=0x%x\n", j, PGADDR(i, j, 0), - PTE_ADDR(pgtab[j])); - } - } - } - cprintf("printpgdir done\n", pgdir); -} - // return the address of the PTE in page table pgdir // that corresponds to linear address va. if create!=0, // create any required page table pages. @@ -84,19 +63,25 @@ walkpgdir(pde_t *pgdir, const void *va, int create) } // create PTEs for linear addresses starting at la that refer to -// physical addresses starting at pa. +// physical addresses starting at pa. la and size might not +// be page-aligned. static int mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm) { - uint i; - pte_t *pte; - - for (i = 0; i < size; i += PGSIZE) { - if (!(pte = walkpgdir(pgdir, (void*)(la + i), 1))) + char *first = PGROUNDDOWN(la); + char *last = PGROUNDDOWN(la + size - 1); + char *a = first; + while(1){ + pte_t *pte = walkpgdir(pgdir, a, 1); + if(pte == 0) return 0; if(*pte & PTE_P) panic("remap"); - *pte = (pa + i) | perm | PTE_P; + *pte = pa | perm | PTE_P; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; } return 1; } @@ -160,10 +145,10 @@ setupkvm(void) // Map IO space from 640K to 1Mbyte if (!mappages(pgdir, (void *)USERTOP, 0x60000, USERTOP, PTE_W)) return 0; - // Map kernel text from kern text addr read-only + // Map kernel text read-only if (!mappages(pgdir, (void *) kerntext, kerntsz, kerntext, 0)) return 0; - // Map kernel data form kern data addr R/W + // Map kernel data read/write if (!mappages(pgdir, (void *) kerndata, kerndsz, kerndata, PTE_W)) return 0; // Map dynamically-allocated memory read/write (kernel stacks, user mem) @@ -194,10 +179,10 @@ allocuvm(pde_t *pgdir, char *addr, uint sz) { if (addr + sz >= (char*)USERTOP) return 0; - char *start = PGROUNDDOWN(addr); + char *first = PGROUNDDOWN(addr); char *last = PGROUNDDOWN(addr + sz - 1); char *a; - for(a = start; a <= last; a += PGSIZE){ + for(a = first; a <= last; a += PGSIZE){ pte_t *pte = walkpgdir(pgdir, a, 0); if(pte == 0 || (*pte & PTE_P) == 0){ char *mem = kalloc(PGSIZE); @@ -212,6 +197,8 @@ allocuvm(pde_t *pgdir, char *addr, uint sz) return 1; } +// free a page table and all the physical memory pages +// in the user part. void freevm(pde_t *pgdir) { @@ -227,9 +214,8 @@ freevm(pde_t *pgdir) if (pgtab[j] != 0) { uint pa = PTE_ADDR(pgtab[j]); uint va = PGADDR(i, j, 0); - if (va >= USERTOP) // done with user part? - break; - kfree((void *) pa, PGSIZE); + if (va < USERTOP) // user memory + kfree((void *) pa, PGSIZE); pgtab[j] = 0; } } @@ -314,8 +300,8 @@ pminit(void) kernend = ((uint)end + PGSIZE) & ~(PGSIZE-1); kerntext = ph[0].va; kerndata = ph[1].va; - kerntsz = kerndata - kerntext; - kerndsz = kernend - kerndata; + kerntsz = ph[0].memsz; + kerndsz = ph[1].memsz; freesz = PHYSTOP - kernend; cprintf("kerntext@0x%x(sz=0x%x), kerndata@0x%x(sz=0x%x), kernend 0x%x freesz = 0x%x\n", From 1afc9d3fcaa7c5992659bb8b69f639b746dda2bc Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Thu, 5 Aug 2010 21:16:55 -0400 Subject: [PATCH 4/7] add some comments find out the hard way why user and kernel must have separate segment descriptors --- asm.h | 2 ++ bootasm.S | 6 ++++-- bootother.S | 6 ++++-- main.c | 22 ++++++++++++---------- proc.h | 4 ++-- vm.c | 9 ++++++--- 6 files changed, 30 insertions(+), 19 deletions(-) diff --git a/asm.h b/asm.h index 0c052db..68210d7 100644 --- a/asm.h +++ b/asm.h @@ -6,6 +6,8 @@ .word 0, 0; \ .byte 0, 0, 0, 0 +// The 0xC0 means the limit is in 4096-byte units +// and (for executable segments) 32-bit mode. #define SEG_ASM(type,base,lim) \ .word (((lim) >> 12) & 0xffff), ((base) & 0xffff); \ .byte (((base) >> 16) & 0xff), (0x90 | (type)), \ diff --git a/bootasm.S b/bootasm.S index f6af255..56175ce 100644 --- a/bootasm.S +++ b/bootasm.S @@ -51,8 +51,10 @@ seta20.2: orl $CR0_PE, %eax movl %eax, %cr0 - # Jump to next instruction, but in 32-bit code segment. - # Switches processor into 32-bit mode. + # This ljmp is how you load the CS (Code Segment) register. + # SEG_ASM produces segment descriptors with the 32-bit mode + # flag set (the D flag), so addresses and word operands will + # default to 32 bits after this jump. ljmp $(SEG_KCODE<<3), $start32 .code32 # Assemble for 32-bit mode diff --git a/bootother.S b/bootother.S index 11d32f1..899669a 100644 --- a/bootother.S +++ b/bootother.S @@ -45,8 +45,10 @@ start: orl $CR0_PE, %eax movl %eax, %cr0 - # Jump to next instruction, but in 32-bit code segment. - # Switches processor into 32-bit mode. + # This ljmp is how you load the CS (Code Segment) register. + # SEG_ASM produces segment descriptors with the 32-bit mode + # flag set (the D flag), so addresses and word operands will + # default to 32 bits after this jump. ljmp $(SEG_KCODE<<3), $start32 .code32 # Assemble for 32-bit mode diff --git a/main.c b/main.c index 8be66e3..a6088cb 100644 --- a/main.c +++ b/main.c @@ -16,13 +16,13 @@ main(void) { mpinit(); // collect info about this machine lapicinit(mpbcpu()); - ksegment(); + ksegment(); // set up segments picinit(); // interrupt controller ioapicinit(); // another interrupt controller consoleinit(); // I/O devices & their interrupts uartinit(); // serial port - pminit(); // physical memory for kernel - jkstack(); // Jump to mainc on a properly-allocated stack + pminit(); // discover how much memory there is + jkstack(); // call mainc() on a properly-allocated stack } void @@ -41,7 +41,7 @@ void mainc(void) { cprintf("\ncpu%d: starting xv6\n\n", cpu->id); - kvmalloc(); // allocate the kernel page table + kvmalloc(); // initialze the kernel page table pinit(); // process table tvinit(); // trap vectors binit(); // buffer cache @@ -57,8 +57,9 @@ mainc(void) mpmain(); } -// Bootstrap processor gets here after setting up the hardware. -// Additional processors start here. +// Common CPU setup code. +// Bootstrap CPU comes here from mainc(). +// Other CPUs jump here from bootother.S. static void mpmain(void) { @@ -66,11 +67,11 @@ mpmain(void) ksegment(); lapicinit(cpunum()); } - vminit(); // Run with paging on each processor + vminit(); // turn on paging cprintf("cpu%d: starting\n", cpu->id); - idtinit(); + idtinit(); // load idt register xchg(&cpu->booted, 1); - scheduler(); + scheduler(); // start running processes } static void @@ -85,6 +86,7 @@ bootothers(void) // placed the start of bootother.S there. code = (uchar *) 0x7000; memmove(code, _binary_bootother_start, (uint)_binary_bootother_size); + for(c = cpus; c < cpus+ncpu; c++){ if(c == cpus+cpunum()) // We've started already. continue; @@ -95,7 +97,7 @@ bootothers(void) *(void**)(code-8) = mpmain; lapicstartap(c->id, (uint)code); - // Wait for cpu to get through bootstrap. + // Wait for cpu to finish mpmain() while(c->booted == 0) ; } diff --git a/proc.h b/proc.h index ebc42f1..5e5d031 100644 --- a/proc.h +++ b/proc.h @@ -3,8 +3,8 @@ #define SEG_KCODE 1 // kernel code #define SEG_KDATA 2 // kernel data+stack #define SEG_KCPU 3 // kernel per-cpu data -#define SEG_UCODE 4 -#define SEG_UDATA 5 +#define SEG_UCODE 4 // user code +#define SEG_UDATA 5 // user data+stack #define SEG_TSS 6 // this process's task state #define NSEGS 7 diff --git a/vm.c b/vm.c index c94e9a3..9ea5e92 100644 --- a/vm.c +++ b/vm.c @@ -93,12 +93,15 @@ ksegment(void) { struct cpu *c; - // Map once virtual addresses to linear addresses using identity map + // Map virtual addresses to linear addresses using identity map. + // Cannot share a CODE descriptor for both kernel and user + // because it would have to have DPL_USR, but the CPU forbids + // an interrupt from CPL=0 to DPL=3. c = &cpus[cpunum()]; c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); - c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0x0, 0xffffffff, DPL_USER); - c->gdt[SEG_UDATA] = SEG(STA_W, 0x0, 0xffffffff, DPL_USER); + c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); + c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); // map cpu, and curproc c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0); From c4cc10da7ef6d65f0f654445e0af35b8309f16c2 Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Fri, 6 Aug 2010 11:12:18 -0400 Subject: [PATCH 5/7] fix corner cases in exec of ELF put an invalid page below the stack have fork() handle invalid pages --- defs.h | 3 ++- exec.c | 7 ++++-- kalloc.c | 5 ++-- mmu.h | 1 - proc.c | 10 ++++---- proc.h | 5 ++-- usertests.c | 24 +++++++++++++++++++ vm.c | 66 ++++++++++++++++++++++++++++++++++------------------- 8 files changed, 84 insertions(+), 37 deletions(-) diff --git a/defs.h b/defs.h index 4a63154..b691099 100644 --- a/defs.h +++ b/defs.h @@ -163,7 +163,8 @@ void freevm(pde_t*); void inituvm(pde_t*, char*, char*, uint); int loaduvm(pde_t*, char*, struct inode *ip, uint, uint); pde_t* copyuvm(pde_t*,uint); -void loadvm(struct proc*); +void switchuvm(struct proc*); +void switchkvm(); // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/exec.c b/exec.c index 8a92e99..4f11695 100644 --- a/exec.c +++ b/exec.c @@ -43,13 +43,16 @@ exec(char *path, char **argv) goto bad; if (!allocuvm(pgdir, (char *)ph.va, ph.memsz)) goto bad; - sz += PGROUNDUP(ph.memsz); + if(ph.va + ph.memsz > sz) + sz = ph.va + ph.memsz; if (!loaduvm(pgdir, (char *)ph.va, ip, ph.offset, ph.filesz)) goto bad; } iunlockput(ip); // Allocate and initialize stack at sz + sz = PGROUNDUP(sz); + sz += PGSIZE; // leave an invalid page if (!allocuvm(pgdir, (char *)sz, PGSIZE)) goto bad; mem = uva2ka(pgdir, (char *)sz); @@ -95,7 +98,7 @@ exec(char *path, char **argv) proc->tf->eip = elf.entry; // main proc->tf->esp = sp; - loadvm(proc); + switchuvm(proc); freevm(oldpgdir); diff --git a/kalloc.c b/kalloc.c index 5661105..7695006 100644 --- a/kalloc.c +++ b/kalloc.c @@ -1,9 +1,8 @@ // Physical memory allocator, intended to allocate -// memory for user processes. Allocates in 4096-byte "pages". +// memory for user processes. Allocates in 4096-byte pages. // Free list is kept sorted and combines adjacent pages into // long runs, to make it easier to allocate big segments. -// One reason the page size is 4k is that the x86 segment size -// granularity is 4k. +// This combining is not useful now that xv6 uses paging. #include "types.h" #include "defs.h" diff --git a/mmu.h b/mmu.h index 76a6f1b..f4fc732 100644 --- a/mmu.h +++ b/mmu.h @@ -129,7 +129,6 @@ struct segdesc { #define PTE_ADDR(pte) ((uint) (pte) & ~0xFFF) typedef uint pte_t; -extern pde_t *kpgdir; // Control Register flags #define CR0_PE 0x00000001 // Protection Enable diff --git a/proc.c b/proc.c index dd6f27e..f799a4d 100644 --- a/proc.c +++ b/proc.c @@ -145,7 +145,7 @@ growproc(int n) if (!allocuvm(proc->pgdir, (char *)proc->sz, n)) return -1; proc->sz += n; - loadvm(proc); + switchuvm(proc); return 0; } @@ -214,9 +214,10 @@ scheduler(void) // to release ptable.lock and then reacquire it // before jumping back to us. proc = p; - loadvm(p); + switchuvm(p); p->state = RUNNING; swtch(&cpu->scheduler, proc->context); + switchkvm(); // Process is done running for now. // It should have changed its p->state before coming back. @@ -242,7 +243,6 @@ sched(void) panic("sched running"); if(readeflags()&FL_IF) panic("sched interruptible"); - lcr3(PADDR(kpgdir)); // Switch to the kernel page table intena = cpu->intena; swtch(&proc->context, cpu->scheduler); cpu->intena = intena; @@ -414,8 +414,8 @@ wait(void) // Found one. pid = p->pid; kfree(p->kstack, KSTACKSIZE); - p->kstack = 0; - freevm(p->pgdir); + p->kstack = 0; + freevm(p->pgdir); p->state = UNUSED; p->pid = 0; p->parent = 0; diff --git a/proc.h b/proc.h index 5e5d031..7d97dfa 100644 --- a/proc.h +++ b/proc.h @@ -16,7 +16,7 @@ // Contexts are stored at the bottom of the stack they // describe; the stack pointer is the address of the context. // The layout of the context matches the layout of the stack in swtch.S -// at "Switch stacks" comment. Switch itself doesn't save eip explicitly, +// at the "Switch stacks" comment. Switch doesn't save eip explicitly, // but it is on the stack and allocproc() manipulates it. struct context { uint edi; @@ -31,7 +31,7 @@ enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; // Per-process state struct proc { uint sz; // Size of process memory (bytes) - pde_t* pgdir; // linear address of proc's pgdir + pde_t* pgdir; // Linear address of proc's pgdir char *kstack; // Bottom of kernel stack for this process enum procstate state; // Process state volatile int pid; // Process ID @@ -48,6 +48,7 @@ struct proc { // Process memory is laid out contiguously, low addresses first: // text // original data and bss +// invalid page // fixed-size stack // expandable heap diff --git a/usertests.c b/usertests.c index 2bd21ba..247cc95 100644 --- a/usertests.c +++ b/usertests.c @@ -1261,6 +1261,29 @@ sbrktest(void) printf(stdout, "sbrk test OK\n"); } +void +stacktest(void) +{ + printf(stdout, "stack test\n"); + char dummy = 1; + char *p = &dummy; + int ppid = getpid(); + int pid = fork(); + if(pid < 0){ + printf(stdout, "fork failed\n"); + exit(); + } + if(pid == 0){ + // should cause a trap: + p[-4096] = 'z'; + kill(ppid); + printf(stdout, "stack test failed: page before stack was writeable\n"); + exit(); + } + wait(); + printf(stdout, "stack test OK\n"); +} + int main(int argc, char *argv[]) { @@ -1272,6 +1295,7 @@ main(int argc, char *argv[]) } close(open("usertests.ran", O_CREATE)); + stacktest(); sbrktest(); opentest(); diff --git a/vm.c b/vm.c index 9ea5e92..6914dd3 100644 --- a/vm.c +++ b/vm.c @@ -8,13 +8,20 @@ // The mappings from logical to linear are one to one (i.e., // segmentation doesn't do anything). -// The mapping from linear to physical are one to one for the kernel. -// The mappings for the kernel include all of physical memory (until -// PHYSTOP), including the I/O hole, and the top of physical address -// space, where additional devices are located. -// The kernel itself is linked to be at 1MB, and its physical memory -// is also at 1MB. -// Physical memory for user programs is allocated from physical memory +// There is one page table per process, plus one that's used +// when a CPU is not running any process (kpgdir). +// A user process uses the same page table as the kernel; the +// page protection bits prevent it from using anything other +// than its memory. +// +// setupkvm() and exec() set up every page table like this: +// 0..640K : user memory (text, data, stack, heap) +// 640K..1M : mapped direct (for IO space) +// 1M..kernend : mapped direct (for the kernel's text and data) +// kernend..PHYSTOP : mapped direct (kernel heap and user pages) +// 0xfe000000..0 : mapped direct (devices such as ioapic) +// +// The kernel allocates memory for its heap and for user memory // between kernend and the end of physical memory (PHYSTOP). // The virtual address space of each user program includes the kernel // (which is inaccessible in user mode). The user program addresses @@ -31,7 +38,7 @@ static uint kerndata; static uint kerndsz; static uint kernend; static uint freesz; -pde_t *kpgdir; // One kernel page table for scheduler procs +static pde_t *kpgdir; // for use in scheduler() // return the address of the PTE in page table pgdir // that corresponds to linear address va. if create!=0, @@ -114,9 +121,9 @@ ksegment(void) proc = 0; } -// Setup address space and current process task state. +// Switch h/w page table and TSS registers to point to process p. void -loadvm(struct proc *p) +switchuvm(struct proc *p) { pushcli(); @@ -128,14 +135,21 @@ loadvm(struct proc *p) ltr(SEG_TSS << 3); if (p->pgdir == 0) - panic("loadvm: no pgdir\n"); + panic("switchuvm: no pgdir\n"); lcr3(PADDR(p->pgdir)); // switch to new address space popcli(); } -// Setup kernel part of a page table. Linear adresses map one-to-one -// on physical addresses. +// Switch h/w page table register to the kernel-only page table, for when +// no process is running. +void +switchkvm() +{ + lcr3(PADDR(kpgdir)); // Switch to the kernel page table +} + +// Set up kernel part of a page table. pde_t* setupkvm(void) { @@ -163,6 +177,10 @@ setupkvm(void) return pgdir; } +// return the physical address that a given user address +// maps to. the result is also a kernel logical address, +// since the kernel maps the physical memory allocated to user +// processes directly. char* uva2ka(pde_t *pgdir, char *uva) { @@ -266,6 +284,8 @@ inituvm(pde_t *pgdir, char *addr, char *init, uint sz) } } +// given a parent process's page table, create a copy +// of it for a child. pde_t* copyuvm(pde_t *pgdir, uint sz) { @@ -278,17 +298,20 @@ copyuvm(pde_t *pgdir, uint sz) for (i = 0; i < sz; i += PGSIZE) { if (!(pte = walkpgdir(pgdir, (void *)i, 0))) panic("copyuvm: pte should exist\n"); - pa = PTE_ADDR(*pte); - if (!(mem = kalloc(PGSIZE))) - return 0; - memmove(mem, (char *)pa, PGSIZE); - if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U)) - return 0; + if(*pte & PTE_P){ + pa = PTE_ADDR(*pte); + if (!(mem = kalloc(PGSIZE))) + return 0; + memmove(mem, (char *)pa, PGSIZE); + if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U)) + return 0; + } } return d; } -// Gather about physical memory layout. Called once during boot. +// Gather information about physical memory layout. +// Called once during boot. void pminit(void) { @@ -307,9 +330,6 @@ pminit(void) kerndsz = ph[1].memsz; freesz = PHYSTOP - kernend; - cprintf("kerntext@0x%x(sz=0x%x), kerndata@0x%x(sz=0x%x), kernend 0x%x freesz = 0x%x\n", - kerntext, kerntsz, kerndata, kerndsz, kernend, freesz); - kinit((char *)kernend, freesz); } From 83d2db91f75460e1275d67847adec0fca5a9800b Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Tue, 10 Aug 2010 17:08:41 -0400 Subject: [PATCH 6/7] allow sbrk(-x) to de-allocate user memory --- defs.h | 1 + proc.c | 9 +++++++-- usertests.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++- vm.c | 26 +++++++++++++++++++++++- 4 files changed, 89 insertions(+), 4 deletions(-) diff --git a/defs.h b/defs.h index b691099..a051522 100644 --- a/defs.h +++ b/defs.h @@ -159,6 +159,7 @@ void vminit(void); pde_t* setupkvm(void); char* uva2ka(pde_t*, char*); int allocuvm(pde_t*, char*, uint); +int deallocuvm(pde_t *pgdir, char *addr, uint sz); void freevm(pde_t*); void inituvm(pde_t*, char*, char*, uint); int loaduvm(pde_t*, char*, struct inode *ip, uint, uint); diff --git a/proc.c b/proc.c index f799a4d..e69bacf 100644 --- a/proc.c +++ b/proc.c @@ -142,8 +142,13 @@ userinit(void) int growproc(int n) { - if (!allocuvm(proc->pgdir, (char *)proc->sz, n)) - return -1; + if(n > 0){ + if (!allocuvm(proc->pgdir, (char *)proc->sz, n)) + return -1; + } else if(n < 0){ + if (!deallocuvm(proc->pgdir, (char *)(proc->sz + n), 0 - n)) + return -1; + } proc->sz += n; switchuvm(proc); return 0; diff --git a/usertests.c b/usertests.c index 247cc95..9ad6448 100644 --- a/usertests.c +++ b/usertests.c @@ -1232,7 +1232,11 @@ forktest(void) void sbrktest(void) { + int pid; + printf(stdout, "sbrk test\n"); + + // can one sbrk() less than a page? char *a = sbrk(0); int i; for(i = 0; i < 5000; i++){ @@ -1244,7 +1248,7 @@ sbrktest(void) *b = 1; a = b + 1; } - int pid = fork(); + pid = fork(); if(pid < 0){ printf(stdout, "sbrk test fork failed\n"); exit(); @@ -1258,6 +1262,57 @@ sbrktest(void) if(pid == 0) exit(); wait(); + + // can one allocate the full 640K? + a = sbrk(0); + uint amt = (640 * 1024) - (uint) a; + char *p = sbrk(amt); + if(p != a){ + printf(stdout, "sbrk test failed 640K test, p %x a %x\n", p, a); + exit(); + } + char *lastaddr = (char *)(640 * 1024 - 1); + *lastaddr = 99; + + // is one forbidden from allocating more than 640K? + c = sbrk(4096); + if(c != (char *) 0xffffffff){ + printf(stdout, "sbrk allocated more than 640K, c %x\n", c); + exit(); + } + + // can one de-allocate? + a = sbrk(0); + c = sbrk(-4096); + if(c == (char *) 0xffffffff){ + printf(stdout, "sbrk could not deallocate\n"); + exit(); + } + c = sbrk(0); + if(c != a - 4096){ + printf(stdout, "sbrk deallocation produced wrong address, a %x c %x\n", a, c); + exit(); + } + + // can one re-allocate that page? + a = sbrk(0); + c = sbrk(4096); + if(c != a || sbrk(0) != a + 4096){ + printf(stdout, "sbrk re-allocation failed, a %x c %x\n", a, c); + exit(); + } + if(*lastaddr == 99){ + // should be zero + printf(stdout, "sbrk de-allocation didn't really deallocate\n"); + exit(); + } + + c = sbrk(4096); + if(c != (char *) 0xffffffff){ + printf(stdout, "sbrk was able to re-allocate beyond 640K, c %x\n", c); + exit(); + } + printf(stdout, "sbrk test OK\n"); } diff --git a/vm.c b/vm.c index 6914dd3..8755b82 100644 --- a/vm.c +++ b/vm.c @@ -198,7 +198,7 @@ uva2ka(pde_t *pgdir, char *uva) int allocuvm(pde_t *pgdir, char *addr, uint sz) { - if (addr + sz >= (char*)USERTOP) + if (addr + sz > (char*)USERTOP) return 0; char *first = PGROUNDDOWN(addr); char *last = PGROUNDDOWN(addr + sz - 1); @@ -218,6 +218,30 @@ allocuvm(pde_t *pgdir, char *addr, uint sz) return 1; } +// deallocate some of the user pages, in response to sbrk() +// with a negative argument. if addr is not page-aligned, +// then only deallocates starting at the next page boundary. +int +deallocuvm(pde_t *pgdir, char *addr, uint sz) +{ + if (addr + sz > (char*)USERTOP) + return 0; + char *first = (char*) PGROUNDUP((uint)addr); + char *last = PGROUNDDOWN(addr + sz - 1); + char *a; + for(a = first; a <= last; a += PGSIZE){ + pte_t *pte = walkpgdir(pgdir, a, 0); + if(pte && (*pte & PTE_P) != 0){ + uint pa = PTE_ADDR(*pte); + if(pa == 0) + panic("deallocuvm"); + kfree((void *) pa, PGSIZE); + *pte = 0; + } + } + return 1; +} + // free a page table and all the physical memory pages // in the user part. void From 789b508d538e6faf635e49f268a4f1f9e9b65f05 Mon Sep 17 00:00:00 2001 From: Robert Morris Date: Wed, 11 Aug 2010 14:34:45 -0400 Subject: [PATCH 7/7] uptime() sys call for benchmarking increase PHYSTOP --- defs.h | 2 +- kalloc.c | 4 ---- syscall.c | 2 ++ syscall.h | 1 + sysproc.c | 16 +++++++++++++++- trap.c | 2 +- usertests.c | 24 +++++++++++++++++++++++- usys.S | 1 + vm.c | 4 +++- 9 files changed, 47 insertions(+), 9 deletions(-) diff --git a/defs.h b/defs.h index a051522..0197e70 100644 --- a/defs.h +++ b/defs.h @@ -142,7 +142,7 @@ void timerinit(void); // trap.c void idtinit(void); -extern int ticks; +extern uint ticks; void tvinit(void); extern struct spinlock tickslock; diff --git a/kalloc.c b/kalloc.c index 7695006..ca87018 100644 --- a/kalloc.c +++ b/kalloc.c @@ -23,14 +23,10 @@ struct { int nfreemem; // Initialize free list of physical pages. -// This code cheats by just considering one megabyte of -// pages after end. Real systems would determine the -// amount of memory available in the system and use it all. void kinit(char *p, uint len) { initlock(&kmem.lock, "kmem"); - cprintf("end 0x%x free = %d(0x%x)\n", p, len); nfreemem = 0; kfree(p, len); } diff --git a/syscall.c b/syscall.c index ce79dbd..9296cff 100644 --- a/syscall.c +++ b/syscall.c @@ -100,6 +100,7 @@ extern int sys_sleep(void); extern int sys_unlink(void); extern int sys_wait(void); extern int sys_write(void); +extern int sys_uptime(void); static int (*syscalls[])(void) = { [SYS_chdir] sys_chdir, @@ -122,6 +123,7 @@ static int (*syscalls[])(void) = { [SYS_unlink] sys_unlink, [SYS_wait] sys_wait, [SYS_write] sys_write, +[SYS_uptime] sys_uptime, }; void diff --git a/syscall.h b/syscall.h index f4b7807..3a0fbca 100644 --- a/syscall.h +++ b/syscall.h @@ -19,3 +19,4 @@ #define SYS_getpid 18 #define SYS_sbrk 19 #define SYS_sleep 20 +#define SYS_uptime 21 diff --git a/sysproc.c b/sysproc.c index 11770ff..efaa372 100644 --- a/sysproc.c +++ b/sysproc.c @@ -57,7 +57,8 @@ sys_sbrk(void) int sys_sleep(void) { - int n, ticks0; + int n; + uint ticks0; if(argint(0, &n) < 0) return -1; @@ -73,3 +74,16 @@ sys_sleep(void) release(&tickslock); return 0; } + +// return how many clock tick interrupts have occurred +// since boot. +int +sys_uptime(void) +{ + uint xticks; + + acquire(&tickslock); + xticks = ticks; + release(&tickslock); + return xticks; +} diff --git a/trap.c b/trap.c index 1f35708..daee22f 100644 --- a/trap.c +++ b/trap.c @@ -11,7 +11,7 @@ struct gatedesc idt[256]; extern uint vectors[]; // in vectors.S: array of 256 entry pointers struct spinlock tickslock; -int ticks; +uint ticks; void tvinit(void) diff --git a/usertests.c b/usertests.c index 9ad6448..670a4a8 100644 --- a/usertests.c +++ b/usertests.c @@ -322,8 +322,9 @@ void mem(void) { void *m1, *m2; - int pid; + int pid, ppid; + ppid = getpid(); if((pid = fork()) == 0){ m1 = 0; while((m2 = malloc(10001)) != 0) { @@ -338,6 +339,7 @@ mem(void) m1 = malloc(1024*20); if(m1 == 0) { printf(1, "couldn't allocate mem?!!\n"); + kill(ppid); exit(); } free(m1); @@ -1233,6 +1235,7 @@ void sbrktest(void) { int pid; + char *oldbrk = sbrk(0); printf(stdout, "sbrk test\n"); @@ -1313,6 +1316,25 @@ sbrktest(void) exit(); } + // can we read the kernel's memory? + for(a = (char*)(640*1024); a < (char *)2000000; a += 50000){ + int ppid = getpid(); + int pid = fork(); + if(pid < 0){ + printf(stdout, "fork failed\n"); + exit(); + } + if(pid == 0){ + printf(stdout, "oops could read %x = %x\n", a, *a); + kill(ppid); + exit(); + } + wait(); + } + + if(sbrk(0) > oldbrk) + sbrk(-(sbrk(0) - oldbrk)); + printf(stdout, "sbrk test OK\n"); } diff --git a/usys.S b/usys.S index 2291b02..8bfd8a1 100644 --- a/usys.S +++ b/usys.S @@ -28,3 +28,4 @@ SYSCALL(dup) SYSCALL(getpid) SYSCALL(sbrk) SYSCALL(sleep) +SYSCALL(uptime) diff --git a/vm.c b/vm.c index 8755b82..98ac108 100644 --- a/vm.c +++ b/vm.c @@ -29,7 +29,7 @@ // (both in physical memory and in the kernel's virtual address // space). -#define PHYSTOP 0x300000 +#define PHYSTOP 0x1000000 #define USERTOP 0xA0000 static uint kerntext; // Linker starts kernel at 1MB @@ -336,6 +336,8 @@ copyuvm(pde_t *pgdir, uint sz) // Gather information about physical memory layout. // Called once during boot. +// Really should find out how much physical memory +// there is rather than assuming PHYSTOP. void pminit(void) {