From f32f3638f4c34fbf2fc4398878e6304612bb3283 Mon Sep 17 00:00:00 2001 From: rsc Date: Tue, 21 Aug 2007 19:22:08 +0000 Subject: [PATCH] Various cleanup: - Got rid of dummy proc[0]. Now proc[0] is init. - Added initcode.S to exec /init, so that /init is just a regular binary. - Moved exec out of sysfile to exec.c - Moved code dealing with fs guts (like struct inode) from sysfile.c to fs.c. Code dealing with system call arguments stays in sysfile.c - Refactored directory routines in fs.c; should be simpler. - Changed iget to return *unlocked* inode structure. This solves the lookup-then-use race in namei without introducing deadlocks. It also enabled getting rid of the dummy proc[0]. --- BUGS | 24 +++-- Makefile | 13 ++- defs.h | 8 +- exec.c | 136 ++++++++++++++++++++++++++++ fs.c | 253 +++++++++++++++++++++++++++++++---------------------- fsvar.h | 7 +- initcode.S | 28 ++++++ main.c | 120 +++++++++---------------- proc.c | 50 +++++------ string.c | 10 +++ syscall.c | 1 - sysfile.c | 221 +++++++--------------------------------------- 12 files changed, 456 insertions(+), 415 deletions(-) create mode 100644 exec.c create mode 100644 initcode.S diff --git a/BUGS b/BUGS index ef4213f..1d2bd37 100644 --- a/BUGS +++ b/BUGS @@ -11,14 +11,15 @@ proc.c: factor out switching and scheduling code from process code -kalloc.c - more cleanups + shuffle for formatting -ide.c: synchronous disk write -> polling disk write. search for - (a)synchronous; xv6 doesn't have asynchronous writes. +syscall.c: + cannot convince runoff1 to split the extern lists to fill previous page completely. -fs.c: split all name operations off in name.c? (starting with namei but move +fs.c: split all name operations off in name.c? (starting with namei but wdir keep in fs.c) + locking? + shuffle for formatting pipe.c: more comments? @@ -31,6 +32,19 @@ sysfile.c: general: sizeof parens? +bio.c: + decide odd or even + bwrite doesn't need a second argument + +file.c: + move fileincref onto page 1? + L=$HOME/mit/l (for i in *.c; do xoc -x xgnu -x ./nodecleq.zeta --typesonly $i; done) 2>&1 | grep warning +saw random sharedfd failure. + +why does fdalloc consume reference? + +why mkdir and create? + diff --git a/Makefile b/Makefile index 2606f4c..ac696dd 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,7 @@ OBJS = \ vectors.o\ bio.o\ fs.o\ + exec.o\ 8253pit.o\ # Cross-compiling (e.g., on Mac OS X) @@ -34,7 +35,7 @@ LD = $(TOOLPREFIX)ld OBJCOPY = $(TOOLPREFIX)objcopy OBJDUMP = $(TOOLPREFIX)objdump # On newer gcc you may need to add -fno-stack-protector to $(CFLAGS) -CFLAGS = -fno-builtin -O2 -Wall -MD +CFLAGS = -fno-builtin -O2 -Wall -MD -ggdb -fno-stack-protector AS = $(TOOLPREFIX)gas xv6.img : bootblock kernel fs.img @@ -50,12 +51,16 @@ bootblock : bootasm.S bootmain.c $(OBJCOPY) -S -O binary bootblock.o bootblock ./sign.pl bootblock -kernel : $(OBJS) bootother.S _init +kernel : $(OBJS) bootother.S initcode.S $(CC) -nostdinc -I. -c bootother.S $(LD) -N -e start -Ttext 0x7000 -o bootother.out bootother.o $(OBJCOPY) -S -O binary bootother.out bootother $(OBJDUMP) -S bootother.o > bootother.asm - $(LD) -Ttext 0x100000 -e main0 -o kernel $(OBJS) -b binary bootother _init + $(CC) -nostdinc -I. -c initcode.S + $(LD) -N -e start -Ttext 0 -o initcode.out initcode.o + $(OBJCOPY) -S -O binary initcode.out initcode + $(OBJDUMP) -S initcode.o > initcode.asm + $(LD) -Ttext 0x100000 -e main0 -o kernel $(OBJS) -b binary initcode bootother $(OBJDUMP) -S kernel > kernel.asm $(OBJDUMP) -t kernel | awk '/SYMBOL TABLE/ { go=1; next } go {print $$1, $$NF}' >kernel.sym @@ -132,7 +137,7 @@ PRINT = \ proc.h proc.c setjmp.S kalloc.c\ syscall.h trapasm.S traps.h trap.c vectors.pl syscall.c sysproc.c\ buf.h dev.h fcntl.h stat.h file.h fs.h fsvar.h file.c fs.c bio.c ide.c sysfile.c\ - pipe.c\ + pipe.c exec.c\ mp.h ioapic.h mp.c lapic.c ioapic.c picirq.c\ console.c\ string.c\ diff --git a/defs.h b/defs.h index f2f8d73..24fd52b 100644 --- a/defs.h +++ b/defs.h @@ -40,6 +40,7 @@ int memcmp(const void*, const void*, uint); void* memmove(void*, const void*, uint); int strncmp(const char*, const char*, uint); char* safestrcpy(char*, const char*, int); +int strlen(const char*); // syscall.c void syscall(void); @@ -135,11 +136,16 @@ int readi(struct inode*, char*, uint, uint); int writei(struct inode*, char*, uint, uint); struct inode* mknod(char*, short, short, short); struct inode* dircreat(struct inode*, char*, int, short, short, short); -int dirlookup(struct inode*, char*, int, uint*, uint*); +struct inode* dirlookup(struct inode*, char*, int, uint*); int unlink(char*); void iupdate(struct inode*); int link(char*, char*); struct inode* igetroot(void); +int mkdir(char *path); +struct inode* create(char *path); + +// exec.c +int exec(char*, char**); // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/exec.c b/exec.c new file mode 100644 index 0000000..1f8b1af --- /dev/null +++ b/exec.c @@ -0,0 +1,136 @@ +#include "types.h" +#include "stat.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "defs.h" +#include "x86.h" +#include "traps.h" +#include "syscall.h" +#include "spinlock.h" +#include "buf.h" +#include "fs.h" +#include "fsvar.h" +#include "elf.h" +#include "file.h" +#include "fcntl.h" + +int +exec(char *path, char **argv) +{ + uint sz, sp, p1, p2; + int i, nargs, argbytes, len; + struct inode *ip; + struct elfhdr elf; + struct proghdr ph; + char *mem; + char *s, *last; + + sz = 0; + mem = 0; + + if((ip = namei(path)) == 0) + return -1; + + if(readi(ip, (char*)&elf, 0, sizeof(elf)) < sizeof(elf)) + goto bad; + + if(elf.magic != ELF_MAGIC) + goto bad; + + for(i = 0; i < elf.phnum; i++){ + if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph), + sizeof(ph)) != sizeof(ph)) + goto bad; + if(ph.type != ELF_PROG_LOAD) + continue; + if(ph.memsz < ph.filesz) + goto bad; + sz += ph.memsz; + } + + sz += 4096 - (sz % 4096); + sz += 4096; + + mem = kalloc(sz); + if(mem == 0) + goto bad; + memset(mem, 0, sz); + + argbytes = 0; + for(i = 0; argv[i]; i++){ + len = strlen(argv[i]); + argbytes += len + 1; + } + nargs = i; + + // argn\0 + // ... + // arg0\0 + // 0 + // ptr to argn + // ... + // 12: ptr to arg0 + // 8: argv (points to ptr to arg0) + // 4: argc + // 0: fake return pc + sp = sz - argbytes - (nargs+1)*4 - 4 - 4 - 4; + *(uint*)(mem + sp) = 0xffffffff; + *(uint*)(mem + sp + 4) = nargs; + *(uint*)(mem + sp + 8) = (uint)(sp + 12); + + p1 = sp + 12; + p2 = sp + 12 + (nargs + 1) * 4; + for(i = 0; i < nargs; i++){ + len = strlen(argv[i]); + memmove(mem + p2, argv[i], len + 1); + *(uint*)(mem + p1) = p2; + p1 += 4; + p2 += len + 1; + } + *(uint*)(mem + p1) = 0; + + // Save name for debugging. + for(last=s=path; *s; s++) + if(*s == '/') + last = s+1; + safestrcpy(cp->name, last, sizeof cp->name); + + // commit to the new image. + kfree(cp->mem, cp->sz); + cp->sz = sz; + cp->mem = mem; + mem = 0; + + for(i = 0; i < elf.phnum; i++){ + if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph), + sizeof(ph)) != sizeof(ph)) + goto bad2; + if(ph.type != ELF_PROG_LOAD) + continue; + if(ph.va + ph.memsz > sz) + goto bad2; + if(readi(ip, cp->mem + ph.va, ph.offset, ph.filesz) != ph.filesz) + goto bad2; + memset(cp->mem + ph.va + ph.filesz, 0, ph.memsz - ph.filesz); + } + + iput(ip); + + cp->tf->eip = elf.entry; + cp->tf->esp = sp; + setupsegs(cp); + + return 0; + + bad: + if(mem) + kfree(mem, sz); + iput(ip); + return -1; + + bad2: + iput(ip); + proc_exit(); + return 0; +} diff --git a/fs.c b/fs.c index 02ca8e9..4731b5f 100644 --- a/fs.c +++ b/fs.c @@ -25,8 +25,6 @@ #define min(a, b) ((a) < (b) ? (a) : (b)) -static void ifree(struct inode*); - // Blocks. // Allocate a disk block. @@ -116,30 +114,25 @@ iinit(void) } // Find the inode with number inum on device dev -// and return an in-memory copy. Loads the inode -// from disk into the in-core table if necessary. -// The returned inode is locked and has its ref count incremented. -// Caller must iput the return value when done with it. +// and return the in-memory copy. The returned inode +// has its reference count incremented (and thus must be +// idecref'ed), but is *unlocked*, meaning that none of the fields +// except dev and inum are guaranteed to be initialized. +// This convention gives the caller maximum control over blocking; +// it also guarantees that iget will not sleep, which is useful in +// the early igetroot and when holding other locked inodes. struct inode* iget(uint dev, uint inum) { struct inode *ip, *empty; - struct dinode *dip; - struct buf *bp; acquire(&icache.lock); - loop: // Try for cached inode. empty = 0; for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ - if(ip->busy){ - sleep(ip, &icache.lock); - goto loop; - } ip->ref++; - ip->busy = 1; release(&icache.lock); return ip; } @@ -155,52 +148,61 @@ iget(uint dev, uint inum) ip->dev = dev; ip->inum = inum; ip->ref = 1; - ip->busy = 1; + ip->flags = 0; release(&icache.lock); - bp = bread(dev, IBLOCK(inum)); - dip = &((struct dinode*)(bp->data))[inum % IPB]; - ip->type = dip->type; - ip->major = dip->major; - ip->minor = dip->minor; - ip->nlink = dip->nlink; - ip->size = dip->size; - memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); - brelse(bp); - return ip; } // Iget the inode for the file system root (/). +// This gets called before there is a current process: it cannot sleep! struct inode* igetroot(void) { - return iget(ROOTDEV, 1); + struct inode *ip; + ip = iget(ROOTDEV, 1); + return ip; } // Lock the given inode. void ilock(struct inode *ip) { + struct buf *bp; + struct dinode *dip; + if(ip->ref < 1) panic("ilock"); acquire(&icache.lock); - while(ip->busy) + while(ip->flags & I_BUSY) sleep(ip, &icache.lock); - ip->busy = 1; + ip->flags |= I_BUSY; release(&icache.lock); + + if(!(ip->flags & I_VALID)){ + bp = bread(ip->dev, IBLOCK(ip->inum)); + dip = &((struct dinode*)(bp->data))[ip->inum % IPB]; + ip->type = dip->type; + ip->major = dip->major; + ip->minor = dip->minor; + ip->nlink = dip->nlink; + ip->size = dip->size; + memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); + brelse(bp); + ip->flags |= I_VALID; + } } // Unlock the given inode. void iunlock(struct inode *ip) { - if(ip->busy != 1 || ip->ref < 1) + if(!(ip->flags & I_BUSY) || ip->ref < 1) panic("iunlock"); acquire(&icache.lock); - ip->busy = 0; + ip->flags &= ~I_BUSY; wakeup(ip); release(&icache.lock); } @@ -209,19 +211,8 @@ iunlock(struct inode *ip) void iput(struct inode *ip) { - if(ip->ref < 1 || ip->busy != 1) - panic("iput"); - - if((ip->ref == 1) && (ip->nlink == 0)) { - itrunc(ip); - ifree(ip); - } - - acquire(&icache.lock); - ip->ref -= 1; - ip->busy = 0; - wakeup(ip); - release(&icache.lock); + iunlock(ip); + idecref(ip); } // Increment reference count for ip. @@ -229,31 +220,42 @@ iput(struct inode *ip) struct inode* iincref(struct inode *ip) { - ilock(ip); + acquire(&icache.lock); ip->ref++; - iunlock(ip); + release(&icache.lock); return ip; } -// Caller holds reference to unlocked ip. -// Drop reference. +// Caller holds reference to unlocked ip. Drop reference. void idecref(struct inode *ip) { - ilock(ip); - iput(ip); + acquire(&icache.lock); + if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0) { + // inode is no longer used: truncate and free inode. + if(ip->flags & I_BUSY) + panic("idecref busy"); + ip->flags |= I_BUSY; + release(&icache.lock); + // XXX convince rsc that no one will come find this inode. + itrunc(ip); + ip->type = 0; + iupdate(ip); + acquire(&icache.lock); + ip->flags &= ~I_BUSY; + } + ip->ref--; + release(&icache.lock); } // Allocate a new inode with the given type on device dev. struct inode* ialloc(uint dev, short type) { - struct inode *ip; + int inum, ninodes; + struct buf *bp; struct dinode *dip; struct superblock *sb; - int ninodes; - int inum; - struct buf *bp; bp = bread(dev, 1); sb = (struct superblock*)bp->data; @@ -268,8 +270,7 @@ ialloc(uint dev, short type) dip->type = type; bwrite(bp, IBLOCK(inum)); // mark it allocated on the disk brelse(bp); - ip = iget(dev, inum); - return ip; + return iget(dev, inum); } brelse(bp); } @@ -295,15 +296,6 @@ iupdate(struct inode *ip) brelse(bp); } -// Free (delete) the given inode. -// Caller must have ip locked. -static void -ifree(struct inode *ip) -{ - ip->type = 0; - iupdate(ip); -} - // Inode contents // // The contents (data) associated with each inode is stored @@ -465,15 +457,15 @@ writei(struct inode *ip, char *src, uint off, uint n) // set *poff to the byte offset of the directory entry // set *pinum to the inode number // return 0. -int -dirlookup(struct inode *dp, char *name, int namelen, uint *poff, uint *pinum) +struct inode* +dirlookup(struct inode *dp, char *name, int namelen, uint *poff) { - uint off; + uint off, inum; struct buf *bp; struct dirent *de; if(dp->type != T_DIR) - return -1; + return 0; for(off = 0; off < dp->size; off += BSIZE){ bp = bread(dp->dev, bmap(dp, off / BSIZE, 0)); @@ -487,24 +479,30 @@ dirlookup(struct inode *dp, char *name, int namelen, uint *poff, uint *pinum) // entry matches path element if(poff) *poff = off + (uchar*)de - bp->data; - if(pinum) - *pinum = de->inum; + inum = de->inum; brelse(bp); - return 0; + return iget(dp->dev, inum); } } brelse(bp); } - return -1; + return 0; } // Write a new directory entry (name, ino) into the directory dp. // Caller must have locked dp. -void -dirwrite(struct inode *dp, char *name, int namelen, uint ino) +int +dirlink(struct inode *dp, char *name, int namelen, uint ino) { int off; struct dirent de; + struct inode *ip; + + // Double-check that name is not present. + if((ip = dirlookup(dp, name, namelen, 0)) != 0){ + idecref(ip); + return -1; + } // Look for an empty dirent. for(off = 0; off < dp->size; off += sizeof(de)){ @@ -519,9 +517,10 @@ dirwrite(struct inode *dp, char *name, int namelen, uint ino) namelen = DIRSIZ; memmove(de.name, name, namelen); memset(de.name+namelen, 0, DIRSIZ-namelen); - if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) panic("dirwrite"); + + return 0; } // Create a new inode named name inside dp @@ -535,13 +534,19 @@ dircreat(struct inode *dp, char *name, int namelen, short type, short major, sho ip = ialloc(dp->dev, type); if(ip == 0) return 0; + ilock(ip); ip->major = major; ip->minor = minor; ip->size = 0; ip->nlink = 1; iupdate(ip); - - dirwrite(dp, name, namelen, ip->inum); + + if(dirlink(dp, name, namelen, ip->inum) < 0){ + ip->nlink = 0; + iupdate(ip); + iput(ip); + return 0; + } return ip; } @@ -590,17 +595,16 @@ skipelem(char *path, char **name, int *len) struct inode* _namei(char *path, int parent, char **pname, int *pnamelen) { - struct inode *dp; + struct inode *dp, *ip; char *name; int namelen; - uint off, dev, inum; + uint off; if(*path == '/') dp = igetroot(); - else { + else dp = iincref(cp->cwd); - ilock(dp); - } + ilock(dp); while((path = skipelem(path, &name, &namelen)) != 0){ // Truncate names in path to DIRSIZ chars. @@ -617,12 +621,12 @@ _namei(char *path, int parent, char **pname, int *pnamelen) return dp; } - if(dirlookup(dp, name, namelen, &off, &inum) < 0) + if((ip = dirlookup(dp, name, namelen, &off)) == 0) goto fail; - dev = dp->dev; iput(dp); - dp = iget(dev, inum); + ilock(ip); + dp = ip; if(dp->type == 0 || dp->nlink < 1) panic("namei"); } @@ -660,10 +664,6 @@ mknod(char *path, short type, short major, short minor) if((dp = nameiparent(path, &name, &namelen)) == 0) return 0; - if(dirlookup(dp, name, namelen, 0, 0) >= 0){ - iput(dp); - return 0; - } ip = dircreat(dp, name, namelen, type, major, minor); iput(dp); return ip; @@ -675,13 +675,13 @@ unlink(char *path) { struct inode *ip, *dp; struct dirent de; - uint off, inum, dev; + uint off; char *name; int namelen; if((dp = nameiparent(path, &name, &namelen)) == 0) return -1; - if(dirlookup(dp, name, namelen, &off, 0) < 0){ + if((ip = dirlookup(dp, name, namelen, &off)) == 0){ iput(dp); return -1; } @@ -691,20 +691,17 @@ unlink(char *path) // Cannot remove "." or ".." - the 2 and 3 count the trailing NUL. if(memcmp(de.name, ".", 2) == 0 || memcmp(de.name, "..", 3) == 0){ + idecref(ip); iput(dp); return -1; } - inum = de.inum; - memset(&de, 0, sizeof(de)); if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) panic("unlink dir write"); - - dev = dp->dev; iput(dp); - ip = iget(dev, inum); + ilock(ip); if(ip->nlink < 1) panic("unlink nlink < 1"); ip->nlink--; @@ -729,30 +726,76 @@ link(char *old, char *new) return -1; } iunlock(ip); - + if((dp = nameiparent(new, &name, &namelen)) == 0){ idecref(ip); return -1; } - if(dirlookup(dp, name, namelen, 0, 0) >= 0){ - iput(dp); - idecref(ip); - return -1; - } - if(dp->dev != ip->dev){ + if(dp->dev != ip->dev || dirlink(dp, name, namelen, ip->inum) < 0){ idecref(ip); iput(dp); return -1; } + iput(dp); - // LOCKING ERROR HERE! TWO LOCKS HELD AT ONCE. + // XXX write ordering wrong here too. ilock(ip); ip->nlink++; iupdate(ip); + iput(ip); + return 0; +} - dirwrite(dp, name, namelen, ip->inum); +int +mkdir(char *path) +{ + struct inode *dp, *ip; + char *name; + int namelen; + + // XXX write ordering is screwy here- do we care? + if((dp = nameiparent(path, &name, &namelen)) == 0) + return -1; + + if((ip = dircreat(dp, name, namelen, T_DIR, 0, 0)) == 0){ + iput(dp); + return -1; + } + dp->nlink++; + iupdate(dp); + + if(dirlink(ip, ".", 1, ip->inum) < 0 || dirlink(ip, "..", 2, dp->inum) < 0) + panic("mkdir"); iput(dp); iput(ip); return 0; } + +struct inode* +create(char *path) +{ + struct inode *dp, *ip; + char *name; + int namelen; + + if((dp = nameiparent(path, &name, &namelen)) == 0) + return 0; + + if((ip = dirlookup(dp, name, namelen, 0)) != 0){ + iput(dp); + ilock(ip); + if(ip->type == T_DIR){ + iput(ip); + return 0; + } + return ip; + } + if((ip = dircreat(dp, name, namelen, T_FILE, 0, 0)) == 0){ + iput(dp); + return 0; + } + iput(dp); + return ip; +} + diff --git a/fsvar.h b/fsvar.h index 449bf3d..8609c2a 100644 --- a/fsvar.h +++ b/fsvar.h @@ -4,7 +4,7 @@ struct inode { uint dev; // Device number uint inum; // Inode number int ref; // Reference count - int busy; // Is the inode "locked"? + int flags; // I_BUSY, I_VALID short type; // copy of disk inode short major; @@ -16,6 +16,5 @@ struct inode { #define ROOTDEV 1 // Device number of root file system -#define NAMEI_LOOKUP 1 -#define NAMEI_CREATE 2 -#define NAMEI_DELETE 3 +#define I_BUSY 0x1 +#define I_VALID 0x2 diff --git a/initcode.S b/initcode.S new file mode 100644 index 0000000..c87c4b1 --- /dev/null +++ b/initcode.S @@ -0,0 +1,28 @@ +# Initial process execs /init. + +#include "syscall.h" +#include "traps.h" + +# exec(init, argv) +start: + pushl $argv + pushl $init + pushl $0 + movl $SYS_exec, %eax + int $T_SYSCALL + +# for(;;) exit(); +exit: + movl $SYS_exit, %eax + int $T_SYSCALL + jmp exit + +# "/init\0" +init: + .string "/init\0" + +.p2align 2 +argv: + .long init + .long 0 + diff --git a/main.c b/main.c index fb7a69a..2774636 100644 --- a/main.c +++ b/main.c @@ -11,9 +11,8 @@ #include "spinlock.h" extern char edata[], end[]; -extern uchar _binary__init_start[], _binary__init_size[]; -void process0(); +void proc0init(); // Bootstrap processor starts running C code here. // This is called main0 not main so that it can have @@ -24,7 +23,6 @@ main0(void) { int i; static int bcpu; // cannot be on stack - struct proc *p; // clear BSS memset(edata, 0, end - edata); @@ -54,15 +52,6 @@ main0(void) fileinit(); iinit(); // i-node table - // initialize process 0 - p = &proc[0]; - p->state = RUNNABLE; - p->kstack = kalloc(KSTACKSIZE); - - // cause proc[0] to start in kernel at process0 - p->jmpbuf.eip = (uint) process0; - p->jmpbuf.esp = (uint) (p->kstack + KSTACKSIZE - 4); - // make sure there's a TSS setupsegs(0); @@ -86,6 +75,9 @@ main0(void) cpus[cpu()].nlock--; sti(); + // initialize process 0 + proc0init(); + scheduler(); } @@ -114,77 +106,45 @@ mpmain(void) scheduler(); } -// proc[0] starts here, called by scheduler() in the ordinary way. -void -process0(void) -{ - extern struct spinlock proc_table_lock; - struct proc *p0, *p1; - struct trapframe tf; - - release(&proc_table_lock); - - p0 = &proc[0]; - p0->cwd = igetroot(); - iunlock(p0->cwd); - - // Dummy user memory to make copyproc() happy. - // Must be big enough to hold the init binary and stack. - p0->sz = 2*PAGE; - p0->mem = kalloc(p0->sz); - - // Fake a trap frame as if a user process had made a system - // call, so that copyproc will have a place for the new - // process to return to. - p0->tf = &tf; - memset(p0->tf, 0, sizeof(struct trapframe)); - p0->tf->es = p0->tf->ds = p0->tf->ss = (SEG_UDATA << 3) | DPL_USER; - p0->tf->cs = (SEG_UCODE << 3) | DPL_USER; - p0->tf->eflags = FL_IF; - p0->tf->esp = p0->sz; +char initcode[] = { + /* push ptr to argv */ 0x6a, 0x1c, + /* push ptr to "/init" */ 0x6a, 0x16, + /* push fake ret addr */ 0x6a, 0x00, + /* mov $SYS_exec, %eax */ 0xb8, 0x09, 0x00, 0x00, 0x00, + /* int $0x30 */ 0xcd, 0x30, + /* Lx: */ + /* mov $SYS_exit, %eax */ 0xb8, 0x02, 0x00, 0x00, 0x00, + /* int $0x30 */ 0xcd, 0x30, + /* jmp Lx */ 0xeb, 0xf7, - // Push bogus return address, both to cause problems - // if main returns and also because gcc can generate - // function prologs that expect to be able to read the - // return address off the stack without causing a fault. - p0->tf->esp -= 4; - *(uint*)(p0->mem + p0->tf->esp) = 0xefefefef; - - p1 = copyproc(p0); - - load_icode(p1, _binary__init_start, (uint) _binary__init_size); - p1->state = RUNNABLE; - safestrcpy(p1->name, "init", sizeof p1->name); - - proc_wait(); - panic("init exited"); -} + /* "/init\0" */ 0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00, + /* ptr to "/init" */ 0x16, 0x00, 0x00, 0x00, + /* 0 */ 0x00, 0x00, 0x00, 0x00 +}; void -load_icode(struct proc *p, uchar *binary, uint size) +proc0init(void) { - int i; - struct elfhdr *elf; - struct proghdr *ph; + struct proc *p; + extern uchar _binary_initcode_start[], _binary_initcode_size[]; + + p = copyproc(0); + p->sz = PAGE; + p->mem = kalloc(p->sz); + p->cwd = igetroot(); + memset(&p->tf, 0, sizeof p->tf); + p->tf->es = p->tf->ds = p->tf->ss = (SEG_UDATA << 3) | DPL_USER; + p->tf->cs = (SEG_UCODE << 3) | DPL_USER; + p->tf->eflags = FL_IF; + p->tf->esp = p->sz; + + // Push dummy return address to placate gcc. + p->tf->esp -= 4; + *(uint*)(p->mem + p->tf->esp) = 0xefefefef; - elf = (struct elfhdr*) binary; - if(elf->magic != ELF_MAGIC) - panic("load_icode: not an ELF binary"); - - p->tf->eip = elf->entry; - - // Map and load segments as directed. - ph = (struct proghdr*) (binary + elf->phoff); - for(i = 0; i < elf->phnum; i++, ph++) { - if(ph->type != ELF_PROG_LOAD) - continue; - if(ph->va + ph->memsz < ph->va) - panic("load_icode: overflow in proghdr"); - if(ph->va + ph->memsz >= p->sz) - panic("load_icode: icode too large"); - - // Load/clear the segment - memmove(p->mem + ph->va, binary + ph->offset, ph->filesz); - memset(p->mem + ph->va + ph->filesz, 0, ph->memsz - ph->filesz); - } + p->tf->eip = 0; + memmove(p->mem, _binary_initcode_start, (int)_binary_initcode_size); + safestrcpy(p->name, "initcode", sizeof p->name); + p->state = RUNNABLE; } + diff --git a/proc.c b/proc.c index b09b738..c86f88f 100644 --- a/proc.c +++ b/proc.c @@ -109,47 +109,43 @@ copyproc(struct proc *p) return 0; } np->pid = next_pid++; - np->ppid = p->pid; release(&proc_table_lock); - // Copy user memory. - np->sz = p->sz; - np->mem = kalloc(np->sz); - if(np->mem == 0){ - np->state = UNUSED; - return 0; - } - memmove(np->mem, p->mem, np->sz); - // Allocate kernel stack. - np->kstack = kalloc(KSTACKSIZE); - if(np->kstack == 0){ - kfree(np->mem, np->sz); - np->mem = 0; + if((np->kstack = kalloc(KSTACKSIZE)) == 0){ np->state = UNUSED; return 0; } - - // Copy trapframe registers from parent. np->tf = (struct trapframe*)(np->kstack + KSTACKSIZE) - 1; - memmove(np->tf, p->tf, sizeof(*np->tf)); - // Clear %eax so that fork system call returns 0 in child. - np->tf->eax = 0; + if(p){ // Copy process state from p. + np->ppid = p->pid; + memmove(np->tf, p->tf, sizeof *np->tf); + + np->sz = p->sz; + if((np->mem = kalloc(np->sz)) == 0){ + kfree(np->kstack, KSTACKSIZE); + np->kstack = 0; + np->state = UNUSED; + return 0; + } + memmove(np->mem, p->mem, np->sz); + + for(i = 0; i < NOFILE; i++){ + np->ofile[i] = p->ofile[i]; + if(np->ofile[i]) + fileincref(np->ofile[i]); + } + np->cwd = iincref(p->cwd); + } // Set up new jmpbuf to start executing at forkret (see below). memset(&np->jmpbuf, 0, sizeof np->jmpbuf); np->jmpbuf.eip = (uint)forkret; np->jmpbuf.esp = (uint)np->tf - 4; - // Copy file descriptors - for(i = 0; i < NOFILE; i++){ - np->ofile[i] = p->ofile[i]; - if(np->ofile[i]) - fileincref(np->ofile[i]); - } - - np->cwd = iincref(p->cwd); + // Clear %eax so that fork system call returns 0 in child. + np->tf->eax = 0; return np; } diff --git a/string.c b/string.c index a871b68..0a92cca 100644 --- a/string.c +++ b/string.c @@ -75,3 +75,13 @@ safestrcpy(char *s, const char *t, int n) return os; } +int +strlen(const char *s) +{ + int n; + + for(n = 0; s[n]; n++) + ; + return n; +} + diff --git a/syscall.c b/syscall.c index 7f4caa1..b18b62c 100644 --- a/syscall.c +++ b/syscall.c @@ -53,7 +53,6 @@ fetchstr(struct proc *p, uint addr, char **pp) int argint(int argno, int *ip) { - return fetchint(cp, cp->tf->esp + 4 + 4*argno, ip); } diff --git a/sysfile.c b/sysfile.c index cd00494..fd28002 100644 --- a/sysfile.c +++ b/sysfile.c @@ -114,42 +114,22 @@ sys_close(void) int sys_open(void) { - struct inode *ip, *dp; - char *path, *name; - int namelen; - int omode; - int fd, dev; - uint inum; + char *path; + int fd, omode; struct file *f; + struct inode *ip; if(argstr(0, &path) < 0 || argint(1, &omode) < 0) return -1; - switch(omode & O_CREATE){ - default: - case 0: // regular open - if((ip = namei(path)) == 0) - return -1; - break; - - case O_CREATE: - if((dp = nameiparent(path, &name, &namelen)) == 0) - return -1; - if(dirlookup(dp, name, namelen, 0, &inum) >= 0){ - dev = dp->dev; - iput(dp); - ip = iget(dev, inum); - }else{ - if((ip = dircreat(dp, name, namelen, T_FILE, 0, 0)) == 0){ - iput(dp); - return -1; - } - iput(dp); - } - break; - } + if(omode & O_CREATE) + ip = create(path); + else + ip = namei(path); + if(ip == 0) + return -1; - if(ip->type == T_DIR && (omode & (O_RDWR|O_WRONLY|O_CREATE))){ + if(ip->type == T_DIR && (omode & (O_RDWR|O_WRONLY))){ iput(ip); return -1; } @@ -194,6 +174,7 @@ sys_mknod(void) argint(2, &major) < 0 || argint(3, &minor) < 0) return -1; + // XXX why this check? if(len >= DIRSIZ) return -1; @@ -206,45 +187,11 @@ sys_mknod(void) int sys_mkdir(void) { - struct inode *nip; - struct inode *dp; - char *name, *path; - struct dirent de; - int namelen; + char *path; if(argstr(0, &path) < 0) return -1; - - dp = nameiparent(path, &name, &namelen); - if(dp == 0) - return -1; - if(dirlookup(dp, name, namelen, 0, 0) >= 0){ - iput(dp); - return -1; - } - - nip = dircreat(dp, name, namelen, T_DIR, 0, 0); - if(nip == 0){ - iput(dp); - return -1; - } - - dp->nlink++; - iupdate(dp); - - memset(de.name, '\0', DIRSIZ); - de.name[0] = '.'; - de.inum = nip->inum; - writei(nip, (char*) &de, 0, sizeof(de)); - - de.inum = dp->inum; - de.name[1] = '.'; - writei(nip, (char*) &de, sizeof(de), sizeof(de)); - - iput(dp); - iput(nip); - - return 0; + return mkdir(path); } int @@ -315,132 +262,30 @@ sys_link(void) return link(old, new); } +#define ARGMAX 10 + int sys_exec(void) { - uint sz=0, ap, sp, p1, p2; - int i, nargs, argbytes, len; - struct inode *ip; - struct elfhdr elf; - struct proghdr ph; - char *mem = 0; - char *path, *s, *last; - uint argv; - - if(argstr(0, &path) < 0 || argint(1, (int*)&argv) < 0) + char *path, *argv[ARGMAX]; + int i; + uint uargv, uarg; + + if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0) return -1; - - if((ip = namei(path)) == 0) - return -1; - - if(readi(ip, (char*)&elf, 0, sizeof(elf)) < sizeof(elf)) - goto bad; - - if(elf.magic != ELF_MAGIC) - goto bad; - - sz = 0; - for(i = 0; i < elf.phnum; i++){ - if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph), - sizeof(ph)) != sizeof(ph)) - goto bad; - if(ph.type != ELF_PROG_LOAD) - continue; - if(ph.memsz < ph.filesz) - goto bad; - sz += ph.memsz; - } - - sz += 4096 - (sz % 4096); - sz += 4096; - - mem = kalloc(sz); - if(mem == 0) - goto bad; - memset(mem, 0, sz); - - nargs = 0; - argbytes = 0; - for(i = 0;; i++){ - if(fetchint(cp, argv + 4*i, (int*)&ap) < 0) - goto bad; - if(ap == 0) + memset(argv, 0, sizeof argv); + for(i=0;; i++){ + if(i >= ARGMAX) + return -1; + if(fetchint(cp, uargv+4*i, (int*)&uarg) < 0) + return -1; + if(uarg == 0){ + argv[i] = 0; break; - len = fetchstr(cp, ap, &s); - if(len < 0) - goto bad; - nargs++; - argbytes += len + 1; + } + if(fetchstr(cp, uarg, &argv[i]) < 0) + return -1; } - - // argn\0 - // ... - // arg0\0 - // 0 - // ptr to argn - // ... - // 12: ptr to arg0 - // 8: argv (points to ptr to arg0) - // 4: argc - // 0: fake return pc - sp = sz - argbytes - (nargs+1)*4 - 4 - 4 - 4; - *(uint*)(mem + sp) = 0xffffffff; - *(uint*)(mem + sp + 4) = nargs; - *(uint*)(mem + sp + 8) = (uint)(sp + 12); - - p1 = sp + 12; - p2 = sp + 12 + (nargs + 1) * 4; - for(i = 0; i < nargs; i++){ - fetchint(cp, argv + 4*i, (int*)&ap); - len = fetchstr(cp, ap, &s); - memmove(mem + p2, s, len + 1); - *(uint*)(mem + p1) = p2; - p1 += 4; - p2 += len + 1; - } - *(uint*)(mem + p1) = 0; - - // Save name for debugging. - for(last=s=path; *s; s++) - if(*s == '/') - last = s+1; - safestrcpy(cp->name, last, sizeof cp->name); - - // commit to the new image. - kfree(cp->mem, cp->sz); - cp->sz = sz; - cp->mem = mem; - mem = 0; - - for(i = 0; i < elf.phnum; i++){ - if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph), - sizeof(ph)) != sizeof(ph)) - goto bad2; - if(ph.type != ELF_PROG_LOAD) - continue; - if(ph.va + ph.memsz > sz) - goto bad2; - if(readi(ip, cp->mem + ph.va, ph.offset, ph.filesz) != ph.filesz) - goto bad2; - memset(cp->mem + ph.va + ph.filesz, 0, ph.memsz - ph.filesz); - } - - iput(ip); - - cp->tf->eip = elf.entry; - cp->tf->esp = sp; - setupsegs(cp); - - return 0; - - bad: - if(mem) - kfree(mem, sz); - iput(ip); - return -1; - - bad2: - iput(ip); - proc_exit(); - return 0; + return exec(path, argv); } +