Various cleanup:

- Got rid of dummy proc[0].  Now proc[0] is init.
 - Added initcode.S to exec /init, so that /init is
   just a regular binary.
 - Moved exec out of sysfile to exec.c
 - Moved code dealing with fs guts (like struct inode)
   from sysfile.c to fs.c.  Code dealing with system call
   arguments stays in sysfile.c
 - Refactored directory routines in fs.c; should be simpler.
 - Changed iget to return *unlocked* inode structure.
   This solves the lookup-then-use race in namei
   without introducing deadlocks.
   It also enabled getting rid of the dummy proc[0].
This commit is contained in:
rsc 2007-08-21 19:22:08 +00:00
parent 2d61a40b20
commit f32f3638f4
12 changed files with 456 additions and 415 deletions

24
BUGS
View file

@ -11,14 +11,15 @@ proc.c:
factor out switching and scheduling code from process code
kalloc.c
more cleanups
shuffle for formatting
ide.c: synchronous disk write -> polling disk write. search for
(a)synchronous; xv6 doesn't have asynchronous writes.
syscall.c:
cannot convince runoff1 to split the extern lists to fill previous page completely.
fs.c: split all name operations off in name.c? (starting with namei but move
fs.c: split all name operations off in name.c? (starting with namei but
wdir keep in fs.c)
locking?
shuffle for formatting
pipe.c:
more comments?
@ -31,6 +32,19 @@ sysfile.c:
general:
sizeof parens?
bio.c:
decide odd or even
bwrite doesn't need a second argument
file.c:
move fileincref onto page 1?
L=$HOME/mit/l
(for i in *.c; do xoc -x xgnu -x ./nodecleq.zeta --typesonly $i; done) 2>&1 | grep warning
saw random sharedfd failure.
why does fdalloc consume reference?
why mkdir and create?

View file

@ -21,6 +21,7 @@ OBJS = \
vectors.o\
bio.o\
fs.o\
exec.o\
8253pit.o\
# Cross-compiling (e.g., on Mac OS X)
@ -34,7 +35,7 @@ LD = $(TOOLPREFIX)ld
OBJCOPY = $(TOOLPREFIX)objcopy
OBJDUMP = $(TOOLPREFIX)objdump
# On newer gcc you may need to add -fno-stack-protector to $(CFLAGS)
CFLAGS = -fno-builtin -O2 -Wall -MD
CFLAGS = -fno-builtin -O2 -Wall -MD -ggdb -fno-stack-protector
AS = $(TOOLPREFIX)gas
xv6.img : bootblock kernel fs.img
@ -50,12 +51,16 @@ bootblock : bootasm.S bootmain.c
$(OBJCOPY) -S -O binary bootblock.o bootblock
./sign.pl bootblock
kernel : $(OBJS) bootother.S _init
kernel : $(OBJS) bootother.S initcode.S
$(CC) -nostdinc -I. -c bootother.S
$(LD) -N -e start -Ttext 0x7000 -o bootother.out bootother.o
$(OBJCOPY) -S -O binary bootother.out bootother
$(OBJDUMP) -S bootother.o > bootother.asm
$(LD) -Ttext 0x100000 -e main0 -o kernel $(OBJS) -b binary bootother _init
$(CC) -nostdinc -I. -c initcode.S
$(LD) -N -e start -Ttext 0 -o initcode.out initcode.o
$(OBJCOPY) -S -O binary initcode.out initcode
$(OBJDUMP) -S initcode.o > initcode.asm
$(LD) -Ttext 0x100000 -e main0 -o kernel $(OBJS) -b binary initcode bootother
$(OBJDUMP) -S kernel > kernel.asm
$(OBJDUMP) -t kernel | awk '/SYMBOL TABLE/ { go=1; next } go {print $$1, $$NF}' >kernel.sym
@ -132,7 +137,7 @@ PRINT = \
proc.h proc.c setjmp.S kalloc.c\
syscall.h trapasm.S traps.h trap.c vectors.pl syscall.c sysproc.c\
buf.h dev.h fcntl.h stat.h file.h fs.h fsvar.h file.c fs.c bio.c ide.c sysfile.c\
pipe.c\
pipe.c exec.c\
mp.h ioapic.h mp.c lapic.c ioapic.c picirq.c\
console.c\
string.c\

8
defs.h
View file

@ -40,6 +40,7 @@ int memcmp(const void*, const void*, uint);
void* memmove(void*, const void*, uint);
int strncmp(const char*, const char*, uint);
char* safestrcpy(char*, const char*, int);
int strlen(const char*);
// syscall.c
void syscall(void);
@ -135,11 +136,16 @@ int readi(struct inode*, char*, uint, uint);
int writei(struct inode*, char*, uint, uint);
struct inode* mknod(char*, short, short, short);
struct inode* dircreat(struct inode*, char*, int, short, short, short);
int dirlookup(struct inode*, char*, int, uint*, uint*);
struct inode* dirlookup(struct inode*, char*, int, uint*);
int unlink(char*);
void iupdate(struct inode*);
int link(char*, char*);
struct inode* igetroot(void);
int mkdir(char *path);
struct inode* create(char *path);
// exec.c
int exec(char*, char**);
// number of elements in fixed-size array
#define NELEM(x) (sizeof(x)/sizeof((x)[0]))

136
exec.c Normal file
View file

@ -0,0 +1,136 @@
#include "types.h"
#include "stat.h"
#include "param.h"
#include "mmu.h"
#include "proc.h"
#include "defs.h"
#include "x86.h"
#include "traps.h"
#include "syscall.h"
#include "spinlock.h"
#include "buf.h"
#include "fs.h"
#include "fsvar.h"
#include "elf.h"
#include "file.h"
#include "fcntl.h"
int
exec(char *path, char **argv)
{
uint sz, sp, p1, p2;
int i, nargs, argbytes, len;
struct inode *ip;
struct elfhdr elf;
struct proghdr ph;
char *mem;
char *s, *last;
sz = 0;
mem = 0;
if((ip = namei(path)) == 0)
return -1;
if(readi(ip, (char*)&elf, 0, sizeof(elf)) < sizeof(elf))
goto bad;
if(elf.magic != ELF_MAGIC)
goto bad;
for(i = 0; i < elf.phnum; i++){
if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph),
sizeof(ph)) != sizeof(ph))
goto bad;
if(ph.type != ELF_PROG_LOAD)
continue;
if(ph.memsz < ph.filesz)
goto bad;
sz += ph.memsz;
}
sz += 4096 - (sz % 4096);
sz += 4096;
mem = kalloc(sz);
if(mem == 0)
goto bad;
memset(mem, 0, sz);
argbytes = 0;
for(i = 0; argv[i]; i++){
len = strlen(argv[i]);
argbytes += len + 1;
}
nargs = i;
// argn\0
// ...
// arg0\0
// 0
// ptr to argn
// ...
// 12: ptr to arg0
// 8: argv (points to ptr to arg0)
// 4: argc
// 0: fake return pc
sp = sz - argbytes - (nargs+1)*4 - 4 - 4 - 4;
*(uint*)(mem + sp) = 0xffffffff;
*(uint*)(mem + sp + 4) = nargs;
*(uint*)(mem + sp + 8) = (uint)(sp + 12);
p1 = sp + 12;
p2 = sp + 12 + (nargs + 1) * 4;
for(i = 0; i < nargs; i++){
len = strlen(argv[i]);
memmove(mem + p2, argv[i], len + 1);
*(uint*)(mem + p1) = p2;
p1 += 4;
p2 += len + 1;
}
*(uint*)(mem + p1) = 0;
// Save name for debugging.
for(last=s=path; *s; s++)
if(*s == '/')
last = s+1;
safestrcpy(cp->name, last, sizeof cp->name);
// commit to the new image.
kfree(cp->mem, cp->sz);
cp->sz = sz;
cp->mem = mem;
mem = 0;
for(i = 0; i < elf.phnum; i++){
if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph),
sizeof(ph)) != sizeof(ph))
goto bad2;
if(ph.type != ELF_PROG_LOAD)
continue;
if(ph.va + ph.memsz > sz)
goto bad2;
if(readi(ip, cp->mem + ph.va, ph.offset, ph.filesz) != ph.filesz)
goto bad2;
memset(cp->mem + ph.va + ph.filesz, 0, ph.memsz - ph.filesz);
}
iput(ip);
cp->tf->eip = elf.entry;
cp->tf->esp = sp;
setupsegs(cp);
return 0;
bad:
if(mem)
kfree(mem, sz);
iput(ip);
return -1;
bad2:
iput(ip);
proc_exit();
return 0;
}

253
fs.c
View file

@ -25,8 +25,6 @@
#define min(a, b) ((a) < (b) ? (a) : (b))
static void ifree(struct inode*);
// Blocks.
// Allocate a disk block.
@ -116,30 +114,25 @@ iinit(void)
}
// Find the inode with number inum on device dev
// and return an in-memory copy. Loads the inode
// from disk into the in-core table if necessary.
// The returned inode is locked and has its ref count incremented.
// Caller must iput the return value when done with it.
// and return the in-memory copy. The returned inode
// has its reference count incremented (and thus must be
// idecref'ed), but is *unlocked*, meaning that none of the fields
// except dev and inum are guaranteed to be initialized.
// This convention gives the caller maximum control over blocking;
// it also guarantees that iget will not sleep, which is useful in
// the early igetroot and when holding other locked inodes.
struct inode*
iget(uint dev, uint inum)
{
struct inode *ip, *empty;
struct dinode *dip;
struct buf *bp;
acquire(&icache.lock);
loop:
// Try for cached inode.
empty = 0;
for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
if(ip->busy){
sleep(ip, &icache.lock);
goto loop;
}
ip->ref++;
ip->busy = 1;
release(&icache.lock);
return ip;
}
@ -155,52 +148,61 @@ iget(uint dev, uint inum)
ip->dev = dev;
ip->inum = inum;
ip->ref = 1;
ip->busy = 1;
ip->flags = 0;
release(&icache.lock);
bp = bread(dev, IBLOCK(inum));
dip = &((struct dinode*)(bp->data))[inum % IPB];
ip->type = dip->type;
ip->major = dip->major;
ip->minor = dip->minor;
ip->nlink = dip->nlink;
ip->size = dip->size;
memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
brelse(bp);
return ip;
}
// Iget the inode for the file system root (/).
// This gets called before there is a current process: it cannot sleep!
struct inode*
igetroot(void)
{
return iget(ROOTDEV, 1);
struct inode *ip;
ip = iget(ROOTDEV, 1);
return ip;
}
// Lock the given inode.
void
ilock(struct inode *ip)
{
struct buf *bp;
struct dinode *dip;
if(ip->ref < 1)
panic("ilock");
acquire(&icache.lock);
while(ip->busy)
while(ip->flags & I_BUSY)
sleep(ip, &icache.lock);
ip->busy = 1;
ip->flags |= I_BUSY;
release(&icache.lock);
if(!(ip->flags & I_VALID)){
bp = bread(ip->dev, IBLOCK(ip->inum));
dip = &((struct dinode*)(bp->data))[ip->inum % IPB];
ip->type = dip->type;
ip->major = dip->major;
ip->minor = dip->minor;
ip->nlink = dip->nlink;
ip->size = dip->size;
memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
brelse(bp);
ip->flags |= I_VALID;
}
}
// Unlock the given inode.
void
iunlock(struct inode *ip)
{
if(ip->busy != 1 || ip->ref < 1)
if(!(ip->flags & I_BUSY) || ip->ref < 1)
panic("iunlock");
acquire(&icache.lock);
ip->busy = 0;
ip->flags &= ~I_BUSY;
wakeup(ip);
release(&icache.lock);
}
@ -209,19 +211,8 @@ iunlock(struct inode *ip)
void
iput(struct inode *ip)
{
if(ip->ref < 1 || ip->busy != 1)
panic("iput");
if((ip->ref == 1) && (ip->nlink == 0)) {
itrunc(ip);
ifree(ip);
}
acquire(&icache.lock);
ip->ref -= 1;
ip->busy = 0;
wakeup(ip);
release(&icache.lock);
iunlock(ip);
idecref(ip);
}
// Increment reference count for ip.
@ -229,31 +220,42 @@ iput(struct inode *ip)
struct inode*
iincref(struct inode *ip)
{
ilock(ip);
acquire(&icache.lock);
ip->ref++;
iunlock(ip);
release(&icache.lock);
return ip;
}
// Caller holds reference to unlocked ip.
// Drop reference.
// Caller holds reference to unlocked ip. Drop reference.
void
idecref(struct inode *ip)
{
ilock(ip);
iput(ip);
acquire(&icache.lock);
if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0) {
// inode is no longer used: truncate and free inode.
if(ip->flags & I_BUSY)
panic("idecref busy");
ip->flags |= I_BUSY;
release(&icache.lock);
// XXX convince rsc that no one will come find this inode.
itrunc(ip);
ip->type = 0;
iupdate(ip);
acquire(&icache.lock);
ip->flags &= ~I_BUSY;
}
ip->ref--;
release(&icache.lock);
}
// Allocate a new inode with the given type on device dev.
struct inode*
ialloc(uint dev, short type)
{
struct inode *ip;
int inum, ninodes;
struct buf *bp;
struct dinode *dip;
struct superblock *sb;
int ninodes;
int inum;
struct buf *bp;
bp = bread(dev, 1);
sb = (struct superblock*)bp->data;
@ -268,8 +270,7 @@ ialloc(uint dev, short type)
dip->type = type;
bwrite(bp, IBLOCK(inum)); // mark it allocated on the disk
brelse(bp);
ip = iget(dev, inum);
return ip;
return iget(dev, inum);
}
brelse(bp);
}
@ -295,15 +296,6 @@ iupdate(struct inode *ip)
brelse(bp);
}
// Free (delete) the given inode.
// Caller must have ip locked.
static void
ifree(struct inode *ip)
{
ip->type = 0;
iupdate(ip);
}
// Inode contents
//
// The contents (data) associated with each inode is stored
@ -465,15 +457,15 @@ writei(struct inode *ip, char *src, uint off, uint n)
// set *poff to the byte offset of the directory entry
// set *pinum to the inode number
// return 0.
int
dirlookup(struct inode *dp, char *name, int namelen, uint *poff, uint *pinum)
struct inode*
dirlookup(struct inode *dp, char *name, int namelen, uint *poff)
{
uint off;
uint off, inum;
struct buf *bp;
struct dirent *de;
if(dp->type != T_DIR)
return -1;
return 0;
for(off = 0; off < dp->size; off += BSIZE){
bp = bread(dp->dev, bmap(dp, off / BSIZE, 0));
@ -487,24 +479,30 @@ dirlookup(struct inode *dp, char *name, int namelen, uint *poff, uint *pinum)
// entry matches path element
if(poff)
*poff = off + (uchar*)de - bp->data;
if(pinum)
*pinum = de->inum;
inum = de->inum;
brelse(bp);
return 0;
return iget(dp->dev, inum);
}
}
brelse(bp);
}
return -1;
return 0;
}
// Write a new directory entry (name, ino) into the directory dp.
// Caller must have locked dp.
void
dirwrite(struct inode *dp, char *name, int namelen, uint ino)
int
dirlink(struct inode *dp, char *name, int namelen, uint ino)
{
int off;
struct dirent de;
struct inode *ip;
// Double-check that name is not present.
if((ip = dirlookup(dp, name, namelen, 0)) != 0){
idecref(ip);
return -1;
}
// Look for an empty dirent.
for(off = 0; off < dp->size; off += sizeof(de)){
@ -519,9 +517,10 @@ dirwrite(struct inode *dp, char *name, int namelen, uint ino)
namelen = DIRSIZ;
memmove(de.name, name, namelen);
memset(de.name+namelen, 0, DIRSIZ-namelen);
if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
panic("dirwrite");
return 0;
}
// Create a new inode named name inside dp
@ -535,13 +534,19 @@ dircreat(struct inode *dp, char *name, int namelen, short type, short major, sho
ip = ialloc(dp->dev, type);
if(ip == 0)
return 0;
ilock(ip);
ip->major = major;
ip->minor = minor;
ip->size = 0;
ip->nlink = 1;
iupdate(ip);
dirwrite(dp, name, namelen, ip->inum);
if(dirlink(dp, name, namelen, ip->inum) < 0){
ip->nlink = 0;
iupdate(ip);
iput(ip);
return 0;
}
return ip;
}
@ -590,17 +595,16 @@ skipelem(char *path, char **name, int *len)
struct inode*
_namei(char *path, int parent, char **pname, int *pnamelen)
{
struct inode *dp;
struct inode *dp, *ip;
char *name;
int namelen;
uint off, dev, inum;
uint off;
if(*path == '/')
dp = igetroot();
else {
else
dp = iincref(cp->cwd);
ilock(dp);
}
ilock(dp);
while((path = skipelem(path, &name, &namelen)) != 0){
// Truncate names in path to DIRSIZ chars.
@ -617,12 +621,12 @@ _namei(char *path, int parent, char **pname, int *pnamelen)
return dp;
}
if(dirlookup(dp, name, namelen, &off, &inum) < 0)
if((ip = dirlookup(dp, name, namelen, &off)) == 0)
goto fail;
dev = dp->dev;
iput(dp);
dp = iget(dev, inum);
ilock(ip);
dp = ip;
if(dp->type == 0 || dp->nlink < 1)
panic("namei");
}
@ -660,10 +664,6 @@ mknod(char *path, short type, short major, short minor)
if((dp = nameiparent(path, &name, &namelen)) == 0)
return 0;
if(dirlookup(dp, name, namelen, 0, 0) >= 0){
iput(dp);
return 0;
}
ip = dircreat(dp, name, namelen, type, major, minor);
iput(dp);
return ip;
@ -675,13 +675,13 @@ unlink(char *path)
{
struct inode *ip, *dp;
struct dirent de;
uint off, inum, dev;
uint off;
char *name;
int namelen;
if((dp = nameiparent(path, &name, &namelen)) == 0)
return -1;
if(dirlookup(dp, name, namelen, &off, 0) < 0){
if((ip = dirlookup(dp, name, namelen, &off)) == 0){
iput(dp);
return -1;
}
@ -691,20 +691,17 @@ unlink(char *path)
// Cannot remove "." or ".." - the 2 and 3 count the trailing NUL.
if(memcmp(de.name, ".", 2) == 0 || memcmp(de.name, "..", 3) == 0){
idecref(ip);
iput(dp);
return -1;
}
inum = de.inum;
memset(&de, 0, sizeof(de));
if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
panic("unlink dir write");
dev = dp->dev;
iput(dp);
ip = iget(dev, inum);
ilock(ip);
if(ip->nlink < 1)
panic("unlink nlink < 1");
ip->nlink--;
@ -729,30 +726,76 @@ link(char *old, char *new)
return -1;
}
iunlock(ip);
if((dp = nameiparent(new, &name, &namelen)) == 0){
idecref(ip);
return -1;
}
if(dirlookup(dp, name, namelen, 0, 0) >= 0){
iput(dp);
idecref(ip);
return -1;
}
if(dp->dev != ip->dev){
if(dp->dev != ip->dev || dirlink(dp, name, namelen, ip->inum) < 0){
idecref(ip);
iput(dp);
return -1;
}
iput(dp);
// LOCKING ERROR HERE! TWO LOCKS HELD AT ONCE.
// XXX write ordering wrong here too.
ilock(ip);
ip->nlink++;
iupdate(ip);
iput(ip);
return 0;
}
dirwrite(dp, name, namelen, ip->inum);
int
mkdir(char *path)
{
struct inode *dp, *ip;
char *name;
int namelen;
// XXX write ordering is screwy here- do we care?
if((dp = nameiparent(path, &name, &namelen)) == 0)
return -1;
if((ip = dircreat(dp, name, namelen, T_DIR, 0, 0)) == 0){
iput(dp);
return -1;
}
dp->nlink++;
iupdate(dp);
if(dirlink(ip, ".", 1, ip->inum) < 0 || dirlink(ip, "..", 2, dp->inum) < 0)
panic("mkdir");
iput(dp);
iput(ip);
return 0;
}
struct inode*
create(char *path)
{
struct inode *dp, *ip;
char *name;
int namelen;
if((dp = nameiparent(path, &name, &namelen)) == 0)
return 0;
if((ip = dirlookup(dp, name, namelen, 0)) != 0){
iput(dp);
ilock(ip);
if(ip->type == T_DIR){
iput(ip);
return 0;
}
return ip;
}
if((ip = dircreat(dp, name, namelen, T_FILE, 0, 0)) == 0){
iput(dp);
return 0;
}
iput(dp);
return ip;
}

View file

@ -4,7 +4,7 @@ struct inode {
uint dev; // Device number
uint inum; // Inode number
int ref; // Reference count
int busy; // Is the inode "locked"?
int flags; // I_BUSY, I_VALID
short type; // copy of disk inode
short major;
@ -16,6 +16,5 @@ struct inode {
#define ROOTDEV 1 // Device number of root file system
#define NAMEI_LOOKUP 1
#define NAMEI_CREATE 2
#define NAMEI_DELETE 3
#define I_BUSY 0x1
#define I_VALID 0x2

28
initcode.S Normal file
View file

@ -0,0 +1,28 @@
# Initial process execs /init.
#include "syscall.h"
#include "traps.h"
# exec(init, argv)
start:
pushl $argv
pushl $init
pushl $0
movl $SYS_exec, %eax
int $T_SYSCALL
# for(;;) exit();
exit:
movl $SYS_exit, %eax
int $T_SYSCALL
jmp exit
# "/init\0"
init:
.string "/init\0"
.p2align 2
argv:
.long init
.long 0

120
main.c
View file

@ -11,9 +11,8 @@
#include "spinlock.h"
extern char edata[], end[];
extern uchar _binary__init_start[], _binary__init_size[];
void process0();
void proc0init();
// Bootstrap processor starts running C code here.
// This is called main0 not main so that it can have
@ -24,7 +23,6 @@ main0(void)
{
int i;
static int bcpu; // cannot be on stack
struct proc *p;
// clear BSS
memset(edata, 0, end - edata);
@ -54,15 +52,6 @@ main0(void)
fileinit();
iinit(); // i-node table
// initialize process 0
p = &proc[0];
p->state = RUNNABLE;
p->kstack = kalloc(KSTACKSIZE);
// cause proc[0] to start in kernel at process0
p->jmpbuf.eip = (uint) process0;
p->jmpbuf.esp = (uint) (p->kstack + KSTACKSIZE - 4);
// make sure there's a TSS
setupsegs(0);
@ -86,6 +75,9 @@ main0(void)
cpus[cpu()].nlock--;
sti();
// initialize process 0
proc0init();
scheduler();
}
@ -114,77 +106,45 @@ mpmain(void)
scheduler();
}
// proc[0] starts here, called by scheduler() in the ordinary way.
void
process0(void)
{
extern struct spinlock proc_table_lock;
struct proc *p0, *p1;
struct trapframe tf;
release(&proc_table_lock);
p0 = &proc[0];
p0->cwd = igetroot();
iunlock(p0->cwd);
// Dummy user memory to make copyproc() happy.
// Must be big enough to hold the init binary and stack.
p0->sz = 2*PAGE;
p0->mem = kalloc(p0->sz);
// Fake a trap frame as if a user process had made a system
// call, so that copyproc will have a place for the new
// process to return to.
p0->tf = &tf;
memset(p0->tf, 0, sizeof(struct trapframe));
p0->tf->es = p0->tf->ds = p0->tf->ss = (SEG_UDATA << 3) | DPL_USER;
p0->tf->cs = (SEG_UCODE << 3) | DPL_USER;
p0->tf->eflags = FL_IF;
p0->tf->esp = p0->sz;
char initcode[] = {
/* push ptr to argv */ 0x6a, 0x1c,
/* push ptr to "/init" */ 0x6a, 0x16,
/* push fake ret addr */ 0x6a, 0x00,
/* mov $SYS_exec, %eax */ 0xb8, 0x09, 0x00, 0x00, 0x00,
/* int $0x30 */ 0xcd, 0x30,
/* Lx: */
/* mov $SYS_exit, %eax */ 0xb8, 0x02, 0x00, 0x00, 0x00,
/* int $0x30 */ 0xcd, 0x30,
/* jmp Lx */ 0xeb, 0xf7,
// Push bogus return address, both to cause problems
// if main returns and also because gcc can generate
// function prologs that expect to be able to read the
// return address off the stack without causing a fault.
p0->tf->esp -= 4;
*(uint*)(p0->mem + p0->tf->esp) = 0xefefefef;
p1 = copyproc(p0);
load_icode(p1, _binary__init_start, (uint) _binary__init_size);
p1->state = RUNNABLE;
safestrcpy(p1->name, "init", sizeof p1->name);
proc_wait();
panic("init exited");
}
/* "/init\0" */ 0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00,
/* ptr to "/init" */ 0x16, 0x00, 0x00, 0x00,
/* 0 */ 0x00, 0x00, 0x00, 0x00
};
void
load_icode(struct proc *p, uchar *binary, uint size)
proc0init(void)
{
int i;
struct elfhdr *elf;
struct proghdr *ph;
struct proc *p;
extern uchar _binary_initcode_start[], _binary_initcode_size[];
p = copyproc(0);
p->sz = PAGE;
p->mem = kalloc(p->sz);
p->cwd = igetroot();
memset(&p->tf, 0, sizeof p->tf);
p->tf->es = p->tf->ds = p->tf->ss = (SEG_UDATA << 3) | DPL_USER;
p->tf->cs = (SEG_UCODE << 3) | DPL_USER;
p->tf->eflags = FL_IF;
p->tf->esp = p->sz;
// Push dummy return address to placate gcc.
p->tf->esp -= 4;
*(uint*)(p->mem + p->tf->esp) = 0xefefefef;
elf = (struct elfhdr*) binary;
if(elf->magic != ELF_MAGIC)
panic("load_icode: not an ELF binary");
p->tf->eip = elf->entry;
// Map and load segments as directed.
ph = (struct proghdr*) (binary + elf->phoff);
for(i = 0; i < elf->phnum; i++, ph++) {
if(ph->type != ELF_PROG_LOAD)
continue;
if(ph->va + ph->memsz < ph->va)
panic("load_icode: overflow in proghdr");
if(ph->va + ph->memsz >= p->sz)
panic("load_icode: icode too large");
// Load/clear the segment
memmove(p->mem + ph->va, binary + ph->offset, ph->filesz);
memset(p->mem + ph->va + ph->filesz, 0, ph->memsz - ph->filesz);
}
p->tf->eip = 0;
memmove(p->mem, _binary_initcode_start, (int)_binary_initcode_size);
safestrcpy(p->name, "initcode", sizeof p->name);
p->state = RUNNABLE;
}

50
proc.c
View file

@ -109,47 +109,43 @@ copyproc(struct proc *p)
return 0;
}
np->pid = next_pid++;
np->ppid = p->pid;
release(&proc_table_lock);
// Copy user memory.
np->sz = p->sz;
np->mem = kalloc(np->sz);
if(np->mem == 0){
np->state = UNUSED;
return 0;
}
memmove(np->mem, p->mem, np->sz);
// Allocate kernel stack.
np->kstack = kalloc(KSTACKSIZE);
if(np->kstack == 0){
kfree(np->mem, np->sz);
np->mem = 0;
if((np->kstack = kalloc(KSTACKSIZE)) == 0){
np->state = UNUSED;
return 0;
}
// Copy trapframe registers from parent.
np->tf = (struct trapframe*)(np->kstack + KSTACKSIZE) - 1;
memmove(np->tf, p->tf, sizeof(*np->tf));
// Clear %eax so that fork system call returns 0 in child.
np->tf->eax = 0;
if(p){ // Copy process state from p.
np->ppid = p->pid;
memmove(np->tf, p->tf, sizeof *np->tf);
np->sz = p->sz;
if((np->mem = kalloc(np->sz)) == 0){
kfree(np->kstack, KSTACKSIZE);
np->kstack = 0;
np->state = UNUSED;
return 0;
}
memmove(np->mem, p->mem, np->sz);
for(i = 0; i < NOFILE; i++){
np->ofile[i] = p->ofile[i];
if(np->ofile[i])
fileincref(np->ofile[i]);
}
np->cwd = iincref(p->cwd);
}
// Set up new jmpbuf to start executing at forkret (see below).
memset(&np->jmpbuf, 0, sizeof np->jmpbuf);
np->jmpbuf.eip = (uint)forkret;
np->jmpbuf.esp = (uint)np->tf - 4;
// Copy file descriptors
for(i = 0; i < NOFILE; i++){
np->ofile[i] = p->ofile[i];
if(np->ofile[i])
fileincref(np->ofile[i]);
}
np->cwd = iincref(p->cwd);
// Clear %eax so that fork system call returns 0 in child.
np->tf->eax = 0;
return np;
}

View file

@ -75,3 +75,13 @@ safestrcpy(char *s, const char *t, int n)
return os;
}
int
strlen(const char *s)
{
int n;
for(n = 0; s[n]; n++)
;
return n;
}

View file

@ -53,7 +53,6 @@ fetchstr(struct proc *p, uint addr, char **pp)
int
argint(int argno, int *ip)
{
return fetchint(cp, cp->tf->esp + 4 + 4*argno, ip);
}

221
sysfile.c
View file

@ -114,42 +114,22 @@ sys_close(void)
int
sys_open(void)
{
struct inode *ip, *dp;
char *path, *name;
int namelen;
int omode;
int fd, dev;
uint inum;
char *path;
int fd, omode;
struct file *f;
struct inode *ip;
if(argstr(0, &path) < 0 || argint(1, &omode) < 0)
return -1;
switch(omode & O_CREATE){
default:
case 0: // regular open
if((ip = namei(path)) == 0)
return -1;
break;
case O_CREATE:
if((dp = nameiparent(path, &name, &namelen)) == 0)
return -1;
if(dirlookup(dp, name, namelen, 0, &inum) >= 0){
dev = dp->dev;
iput(dp);
ip = iget(dev, inum);
}else{
if((ip = dircreat(dp, name, namelen, T_FILE, 0, 0)) == 0){
iput(dp);
return -1;
}
iput(dp);
}
break;
}
if(omode & O_CREATE)
ip = create(path);
else
ip = namei(path);
if(ip == 0)
return -1;
if(ip->type == T_DIR && (omode & (O_RDWR|O_WRONLY|O_CREATE))){
if(ip->type == T_DIR && (omode & (O_RDWR|O_WRONLY))){
iput(ip);
return -1;
}
@ -194,6 +174,7 @@ sys_mknod(void)
argint(2, &major) < 0 || argint(3, &minor) < 0)
return -1;
// XXX why this check?
if(len >= DIRSIZ)
return -1;
@ -206,45 +187,11 @@ sys_mknod(void)
int
sys_mkdir(void)
{
struct inode *nip;
struct inode *dp;
char *name, *path;
struct dirent de;
int namelen;
char *path;
if(argstr(0, &path) < 0)
return -1;
dp = nameiparent(path, &name, &namelen);
if(dp == 0)
return -1;
if(dirlookup(dp, name, namelen, 0, 0) >= 0){
iput(dp);
return -1;
}
nip = dircreat(dp, name, namelen, T_DIR, 0, 0);
if(nip == 0){
iput(dp);
return -1;
}
dp->nlink++;
iupdate(dp);
memset(de.name, '\0', DIRSIZ);
de.name[0] = '.';
de.inum = nip->inum;
writei(nip, (char*) &de, 0, sizeof(de));
de.inum = dp->inum;
de.name[1] = '.';
writei(nip, (char*) &de, sizeof(de), sizeof(de));
iput(dp);
iput(nip);
return 0;
return mkdir(path);
}
int
@ -315,132 +262,30 @@ sys_link(void)
return link(old, new);
}
#define ARGMAX 10
int
sys_exec(void)
{
uint sz=0, ap, sp, p1, p2;
int i, nargs, argbytes, len;
struct inode *ip;
struct elfhdr elf;
struct proghdr ph;
char *mem = 0;
char *path, *s, *last;
uint argv;
if(argstr(0, &path) < 0 || argint(1, (int*)&argv) < 0)
char *path, *argv[ARGMAX];
int i;
uint uargv, uarg;
if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0)
return -1;
if((ip = namei(path)) == 0)
return -1;
if(readi(ip, (char*)&elf, 0, sizeof(elf)) < sizeof(elf))
goto bad;
if(elf.magic != ELF_MAGIC)
goto bad;
sz = 0;
for(i = 0; i < elf.phnum; i++){
if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph),
sizeof(ph)) != sizeof(ph))
goto bad;
if(ph.type != ELF_PROG_LOAD)
continue;
if(ph.memsz < ph.filesz)
goto bad;
sz += ph.memsz;
}
sz += 4096 - (sz % 4096);
sz += 4096;
mem = kalloc(sz);
if(mem == 0)
goto bad;
memset(mem, 0, sz);
nargs = 0;
argbytes = 0;
for(i = 0;; i++){
if(fetchint(cp, argv + 4*i, (int*)&ap) < 0)
goto bad;
if(ap == 0)
memset(argv, 0, sizeof argv);
for(i=0;; i++){
if(i >= ARGMAX)
return -1;
if(fetchint(cp, uargv+4*i, (int*)&uarg) < 0)
return -1;
if(uarg == 0){
argv[i] = 0;
break;
len = fetchstr(cp, ap, &s);
if(len < 0)
goto bad;
nargs++;
argbytes += len + 1;
}
if(fetchstr(cp, uarg, &argv[i]) < 0)
return -1;
}
// argn\0
// ...
// arg0\0
// 0
// ptr to argn
// ...
// 12: ptr to arg0
// 8: argv (points to ptr to arg0)
// 4: argc
// 0: fake return pc
sp = sz - argbytes - (nargs+1)*4 - 4 - 4 - 4;
*(uint*)(mem + sp) = 0xffffffff;
*(uint*)(mem + sp + 4) = nargs;
*(uint*)(mem + sp + 8) = (uint)(sp + 12);
p1 = sp + 12;
p2 = sp + 12 + (nargs + 1) * 4;
for(i = 0; i < nargs; i++){
fetchint(cp, argv + 4*i, (int*)&ap);
len = fetchstr(cp, ap, &s);
memmove(mem + p2, s, len + 1);
*(uint*)(mem + p1) = p2;
p1 += 4;
p2 += len + 1;
}
*(uint*)(mem + p1) = 0;
// Save name for debugging.
for(last=s=path; *s; s++)
if(*s == '/')
last = s+1;
safestrcpy(cp->name, last, sizeof cp->name);
// commit to the new image.
kfree(cp->mem, cp->sz);
cp->sz = sz;
cp->mem = mem;
mem = 0;
for(i = 0; i < elf.phnum; i++){
if(readi(ip, (char*)&ph, elf.phoff + i * sizeof(ph),
sizeof(ph)) != sizeof(ph))
goto bad2;
if(ph.type != ELF_PROG_LOAD)
continue;
if(ph.va + ph.memsz > sz)
goto bad2;
if(readi(ip, cp->mem + ph.va, ph.offset, ph.filesz) != ph.filesz)
goto bad2;
memset(cp->mem + ph.va + ph.filesz, 0, ph.memsz - ph.filesz);
}
iput(ip);
cp->tf->eip = elf.entry;
cp->tf->esp = sp;
setupsegs(cp);
return 0;
bad:
if(mem)
kfree(mem, sz);
iput(ip);
return -1;
bad2:
iput(ip);
proc_exit();
return 0;
return exec(path, argv);
}