From ad329b5f81fec4634dbc8842cbd62418042b4e63 Mon Sep 17 00:00:00 2001
From: Jack Halford <jack@0x5.be>
Date: Fri, 27 Dec 2019 11:02:29 +0100
Subject: [PATCH] Step 11: Preemptive scheduler

---
 src/arch/x86/idt.zig              |  24 ++-
 src/arch/x86/interrupt.zig        |  23 +--
 src/arch/x86/isr.s                |  19 ++-
 src/arch/x86/lib/instructions.zig |  12 ++
 src/arch/x86/paging.zig           |  10 +-
 src/console.zig                   |   5 +-
 src/main.zig                      |   1 +
 src/task.zig                      | 233 ++++++++++++++++++------------
 src/time.zig                      |  15 +-
 src/vga.zig                       |   4 +-
 src/vmem.zig                      |   4 +-
 11 files changed, 213 insertions(+), 137 deletions(-)

diff --git a/src/arch/x86/idt.zig b/src/arch/x86/idt.zig
index 64d9381..df88ad1 100644
--- a/src/arch/x86/idt.zig
+++ b/src/arch/x86/idt.zig
@@ -57,9 +57,31 @@ pub fn initialize() void {
     isr.install_exceptions();
     isr.install_irqs();
     isr.install_syscalls();
-    interrupt.registerIRQ(0, interrupt.pit_handler);
+    interrupt.registerIRQ(0, kernel.time.increment);
     interrupt.registerIRQ(1, kernel.ps2.keyboard_handler);
+    interrupt.register(1, debug_trap);
+    interrupt.register(13, general_protection_fault);
+    interrupt.register(14, page_fault);
 
     // load IDT
     lidt(@ptrToInt(&idtr));
 }
+
+fn general_protection_fault() void {
+    kernel.println("general protection fault");
+}
+
+fn debug_trap() void {
+    kernel.println("debug fault/trap");
+    kernel.println("dr7: 0b{b}", dr7());
+}
+
+fn page_fault() void {
+    const vaddr = cr2();
+    kernel.println("cr2: 0x{x}", vaddr);
+    kernel.println("phy: 0x{x}", paging.translate(vaddr));
+    kernel.println("pde: 0x{x} ({})", paging.pde(vaddr), vaddr >> 22);
+    kernel.println("pte: 0x{x} ({})", paging.pte(vaddr), vaddr >> 12);
+    paging.format();
+    while (true) asm volatile ("hlt");
+}
diff --git a/src/arch/x86/interrupt.zig b/src/arch/x86/interrupt.zig
index e3dfbd9..edacc3f 100644
--- a/src/arch/x86/interrupt.zig
+++ b/src/arch/x86/interrupt.zig
@@ -57,7 +57,11 @@ export fn interruptDispatch() void {
     switch (n) {
         // Exceptions.
         EXCEPTION_0...EXCEPTION_31 => {
-            handlers[n]();
+            kernel.println("");
+            kernel.println("num: {}", isr.context.interrupt_n);
+            kernel.println("err: {}", isr.context.error_code);
+            kernel.println("ip:  0x{x}", isr.context.eip);
+            return handlers[n]();
         },
 
         // IRQs.
@@ -181,27 +185,14 @@ pub fn maskIRQ(irq: u8, mask: bool) void {
 }
 
 // configures the chan0 with a rate generator, which will trigger irq0
+pub const divisor = 2685;
+pub const tick = 2251; // f = 1.193182 MHz, TODO: turn into a function
 pub fn configPIT() void {
     const chanNum = 0;
     const chan = PIT_CHAN0;
-    const divisor = 2685;
     const LOHI = 0b11; // bit4 | bit5
     const PITMODE_RATE_GEN = 0x2;
     outb(PIT_CMD, chanNum << 6 | LOHI << 4 | PITMODE_RATE_GEN << 1);
     outb(PIT_CHAN0, divisor & 0xff);
     outb(PIT_CHAN0, divisor >> 8);
 }
-
-pub fn pit_handler() void {
-    // pit freq = 1.193182 MHz
-    // chan0 divisor = 2685
-    // PIT_RATE in us
-    kernel.time.increment(2251);
-    kernel.task.sleeping_tasks.decrement(2251);
-    while (kernel.task.sleeping_tasks.popZero()) |sleepnode| {
-        const tasknode = sleepnode.data;
-        tasknode.data.state = .ReadyToRun;
-        kernel.vmem.free(@ptrToInt(sleepnode));
-        kernel.task.ready_tasks.prepend(tasknode);
-    }
-}
diff --git a/src/arch/x86/isr.s b/src/arch/x86/isr.s
index 118d4c2..ad970c1 100644
--- a/src/arch/x86/isr.s
+++ b/src/arch/x86/isr.s
@@ -1,5 +1,5 @@
 // Kernel stack for interrupt handling.
-KERNEL_STACK = 0x10000
+// KERNEL_STACK = 0x10000
 // GDT selectors.
 KERNEL_DS = 0x10
 
@@ -27,24 +27,23 @@ isrCommon:
     pusha  // Save the registers state.
 
     // Setup kernel data segment.
-    mov $KERNEL_DS, %ax
-    mov %ax, %ds
-    mov %ax, %es
+    // mov $KERNEL_DS, %ax
+    // mov %ax, %ds
+    // mov %ax, %es
 
     // Save the pointer to the current context and switch to the kernel stack.
     mov %esp, context
-    mov $KERNEL_STACK, %esp
+    // mov $KERNEL_STACK, %esp
 
     call interruptDispatch  // Handle the interrupt event.
 
     // Restore the pointer to the context (of a different thread, potentially).
-    mov context, %esp
+    // mov context, %esp
 
     // Setup user data segment.
-    mov $USER_DS, %ax
-    mov %ax, %ds
-    mov %ax, %es
-
+    // mov $USER_DS, %ax
+    // mov %ax, %ds
+    // mov %ax, %es
 
     popa          // Restore the registers state.
     add $8, %esp  // Remove interrupt number and error code from stack.
diff --git a/src/arch/x86/lib/instructions.zig b/src/arch/x86/lib/instructions.zig
index f93edbe..1a638f5 100644
--- a/src/arch/x86/lib/instructions.zig
+++ b/src/arch/x86/lib/instructions.zig
@@ -33,3 +33,15 @@ pub inline fn lidt(idtr: usize) void {
         : [idtr] "r" (idtr)
     );
 }
+
+pub fn cr2() usize {
+    return asm volatile ("movl %%cr2, %[result]"
+        : [result] "=r" (-> usize)
+    );
+}
+
+pub fn dr7() usize {
+    return asm volatile ("movl %%dr7, %[result]"
+        : [result] "=r" (-> usize)
+    );
+}
diff --git a/src/arch/x86/paging.zig b/src/arch/x86/paging.zig
index f2cd879..e101918 100644
--- a/src/arch/x86/paging.zig
+++ b/src/arch/x86/paging.zig
@@ -15,19 +15,14 @@ const HUGE = 0x80;
 
 pub var pageDirectory: [1024]PageEntry align(4096) linksection(".bss") = [_]PageEntry{0} ** 1024;
 
-fn pageFault() void {
-    kernel.println("pagefault");
-    while (true) asm volatile ("hlt");
-}
-
 // TODO: inline these
 fn pageBase(virt: usize) usize {
     return virt & (~PAGE_SIZE +% 1);
 }
-fn pde(virt: usize) *PageEntry {
+pub fn pde(virt: usize) *PageEntry {
     return &PD[virt >> 22]; //relies on recursive mapping
 }
-fn pte(virt: usize) *PageEntry {
+pub fn pte(virt: usize) *PageEntry {
     return &PT[virt >> 12]; //relies on recursive mapping
 }
 
@@ -64,7 +59,6 @@ pub fn initialize() void {
     // TODO: verify is this a hack?
     assert(pmem.stack_end < kernel.layout.IDENTITY);
 
-    interrupt.register(14, pageFault);
     setupPaging(@ptrToInt(&pageDirectory[0])); //asm routine
 }
 
diff --git a/src/console.zig b/src/console.zig
index ad9d6bc..68d46a3 100644
--- a/src/console.zig
+++ b/src/console.zig
@@ -65,9 +65,6 @@ pub fn loop() void {
             keypress(input_ring_buffer[input_read_index]);
             input_read_index +%= 1;
         }
-
-        task.lock_scheduler();
-        task.schedule();
-        task.unlock_scheduler();
+        // task.usleep(10 * 1000) catch unreachable;
     }
 }
diff --git a/src/main.zig b/src/main.zig
index 994cafa..89fd027 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -26,6 +26,7 @@ export fn kmain(magic: u32, info: *const multiboot.MultibootInfo) noreturn {
     pci.scan();
 
     task.new(@ptrToInt(topbar)) catch unreachable;
+    task.preempt();
 
     console.loop();
     unreachable;
diff --git a/src/task.zig b/src/task.zig
index 9297f94..96b937f 100644
--- a/src/task.zig
+++ b/src/task.zig
@@ -17,6 +17,7 @@ var timer_last_count: u64 = 0;
 pub fn update_time_used() void {
     const current_count = time.offset_us;
     const elapsed = current_count - timer_last_count;
+    // if (current_task.data.tid == 1) println("{} adding {} time", current_task.data.tid, elapsed);
     timer_last_count = current_count;
     current_task.data.time_used += elapsed;
 }
@@ -24,8 +25,9 @@ pub fn update_time_used() void {
 pub const TaskState = enum {
     Running,
     ReadyToRun,
-    Blocked,
+    Paused,
     Sleeping,
+    Terminated,
 };
 
 pub const Task = struct {
@@ -59,76 +61,63 @@ pub const Task = struct {
         return t;
     }
 
-    // responsible for calling the task entrypoint
     pub fn destroy(self: *Task) void {
         vmem.free(self.esp);
         vmem.free(@ptrToInt(self));
     }
 };
 
-///ASM
-// extern fn jmp_to_entrypoint(entrypoint: usize) void;
-// // this is only run once on the first execution of a task
-// pub fn birth() void {
-//     println("birth!");
-//     unlock_scheduler();
-//     const entrypoint = current_task.data.entrypoint;
-//     jmp_to_entrypoint(entrypoint);
-//     // comptime asm ("jmp %[entrypoint]"
-//     //     :
-//     //     : [entrypoint] "{ecx}" (entrypoint)
-//     // );
-// }
 ///ASM
 extern fn switch_tasks(new_esp: usize, old_esp_addr: usize) void;
 
 pub fn new(entrypoint: usize) !void {
+    task.lock_scheduler();
+    defer task.unlock_scheduler();
+
     const node = try vmem.create(TaskNode);
     node.data = try Task.create(entrypoint);
     ready_tasks.prepend(node);
 }
 
+pub fn sleep_tick(tick: usize) void {
+    task.lock_scheduler();
+    defer task.unlock_scheduler();
+
+    task.sleeping_tasks.decrement(tick);
+    var popped = false;
+    while (task.sleeping_tasks.popZero()) |sleepnode| {
+        // println("finished sleeping");
+        // task.format();
+        const tasknode = sleepnode.data;
+        tasknode.data.state = .ReadyToRun;
+        vmem.free(@ptrToInt(sleepnode));
+        task.ready_tasks.prepend(tasknode);
+        popped = true;
+    }
+    if (popped) preempt();
+}
+
 // TODO: make a sleep without malloc
 pub fn usleep(usec: u64) !void {
+    assert(current_task.data.state == .Running);
+
     const node = try vmem.create(SleepNode);
 
+    update_time_used();
+
     lock_scheduler();
+    defer unlock_scheduler();
+
     current_task.data.state = .Sleeping;
     node.data = current_task;
     node.counter = usec;
     sleeping_tasks.insert(node);
     schedule();
-    unlock_scheduler();
 }
 
-pub fn block(state: TaskState) void {
-    assert(state != .Running);
-    assert(state != .ReadyToRun);
-
-    lock_scheduler();
-    current_task.data.state = state;
-    blocked_tasks.append(current_task);
-    schedule();
-    unlock_scheduler();
-}
-
-pub fn unblock(node: *TaskNode) void {
-    lock_scheduler();
-    node.data.state = .ReadyToRun;
-    blocked_tasks.remove(node);
-    if (ready_tasks.first == null) {
-        // Only one task was running before, so pre-empt
-        switch_to(node);
-    } else {
-        // There's at least one task on the "ready to run" queue already, so don't pre-empt
-        ready_tasks.append(node);
-        unlock_scheduler();
-    }
-}
-
-var IRQ_disable_counter: usize = 0;
-var postpone_task_switches_counter: isize = 0; // this counter can go negative when we are scheduling after a postpone
-var postpone_task_switches_flag: bool = false;
+pub var IRQ_disable_counter: usize = 0;
+pub var postpone_task_switches_counter: isize = 0; // this counter can go negative when we are scheduling after a postpone
+pub var postpone_task_switches_flag: bool = false;
 pub fn lock_scheduler() void {
     if (constants.SMP == false) {
         x86.cli();
@@ -138,38 +127,44 @@ pub fn lock_scheduler() void {
 }
 pub fn unlock_scheduler() void {
     if (constants.SMP == false) {
+        assert(IRQ_disable_counter > 0);
+        assert(postpone_task_switches_counter > 0);
         postpone_task_switches_counter -= 1;
-        if (postpone_task_switches_flag == true and postpone_task_switches_counter == 0) {
-            // in this section, postpone counter will go to -1 during the task
+        if (postpone_task_switches_flag == true and postpone_task_switches_counter == 1) {
             postpone_task_switches_flag = false;
+            notify("AFTER POSTPONE");
             schedule();
         }
         IRQ_disable_counter -= 1;
-        if (IRQ_disable_counter == 0) {
-            x86.sti();
-            x86.hlt();
-        }
+        // must be the last instruction because we do interrupts inside interrupts
+        if (IRQ_disable_counter == 0) x86.sti();
     }
 }
 
+pub fn preempt() void {
+    if (current_task.data.state != .Running) return;
+
+    update_time_used();
+    if (ready_tasks.first == null) {
+        notify("NO PREEMPT SINGLE TASK");
+        time.task_slice_remaining = 0;
+        return;
+    }
+
+    lock_scheduler();
+    schedule();
+    unlock_scheduler();
+}
+
 // expects:
 //  - chosen is .ReadyToRun
 //  - chosen is not in any scheduler lists
+//  - current_task has been moved to a queue
 //  - scheduler is locked
 //  - the tasks being switched to will unlock_scheduler()
 pub fn switch_to(chosen: *TaskNode) void {
     assert(chosen.data.state == .ReadyToRun);
-
-    if (postpone_task_switches_counter != 0) {
-        postpone_task_switches_flag = true;
-        return;
-    }
-
-    // in case of self preemption, shouldn't happen really
-    if (current_task.data.state == .Running) {
-        current_task.data.state = .ReadyToRun;
-        ready_tasks.append(current_task);
-    }
+    assert(current_task.data.state != .Running);
 
     // save old stack
     const old_task_esp_addr = &current_task.data.esp;
@@ -177,7 +172,10 @@ pub fn switch_to(chosen: *TaskNode) void {
     // switch states
     chosen.data.state = .Running;
     current_task = chosen;
+    if (ready_tasks.first == null) time.task_slice_remaining = 0;
+    if (ready_tasks.first != null) time.task_slice_remaining = time.TASK_SLICE;
 
+    // we don't have any startup code for tasks, so i do it here
     if (current_task.data.born == false) {
         current_task.data.born = true;
         unlock_scheduler();
@@ -189,65 +187,83 @@ pub fn switch_to(chosen: *TaskNode) void {
 
 pub var CPU_idle_time: u64 = 0;
 pub var CPU_idle_start_time: u64 = 0;
+// expects:
+//  lock_scheduler should be called before
+//  unlock_scheduler should be called after
+//  current_task is blocked or running (preemption)
 pub fn schedule() void {
     assert(IRQ_disable_counter > 0);
+    assert(current_task.data.state != .ReadyToRun);
 
-    if (postpone_task_switches_counter != 0) {
+    // postponed
+    if (postpone_task_switches_counter != 1 and current_task.data.state == .Running) {
         postpone_task_switches_flag = true;
+        notify("POSTPONING SCHEDULE");
         return;
     }
-
-    update_time_used();
-
-    // format();
+    // next task
     if (ready_tasks.popFirst()) |t| {
-        // somebody is ready to run
-        // std doesn't do this, for developer flexibility maybe?
         t.prev = null;
         t.next = null;
-        switch_to(t);
-    } else if (current_task.data.state == .Running) {
-        // single task mode, carry on
+
+        // notify("SWITCHING TO 0x{x}", t.data.esp);
+        if (current_task.data.state == .Running) {
+            current_task.data.state = .ReadyToRun;
+            ready_tasks.append(current_task);
+        }
+        return switch_to(t);
+    }
+    // single task
+    if (current_task.data.state == .Running) {
+        notify("SINGLE TASK");
+        time.task_slice_remaining = 0;
         return;
-    } else {
-        // idle mode
-        notify_idle();
+    }
+    // no tasks
+    idle_mode();
+}
 
-        // borrow the current task
-        const borrow = current_task;
+fn idle_mode() void {
+    assert(ready_tasks.first == null);
+    assert(current_task.data.state != .Running);
+    assert(current_task.data.state != .ReadyToRun);
 
-        CPU_idle_start_time = time.offset_us; //for power management
+    notify("IDLE");
 
-        while (true) { // idle loop
-            if (ready_tasks.popFirst()) |t| { // found a new task
-                CPU_idle_time += time.offset_us - CPU_idle_start_time; // count time as idle
-                timer_last_count = time.offset_us; // don't count time as used
-                // println("went into idle mode for {}usecs", time.offset_us - CPU_idle_start_time);
+    // borrow the current task
+    const borrow = current_task;
 
-                if (t == borrow) {
-                    t.data.state = .Running;
-                    return; //no need to ctx_switch we are already running this
-                }
-                return switch_to(t);
-            } else { // no tasks ready, let the timer fire
-                x86.sti(); // enable interrupts to allow the timer to fire
-                x86.hlt(); // halt and wait for the timer to fire
-                x86.cli(); // disable interrupts again to see if there is something to do
+    CPU_idle_start_time = time.offset_us; //for power management
+
+    while (true) { // idle loop
+        if (ready_tasks.popFirst()) |t| { // found a new task
+            CPU_idle_time += time.offset_us - CPU_idle_start_time; // count time as idle
+            timer_last_count = time.offset_us; // don't count time as used
+            // println("went into idle mode for {}usecs", time.offset_us - CPU_idle_start_time);
+
+            if (t == borrow) {
+                t.data.state = .Running;
+                return; //no need to ctx_switch we are already running this
             }
+            return switch_to(t);
+        } else { // no tasks ready, let the timer fire
+            x86.sti(); // enable interrupts to allow the timer to fire
+            x86.hlt(); // halt and wait for the timer to fire
+            x86.cli(); // disable interrupts again to see if there is something to do
         }
     }
 }
 
-fn notify_idle() void {
+pub fn notify(comptime message: []const u8, args: ...) void {
     const bg = vga.background;
     const fg = vga.foreground;
     const cursor = vga.cursor;
     vga.background = fg;
     vga.foreground = bg;
-    vga.cursor = 80 - 4;
+    vga.cursor = 80 - message.len - 10;
     vga.cursor_enabled = false;
 
-    print("IDLE");
+    print(message, args);
 
     vga.cursor_enabled = true;
     vga.cursor = cursor;
@@ -271,3 +287,32 @@ pub fn format() void {
     var sit = sleeping_tasks.first;
     while (sit) |node| : (sit = node.next) println("{} {}", node.data.data, node.counter);
 }
+
+// pub fn block(state: TaskState) void {
+//     assert(current_task.data.state == .Running);
+
+//     assert(state != .Running);
+//     assert(state != .ReadyToRun);
+
+//     lock_scheduler();
+//     defer unlock_scheduler();
+
+//     update_time_used();
+//     current_task.data.state = state;
+//     blocked_tasks.append(current_task);
+//     schedule();
+// }
+
+// pub fn unblock(node: *TaskNode) void {
+//     lock_scheduler();
+//     node.data.state = .ReadyToRun;
+//     blocked_tasks.remove(node);
+//     if (ready_tasks.first == null) {
+//         // Only one task was running before, so pre-empt
+//         switch_to(node);
+//     } else {
+//         // There's at least one task on the "ready to run" queue already, so don't pre-empt
+//         ready_tasks.append(node);
+//         unlock_scheduler();
+//     }
+// }
diff --git a/src/time.zig b/src/time.zig
index b3607d7..0527f39 100644
--- a/src/time.zig
+++ b/src/time.zig
@@ -1,8 +1,19 @@
 usingnamespace @import("index.zig");
 
 pub var offset_us: u64 = 0;
-pub fn increment(value: u32) void {
-    offset_us += value;
+pub var task_slice_remaining: u64 = 0;
+pub var TASK_SLICE: u64 = 50 * 1000;
+pub fn increment() void {
+    const tick = x86.interrupt.tick; //us
+
+    offset_us += tick;
+    task.sleep_tick(tick);
+
+    if (task_slice_remaining != 0) {
+        // There is a time slice length
+        if (task_slice_remaining <= tick) return task.preempt();
+        if (task_slice_remaining > tick) task_slice_remaining -= tick;
+    }
 }
 
 pub fn uptime() void {
diff --git a/src/vga.zig b/src/vga.zig
index 94fe865..23a3f77 100644
--- a/src/vga.zig
+++ b/src/vga.zig
@@ -64,6 +64,7 @@ pub fn clear() void {
 }
 pub fn topbar() void {
     const bg = vga.background;
+    // println("topbar1");
     while (true) {
         const cursor = vga.cursor;
         vga.background = Color.Red;
@@ -73,13 +74,14 @@ pub fn topbar() void {
         time.uptime();
         print(" | ");
         task.format_short();
+        // print(" ({})", task.IRQ_disable_counter);
         println("");
 
         vga.cursor_enabled = true;
         vga.cursor = cursor;
         vga.background = bg;
 
-        task.usleep(500 * 1000) catch unreachable; // 60ms
+        task.usleep(50 * 1000) catch unreachable; // 60ms
     }
 }
 
diff --git a/src/vmem.zig b/src/vmem.zig
index a38a9db..9962d6f 100644
--- a/src/vmem.zig
+++ b/src/vmem.zig
@@ -15,7 +15,9 @@ pub fn available() usize {
 }
 
 pub fn malloc(size: usize) !usize {
-    if (available() == 0) return error.OutOfMemory;
+    if (available() == 0) {
+        return error.OutOfMemory;
+    }
     stack_index -= 1;
     var vaddr: usize = stack[stack_index];
     try x86.paging.mmap(vaddr, null);