/******************************************************************************
 * libc.c - minimal subset of C runtime library for os2ahci
 *
 * Copyright (c) 2010 Christian Mueller. Parts copied from/inspired by the
 * Linux AHCI driver; those parts are (c) Linux AHCI/ATA maintainers
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "os2ahci.h"

/* -------------------------- macros and constants ------------------------- */

#define MSG_REPLACEMENT_STRING 1178   /* empty message with a single %; used
                                       * for printing custom messages via
                                       * DevHelp_Save_Message() */

/* heap management constants */
#define HEAP_SIZE             8192
#define HEAP_UNIT              128
#define HEAP_UNIT_CNT         (HEAP_SIZE / HEAP_UNIT)

/* ------------------------ typedefs and structures ------------------------ */

/* mdelay() calibration status */
typedef enum {
  MD_NOT_CALIBRATED,                  /* delay loop not calibrated */
  MD_CALIBRATION_START,               /* calibration run started */
  MD_CALIBRATION_END,                 /* calibration run ended */
  MD_CALIBRATION_DONE                 /* calibration complete */
} MDCAL;

/* -------------------------- function prototypes -------------------------- */

static void      long_to_asc            (long val, char _far *buf, int base,
                                         int zero, int flen);
static void _far mdelay_timer_callback  (ULONG timer_handle, ULONG parm1,
                                         ULONG parm2);
static int       mdelay_cal_end         (void);

/* ------------------------ global/static variables ------------------------ */

/* debug COM port base address */
u16 com_base = 0x03f8;

static char  hex_digits[] = "0123456789abcdef";
static ULONG com_lock;

/* message table for DosHelp_Save_Message() which prints the first string */
static MSGTABLE init_msgtbl = {
  MSG_REPLACEMENT_STRING,
  1,
  0
};

/* COM port initialization sequence */
static struct {
  int reg;
  u8  data;
} com1_init_sequence[] = {
  3, 0x80,   /* overlay divisor latch register at 0x3f8 and 0x3f9 */
  0, 0x00,   /* set low byte of divisor to 1 (115200 baud) */
  1, 0x01,   /* set high byte of divisor to 0 */
  3, 0x03,   /* reset divisor latch register overlay and set 8,n,1 */
  1, 0x00,   /* disable interrupts */
  4, 0x0f,   /* modem control register */
  -1, 0x00
};

/* delay loop calibration data */
volatile MDCAL mdelay_cal_status = 0;     /* delay loop calibration status */
volatile u32   mdelay_loops_per_ms = 0;   /* delay loop counter */

/* very small heap for dynamic memory management */
static u8 heap_buf[HEAP_SIZE];
static u8 heap_units[HEAP_UNIT_CNT];

/* ----------------------------- start of code ----------------------------- */

/******************************************************************************
 * Initialize COM1 to 57600,n,8,1
 */
void init_com1(void)
{
  int i;

  DevHelp_CreateSpinLock(&com_lock);

  for (i = 0; com1_init_sequence[i].reg != -1; i++) {
    u16 port = com_base + com1_init_sequence[i].reg;
    u8  data = com1_init_sequence[i].data;
    _asm {
      mov  dx, port;
      mov  al, data;
      out  dx, al;
    }
  }
}

/******************************************************************************
 * Print a formatted message into a string buffer. This is very basic,
 * supporting only strings and integers (16 and 32 bits (l), decimal (d)
 * and hex (x)). Formatting length modifiers are only supported with a single
 * digit -- 32-bit numbers don't need more than 9 characters -- and an
 * optional '0' in front.
 */
int vsprintf(char _far *buf, const char *fmt, va_list va)
{
  char _far *orig = buf;
  char _far *s;
  int lmod;
  int fptr;
  int zero;
  int flen;

  for (; *fmt != '\0'; fmt++) {
    switch (*fmt) {

    case '%':
      fmt++;
      zero = flen = 0;
      if (*fmt >= '0' && *fmt <= '9') {
        /* formatting length modifiers */
        zero = (*fmt == '0') ? 1 : 0;
        fmt += zero;
        if ((flen = *fmt - '0') >= 1 && flen <= 9) {
          fmt++;
        }
      }
      
      /* data type modifiers */
      lmod = (*fmt == 'l') ? 1 : 0;
      fptr = (*fmt == 'F') ? 1 : 0;
      fmt += lmod + fptr;

      switch (*fmt) {

      case 's':
        if (fptr) {
          char _far *p = va_arg(va, char _far *);
          s = (p == 0) ? "[null]" : p;
        } else {
          char *p = va_arg(va, char *);
          s = (p == 0) ? "[null]" : p;
        }
        while ((*buf = *(s++)) != '\0')
          buf++;
        break;

      case 'c':
        *(buf++) = (char) va_arg(va, int); 
        break;

      case 'd':
        long_to_asc((lmod) ? va_arg(va, long)
                           : va_arg(va, int), buf, 10, zero, flen);
        buf += strlen(buf);
        break;

      case 'x':
        long_to_asc((lmod) ? va_arg(va, u32)
                           : va_arg(va, u16), buf, 16, zero, flen);
        buf += strlen(buf);
        break;

      case 'p':
        if (fptr || lmod) {
          u16 off = va_arg(va, u16);
          u16 seg = va_arg(va, u16);
          long_to_asc(seg, buf, 16, 1, 4);
          buf += strlen(buf);
          *(buf++) = ':';
          long_to_asc(off, buf, 16, 1, 4);
          buf += strlen(buf);
        } else {
          long_to_asc(va_arg(va, u16), buf, 16, 1, 4);
          buf += strlen(buf);
        }
        break;

      default:
        *(buf++) = *fmt;
        break;
      }
    break;

    case '\n':
      *(buf++) = '\r';
      *(buf++) = '\n';
      break;

    default:
      *(buf++) = *fmt;
      break;

    }
  }

  *buf = '\0';
  return((int) (buf - orig));
}

/*******************************************************************************
 * Print a formatted message into a string buffer. Relies on vsprintf()
 */
int sprintf(char _far *buf, const char *fmt, ...)
{
  va_list va;

  va_start(va, fmt);
  return(vsprintf(buf, fmt, va));
}

/******************************************************************************
 * Print messages to COM
 *
 * NOTES: This function uses a 1K buffer for the resulting message. Thus,
 *        messages should never exceed 1024 bytes.
 */
void vprintf(const char *fmt, va_list va)
{
  static char buf[1024];
  char *s;

  spin_lock(com_lock);

  vsprintf(buf, fmt, va);

  /* write debug message to COM1 */
  for (s = buf; *s != '\0'; s++) {

    /* inp() and outp() are redefined by the DDK in an incompatible
     * way (only words). Instead of messing around with those
     * definitions, it's safer and easier to put the whole thing
     * into an _asm block.
     * 
     * The C equivalent would look like this:
     *
     *   while (!(inp(com_base + 5) & 0x20));
     *   outp(com_base, *s);
     */

    _asm {
      /* wait until COM transmitter is idle */
      mov  dx, com_base;
      add  dx, 5;
    transmitter_not_idle:
      in   al, dx;
      and  al, 0x20;
      jz   transmitter_not_idle;

      /* output character to be sent */
      mov  dx, com_base;
      mov  bx, s;
      mov  al, [bx];
      out  dx, al;
    };
  }

  spin_unlock(com_lock);
}

/******************************************************************************
 * Print messages to COM port
 */
void printf(const char *fmt, ...)
{
  va_list va;
  
  va_start(va, fmt);
  vprintf(fmt, va);
}

/******************************************************************************
 * Print a message to the system console. This works only during device driver
 * initialization.
 *
 * NOTE: This function uses a 1K buffer for the resulting message. Thus,
 *       messages should never exceed 1024 bytes...
 */
void cprintf(const char *fmt, ...)
{
  static char buf[1024];
  va_list va;
  size_t len;
  
  va_start(va, fmt);
  vsprintf(buf, fmt, va);

  if (debug) {
    /* print the same message to COM1 as well */
    printf("%s", buf);
  }

  /* remove trailing CR/LF (DevHelp_Save_Message() will add it again) */
  if ((len = strlen(buf)) >= 2 && buf[len-1] == '\n' && buf[len-2] == '\r') {
    buf[len-2] = '\0';
  }

  init_msgtbl.MsgStrings[0] = buf;
  DevHelp_Save_Message((NPBYTE) &init_msgtbl);
}

/******************************************************************************
 * Print hex buffer to COM port.
 */
void phex(const void _far *p, int len, const char *fmt, ...)
{
  va_list va;
  const unsigned char _far *buf = p;
  long pos = 0;
  int i;

  if (!debug) {
    return;
  }

  /* print header */
  va_start(va, fmt);
  vprintf(fmt, va);

  /* print hex block */
  while (len > 0) {
    printf("%Fp ", buf);

    /* print hex block */
    for (i = 0; i < 16; i++) {
      if (i < len) {
        printf("%c%02x", ((i == 8) ? '-' : ' '), buf[i]);
      } else {
        printf("   ");
      }
    }

    /* print ASCII block */
    printf("   ");
    for (i = 0; i < ((len > 16) ? 16 : len); i++) {
      printf("%c", (buf[i] >= 32 && buf[i] < 128) ? buf[i] : '.');
    }
    printf("\n");

    pos += 16;
    buf += 16;
    len -= 16;
  }
}

/******************************************************************************
 * Return length of zero-terminated string
 */
size_t strlen(const char _far *s)
{
  int len = 0;

  while (*(s++) != '\0') {
    len++;
  }
  return(len);
}

/******************************************************************************
 * Copy zero-terminated string
 */
char _far *strcpy(char _far *dst, const char _far *src)
{
  char _far *orig = dst;

  while ((*(dst++) = *(src++)) != '\0');
  return(orig);
}

/******************************************************************************
 * Compare blocks of memory
 */
int memcmp(void _far *p1, void _far *p2, size_t len)
{
  register char _far *s1 = p1;
  register char _far *s2 = p2;
  int n = 0;

  while (len > 0) {
    if ((n = *(s1++) - *(s2++)) != 0) {
      return(n);
    }
    len--;
  }
  return(0);
}

/******************************************************************************
 * Convert a string to a long value using the specified base
 */
long strtol(const char _far *buf, const char _far * _far *ep, int base)
{
  register const char _far *s = buf;
  long val = 0;
  int negative = 0;

  /* skip leading whitespace */
  while (*s == ' ' || *s == '\t') {
    s++;
  }

  /* positive or negative */
  if (*s == '-') {
    negative = 1;
    s++;
  } else if (*s == '+') {
    s++;
  }

  /* convert string to long integer */
  for (;; s++) {
    int digit = (*s <= '9') ? (*s - '0') : (tolower(*s) - 'a' + 10);
    if (digit < 0 || digit >= base) {
      break;
    }
    val *= base;
    val += digit;
  }

  if (ep != NULL) {
    *ep = s;
  }
  if (negative) {
    val = -val;
  }
  return(val);
}

/******************************************************************************
 * Extremely simple and stupid implementation of malloc(). The heap is very
 * small, only 8K at the moment, and the memory blocks are managed using a
 * simple array of "number of heap units allocated", zero meaning this unit is
 * available. Each heap unit is currently 128 bytes.
 *
 * Dynamic memory is primarily used for things like ATA identify, ATAPI
 * sense buffers, etc. and should be freed as soon as possible, otherwise
 * we'll quickly run out of memory.
 *
 * NOTE: This function is not reentrant, thus must be called with the driver-
 *       level spinlock held. The main reason for this design is that most
 *       functions that need dynamic memory are already holding the spinlock.
 */
void *malloc(size_t len)
{
  u16 units = (len + HEAP_UNIT - 1) / HEAP_UNIT;
  u16 i;
  u16 n;

  /* find a sequence of free heap units big enough for the requested length */
  for (i = 0; i < HEAP_UNIT_CNT; i++) {
    if (heap_units[i] == 0) {
      for (n = i; n < i + units && n < HEAP_UNIT_CNT; n++) {
        if (heap_units[n] != 0) {
          break;
        }
      }
      if (n == i + units) {
        /* found a chunk large enough; update 'heap_units[]' */
        for (; i < n; i++) {
          heap_units[i] = (u8) (n - i);
        }
        return(heap_buf + (n - units) * HEAP_UNIT);
      }
      
      /* keep searching... */
      i = n;
    } else {
      /* skip occupied heap units */
      i += heap_units[i] - 1;
    }
  }

  /* out of memory */
  dprintf("malloc(%d): out of memory\n", len);
  return(NULL);
}

/******************************************************************************
 * Free block of memory allocted by malloc().
 *
 * NOTE: This function is not reentrant, thus must be called with the driver-
 *       level spinlock held. The main reason for this design is that most
 *       functions that need dynamic memory are already holding the spinlock.
 */
void free(void *ptr)
{
  u8 *p = (u8 *) ptr;
  u16 first_unit;
  u16 units;
  u16 i;

  if (p < heap_buf || p >= heap_buf + sizeof(heap_buf) ||
      (u16) (p - heap_buf) % HEAP_UNIT != 0) {
    dprintf("free(0x%p): invalid pointer (heap_buf = 0x%p)\n",
            (u16) p, (u16) heap_buf);
    return;
  }

  /* clear unit allocation counters in heap_units[] */
  first_unit = (u16) (p - heap_buf) / HEAP_UNIT;
  units = heap_units[first_unit];
  for (i = first_unit; i < first_unit + units; i++) {
    heap_units[i] = 0;
  }
}

/******************************************************************************
 * Calibrate 'mdelay()' loop. This is done by setting up a 1 second timer
 * with a callback that sets 'mdelay_done' to MD_CALIBRATION_END. Then it
 * calls mdelay() with a large milliseond value an initial delay loop counter
 * of 1,000,000. When the timer triggers, 'mdelay()' will stop and update
 * the delay loop counter.
 *
 * This function needs to be called at device driver init time. Since it uses
 * ADD timers, it must be called with interrupts enabled. All this is not very
 * precise (we should wait for a clock tick before starting, ...) but we don't
 * really need precise timers.
 */
void mdelay_cal(void)
{
  ULONG timer_handle;

  dprintf("calibrating delay loop... ");

  mdelay_loops_per_ms = 100000;
  mdelay_cal_status = MD_CALIBRATION_START;

  ADD_StartTimerMS(&timer_handle, 1000, (PFN) mdelay_timer_callback, 0, 0);
  mdelay(999999999);
  ADD_CancelTimer(timer_handle);

  dprintf("done (loops per ms = %ld)\n", mdelay_loops_per_ms);
}

/******************************************************************************
 * Wait specified number of milliseconds. This is implemented using a busy
 * loop and is only good for delays in the millisecond range but never for more
 * than a few milliseconds and only in situations where a proper timer won't do.
 * As a rule of thumb, don't call this function and use ADD timers, instead.
 *
 * NOTES:
 *
 *  - Timers are problematic on x86 platforms because there's no reliable
 *    hardware timer on all architectures and the CPU clock speed may change
 *    while executing delay loops (AMD Cool&Quiet and Intel SpeedStep), thus
 *    calibration routines won't really be sufficient. But this usually only
 *    extends the delay and we don't really need a high precision timer. The
 *    exception are things like notebooks that are clocked slower when on
 *    battery and which got booted while on battery. Should still be OK,
 *    though, because our requirements are not that strict.
 *
 *  - The code in this function is inefficient by design to make sure it
 *    will work with future CPUs which might otherwise be too fast for
 *    our loop counters. Part of this design is using volatile variables to
 *    force memory operations.
 *
 *  - Before using this function, call mdelay_calibrate() to determine the
 *    number of inner loops required per millisecond.
 */
void mdelay(u32 millies)
{
  volatile u32 i;
  volatile u32 n;

  for (i = 0; i < millies; i++) {
    for (n = 0; n < mdelay_loops_per_ms; n++) {
      if (mdelay_cal_end()) {
        /* this is a calibration run that just ended */
        goto complete_calibration;
      }
    }
  }
  return;

complete_calibration:
  /* complete calibration cycle */
  if (i < 1000) {
    /* Initial value for delay loop was too high; interpolate results for
     * an assumed initial delay loop divided by 1000.
     */
    i = i * 1000 + mdelay_loops_per_ms % 1000;
    mdelay_loops_per_ms /= 1000;
  }
  mdelay_loops_per_ms = (mdelay_loops_per_ms * i) / 1000;
  mdelay_cal_status = MD_CALIBRATION_DONE;
}

/******************************************************************************
 * Sleep specified number of milliseonds. This is implemented by yielding the
 * CPU until the system timer value indicates we're done. This function can
 * only be called at task time, or from a context hook.
 *
 * NOTE: The accuracy is limited by the OS/2 timer interrupt frequency which
 *       can lead to intervals up to 55ms (18.2 timer interrupts per second).
 */
void msleep(u32 millies)
{
  volatile PGINFOSEG gis;
  ULONG start;
  ULONG end;
  PSEL p;

  if (DevHelp_GetDOSVar(DHGETDOSV_SYSINFOSEG, 0, (PPVOID) &p)) {
    /* no global info segment; use mdelay() */
    mdelay(millies);
    return;
  }
  gis = (PGINFOSEG) ((u32) *p << 16);
  start = gis->msecs;
  end = start + millies;

  if (end < start) {
    /* wrap-around; wait until 'msecs' has wrapped, too */
    while (gis->msecs >= start) {
      DevHelp_Yield();
    }
  }

  while (gis->msecs <= end) {
    DevHelp_Yield();
  }
}

/******************************************************************************
 * Halt processing by submitting an internal error. This is a last resort and
 * should only be called when the system state is corrupt.
 */
void panic(char *msg)
{
  DevHelp_InternalError(msg, strlen(msg));
}

/******************************************************************************
 * Disable interrupts. The reason for using a separate function for this is
 * that the presence of _asm statements will disable compiler optimizations.
 * In order to support nested calls, this function will return 0 if the
 * interrupts were already disabled or != 0, if not.
 *
 * NOTE: SMP systems must use spinlocks, thus this function will only be
 *       compiled on non-SMP builds.
 */
#ifndef OS2AHCI_SMP
int disable(void)
{
  int rc = 0;

  _asm {
    pushf
    pop   ax
    and   ax, 0x0200;  /* "interrupts enabled" bit */
    mov   rc, ax;
    cli
  }

  return(rc);
}
#endif

/******************************************************************************
 * Enable interrupts. The reason for using a separate function for this is
 * that the presence of _asm statements will disable compiler optimizations.
 *
 * NOTE: SMP systems must use spinlocks, thus this function will only be
 *       compiled on non-SMP builds.
 */
#ifndef OS2AHCI_SMP
void enable(void)
{
  _asm sti;
}
#endif

/******************************************************************************
 * Convert 'long' to ASCII with the specified base
 */
static void long_to_asc(long val, char _far *buf, int base, int zero, int flen)
{
  register unsigned long abs_val = (unsigned long) val;
  char tmp[80];
  char _far *ptmp = tmp;
  char _far *s;

  if (base > 16) {
    sprintf(buf, "[EVAL]");
    return;
  }

  tmp[sizeof(tmp) - 1] = '\0';
  for (s = ptmp + sizeof(tmp) - 2; s > ptmp; s--) {
    *s = hex_digits[abs_val % base];
    flen--;
    if ((abs_val /= base) == 0) {
      break;
    }
  }

  /* prepend minus sign if val was negative and base is decimal or less */
  if (val < 0 && base <= 0) {
    *(--s) = '-';
    flen--;
  }

  /* left-pad the resulting number with zeros or spaces up to 'flen' */
  while (flen > 0) {
    *(--s) = (zero) ? '0' : ' ';
    flen--;
  }

  strcpy(buf, s);
}

/******************************************************************************
 * Timer callback handler for 'mdelay_calibrate()'
 */
static void _far mdelay_timer_callback(ULONG timer_handle, ULONG parm1,
                                       ULONG parm2)
{
  mdelay_cal_status = MD_CALIBRATION_END;
}

/******************************************************************************
 * Determine whether an mdelay calibration run has just ended. This is in a
 * function to prevent overzealous optimizers from removing the whole delay
 * loop in mdelay().
 */
static int mdelay_cal_end(void)
{
  return(mdelay_cal_status == MD_CALIBRATION_END);
}
