/** * ctxhook.c - context hooks (kernel thread functions) for os2ahci * * Copyright (c) 2011 thi.guten Software Development * Copyright (c) 2011 Mensys B.V. * Copyright (c) 2013-2023 David Azarewicz * * Authors: Christian Mueller, Markus Thielen * * Parts copied from/inspired by the Linux AHCI driver; * those parts are (c) Linux AHCI/ATA maintainers * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "os2ahci.h" #include "ata.h" #include "atapi.h" /* port restart context hook and input data */ ULONG RestartCtxHook_h; volatile u32 ports_to_restart[MAX_AD]; /* port reset context hook and input data */ ULONG ResetCtxHook_h; ULONG th_watchdog; volatile u32 ports_to_reset[MAX_AD]; IORB_QUEUE abort_queue; /* trigger engine context hook and input data */ ULONG engine_ctxhook_h; #define QUEUEDEPTH 8 static struct _ctxq_ { ULONG ulHandle; ULONG ulArg; } CtxQueue[QUEUEDEPTH] = {0}; static ULONG ulCtxStatusFlag = 0; void SafeArmCtxHook(ULONG ulHandle, ULONG armData) { USHORT i; i = LockInc(&ulCtxStatusFlag); if (i) { i--; if (i < QUEUEDEPTH) { CtxQueue[i].ulHandle = ulHandle; CtxQueue[i].ulArg = armData; } } else { KernArmHook(ulHandle, armData, 0); } } void ClearThreadStatus(ULONG ulHandle) { USHORT i; i = LockDec(&ulCtxStatusFlag); if (i) { i--; if (i < QUEUEDEPTH) { KernArmHook(CtxQueue[i].ulHandle, CtxQueue[i].ulArg, 0); } else { KernArmHook(ulHandle, 0, 0); } } } /****************************************************************************** * Port restart context hook. This context hook is executed at task time and * will handle ports which are stopped due to a device error condition. * * The following conditions may exist: * * - Only a single non-NCQ command is executed by the AHCI adapter at any * given time (even if more are outstanding). This is the case for single * devices or port multipliers without FIS-based command switching. Error * recovery is simple because we know which command has failed and that * all other commands have not yet started executing. Thus, we can requeue * all of them, replacing the failing command with a "request sense" * command to get error details. * * - Multiple non-NCQ commands are executed on different devices behind a * port multiplier which supports FIS-based command switching. This is * more difficult to recover from but currently not an issue because we * don't yet support FIS-based command switching (the FIS receive areas * would become too large for the current data model). * * - One or more NCQ commands were active at the time of the error, with or * without FIS-based command switching. We would have to interrogate the * corresponding devices to find out which command has failed but if this * is combined with FIS-based command switching, even the AHCI spec * recommends to reset the port. This leads to a much simpler approach: * requeue all NCQ commands (they are idempotent per definition, otherwise * they couldn't be reordered by the device) with the 'no_ncq' flag set * in the IORB and reset the port. Then those comands will be executed as * regular commands. The error, if it reoccurs, can then be handled by * one of the above cases. * * The upstream code will guarantee that we will never have a mix of NCQ and * non-NCQ commands active at the same time in order to reduce complexity * in the interrupt and error handlers. */ void _Syscall RestartCtxHook(ULONG parm) { IORB_QUEUE done_queue; AD_INFO *ai; IORBH FAR16DATA *vProblemIorb; IORBH FAR16DATA *vIorb; IORBH FAR16DATA *vNext; IORBH *pIorb; ADD_WORKSPACE *aws; u8 *port_mmio; int rearm_ctx_hook; int need_reset; int ccs; int a; int p; D32ThunkStackTo32(); vNext = FAR16NULL; rearm_ctx_hook = 0; AhciStats.ulSoftErrorCount++; DPRINTF(0, DBG_PREFIX": BEG\n"); memset(&done_queue, 0x00, sizeof(done_queue)); spin_lock(drv_lock); if (th_watchdog != 0) { /* watchdog timer still active -- just reset it */ Timer_CancelTimer(th_watchdog); th_watchdog = 0; } for (a = 0; a < ad_info_cnt; a++) { ai = ad_infos + a; if (ai->busy) { /* this adapter is busy; leave it alone for now */ rearm_ctx_hook = 1; continue; } for (p = 0; p <= ai->port_max; p++) { if (ports_to_restart[a] & (1UL << p)) { ports_to_restart[a] &= ~(1UL << p); /* restart this port */ port_mmio = port_base(ai, p); vProblemIorb = FAR16NULL; need_reset = 0; DPRINTF(DBG_DETAILED, DBG_PREFIX": port=%d TF_DATA=0x%x\n", p, readl(port_mmio + PORT_TFDATA)); /* get "current command slot"; only valid if there are no NCQ cmds */ ccs = (int) ((readl(port_mmio + PORT_CMD) >> 8) & 0x1f); DPRINTF(DBG_DETAILED, DBG_PREFIX": PORT_CMD=0x%x\n", ccs); for (vIorb = ai->ports[p].iorb_queue.vRoot; vIorb != FAR16NULL; vIorb = vNext) { pIorb = Far16ToFlat(vIorb); aws = add_workspace(pIorb); vNext = pIorb->f16NxtIORB; if (aws->queued_hw) { if (aws->timer != 0) { Timer_CancelTimer(aws->timer); aws->timer = 0; } if (ai->ports[p].ncq_cmds & (1UL << aws->cmd_slot)) { /* NCQ command; force non-NCQ mode and trigger port reset */ ai->ports[p].ncq_cmds &= ~(1UL << aws->cmd_slot); aws->no_ncq = 1; need_reset = 1; DPRINTF(0, DBG_PREFIX": failing IORB: %x NCQ slot=%x\n", vIorb, aws->cmd_slot); #ifdef DEBUG DumpIorb(pIorb, vIorb); #endif } else { /* regular command; clear cmd bit and identify problem IORB */ ai->ports[p].reg_cmds &= ~(1UL << aws->cmd_slot); if (aws->cmd_slot == ccs) { /* this is the non-NCQ command that failed */ DPRINTF(0, DBG_PREFIX": failing IORB: %x slot=%x\n", vIorb, aws->cmd_slot); #ifdef DEBUG DumpIorb(pIorb, vIorb); #endif vProblemIorb = vIorb; } } /* we can requeue all IORBs unconditionally (see function comment) */ if (aws->retries++ < MAX_RETRIES) { iorb_requeue(pIorb); } else { /* retry count exceeded; consider IORB aborted */ iorb_seterr(pIorb, IOERR_CMD_ABORTED); iorb_queue_del(&ai->ports[p].iorb_queue, vIorb); iorb_queue_add(&done_queue, vIorb, pIorb); if (vIorb == vProblemIorb) { /* no further analysis -- we're done with this one */ vProblemIorb = FAR16NULL; } } } } /* sanity check: issued command bitmaps should be 0 now */ if (ai->ports[p].ncq_cmds != 0 || ai->ports[p].reg_cmds != 0) { DPRINTF(0, DBG_PREFIX": warning: commands issued not 0 (%08x/%08x); resetting...\n", ai->ports[p].ncq_cmds, ai->ports[p].reg_cmds); need_reset = 1; } if (!need_reset) { if ((readl(port_mmio + PORT_TFDATA) & 0x88) != 0) { /* device is not in an idle state */ need_reset = 1; } } /* restart/reset port */ ai->busy = 1; spin_unlock(drv_lock); if (need_reset) { ahci_reset_port(ai, p, 1); } else { ahci_stop_port(ai, p); ahci_start_port(ai, p, 1); } spin_lock(drv_lock); ai->busy = 0; /* reset internal port status */ ai->ports[p].ncq_cmds = 0; ai->ports[p].reg_cmds = 0; ai->ports[p].cmd_slot = 0; if (vProblemIorb != FAR16NULL) { IORBH *pProblemIorb = Far16ToFlat(vProblemIorb); /* get details about the error that caused this IORB to fail */ if (need_reset) { /* no way to retrieve error details after a reset */ iorb_seterr(pProblemIorb, IOERR_DEVICE_NONSPECIFIC); iorb_queue_del(&ai->ports[p].iorb_queue, vProblemIorb); iorb_queue_add(&done_queue, vProblemIorb, pProblemIorb); } else { /* get sense information */ ADD_WORKSPACE *aws = add_workspace(pProblemIorb); int d = iorb_unit_device(pProblemIorb); int (*req_sense)(IORBH FAR16DATA *, IORBH *, int) = (ai->ports[p].devs[d].atapi) ? atapi_req_sense : ata_req_sense; aws->processing = 1; aws->queued_hw = 1; if (req_sense(vProblemIorb, pProblemIorb, 0) == 0) { /* execute request sense on slot #0 before anything else comes along */ Timer_StartTimerMS(&aws->timer, 5000, timeout_callback, CastFar16ToULONG(vProblemIorb)); aws->cmd_slot = 0; ai->ports[p].reg_cmds = 1; writel(port_mmio + PORT_CMD_ISSUE, 1); readl(port_mmio); /* flush */ } else { /* IORB is expected to contain the error code; just move to done queue */ iorb_queue_del(&ai->ports[p].iorb_queue, vProblemIorb); iorb_queue_add(&done_queue, vProblemIorb, pProblemIorb); } } } } } } spin_unlock(drv_lock); DPRINTF(0, DBG_PREFIX": Resuming\n"); /* call notification routine on all IORBs which have completed */ for (vIorb = done_queue.vRoot; vIorb != FAR16NULL; vIorb = vNext) { pIorb = Far16ToFlat(vIorb); vNext = pIorb->f16NxtIORB; spin_lock(drv_lock); aws_free(add_workspace(pIorb)); spin_unlock(drv_lock); iorb_complete(vIorb, pIorb); } /* restart engine to resume IORB processing */ spin_lock(drv_lock); trigger_engine(); spin_unlock(drv_lock); DPRINTF(0, DBG_PREFIX": END Rearm=%x\n", rearm_ctx_hook); /* Check whether we have to rearm ourselves because some adapters were busy * when we wanted to restart ports on them. */ if (rearm_ctx_hook) { /* we cannot rearm ourself because we will execute immediately leaving * no time to process and clear the reason we need to rearm. Therefore * we set the timer again. */ Timer_StartTimerMS(&th_watchdog, 250, WatchdogTimer, RestartCtxHook_h); } ClearThreadStatus(RestartCtxHook_h); KernThunkStackTo16(); } /****************************************************************************** * Reset and abort context hook. This function runs at task time and takes * care of port resets and their side effects. Input to this function are: * * ports_to_reset[] - array of port bitmaps, each bit indicating which port * should be reset unconditionally. This is primarily * used by the error interrupt handler. * * abort_queue - queue with IORBs to be arborted (timed-out, ...) If * any of these commands have reached the hardware, the * corresponding port is reset to interrupt command * execution. This is primarily used for timeout * handling and when IORBs are requested to be aborted. * * After resetting the requested ports, all remaining active IORBs on those * ports have to be retried or aborted. Whether a retry is attempted depends * on the kind of IORB -- those which are idempotent are retried, all others * are aborted. This is different from the port restart hook because the * restart hook can assume it is called with the port in error state, thus * the controller will have stopped executing commands. The reset handler can * be called at any time and we can't tell what's going on in the controller. * * The IORBs in the global abort_queue are expected to have their error code * set (aborted, timeout, ...) but must not be marked as 'done'; otherwise, * the upstream code might reuse the IORBs before we're done with them. */ void _Syscall ResetCtxHook(ULONG ulArg) { IORB_QUEUE done_queue; AD_INFO *ai; IORBH FAR16DATA *vIorb; IORBH FAR16DATA *vNext; IORBH *pIorb; ADD_WORKSPACE *aws; int rearm_ctx_hook; int a; int p; D32ThunkStackTo32(); vNext = FAR16NULL; rearm_ctx_hook = 0; AhciStats.ulHardErrorCount++; DPRINTF(0, DBG_PREFIX": BEG Arg=%x\n", ulArg); memset(&done_queue, 0x00, sizeof(done_queue)); if (th_watchdog != 0) { /* watchdog timer still active -- just reset it */ Timer_CancelTimer(th_watchdog); th_watchdog = 0; } spin_lock(drv_lock); if (ulArg) { /* Move the timed-out IORB to the abort queue. Since it's possible that the * IORB has completed after the timeout has expired but before we got to * this line of code, we'll check the return code of iorb_queue_del(): If it * returns an error, the IORB must have completed a few microseconds ago and * there is no timeout. */ vIorb = (IORBH FAR16DATA *)CastULONGToFar16(ulArg); pIorb = Far16ToFlat(vIorb); a = iorb_unit_adapter(pIorb); p = iorb_unit_port(pIorb); if (iorb_queue_del(&ad_infos[a].ports[p].iorb_queue, vIorb) == 0) { pIorb = Far16ToFlat(vIorb); iorb_queue_add(&abort_queue, vIorb, pIorb); pIorb->ErrorCode = IOERR_ADAPTER_TIMEOUT; } } /* add ports of active IORBs from the abort queue to ports_to_reset[] */ for (vIorb = abort_queue.vRoot; vIorb != FAR16NULL; vIorb = vNext) { pIorb = Far16ToFlat(vIorb); vNext = pIorb->f16NxtIORB; a = iorb_unit_adapter(pIorb); p = iorb_unit_port(pIorb); ai = ad_infos + a; aws = add_workspace(pIorb); if (ai->busy) { /* this adapter is busy; leave it alone for now */ rearm_ctx_hook = 1; continue; } /* move IORB to the local 'done' queue */ iorb_queue_del(&abort_queue, vIorb); iorb_queue_add(&done_queue, vIorb, pIorb); /* reset port if the IORB has already been queued to hardware */ if (aws->queued_hw) { if (aws->timer != 0) { Timer_CancelTimer(aws->timer); aws->timer = 0; } /* prepare port reset */ ports_to_reset[a] |= (1UL << p); } } /* reset all ports in 'ports_to_reset[]' */ for (a = 0; a < ad_info_cnt; a++) { ai = ad_infos + a; if (ai->busy) { /* this adapter is busy; leave it alone for now */ rearm_ctx_hook = 1; continue; } for (p = 0; p <= ai->port_max; p++) { if (ports_to_reset[a] & (1UL << p)) { ports_to_reset[a] &= ~(1UL << p); ai->ports[p].ulResetCount++; /* Reset this port. Since this is a rather slow operation, we'll * release the spinlock while doing so. The adapter is marked as * 'busy' to prevent similar routines (e.g. an ahci port scan) from * interfering. */ ai->busy = 1; spin_unlock(drv_lock); ahci_reset_port(ai, p, 1); spin_lock(drv_lock); ai->busy = 0; /* reset port status */ ai->ports[p].ncq_cmds = 0; ai->ports[p].reg_cmds = 0; ai->ports[p].cmd_slot = 0; /* retry or abort all remaining active commands on this port */ for (vIorb = ai->ports[p].iorb_queue.vRoot; vIorb != FAR16NULL; vIorb = vNext) { IORBH *pIorb = Far16ToFlat(vIorb); vNext = pIorb->f16NxtIORB; aws = add_workspace(pIorb); if (aws->queued_hw) { /* this IORB had already been queued to HW when we reset the port */ if (aws->idempotent && aws->retries++ < MAX_RETRIES) { /* we can retry this IORB */ iorb_requeue(pIorb); } else { /* we cannot retry this IORB; consider it aborted */ pIorb->ErrorCode = IOERR_CMD_ABORTED; iorb_queue_del(&ai->ports[p].iorb_queue, vIorb); iorb_queue_add(&done_queue, vIorb, pIorb); } } } } } } spin_unlock(drv_lock); DPRINTF(0, DBG_PREFIX": Resuming\n"); /* complete all aborted IORBs */ for (vIorb = done_queue.vRoot; vIorb != FAR16NULL; vIorb = vNext) { pIorb = Far16ToFlat(vIorb); vNext = pIorb->f16NxtIORB; spin_lock(drv_lock); aws_free(add_workspace(pIorb)); spin_unlock(drv_lock); pIorb->Status |= IORB_ERROR; iorb_complete(vIorb, pIorb); } /* restart engine to resume IORB processing */ spin_lock(drv_lock); trigger_engine(); spin_unlock(drv_lock); DPRINTF(0, DBG_PREFIX": END Rearm=%x\n", rearm_ctx_hook); /* Check whether we have to rearm ourselves because some adapters were busy * when we wanted to reset ports on them. */ if (rearm_ctx_hook) { /* we cannot rearm ourself because we will execute immediately leaving * no time to process and clear the reason we need to rearm. Therefore * we set the timer again. */ Timer_StartTimerMS(&th_watchdog, 250, WatchdogTimer, ResetCtxHook_h); } ClearThreadStatus(ResetCtxHook_h); KernThunkStackTo16(); } /****************************************************************************** * IORB Engine context hook. This hook is executed if trigger_engine() came * to the conclusion that some of the IORBs keep bouncing, most likely due to * some condition on the adapter such as being busy. It could also be a very * busy system. Either way, this requires some task-time help. */ void _Syscall engine_ctxhook(ULONG parm) { int iorbs_sent; int i; D32ThunkStackTo32(); DPRINTF(DBG_FUNCBEG, DBG_PREFIX": engine_ctxhook() started\n"); if (resume_sleep_flag) { msleep(resume_sleep_flag); resume_sleep_flag = 0; } spin_lock(drv_lock); for (i = 0; i < 10; i++) { if ((iorbs_sent = trigger_engine_1()) == 0) break; } spin_unlock(drv_lock); DPRINTF(DBG_FUNCEND, DBG_PREFIX": engine_ctxhook() completed\n"); if (iorbs_sent != 0) { /* need to rearm ourselves for another run */ msleep(250); KernArmHook(engine_ctxhook_h, 0, 0); } KernThunkStackTo16(); }