source: trunk/src/os2ahci/ctxhook.c@ 121

Last change on this file since 121 was 121, checked in by cjm, 14 years ago

OS2AHCI Version 1.19
====================

  • Added retry counters to all commands (IORBs) in order to prevent infinite retry loops. This was necessary because Virtualbox 4.x doesn't seem to set the "current command index" in specific ATAPI error situations, causing the failing command to be retried indefinitely instead of asking for a sense buffer
  • Minor changes to debug logging
File size: 15.7 KB
Line 
1/******************************************************************************
2 * ctxhook.c - context hooks (kernel thread functions) for os2ahci
3 *
4 * Copyright (c) 2011 thi.guten Software Development
5 * Copyright (c) 2011 Mensys B.V.
6 *
7 * Authors: Christian Mueller, Markus Thielen
8 *
9 * Parts copied from/inspired by the Linux AHCI driver;
10 * those parts are (c) Linux AHCI/ATA maintainers
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27#include "os2ahci.h"
28#include "ata.h"
29#include "atapi.h"
30
31/* -------------------------- macros and constants ------------------------- */
32
33/* ------------------------ typedefs and structures ------------------------ */
34
35/* -------------------------- function prototypes -------------------------- */
36
37/* ------------------------ global/static variables ------------------------ */
38
39/* port restart context hook and input data */
40ULONG restart_ctxhook_h;
41volatile u32 ports_to_restart[MAX_AD];
42
43/* port reset context hook and input data */
44ULONG reset_ctxhook_h;
45ULONG th_reset_watchdog;
46volatile u32 ports_to_reset[MAX_AD];
47IORB_QUEUE abort_queue;
48
49/* trigger engine context hook and input data */
50ULONG engine_ctxhook_h;
51
52/* ----------------------------- start of code ----------------------------- */
53
54/******************************************************************************
55 * Port restart context hook. This context hook is executed at task time and
56 * will handle ports which are stopped due to a device error condition.
57 *
58 * The following conditions may exist:
59 *
60 * - Only a single non-NCQ command is executed by the AHCI adapter at any
61 * given time (even if more are outstanding). This is the case for single
62 * devices or port multipliers without FIS-based command switching. Error
63 * recovery is simple because we know which command has failed and that
64 * all other commands have not yet started executing. Thus, we can requeue
65 * all of them, replacing the failing command with a "request sense"
66 * command to get error details.
67 *
68 * - Multiple non-NCQ commands are executed on different devices behind a
69 * port multiplier which supports FIS-based command switching. This is
70 * more difficult to recover from but currently not an issue because we
71 * don't yet support FIS-based command switching (the FIS receive areas
72 * would become too large for the current data model).
73 *
74 * - One or more NCQ commands were active at the time of the error, with or
75 * without FIS-based command switching. We would have to interrogate the
76 * corresponding devices to find out which command has failed but if this
77 * is combined with FIS-based command switching, even the AHCI spec
78 * recommends to reset the port. This leads to a much simpler approach:
79 * requeue all NCQ commands (they are idempotent per definition, otherwise
80 * they couldn't be reordered by the device) with the 'no_ncq' flag set
81 * in the IORB and reset the port. Then those comands will be executed as
82 * regular commands. The error, if it reoccurs, can then be handled by
83 * one of the above cases.
84 *
85 * The upstream code will guarantee that we will never have a mix of NCQ and
86 * non-NCQ commands active at the same time in order to reduce complexity
87 * in the interrupt and error handlers.
88 */
89void restart_ctxhook(ULONG parm)
90{
91 IORB_QUEUE done_queue;
92 AD_INFO *ai;
93 IORBH _far *problem_iorb;
94 IORBH _far *iorb;
95 IORBH _far *next = NULL;
96 u8 _far *port_mmio;
97 int rearm_ctx_hook = 0;
98 int need_reset;
99 int ccs;
100 int a;
101 int p;
102
103 dprintf("restart_ctxhook() started\n");
104 memset(&done_queue, 0x00, sizeof(done_queue));
105
106 spin_lock(drv_lock);
107
108 for (a = 0; a < ad_info_cnt; a++) {
109 ai = ad_infos + a;
110
111 if (ai->busy) {
112 /* this adapter is busy; leave it alone for now */
113 rearm_ctx_hook = 1;
114 continue;
115 }
116
117 for (p = 0; p <= ai->port_max; p++) {
118 if (ports_to_restart[a] & (1UL << p)) {
119 ports_to_restart[a] &= ~(1UL << p);
120
121 /* restart this port */
122 port_mmio = port_base(ai, p);
123 problem_iorb = NULL;
124 need_reset = 0;
125
126 dprintf("port %d, TF_DATA: 0x%lx\n", p, readl(port_mmio + PORT_TFDATA));
127
128 /* get "current command slot"; only valid if there are no NCQ cmds */
129 ccs = (int) ((readl(port_mmio + PORT_CMD) >> 8) & 0x0f);
130 ddprintf(" PORT_CMD = 0x%x\n", ccs);
131
132 for (iorb = ai->ports[p].iorb_queue.root; iorb != NULL; iorb = next) {
133 ADD_WORKSPACE _far *aws = add_workspace(iorb);
134 next = iorb->pNxtIORB;
135
136 if (aws->queued_hw) {
137 if (ai->ports[p].ncq_cmds & (1UL << aws->cmd_slot)) {
138 /* NCQ command; force non-NCQ mode and trigger port reset */
139 ai->ports[p].ncq_cmds &= ~(1UL << aws->cmd_slot);
140 aws->no_ncq = 1;
141 need_reset = 1;
142 } else {
143 /* regular command; clear cmd bit and identify problem IORB */
144 ai->ports[p].reg_cmds &= ~(1UL << aws->cmd_slot);
145 if (aws->cmd_slot == ccs) {
146 /* this is the non-NCQ command that failed */
147 problem_iorb = iorb;
148 }
149 }
150 /* we can requeue all IORBs unconditionally (see function comment) */
151 if (aws->retries++ < MAX_RETRIES) {
152 iorb_requeue(iorb);
153
154 } else {
155 /* retry count exceeded; consider IORB aborted */
156 iorb_seterr(iorb, IOERR_CMD_ABORTED);
157 iorb_queue_del(&ai->ports[p].iorb_queue, iorb);
158 iorb_queue_add(&done_queue, iorb);
159 if (iorb == problem_iorb) {
160 /* no further analysis -- we're done with this one */
161 problem_iorb = NULL;
162 }
163 }
164 }
165 }
166
167 /* sanity check: issued command bitmaps should be 0 now */
168 if (ai->ports[p].ncq_cmds != 0 || ai->ports[p].reg_cmds != 0) {
169 dprintf("warning: commands issued not 0 (%08lx/%08lx); resetting...\n",
170 ai->ports[p].ncq_cmds, ai->ports[p].reg_cmds);
171 need_reset = 1;
172 }
173
174 if (!need_reset) {
175 if ((readl(port_mmio + PORT_TFDATA) & 0x88) != 0) {
176 /* device is not in an idle state */
177 need_reset = 1;
178 }
179 }
180
181 /* restart/reset port */
182 ai->busy = 1;
183 spin_unlock(drv_lock);
184 if (need_reset) {
185 ahci_reset_port(ai, p, 1);
186 } else {
187 ahci_stop_port(ai, p);
188 ahci_start_port(ai, p, 1);
189 }
190 spin_lock(drv_lock);
191 ai->busy = 0;
192
193 /* reset internal port status */
194 ai->ports[p].ncq_cmds = 0;
195 ai->ports[p].reg_cmds = 0;
196 ai->ports[p].cmd_slot = 0;
197
198 if (problem_iorb != NULL) {
199 /* get details about the error that caused this IORB to fail */
200 if (need_reset) {
201 /* no way to retrieve error details after a reset */
202 iorb_seterr(problem_iorb, IOERR_DEVICE_NONSPECIFIC);
203 iorb_queue_del(&ai->ports[p].iorb_queue, problem_iorb);
204 iorb_queue_add(&done_queue, problem_iorb);
205
206 } else {
207 /* get sense information */
208 ADD_WORKSPACE _far *aws = add_workspace(problem_iorb);
209 int d = iorb_unit_device(problem_iorb);
210 int (*req_sense)(IORBH _far *, int) = (ai->ports[p].devs[d].atapi) ?
211 atapi_req_sense : ata_req_sense;
212
213 aws->processing = 1;
214 aws->queued_hw = 1;
215
216 if (req_sense(problem_iorb, 0) == 0) {
217 /* execute request sense on slot #0 before anything else comes along */
218 ADD_StartTimerMS(&aws->timer, 5000, (PFN) timeout_callback, iorb, 0);
219 aws->cmd_slot = 0;
220 ai->ports[p].reg_cmds = 1;
221 writel(port_mmio + PORT_CMD_ISSUE, 1);
222 readl(port_mmio); /* flush */
223
224 } else {
225 /* IORB is expected to contain the error code; just move to done queue */
226 iorb_queue_del(&ai->ports[p].iorb_queue, problem_iorb);
227 iorb_queue_add(&done_queue, problem_iorb);
228 }
229 }
230 }
231 }
232 }
233 }
234
235 spin_unlock(drv_lock);
236
237 /* call notification routine on all IORBs which have completed */
238 for (iorb = done_queue.root; iorb != NULL; iorb = next) {
239 next = iorb->pNxtIORB;
240
241 spin_lock(drv_lock);
242 aws_free(add_workspace(iorb));
243 spin_unlock(drv_lock);
244
245 iorb_complete(iorb);
246 }
247
248 /* restart engine to resume IORB processing */
249 spin_lock(drv_lock);
250 trigger_engine();
251 spin_unlock(drv_lock);
252
253 dprintf("restart_ctxhook() completed\n");
254
255 /* Check whether we have to rearm ourselves because some adapters were busy
256 * when we wanted to restart ports on them.
257 */
258 if (rearm_ctx_hook) {
259 msleep(250);
260 DevHelp_ArmCtxHook(0, restart_ctxhook_h);
261 }
262}
263
264/******************************************************************************
265 * Reset and abort context hook. This function runs at task time and takes
266 * care of port resets and their side effects. Input to this function are:
267 *
268 * ports_to_reset[] - array of port bitmaps, each bit indicating which port
269 * should be reset unconditionally. This is primarily
270 * used by the error interrupt handler.
271 *
272 * abort_queue - queue with IORBs to be arborted (timed-out, ...) If
273 * any of these commands have reached the hardware, the
274 * corresponding port is reset to interrupt command
275 * execution. This is primarily used for timeout
276 * handling and when IORBs are requested to be aborted.
277 *
278 * After resetting the requested ports, all remaining active IORBs on those
279 * ports have to be retried or aborted. Whether a retry is attempted depends
280 * on the kind of IORB -- those which are idempotent are retried, all others
281 * are aborted. This is different from the port restart hook because the
282 * restart hook can assume it is called with the port in error state, thus
283 * the controller will have stopped executing commands. The reset handler can
284 * be called at any time and we can't tell what's going on in the controller.
285 *
286 * The IORBs in the global abort_queue are expected to have their error code
287 * set (aborted, timeout, ...) but must not be marked as 'done'; otherwise,
288 * the upstream code might reuse the IORBs before we're done with them.
289 */
290void reset_ctxhook(ULONG parm)
291{
292 IORB_QUEUE done_queue;
293 AD_INFO *ai;
294 IORBH _far *iorb;
295 IORBH _far *next = NULL;
296 int rearm_ctx_hook = 0;
297 int a;
298 int p;
299
300 dprintf("reset_ctxhook() started\n");
301 memset(&done_queue, 0x00, sizeof(done_queue));
302
303 spin_lock(drv_lock);
304
305 if (th_reset_watchdog != 0) {
306 /* watchdog timer still active -- just reset it */
307 ADD_CancelTimer(th_reset_watchdog);
308 th_reset_watchdog = 0;
309 }
310
311 /* add ports of active IORBs from the abort queue to ports_to_reset[] */
312 for (iorb = abort_queue.root; iorb != NULL; iorb = next) {
313 next = iorb->pNxtIORB;
314 a = iorb_unit_adapter(iorb);
315 p = iorb_unit_port(iorb);
316 ai = ad_infos + a;
317
318 if (ai->busy) {
319 /* this adapter is busy; leave it alone for now */
320 rearm_ctx_hook = 1;
321 continue;
322 }
323
324 /* move IORB to the local 'done' queue */
325 iorb_queue_del(&abort_queue, iorb);
326 iorb_queue_add(&done_queue, iorb);
327
328 /* reset port if the IORB has already been queued to hardware */
329 if (add_workspace(iorb)->queued_hw) {
330 /* prepare port reset */
331 ports_to_reset[a] |= (1UL << p);
332 }
333 }
334
335 /* reset all ports in 'ports_to_reset[]' */
336 for (a = 0; a < ad_info_cnt; a++) {
337 ai = ad_infos + a;
338
339 if (ai->busy) {
340 /* this adapter is busy; leave it alone for now */
341 rearm_ctx_hook = 1;
342 continue;
343 }
344
345 for (p = 0; p <= ai->port_max; p++) {
346 if (ports_to_reset[a] & (1UL << p)) {
347 ports_to_reset[a] &= ~(1UL << p);
348
349 /* Reset this port. Since this is a rather slow operation, we'll
350 * release the spinlock while doing so. The adapter is marked as
351 * 'busy' to prevent similar routines (e.g. an ahci port scan) from
352 * interfering.
353 */
354 ai->busy = 1;
355 spin_unlock(drv_lock);
356 ahci_reset_port(ai, p, 1);
357 spin_lock(drv_lock);
358 ai->busy = 0;
359
360 /* reset port status */
361 ai->ports[p].ncq_cmds = 0;
362 ai->ports[p].reg_cmds = 0;
363 ai->ports[p].cmd_slot = 0;
364
365 /* retry or abort all remaining active commands on this port */
366 for (iorb = ai->ports[p].iorb_queue.root; iorb != NULL; iorb = next) {
367 ADD_WORKSPACE _far *aws = add_workspace(iorb);
368 next = iorb->pNxtIORB;
369
370 if (aws->queued_hw) {
371 /* this IORB had already been queued to HW when we reset the port */
372 if (aws->idempotent && aws->retries++ < MAX_RETRIES) {
373 /* we can retry this IORB */
374 iorb_requeue(iorb);
375
376 } else {
377 /* we cannot retry this IORB; consider it aborted */
378 iorb->ErrorCode = IOERR_CMD_ABORTED;
379 iorb_queue_del(&ai->ports[p].iorb_queue, iorb);
380 iorb_queue_add(&done_queue, iorb);
381 }
382 }
383 }
384 }
385 }
386 }
387
388 spin_unlock(drv_lock);
389
390 /* complete all aborted IORBs */
391 for (iorb = done_queue.root; iorb != NULL; iorb = next) {
392 next = iorb->pNxtIORB;
393
394 spin_lock(drv_lock);
395 aws_free(add_workspace(iorb));
396 spin_unlock(drv_lock);
397
398 iorb->Status |= IORB_ERROR;
399 iorb_complete(iorb);
400 }
401
402 /* restart engine to resume IORB processing */
403 spin_lock(drv_lock);
404 trigger_engine();
405 spin_unlock(drv_lock);
406
407 dprintf("reset_ctxhook() completed\n");
408
409 /* Check whether we have to rearm ourselves because some adapters were busy
410 * when we wanted to reset ports on them.
411 */
412 if (rearm_ctx_hook) {
413 msleep(250);
414 DevHelp_ArmCtxHook(0, reset_ctxhook_h);
415 }
416}
417
418/******************************************************************************
419 * IORB Engine context hook. This hook is executed if trigger_engine() came
420 * to the conclusion that some of the IORBs keep bouncing, most likely due to
421 * some condition on the adapter such as being busy. It could also be a very
422 * busy system. Either way, this requires some task-time help.
423 */
424void engine_ctxhook(ULONG parm)
425{
426 int iorbs_sent;
427 int i;
428
429 dprintf("engine_ctxhook() started\n");
430
431 spin_lock(drv_lock);
432 for (i = 0; i < 10; i++) {
433 if ((iorbs_sent = trigger_engine_1()) == 0) {
434 break;
435 }
436 }
437 spin_unlock(drv_lock);
438
439 dprintf("engine_ctxhook() completed\n");
440
441 if (iorbs_sent != 0) {
442 /* need to rearm ourselves for another run */
443 msleep(250);
444 DevHelp_ArmCtxHook(0, engine_ctxhook_h);
445 }
446}
447
Note: See TracBrowser for help on using the repository browser.