source: trunk/src/os2ahci/ctxhook.c@ 125

Last change on this file since 125 was 125, checked in by cjm, 14 years ago

Version 1.21


  • Triggered by reports of performance loss with NCQ
  • New command line flag "/F" to force using write buffers even when upstream I/O requested non-buffered I/O; the primary purpose of this flag is to debug the NCQ performance drop but the flag may or may not remain.
File size: 15.7 KB
Line 
1/******************************************************************************
2 * ctxhook.c - context hooks (kernel thread functions) for os2ahci
3 *
4 * Copyright (c) 2011 thi.guten Software Development
5 * Copyright (c) 2011 Mensys B.V.
6 *
7 * Authors: Christian Mueller, Markus Thielen
8 *
9 * Parts copied from/inspired by the Linux AHCI driver;
10 * those parts are (c) Linux AHCI/ATA maintainers
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26
27#include "os2ahci.h"
28#include "ata.h"
29#include "atapi.h"
30
31/* -------------------------- macros and constants ------------------------- */
32
33/* ------------------------ typedefs and structures ------------------------ */
34
35/* -------------------------- function prototypes -------------------------- */
36
37/* ------------------------ global/static variables ------------------------ */
38
39/* port restart context hook and input data */
40ULONG restart_ctxhook_h;
41volatile u32 ports_to_restart[MAX_AD];
42
43/* port reset context hook and input data */
44ULONG reset_ctxhook_h;
45ULONG th_reset_watchdog;
46volatile u32 ports_to_reset[MAX_AD];
47IORB_QUEUE abort_queue;
48
49/* trigger engine context hook and input data */
50ULONG engine_ctxhook_h;
51
52/* ----------------------------- start of code ----------------------------- */
53
54/******************************************************************************
55 * Port restart context hook. This context hook is executed at task time and
56 * will handle ports which are stopped due to a device error condition.
57 *
58 * The following conditions may exist:
59 *
60 * - Only a single non-NCQ command is executed by the AHCI adapter at any
61 * given time (even if more are outstanding). This is the case for single
62 * devices or port multipliers without FIS-based command switching. Error
63 * recovery is simple because we know which command has failed and that
64 * all other commands have not yet started executing. Thus, we can requeue
65 * all of them, replacing the failing command with a "request sense"
66 * command to get error details.
67 *
68 * - Multiple non-NCQ commands are executed on different devices behind a
69 * port multiplier which supports FIS-based command switching. This is
70 * more difficult to recover from but currently not an issue because we
71 * don't yet support FIS-based command switching (the FIS receive areas
72 * would become too large for the current data model).
73 *
74 * - One or more NCQ commands were active at the time of the error, with or
75 * without FIS-based command switching. We would have to interrogate the
76 * corresponding devices to find out which command has failed but if this
77 * is combined with FIS-based command switching, even the AHCI spec
78 * recommends to reset the port. This leads to a much simpler approach:
79 * requeue all NCQ commands (they are idempotent per definition, otherwise
80 * they couldn't be reordered by the device) with the 'no_ncq' flag set
81 * in the IORB and reset the port. Then those comands will be executed as
82 * regular commands. The error, if it reoccurs, can then be handled by
83 * one of the above cases.
84 *
85 * The upstream code will guarantee that we will never have a mix of NCQ and
86 * non-NCQ commands active at the same time in order to reduce complexity
87 * in the interrupt and error handlers.
88 */
89void restart_ctxhook(ULONG parm)
90{
91 IORB_QUEUE done_queue;
92 AD_INFO *ai;
93 IORBH _far *problem_iorb;
94 IORBH _far *iorb;
95 IORBH _far *next = NULL;
96 u8 _far *port_mmio;
97 int rearm_ctx_hook = 0;
98 int need_reset;
99 int ccs;
100 int a;
101 int p;
102
103 dprintf("restart_ctxhook() started\n");
104 memset(&done_queue, 0x00, sizeof(done_queue));
105
106 spin_lock(drv_lock);
107
108 for (a = 0; a < ad_info_cnt; a++) {
109 ai = ad_infos + a;
110
111 if (ai->busy) {
112 /* this adapter is busy; leave it alone for now */
113 rearm_ctx_hook = 1;
114 continue;
115 }
116
117 for (p = 0; p <= ai->port_max; p++) {
118 if (ports_to_restart[a] & (1UL << p)) {
119 ports_to_restart[a] &= ~(1UL << p);
120
121 /* restart this port */
122 port_mmio = port_base(ai, p);
123 problem_iorb = NULL;
124 need_reset = 0;
125
126 dprintf("port %d, TF_DATA: 0x%lx\n", p, readl(port_mmio + PORT_TFDATA));
127
128 /* get "current command slot"; only valid if there are no NCQ cmds */
129 ccs = (int) ((readl(port_mmio + PORT_CMD) >> 8) & 0x1f);
130 ddprintf(" PORT_CMD = 0x%x\n", ccs);
131
132 for (iorb = ai->ports[p].iorb_queue.root; iorb != NULL; iorb = next) {
133 ADD_WORKSPACE _far *aws = add_workspace(iorb);
134 next = iorb->pNxtIORB;
135
136 if (aws->queued_hw) {
137 if (ai->ports[p].ncq_cmds & (1UL << aws->cmd_slot)) {
138 /* NCQ command; force non-NCQ mode and trigger port reset */
139 ai->ports[p].ncq_cmds &= ~(1UL << aws->cmd_slot);
140 aws->no_ncq = 1;
141 need_reset = 1;
142 } else {
143 /* regular command; clear cmd bit and identify problem IORB */
144 ai->ports[p].reg_cmds &= ~(1UL << aws->cmd_slot);
145 if (aws->cmd_slot == ccs) {
146 /* this is the non-NCQ command that failed */
147 ddprintf("failing IORB: %Fp\n", iorb);
148 problem_iorb = iorb;
149 }
150 }
151 /* we can requeue all IORBs unconditionally (see function comment) */
152 if (aws->retries++ < MAX_RETRIES) {
153 iorb_requeue(iorb);
154
155 } else {
156 /* retry count exceeded; consider IORB aborted */
157 iorb_seterr(iorb, IOERR_CMD_ABORTED);
158 iorb_queue_del(&ai->ports[p].iorb_queue, iorb);
159 iorb_queue_add(&done_queue, iorb);
160 if (iorb == problem_iorb) {
161 /* no further analysis -- we're done with this one */
162 problem_iorb = NULL;
163 }
164 }
165 }
166 }
167
168 /* sanity check: issued command bitmaps should be 0 now */
169 if (ai->ports[p].ncq_cmds != 0 || ai->ports[p].reg_cmds != 0) {
170 dprintf("warning: commands issued not 0 (%08lx/%08lx); resetting...\n",
171 ai->ports[p].ncq_cmds, ai->ports[p].reg_cmds);
172 need_reset = 1;
173 }
174
175 if (!need_reset) {
176 if ((readl(port_mmio + PORT_TFDATA) & 0x88) != 0) {
177 /* device is not in an idle state */
178 need_reset = 1;
179 }
180 }
181
182 /* restart/reset port */
183 ai->busy = 1;
184 spin_unlock(drv_lock);
185 if (need_reset) {
186 ahci_reset_port(ai, p, 1);
187 } else {
188 ahci_stop_port(ai, p);
189 ahci_start_port(ai, p, 1);
190 }
191 spin_lock(drv_lock);
192 ai->busy = 0;
193
194 /* reset internal port status */
195 ai->ports[p].ncq_cmds = 0;
196 ai->ports[p].reg_cmds = 0;
197 ai->ports[p].cmd_slot = 0;
198
199 if (problem_iorb != NULL) {
200 /* get details about the error that caused this IORB to fail */
201 if (need_reset) {
202 /* no way to retrieve error details after a reset */
203 iorb_seterr(problem_iorb, IOERR_DEVICE_NONSPECIFIC);
204 iorb_queue_del(&ai->ports[p].iorb_queue, problem_iorb);
205 iorb_queue_add(&done_queue, problem_iorb);
206
207 } else {
208 /* get sense information */
209 ADD_WORKSPACE _far *aws = add_workspace(problem_iorb);
210 int d = iorb_unit_device(problem_iorb);
211 int (*req_sense)(IORBH _far *, int) = (ai->ports[p].devs[d].atapi) ?
212 atapi_req_sense : ata_req_sense;
213
214 aws->processing = 1;
215 aws->queued_hw = 1;
216
217 if (req_sense(problem_iorb, 0) == 0) {
218 /* execute request sense on slot #0 before anything else comes along */
219 ADD_StartTimerMS(&aws->timer, 5000, (PFN) timeout_callback, iorb, 0);
220 aws->cmd_slot = 0;
221 ai->ports[p].reg_cmds = 1;
222 writel(port_mmio + PORT_CMD_ISSUE, 1);
223 readl(port_mmio); /* flush */
224
225 } else {
226 /* IORB is expected to contain the error code; just move to done queue */
227 iorb_queue_del(&ai->ports[p].iorb_queue, problem_iorb);
228 iorb_queue_add(&done_queue, problem_iorb);
229 }
230 }
231 }
232 }
233 }
234 }
235
236 spin_unlock(drv_lock);
237
238 /* call notification routine on all IORBs which have completed */
239 for (iorb = done_queue.root; iorb != NULL; iorb = next) {
240 next = iorb->pNxtIORB;
241
242 spin_lock(drv_lock);
243 aws_free(add_workspace(iorb));
244 spin_unlock(drv_lock);
245
246 iorb_complete(iorb);
247 }
248
249 /* restart engine to resume IORB processing */
250 spin_lock(drv_lock);
251 trigger_engine();
252 spin_unlock(drv_lock);
253
254 dprintf("restart_ctxhook() completed\n");
255
256 /* Check whether we have to rearm ourselves because some adapters were busy
257 * when we wanted to restart ports on them.
258 */
259 if (rearm_ctx_hook) {
260 msleep(250);
261 DevHelp_ArmCtxHook(0, restart_ctxhook_h);
262 }
263}
264
265/******************************************************************************
266 * Reset and abort context hook. This function runs at task time and takes
267 * care of port resets and their side effects. Input to this function are:
268 *
269 * ports_to_reset[] - array of port bitmaps, each bit indicating which port
270 * should be reset unconditionally. This is primarily
271 * used by the error interrupt handler.
272 *
273 * abort_queue - queue with IORBs to be arborted (timed-out, ...) If
274 * any of these commands have reached the hardware, the
275 * corresponding port is reset to interrupt command
276 * execution. This is primarily used for timeout
277 * handling and when IORBs are requested to be aborted.
278 *
279 * After resetting the requested ports, all remaining active IORBs on those
280 * ports have to be retried or aborted. Whether a retry is attempted depends
281 * on the kind of IORB -- those which are idempotent are retried, all others
282 * are aborted. This is different from the port restart hook because the
283 * restart hook can assume it is called with the port in error state, thus
284 * the controller will have stopped executing commands. The reset handler can
285 * be called at any time and we can't tell what's going on in the controller.
286 *
287 * The IORBs in the global abort_queue are expected to have their error code
288 * set (aborted, timeout, ...) but must not be marked as 'done'; otherwise,
289 * the upstream code might reuse the IORBs before we're done with them.
290 */
291void reset_ctxhook(ULONG parm)
292{
293 IORB_QUEUE done_queue;
294 AD_INFO *ai;
295 IORBH _far *iorb;
296 IORBH _far *next = NULL;
297 int rearm_ctx_hook = 0;
298 int a;
299 int p;
300
301 dprintf("reset_ctxhook() started\n");
302 memset(&done_queue, 0x00, sizeof(done_queue));
303
304 spin_lock(drv_lock);
305
306 if (th_reset_watchdog != 0) {
307 /* watchdog timer still active -- just reset it */
308 ADD_CancelTimer(th_reset_watchdog);
309 th_reset_watchdog = 0;
310 }
311
312 /* add ports of active IORBs from the abort queue to ports_to_reset[] */
313 for (iorb = abort_queue.root; iorb != NULL; iorb = next) {
314 next = iorb->pNxtIORB;
315 a = iorb_unit_adapter(iorb);
316 p = iorb_unit_port(iorb);
317 ai = ad_infos + a;
318
319 if (ai->busy) {
320 /* this adapter is busy; leave it alone for now */
321 rearm_ctx_hook = 1;
322 continue;
323 }
324
325 /* move IORB to the local 'done' queue */
326 iorb_queue_del(&abort_queue, iorb);
327 iorb_queue_add(&done_queue, iorb);
328
329 /* reset port if the IORB has already been queued to hardware */
330 if (add_workspace(iorb)->queued_hw) {
331 /* prepare port reset */
332 ports_to_reset[a] |= (1UL << p);
333 }
334 }
335
336 /* reset all ports in 'ports_to_reset[]' */
337 for (a = 0; a < ad_info_cnt; a++) {
338 ai = ad_infos + a;
339
340 if (ai->busy) {
341 /* this adapter is busy; leave it alone for now */
342 rearm_ctx_hook = 1;
343 continue;
344 }
345
346 for (p = 0; p <= ai->port_max; p++) {
347 if (ports_to_reset[a] & (1UL << p)) {
348 ports_to_reset[a] &= ~(1UL << p);
349
350 /* Reset this port. Since this is a rather slow operation, we'll
351 * release the spinlock while doing so. The adapter is marked as
352 * 'busy' to prevent similar routines (e.g. an ahci port scan) from
353 * interfering.
354 */
355 ai->busy = 1;
356 spin_unlock(drv_lock);
357 ahci_reset_port(ai, p, 1);
358 spin_lock(drv_lock);
359 ai->busy = 0;
360
361 /* reset port status */
362 ai->ports[p].ncq_cmds = 0;
363 ai->ports[p].reg_cmds = 0;
364 ai->ports[p].cmd_slot = 0;
365
366 /* retry or abort all remaining active commands on this port */
367 for (iorb = ai->ports[p].iorb_queue.root; iorb != NULL; iorb = next) {
368 ADD_WORKSPACE _far *aws = add_workspace(iorb);
369 next = iorb->pNxtIORB;
370
371 if (aws->queued_hw) {
372 /* this IORB had already been queued to HW when we reset the port */
373 if (aws->idempotent && aws->retries++ < MAX_RETRIES) {
374 /* we can retry this IORB */
375 iorb_requeue(iorb);
376
377 } else {
378 /* we cannot retry this IORB; consider it aborted */
379 iorb->ErrorCode = IOERR_CMD_ABORTED;
380 iorb_queue_del(&ai->ports[p].iorb_queue, iorb);
381 iorb_queue_add(&done_queue, iorb);
382 }
383 }
384 }
385 }
386 }
387 }
388
389 spin_unlock(drv_lock);
390
391 /* complete all aborted IORBs */
392 for (iorb = done_queue.root; iorb != NULL; iorb = next) {
393 next = iorb->pNxtIORB;
394
395 spin_lock(drv_lock);
396 aws_free(add_workspace(iorb));
397 spin_unlock(drv_lock);
398
399 iorb->Status |= IORB_ERROR;
400 iorb_complete(iorb);
401 }
402
403 /* restart engine to resume IORB processing */
404 spin_lock(drv_lock);
405 trigger_engine();
406 spin_unlock(drv_lock);
407
408 dprintf("reset_ctxhook() completed\n");
409
410 /* Check whether we have to rearm ourselves because some adapters were busy
411 * when we wanted to reset ports on them.
412 */
413 if (rearm_ctx_hook) {
414 msleep(250);
415 DevHelp_ArmCtxHook(0, reset_ctxhook_h);
416 }
417}
418
419/******************************************************************************
420 * IORB Engine context hook. This hook is executed if trigger_engine() came
421 * to the conclusion that some of the IORBs keep bouncing, most likely due to
422 * some condition on the adapter such as being busy. It could also be a very
423 * busy system. Either way, this requires some task-time help.
424 */
425void engine_ctxhook(ULONG parm)
426{
427 int iorbs_sent;
428 int i;
429
430 dprintf("engine_ctxhook() started\n");
431
432 spin_lock(drv_lock);
433 for (i = 0; i < 10; i++) {
434 if ((iorbs_sent = trigger_engine_1()) == 0) {
435 break;
436 }
437 }
438 spin_unlock(drv_lock);
439
440 dprintf("engine_ctxhook() completed\n");
441
442 if (iorbs_sent != 0) {
443 /* need to rearm ourselves for another run */
444 msleep(250);
445 DevHelp_ArmCtxHook(0, engine_ctxhook_h);
446 }
447}
448
Note: See TracBrowser for help on using the repository browser.