source: trunk/src/os2ahci/ctxhook.c@ 80

Last change on this file since 80 was 80, checked in by chris, 14 years ago

Version 1.06
============

  • Finally came across a BIOS which accesses the ICH7/8 controller via SATA registers (i.e. not AHCI mode). This required a few changes to the code at boot time because it turned out that COMRESETs are required whenever switching to/from AHCI mode to allow the AHCI or SATA controller to re-discover the attached devices:
  • 'init_reset' will now be forced on when finding a controller in non-AHCI mode at boot time.
  • A COMRESET is initiated for each implemented port after turning off AHCI mode when restoring the BIOS configuration; this is done only for Intel controllers at this point because they map the AHCI port SCR MMIO registers even when not in AHCI mode.
  • apm_suspend() has been adjusted to restore the BIOS configuration to prevent needless timeouts when the BIOS takes over during suspend or power-off operations.
  • Small changes to the functions which save/restore BIOS/port settings to avoid pitfalls; among others, the port save/restore code now also saves and restores the port's engine status.
  • Improvements to debug logging around port resets.
  • Moved code to clear pending interrupts from ahci_reset_port() to ahci_stop_port() because both need it and resetting a port involves stopping it, first.
  • NCQ mode has found to cause problems on a Dell D630. This may be related to the hard disk used for the test but since I've never seen more than one queued command regardless of the I/O load (even during simulaneous xcopy operations), NCQ mode is now off by default and needs to be turned on via the /N switch (i.e. the the /N switch now has a reversed meaning).
  • Removed the code which attempts to establish another MMIO base address in case the one assigned by the BIOS can't be reserved via resource manager; if there's a conflict, it's extremely unlikely we would ever be able to restore the BIOS MMIO address at boot time without the BIOS clashing with whatever conflicts with the MMIO address, thus there's no point trying to do any of this.
  • Implemented a reset context hook watchdog; in the early boot phase, some components apparently don't yield the CPU so the context hook will never execute without the watchdog. Now we'll give the context hook 10 seconds to execute, otherwise the watchdog will expire and we'll call the context hook directly from the corresponding timer callback.
File size: 15.1 KB
Line 
1/******************************************************************************
2 * ctxhook.c - context hooks (kernel thread functions) for os2ahci
3 *
4 * Copyright (c) 2010 Christian Mueller. Parts copied from/inspired by the
5 * Linux AHCI driver; those parts are (c) Linux AHCI/ATA maintainers
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include "os2ahci.h"
23#include "ata.h"
24#include "atapi.h"
25
26/* -------------------------- macros and constants ------------------------- */
27
28/* ------------------------ typedefs and structures ------------------------ */
29
30/* -------------------------- function prototypes -------------------------- */
31
32/* ------------------------ global/static variables ------------------------ */
33
34/* port restart context hook and input data */
35ULONG restart_ctxhook_h;
36volatile u32 ports_to_restart[MAX_AD];
37
38/* port reset context hook and input data */
39ULONG reset_ctxhook_h;
40ULONG th_reset_watchdog;
41volatile u32 ports_to_reset[MAX_AD];
42IORB_QUEUE abort_queue;
43
44/* trigger engine context hook and input data */
45ULONG engine_ctxhook_h;
46
47/* ----------------------------- start of code ----------------------------- */
48
49/******************************************************************************
50 * Port restart context hook. This context hook is executed at task time and
51 * will handle ports which are stopped due to a device error condition.
52 *
53 * The following conditions may exist:
54 *
55 * - Only a single non-NCQ command is executed by the AHCI adapter at any
56 * given time (even if more are outstanding). This is the case for single
57 * devices or port multipliers without FIS-based command switching. Error
58 * recovery is simple because we know which command has failed and that
59 * all other commands have not yet started executing. Thus, we can requeue
60 * all of them, replacing the failing command with a "request sense"
61 * command to get error details.
62 *
63 * - Multiple non-NCQ commands are executed on different devices behind a
64 * port multiplier which supports FIS-based command switching. This is
65 * more difficult to recover from but currently not an issue because we
66 * don't yet support FIS-based command switching (the FIS receive areas
67 * would become too large for the current data model).
68 *
69 * - One or more NCQ commands were active at the time of the error, with or
70 * without FIS-based command switching. We would have to interrogate the
71 * corresponding devices to find out which command has failed but if this
72 * is combined with FIS-based command switching, even the AHCI spec
73 * recommends to reset the port. This leads to a much simpler approach:
74 * requeue all NCQ commands (they are idempotent per definition, otherwise
75 * they couldn't be reordered by the device) with the 'no_ncq' flag set
76 * in the IORB and reset the port. Then those comands will be executed as
77 * regular commands. The error, if it reoccurrs, can be then handled by
78 * one of the above cases.
79 *
80 * The upstream code will guarantee that we will never have a mix of NCQ and
81 * non-NCQ commands active at the same time in order to reduce complexity
82 * in the interrupt and error handlers.
83 */
84void restart_ctxhook(ULONG parm)
85{
86 IORB_QUEUE done_queue;
87 AD_INFO *ai;
88 IORBH _far *problem_iorb;
89 IORBH _far *iorb;
90 IORBH _far *next = NULL;
91 u8 _far *port_mmio;
92 int rearm_ctx_hook = 0;
93 int need_reset;
94 int ccs;
95 int a;
96 int p;
97
98 dprintf("restart_ctxhook() started\n");
99 memset(&done_queue, 0x00, sizeof(done_queue));
100
101 spin_lock(drv_lock);
102
103 for (a = 0; a < ad_info_cnt; a++) {
104 ai = ad_infos + a;
105
106 if (ai->busy) {
107 /* this adapter is busy; leave it alone for now */
108 rearm_ctx_hook = 1;
109 continue;
110 }
111
112 for (p = 0; p <= ai->port_max; p++) {
113 if (ports_to_restart[a] & (1UL << p)) {
114 ports_to_restart[a] &= ~(1UL << p);
115
116 /* restart this port */
117 port_mmio = port_base(ai, p);
118 problem_iorb = NULL;
119 need_reset = 0;
120
121 dprintf("port %d, TF_DATA: 0x%lx\n", p, readl(port_mmio + PORT_TFDATA));
122
123 /* get "current command slot"; only valid if there are no NCQ cmds */
124 ccs = (int) ((readl(port_mmio + PORT_CMD) >> 8) & 0x0f);
125
126 for (iorb = ai->ports[p].iorb_queue.root; iorb != NULL; iorb = next) {
127 ADD_WORKSPACE _far *aws = add_workspace(iorb);
128 next = iorb->pNxtIORB;
129
130 if (aws->queued_hw) {
131 if (ai->ports[p].ncq_cmds & (1UL << aws->cmd_slot)) {
132 /* NCQ command; force non-NCQ mode and trigger port reset */
133 ai->ports[p].ncq_cmds &= ~(1UL << aws->cmd_slot);
134 aws->no_ncq = 1;
135 need_reset = 1;
136 } else {
137 /* regular command; clear cmd bit and identify problem IORB */
138 ai->ports[p].reg_cmds &= ~(1UL << aws->cmd_slot);
139 if (aws->cmd_slot == ccs) {
140 /* this is the non-NCQ comand that failed */
141 problem_iorb = iorb;
142 }
143 }
144 /* we can requeue all IORBs unconditionally (see function comment) */
145 iorb_requeue(iorb);
146 }
147 }
148
149 /* sanity check: issued command bitmaps should be 0 now */
150 if (ai->ports[p].ncq_cmds != 0 || ai->ports[p].reg_cmds != 0) {
151 dprintf("warning: commands issued not 0 (%08lx/%08lx); resetting...\n",
152 ai->ports[p].ncq_cmds, ai->ports[p].reg_cmds);
153 need_reset = 1;
154 }
155
156 if (!need_reset) {
157 if ((readl(port_mmio + PORT_TFDATA) & 0x88) != 0) {
158 /* device is not in an idle state */
159 need_reset = 1;
160 }
161 }
162
163 /* restart/reset port */
164 ai->busy = 1;
165 spin_unlock(drv_lock);
166 if (need_reset) {
167 ahci_reset_port(ai, p, 1);
168 } else {
169 ahci_stop_port(ai, p);
170 ahci_start_port(ai, p, 1);
171 }
172 spin_lock(drv_lock);
173 ai->busy = 0;
174
175 /* reset internal port status */
176 ai->ports[p].ncq_cmds = 0;
177 ai->ports[p].reg_cmds = 0;
178 ai->ports[p].cmd_slot = 0;
179
180 if (problem_iorb != NULL) {
181 /* get details about the error that caused this IORB to fail */
182 if (need_reset) {
183 /* no way to retrieve error details after a reset */
184 iorb_seterr(problem_iorb, IOERR_DEVICE_NONSPECIFIC);
185 iorb_queue_del(&ai->ports[p].iorb_queue, problem_iorb);
186 iorb_queue_add(&done_queue, problem_iorb);
187
188 } else {
189 /* get sense information */
190 ADD_WORKSPACE _far *aws = add_workspace(problem_iorb);
191 int d = iorb_unit_device(problem_iorb);
192 int (*req_sense)(IORBH _far *, int) = (ai->ports[p].devs[d].atapi) ?
193 atapi_req_sense : ata_req_sense;
194
195 aws->processing = 1;
196 aws->queued_hw = 1;
197
198 if (req_sense(problem_iorb, 0) == 0) {
199 /* execute request sense on slot #0 before anything else comes along */
200 ADD_StartTimerMS(&aws->timer, 5000, (PFN) timeout_callback, iorb, 0);
201 aws->cmd_slot = 0;
202 ai->ports[p].reg_cmds = 1;
203 writel(port_mmio + PORT_CMD_ISSUE, 1);
204 readl(port_mmio); /* flush */
205
206 } else {
207 /* IORB is expected to contain the error code; just move to done queue */
208 iorb_queue_del(&ai->ports[p].iorb_queue, problem_iorb);
209 iorb_queue_add(&done_queue, problem_iorb);
210 }
211 }
212 }
213 }
214 }
215 }
216
217 spin_unlock(drv_lock);
218
219 /* call notification routine on all IORBs which have completed */
220 for (iorb = done_queue.root; iorb != NULL; iorb = next) {
221 next = iorb->pNxtIORB;
222
223 spin_lock(drv_lock);
224 aws_free(add_workspace(iorb));
225 spin_unlock(drv_lock);
226
227 iorb_complete(iorb);
228 }
229
230 /* restart engine to resume IORB processing */
231 spin_lock(drv_lock);
232 trigger_engine();
233 spin_unlock(drv_lock);
234
235 dprintf("restart_ctxhook() completed\n");
236
237 /* Check whether we have to rearm ourselves because some adapters were busy
238 * when we wanted to restart ports on them.
239 */
240 if (rearm_ctx_hook) {
241 msleep(250);
242 DevHelp_ArmCtxHook(0, restart_ctxhook_h);
243 }
244}
245
246/******************************************************************************
247 * Reset and abort context hook. This function runs at task time and takes
248 * care of port resets and their side effects. Input to this function are:
249 *
250 * ports_to_reset[] - array of port bitmaps, each bit indicating which port
251 * should be reset unconditionally. This is primarily
252 * used by the error interrupt handler.
253 *
254 * abort_queue - queue with IORBs to be arborted (timed-out, ...) If
255 * any of these commands have reached the hardware, the
256 * corresponding port is reset to interrupt command
257 * execution. This is primarily used for timeout
258 * handling and when IORBs are requested to be aborted.
259 *
260 * After resetting the requested ports, all remaining active IORBs on those
261 * ports have to be retried or aborted. Whether a retry is attempted depends
262 * on the kind of IORB -- those which are idempotent are retried, all others
263 * are aborted. This is different from the port restart hook because the
264 * restart hook can assume it is called with the port in error state, thus
265 * the controller will have stopped executing commands. The reset handler can
266 * be called at any time and we can't tell what's going on in the controller.
267 *
268 * The IORBs in the global abort_queue are expected to have their error code
269 * set (aborted, timeout, ...) but must not be marked as 'done'; otherwise,
270 * the upstream code might reuse the IORBs before we're done with them.
271 */
272void reset_ctxhook(ULONG parm)
273{
274 IORB_QUEUE done_queue;
275 AD_INFO *ai;
276 IORBH _far *iorb;
277 IORBH _far *next = NULL;
278 int rearm_ctx_hook = 0;
279 int a;
280 int p;
281
282 dprintf("reset_ctxhook() started\n");
283 memset(&done_queue, 0x00, sizeof(done_queue));
284
285 spin_lock(drv_lock);
286
287 if (th_reset_watchdog != 0) {
288 /* watchdog timer still active -- just reset it */
289 ADD_CancelTimer(th_reset_watchdog);
290 th_reset_watchdog = 0;
291 }
292
293 /* add ports of active IORBs from the abort queue to ports_to_reset[] */
294 for (iorb = abort_queue.root; iorb != NULL; iorb = next) {
295 next = iorb->pNxtIORB;
296 a = iorb_unit_adapter(iorb);
297 p = iorb_unit_port(iorb);
298 ai = ad_infos + a;
299
300 if (ai->busy) {
301 /* this adapter is busy; leave it alone for now */
302 rearm_ctx_hook = 1;
303 continue;
304 }
305
306 /* move IORB to the local 'done' queue */
307 iorb_queue_del(&abort_queue, iorb);
308 iorb_queue_add(&done_queue, iorb);
309
310 /* reset port if the IORB has already been queued to hardware */
311 if (add_workspace(iorb)->queued_hw) {
312 /* prepare port reset */
313 ports_to_reset[a] |= (1UL << p);
314 }
315 }
316
317 /* reset all ports in 'ports_to_reset[]' */
318 for (a = 0; a < ad_info_cnt; a++) {
319 ai = ad_infos + a;
320
321 if (ai->busy) {
322 /* this adapter is busy; leave it alone for now */
323 rearm_ctx_hook = 1;
324 continue;
325 }
326
327 for (p = 0; p <= ai->port_max; p++) {
328 if (ports_to_reset[a] & (1UL << p)) {
329 ports_to_reset[a] &= ~(1UL << p);
330
331 /* Reset this port. Since this is a rather slow operation, we'll
332 * release the spinlock while doing so. The adapter is marked as
333 * 'busy' to prevent similar routines (e.g. an ahci port scan) from
334 * interfering.
335 */
336 ai->busy = 1;
337 spin_unlock(drv_lock);
338 ahci_reset_port(ai, p, 1);
339 spin_lock(drv_lock);
340 ai->busy = 0;
341
342 /* reset port status */
343 ai->ports[p].ncq_cmds = 0;
344 ai->ports[p].reg_cmds = 0;
345 ai->ports[p].cmd_slot = 0;
346
347 /* retry or abort all remaining active commands on this port */
348 for (iorb = ai->ports[p].iorb_queue.root; iorb != NULL; iorb = next) {
349 next = iorb->pNxtIORB;
350 if (add_workspace(iorb)->queued_hw) {
351 /* this IORB had already been queued to HW when we reset the port */
352 if (add_workspace(iorb)->idempotent) {
353 /* We can retry this IORB; just reset its status and it will be
354 * picked up by subsequent trigger_engine() calls.
355 */
356 iorb_requeue(iorb);
357
358 } else {
359 /* we cannot retry this IORB; consider it aborted */
360 iorb->ErrorCode = IOERR_CMD_ABORTED;
361 iorb_queue_del(&ai->ports[p].iorb_queue, iorb);
362 iorb_queue_add(&done_queue, iorb);
363 }
364 }
365 }
366 }
367 }
368 }
369
370 spin_unlock(drv_lock);
371
372 /* complete all aborted IORBs */
373 for (iorb = done_queue.root; iorb != NULL; iorb = next) {
374 next = iorb->pNxtIORB;
375
376 spin_lock(drv_lock);
377 aws_free(add_workspace(iorb));
378 spin_unlock(drv_lock);
379
380 iorb->Status |= IORB_ERROR;
381 iorb_complete(iorb);
382 }
383
384 /* restart engine to resume IORB processing */
385 spin_lock(drv_lock);
386 trigger_engine();
387 spin_unlock(drv_lock);
388
389 dprintf("reset_ctxhook() completed\n");
390
391 /* Check whether we have to rearm ourselves because some adapters were busy
392 * when we wanted to reset ports on them.
393 */
394 if (rearm_ctx_hook) {
395 msleep(250);
396 DevHelp_ArmCtxHook(0, reset_ctxhook_h);
397 }
398}
399
400/******************************************************************************
401 * IORB Engine context hook. This hook is executed if trigger_engine() came
402 * to the conclusion that some of the IORBs keep bouncing, most likely due to
403 * some condition on the adapter such as being busy. It could also be a very
404 * busy system. Either way, this requires some task-time help.
405 */
406void engine_ctxhook(ULONG parm)
407{
408 int iorbs_sent;
409 int i;
410
411 dprintf("engine_ctxhook() started\n");
412
413 spin_lock(drv_lock);
414 for (i = 0; i < 10; i++) {
415 if ((iorbs_sent = trigger_engine_1()) == 0) {
416 break;
417 }
418 }
419 spin_unlock(drv_lock);
420
421 dprintf("engine_ctxhook() completed\n");
422
423 if (iorbs_sent != 0) {
424 /* need to rearm ourselves for another run */
425 msleep(250);
426 DevHelp_ArmCtxHook(0, engine_ctxhook_h);
427 }
428}
429
Note: See TracBrowser for help on using the repository browser.