Context Navigation

source: trunk/src/os2ahci/ctxhook.c@ 125

Last change on this file since 125 was 125, checked in by cjm, 14 years ago

Version 1.21

Triggered by reports of performance loss with NCQ
New command line flag "/F" to force using write buffers even when upstream I/O requested non-buffered I/O; the primary purpose of this flag is to debug the NCQ performance drop but the flag may or may not remain.

File size: 15.7 KB

Line
1	/******************************************************************************
2	* ctxhook.c - context hooks (kernel thread functions) for os2ahci
3	*
4	* Copyright (c) 2011 thi.guten Software Development
5	* Copyright (c) 2011 Mensys B.V.
6	*
7	* Authors: Christian Mueller, Markus Thielen
8	*
9	* Parts copied from/inspired by the Linux AHCI driver;
10	* those parts are (c) Linux AHCI/ATA maintainers
11	*
12	* This program is free software; you can redistribute it and/or modify
13	* it under the terms of the GNU General Public License as published by
14	* the Free Software Foundation; either version 2 of the License, or
15	* (at your option) any later version.
16	*
17	* This program is distributed in the hope that it will be useful,
18	* but WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	* GNU General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, write to the Free Software
24	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25	*/
26
27	#include "os2ahci.h"
28	#include "ata.h"
29	#include "atapi.h"
30
31	/* -------------------------- macros and constants ------------------------- */
32
33	/* ------------------------ typedefs and structures ------------------------ */
34
35	/* -------------------------- function prototypes -------------------------- */
36
37	/* ------------------------ global/static variables ------------------------ */
38
39	/* port restart context hook and input data */
40	ULONG restart_ctxhook_h;
41	volatile u32 ports_to_restart[MAX_AD];
42
43	/* port reset context hook and input data */
44	ULONG reset_ctxhook_h;
45	ULONG th_reset_watchdog;
46	volatile u32 ports_to_reset[MAX_AD];
47	IORB_QUEUE abort_queue;
48
49	/* trigger engine context hook and input data */
50	ULONG engine_ctxhook_h;
51
52	/* ----------------------------- start of code ----------------------------- */
53
54	/******************************************************************************
55	* Port restart context hook. This context hook is executed at task time and
56	* will handle ports which are stopped due to a device error condition.
57	*
58	* The following conditions may exist:
59	*
60	* - Only a single non-NCQ command is executed by the AHCI adapter at any
61	* given time (even if more are outstanding). This is the case for single
62	* devices or port multipliers without FIS-based command switching. Error
63	* recovery is simple because we know which command has failed and that
64	* all other commands have not yet started executing. Thus, we can requeue
65	* all of them, replacing the failing command with a "request sense"
66	* command to get error details.
67	*
68	* - Multiple non-NCQ commands are executed on different devices behind a
69	* port multiplier which supports FIS-based command switching. This is
70	* more difficult to recover from but currently not an issue because we
71	* don't yet support FIS-based command switching (the FIS receive areas
72	* would become too large for the current data model).
73	*
74	* - One or more NCQ commands were active at the time of the error, with or
75	* without FIS-based command switching. We would have to interrogate the
76	* corresponding devices to find out which command has failed but if this
77	* is combined with FIS-based command switching, even the AHCI spec
78	* recommends to reset the port. This leads to a much simpler approach:
79	* requeue all NCQ commands (they are idempotent per definition, otherwise
80	* they couldn't be reordered by the device) with the 'no_ncq' flag set
81	* in the IORB and reset the port. Then those comands will be executed as
82	* regular commands. The error, if it reoccurs, can then be handled by
83	* one of the above cases.
84	*
85	* The upstream code will guarantee that we will never have a mix of NCQ and
86	* non-NCQ commands active at the same time in order to reduce complexity
87	* in the interrupt and error handlers.
88	*/
89	void restart_ctxhook(ULONG parm)
90	{
91	IORB_QUEUE done_queue;
92	AD_INFO *ai;
93	IORBH _far *problem_iorb;
94	IORBH _far *iorb;
95	IORBH _far *next = NULL;
96	u8 _far *port_mmio;
97	int rearm_ctx_hook = 0;
98	int need_reset;
99	int ccs;
100	int a;
101	int p;
102
103	dprintf("restart_ctxhook() started\n");
104	memset(&done_queue, 0x00, sizeof(done_queue));
105
106	spin_lock(drv_lock);
107
108	for (a = 0; a < ad_info_cnt; a++) {
109	ai = ad_infos + a;
110
111	if (ai->busy) {
112	/* this adapter is busy; leave it alone for now */
113	rearm_ctx_hook = 1;
114	continue;
115	}
116
117	for (p = 0; p <= ai->port_max; p++) {
118	if (ports_to_restart[a] & (1UL << p)) {
119	ports_to_restart[a] &= ~(1UL << p);
120
121	/* restart this port */
122	port_mmio = port_base(ai, p);
123	problem_iorb = NULL;
124	need_reset = 0;
125
126	dprintf("port %d, TF_DATA: 0x%lx\n", p, readl(port_mmio + PORT_TFDATA));
127
128	/* get "current command slot"; only valid if there are no NCQ cmds */
129	ccs = (int) ((readl(port_mmio + PORT_CMD) >> 8) & 0x1f);
130	ddprintf(" PORT_CMD = 0x%x\n", ccs);
131
132	for (iorb = ai->ports[p].iorb_queue.root; iorb != NULL; iorb = next) {
133	ADD_WORKSPACE _far *aws = add_workspace(iorb);
134	next = iorb->pNxtIORB;
135
136	if (aws->queued_hw) {
137	if (ai->ports[p].ncq_cmds & (1UL << aws->cmd_slot)) {
138	/* NCQ command; force non-NCQ mode and trigger port reset */
139	ai->ports[p].ncq_cmds &= ~(1UL << aws->cmd_slot);
140	aws->no_ncq = 1;
141	need_reset = 1;
142	} else {
143	/* regular command; clear cmd bit and identify problem IORB */
144	ai->ports[p].reg_cmds &= ~(1UL << aws->cmd_slot);
145	if (aws->cmd_slot == ccs) {
146	/* this is the non-NCQ command that failed */
147	ddprintf("failing IORB: %Fp\n", iorb);
148	problem_iorb = iorb;
149	}
150	}
151	/* we can requeue all IORBs unconditionally (see function comment) */
152	if (aws->retries++ < MAX_RETRIES) {
153	iorb_requeue(iorb);
154
155	} else {
156	/* retry count exceeded; consider IORB aborted */
157	iorb_seterr(iorb, IOERR_CMD_ABORTED);
158	iorb_queue_del(&ai->ports[p].iorb_queue, iorb);
159	iorb_queue_add(&done_queue, iorb);
160	if (iorb == problem_iorb) {
161	/* no further analysis -- we're done with this one */
162	problem_iorb = NULL;
163	}
164	}
165	}
166	}
167
168	/* sanity check: issued command bitmaps should be 0 now */
169	if (ai->ports[p].ncq_cmds != 0 \|\| ai->ports[p].reg_cmds != 0) {
170	dprintf("warning: commands issued not 0 (%08lx/%08lx); resetting...\n",
171	ai->ports[p].ncq_cmds, ai->ports[p].reg_cmds);
172	need_reset = 1;
173	}
174
175	if (!need_reset) {
176	if ((readl(port_mmio + PORT_TFDATA) & 0x88) != 0) {
177	/* device is not in an idle state */
178	need_reset = 1;
179	}
180	}
181
182	/* restart/reset port */
183	ai->busy = 1;
184	spin_unlock(drv_lock);
185	if (need_reset) {
186	ahci_reset_port(ai, p, 1);
187	} else {
188	ahci_stop_port(ai, p);
189	ahci_start_port(ai, p, 1);
190	}
191	spin_lock(drv_lock);
192	ai->busy = 0;
193
194	/* reset internal port status */
195	ai->ports[p].ncq_cmds = 0;
196	ai->ports[p].reg_cmds = 0;
197	ai->ports[p].cmd_slot = 0;
198
199	if (problem_iorb != NULL) {
200	/* get details about the error that caused this IORB to fail */
201	if (need_reset) {
202	/* no way to retrieve error details after a reset */
203	iorb_seterr(problem_iorb, IOERR_DEVICE_NONSPECIFIC);
204	iorb_queue_del(&ai->ports[p].iorb_queue, problem_iorb);
205	iorb_queue_add(&done_queue, problem_iorb);
206
207	} else {
208	/* get sense information */
209	ADD_WORKSPACE _far *aws = add_workspace(problem_iorb);
210	int d = iorb_unit_device(problem_iorb);
211	int (req_sense)(IORBH _far , int) = (ai->ports[p].devs[d].atapi) ?
212	atapi_req_sense : ata_req_sense;
213
214	aws->processing = 1;
215	aws->queued_hw = 1;
216
217	if (req_sense(problem_iorb, 0) == 0) {
218	/* execute request sense on slot #0 before anything else comes along */
219	ADD_StartTimerMS(&aws->timer, 5000, (PFN) timeout_callback, iorb, 0);
220	aws->cmd_slot = 0;
221	ai->ports[p].reg_cmds = 1;
222	writel(port_mmio + PORT_CMD_ISSUE, 1);
223	readl(port_mmio); /* flush */
224
225	} else {
226	/* IORB is expected to contain the error code; just move to done queue */
227	iorb_queue_del(&ai->ports[p].iorb_queue, problem_iorb);
228	iorb_queue_add(&done_queue, problem_iorb);
229	}
230	}
231	}
232	}
233	}
234	}
235
236	spin_unlock(drv_lock);
237
238	/* call notification routine on all IORBs which have completed */
239	for (iorb = done_queue.root; iorb != NULL; iorb = next) {
240	next = iorb->pNxtIORB;
241
242	spin_lock(drv_lock);
243	aws_free(add_workspace(iorb));
244	spin_unlock(drv_lock);
245
246	iorb_complete(iorb);
247	}
248
249	/* restart engine to resume IORB processing */
250	spin_lock(drv_lock);
251	trigger_engine();
252	spin_unlock(drv_lock);
253
254	dprintf("restart_ctxhook() completed\n");
255
256	/* Check whether we have to rearm ourselves because some adapters were busy
257	* when we wanted to restart ports on them.
258	*/
259	if (rearm_ctx_hook) {
260	msleep(250);
261	DevHelp_ArmCtxHook(0, restart_ctxhook_h);
262	}
263	}
264
265	/******************************************************************************
266	* Reset and abort context hook. This function runs at task time and takes
267	* care of port resets and their side effects. Input to this function are:
268	*
269	* ports_to_reset[] - array of port bitmaps, each bit indicating which port
270	* should be reset unconditionally. This is primarily
271	* used by the error interrupt handler.
272	*
273	* abort_queue - queue with IORBs to be arborted (timed-out, ...) If
274	* any of these commands have reached the hardware, the
275	* corresponding port is reset to interrupt command
276	* execution. This is primarily used for timeout
277	* handling and when IORBs are requested to be aborted.
278	*
279	* After resetting the requested ports, all remaining active IORBs on those
280	* ports have to be retried or aborted. Whether a retry is attempted depends
281	* on the kind of IORB -- those which are idempotent are retried, all others
282	* are aborted. This is different from the port restart hook because the
283	* restart hook can assume it is called with the port in error state, thus
284	* the controller will have stopped executing commands. The reset handler can
285	* be called at any time and we can't tell what's going on in the controller.
286	*
287	* The IORBs in the global abort_queue are expected to have their error code
288	* set (aborted, timeout, ...) but must not be marked as 'done'; otherwise,
289	* the upstream code might reuse the IORBs before we're done with them.
290	*/
291	void reset_ctxhook(ULONG parm)
292	{
293	IORB_QUEUE done_queue;
294	AD_INFO *ai;
295	IORBH _far *iorb;
296	IORBH _far *next = NULL;
297	int rearm_ctx_hook = 0;
298	int a;
299	int p;
300
301	dprintf("reset_ctxhook() started\n");
302	memset(&done_queue, 0x00, sizeof(done_queue));
303
304	spin_lock(drv_lock);
305
306	if (th_reset_watchdog != 0) {
307	/* watchdog timer still active -- just reset it */
308	ADD_CancelTimer(th_reset_watchdog);
309	th_reset_watchdog = 0;
310	}
311
312	/* add ports of active IORBs from the abort queue to ports_to_reset[] */
313	for (iorb = abort_queue.root; iorb != NULL; iorb = next) {
314	next = iorb->pNxtIORB;
315	a = iorb_unit_adapter(iorb);
316	p = iorb_unit_port(iorb);
317	ai = ad_infos + a;
318
319	if (ai->busy) {
320	/* this adapter is busy; leave it alone for now */
321	rearm_ctx_hook = 1;
322	continue;
323	}
324
325	/* move IORB to the local 'done' queue */
326	iorb_queue_del(&abort_queue, iorb);
327	iorb_queue_add(&done_queue, iorb);
328
329	/* reset port if the IORB has already been queued to hardware */
330	if (add_workspace(iorb)->queued_hw) {
331	/* prepare port reset */
332	ports_to_reset[a] \|= (1UL << p);
333	}
334	}
335
336	/* reset all ports in 'ports_to_reset[]' */
337	for (a = 0; a < ad_info_cnt; a++) {
338	ai = ad_infos + a;
339
340	if (ai->busy) {
341	/* this adapter is busy; leave it alone for now */
342	rearm_ctx_hook = 1;
343	continue;
344	}
345
346	for (p = 0; p <= ai->port_max; p++) {
347	if (ports_to_reset[a] & (1UL << p)) {
348	ports_to_reset[a] &= ~(1UL << p);
349
350	/* Reset this port. Since this is a rather slow operation, we'll
351	* release the spinlock while doing so. The adapter is marked as
352	* 'busy' to prevent similar routines (e.g. an ahci port scan) from
353	* interfering.
354	*/
355	ai->busy = 1;
356	spin_unlock(drv_lock);
357	ahci_reset_port(ai, p, 1);
358	spin_lock(drv_lock);
359	ai->busy = 0;
360
361	/* reset port status */
362	ai->ports[p].ncq_cmds = 0;
363	ai->ports[p].reg_cmds = 0;
364	ai->ports[p].cmd_slot = 0;
365
366	/* retry or abort all remaining active commands on this port */
367	for (iorb = ai->ports[p].iorb_queue.root; iorb != NULL; iorb = next) {
368	ADD_WORKSPACE _far *aws = add_workspace(iorb);
369	next = iorb->pNxtIORB;
370
371	if (aws->queued_hw) {
372	/* this IORB had already been queued to HW when we reset the port */
373	if (aws->idempotent && aws->retries++ < MAX_RETRIES) {
374	/* we can retry this IORB */
375	iorb_requeue(iorb);
376
377	} else {
378	/* we cannot retry this IORB; consider it aborted */
379	iorb->ErrorCode = IOERR_CMD_ABORTED;
380	iorb_queue_del(&ai->ports[p].iorb_queue, iorb);
381	iorb_queue_add(&done_queue, iorb);
382	}
383	}
384	}
385	}
386	}
387	}
388
389	spin_unlock(drv_lock);
390
391	/* complete all aborted IORBs */
392	for (iorb = done_queue.root; iorb != NULL; iorb = next) {
393	next = iorb->pNxtIORB;
394
395	spin_lock(drv_lock);
396	aws_free(add_workspace(iorb));
397	spin_unlock(drv_lock);
398
399	iorb->Status \|= IORB_ERROR;
400	iorb_complete(iorb);
401	}
402
403	/* restart engine to resume IORB processing */
404	spin_lock(drv_lock);
405	trigger_engine();
406	spin_unlock(drv_lock);
407
408	dprintf("reset_ctxhook() completed\n");
409
410	/* Check whether we have to rearm ourselves because some adapters were busy
411	* when we wanted to reset ports on them.
412	*/
413	if (rearm_ctx_hook) {
414	msleep(250);
415	DevHelp_ArmCtxHook(0, reset_ctxhook_h);
416	}
417	}
418
419	/******************************************************************************
420	* IORB Engine context hook. This hook is executed if trigger_engine() came
421	* to the conclusion that some of the IORBs keep bouncing, most likely due to
422	* some condition on the adapter such as being busy. It could also be a very
423	* busy system. Either way, this requires some task-time help.
424	*/
425	void engine_ctxhook(ULONG parm)
426	{
427	int iorbs_sent;
428	int i;
429
430	dprintf("engine_ctxhook() started\n");
431
432	spin_lock(drv_lock);
433	for (i = 0; i < 10; i++) {
434	if ((iorbs_sent = trigger_engine_1()) == 0) {
435	break;
436	}
437	}
438	spin_unlock(drv_lock);
439
440	dprintf("engine_ctxhook() completed\n");
441
442	if (iorbs_sent != 0) {
443	/* need to rearm ourselves for another run */
444	msleep(250);
445	DevHelp_ArmCtxHook(0, engine_ctxhook_h);
446	}
447	}
448

Note: See TracBrowser for help on using the repository browser.

Download in other formats: