source: trunk/src/3rdparty/sqlite/pager.c@ 205

Last change on this file since 205 was 205, checked in by rudi, 14 years ago

Added SQLite 2.8.17 sources. This allows to build at least one of the sql drivers / plugins.

File size: 71.7 KB
Line 
1/*
2** 2001 September 15
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12** This is the implementation of the page cache subsystem or "pager".
13**
14** The pager is used to access a database disk file. It implements
15** atomic commit and rollback through the use of a journal file that
16** is separate from the database file. The pager also implements file
17** locking to prevent two processes from writing the same database
18** file simultaneously, or one process from reading the database while
19** another is writing.
20**
21** @(#) $Id: pager.c,v 1.101.2.1 2005/12/19 17:37:10 drh Exp $
22*/
23#include "os.h" /* Must be first to enable large file support */
24#include "sqliteInt.h"
25#include "pager.h"
26#include <assert.h>
27#include <string.h>
28
29/*
30** Macros for troubleshooting. Normally turned off
31*/
32#if 0
33static Pager *mainPager = 0;
34#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)
35#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0
36#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)
37#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)
38#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
39#else
40#define SET_PAGER(X)
41#define CLR_PAGER(X)
42#define TRACE1(X)
43#define TRACE2(X,Y)
44#define TRACE3(X,Y,Z)
45#endif
46
47
48/*
49** The page cache as a whole is always in one of the following
50** states:
51**
52** SQLITE_UNLOCK The page cache is not currently reading or
53** writing the database file. There is no
54** data held in memory. This is the initial
55** state.
56**
57** SQLITE_READLOCK The page cache is reading the database.
58** Writing is not permitted. There can be
59** multiple readers accessing the same database
60** file at the same time.
61**
62** SQLITE_WRITELOCK The page cache is writing the database.
63** Access is exclusive. No other processes or
64** threads can be reading or writing while one
65** process is writing.
66**
67** The page cache comes up in SQLITE_UNLOCK. The first time a
68** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.
69** After all pages have been released using sqlite_page_unref(),
70** the state transitions back to SQLITE_UNLOCK. The first time
71** that sqlite_page_write() is called, the state transitions to
72** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be
73** called on an outstanding page which means that the pager must
74** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)
75** The sqlite_page_rollback() and sqlite_page_commit() functions
76** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.
77*/
78#define SQLITE_UNLOCK 0
79#define SQLITE_READLOCK 1
80#define SQLITE_WRITELOCK 2
81
82
83/*
84** Each in-memory image of a page begins with the following header.
85** This header is only visible to this pager module. The client
86** code that calls pager sees only the data that follows the header.
87**
88** Client code should call sqlitepager_write() on a page prior to making
89** any modifications to that page. The first time sqlitepager_write()
90** is called, the original page contents are written into the rollback
91** journal and PgHdr.inJournal and PgHdr.needSync are set. Later, once
92** the journal page has made it onto the disk surface, PgHdr.needSync
93** is cleared. The modified page cannot be written back into the original
94** database file until the journal pages has been synced to disk and the
95** PgHdr.needSync has been cleared.
96**
97** The PgHdr.dirty flag is set when sqlitepager_write() is called and
98** is cleared again when the page content is written back to the original
99** database file.
100*/
101typedef struct PgHdr PgHdr;
102struct PgHdr {
103 Pager *pPager; /* The pager to which this page belongs */
104 Pgno pgno; /* The page number for this page */
105 PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */
106 int nRef; /* Number of users of this page */
107 PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */
108 PgHdr *pNextAll, *pPrevAll; /* A list of all pages */
109 PgHdr *pNextCkpt, *pPrevCkpt; /* List of pages in the checkpoint journal */
110 u8 inJournal; /* TRUE if has been written to journal */
111 u8 inCkpt; /* TRUE if written to the checkpoint journal */
112 u8 dirty; /* TRUE if we need to write back changes */
113 u8 needSync; /* Sync journal before writing this page */
114 u8 alwaysRollback; /* Disable dont_rollback() for this page */
115 PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */
116 /* SQLITE_PAGE_SIZE bytes of page data follow this header */
117 /* Pager.nExtra bytes of local data follow the page data */
118};
119
120
121/*
122** A macro used for invoking the codec if there is one
123*/
124#ifdef SQLITE_HAS_CODEC
125# define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); }
126#else
127# define CODEC(P,D,N,X)
128#endif
129
130/*
131** Convert a pointer to a PgHdr into a pointer to its data
132** and back again.
133*/
134#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))
135#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])
136#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])
137
138/*
139** How big to make the hash table used for locating in-memory pages
140** by page number.
141*/
142#define N_PG_HASH 2048
143
144/*
145** Hash a page number
146*/
147#define pager_hash(PN) ((PN)&(N_PG_HASH-1))
148
149/*
150** A open page cache is an instance of the following structure.
151*/
152struct Pager {
153 char *zFilename; /* Name of the database file */
154 char *zJournal; /* Name of the journal file */
155 char *zDirectory; /* Directory hold database and journal files */
156 OsFile fd, jfd; /* File descriptors for database and journal */
157 OsFile cpfd; /* File descriptor for the checkpoint journal */
158 int dbSize; /* Number of pages in the file */
159 int origDbSize; /* dbSize before the current change */
160 int ckptSize; /* Size of database (in pages) at ckpt_begin() */
161 off_t ckptJSize; /* Size of journal at ckpt_begin() */
162 int nRec; /* Number of pages written to the journal */
163 u32 cksumInit; /* Quasi-random value added to every checksum */
164 int ckptNRec; /* Number of records in the checkpoint journal */
165 int nExtra; /* Add this many bytes to each in-memory page */
166 void (*xDestructor)(void*); /* Call this routine when freeing pages */
167 int nPage; /* Total number of in-memory pages */
168 int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */
169 int mxPage; /* Maximum number of pages to hold in cache */
170 int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */
171 void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
172 void *pCodecArg; /* First argument to xCodec() */
173 u8 journalOpen; /* True if journal file descriptors is valid */
174 u8 journalStarted; /* True if header of journal is synced */
175 u8 useJournal; /* Use a rollback journal on this file */
176 u8 ckptOpen; /* True if the checkpoint journal is open */
177 u8 ckptInUse; /* True we are in a checkpoint */
178 u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/
179 u8 noSync; /* Do not sync the journal if true */
180 u8 fullSync; /* Do extra syncs of the journal for robustness */
181 u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */
182 u8 errMask; /* One of several kinds of errors */
183 u8 tempFile; /* zFilename is a temporary file */
184 u8 readOnly; /* True for a read-only database */
185 u8 needSync; /* True if an fsync() is needed on the journal */
186 u8 dirtyFile; /* True if database file has changed in any way */
187 u8 alwaysRollback; /* Disable dont_rollback() for all pages */
188 u8 *aInJournal; /* One bit for each page in the database file */
189 u8 *aInCkpt; /* One bit for each page in the database */
190 PgHdr *pFirst, *pLast; /* List of free pages */
191 PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
192 PgHdr *pAll; /* List of all pages */
193 PgHdr *pCkpt; /* List of pages in the checkpoint journal */
194 PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */
195};
196
197/*
198** These are bits that can be set in Pager.errMask.
199*/
200#define PAGER_ERR_FULL 0x01 /* a write() failed */
201#define PAGER_ERR_MEM 0x02 /* malloc() failed */
202#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */
203#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */
204#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? */
205
206/*
207** The journal file contains page records in the following
208** format.
209**
210** Actually, this structure is the complete page record for pager
211** formats less than 3. Beginning with format 3, this record is surrounded
212** by two checksums.
213*/
214typedef struct PageRecord PageRecord;
215struct PageRecord {
216 Pgno pgno; /* The page number */
217 char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */
218};
219
220/*
221** Journal files begin with the following magic string. The data
222** was obtained from /dev/random. It is used only as a sanity check.
223**
224** There are three journal formats (so far). The 1st journal format writes
225** 32-bit integers in the byte-order of the host machine. New
226** formats writes integers as big-endian. All new journals use the
227** new format, but we have to be able to read an older journal in order
228** to rollback journals created by older versions of the library.
229**
230** The 3rd journal format (added for 2.8.0) adds additional sanity
231** checking information to the journal. If the power fails while the
232** journal is being written, semi-random garbage data might appear in
233** the journal file after power is restored. If an attempt is then made
234** to roll the journal back, the database could be corrupted. The additional
235** sanity checking data is an attempt to discover the garbage in the
236** journal and ignore it.
237**
238** The sanity checking information for the 3rd journal format consists
239** of a 32-bit checksum on each page of data. The checksum covers both
240** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.
241** This cksum is initialized to a 32-bit random value that appears in the
242** journal file right after the header. The random initializer is important,
243** because garbage data that appears at the end of a journal is likely
244** data that was once in other files that have now been deleted. If the
245** garbage data came from an obsolete journal file, the checksums might
246** be correct. But by initializing the checksum to random value which
247** is different for every journal, we minimize that risk.
248*/
249static const unsigned char aJournalMagic1[] = {
250 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
251};
252static const unsigned char aJournalMagic2[] = {
253 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
254};
255static const unsigned char aJournalMagic3[] = {
256 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,
257};
258#define JOURNAL_FORMAT_1 1
259#define JOURNAL_FORMAT_2 2
260#define JOURNAL_FORMAT_3 3
261
262/*
263** The following integer determines what format to use when creating
264** new primary journal files. By default we always use format 3.
265** When testing, we can set this value to older journal formats in order to
266** make sure that newer versions of the library are able to rollback older
267** journal files.
268**
269** Note that checkpoint journals always use format 2 and omit the header.
270*/
271#ifdef SQLITE_TEST
272int journal_format = 3;
273#else
274# define journal_format 3
275#endif
276
277/*
278** The size of the header and of each page in the journal varies according
279** to which journal format is being used. The following macros figure out
280** the sizes based on format numbers.
281*/
282#define JOURNAL_HDR_SZ(X) \
283 (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
284#define JOURNAL_PG_SZ(X) \
285 (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))
286
287/*
288** Enable reference count tracking here:
289*/
290#ifdef SQLITE_TEST
291 int pager_refinfo_enable = 0;
292 static void pager_refinfo(PgHdr *p){
293 static int cnt = 0;
294 if( !pager_refinfo_enable ) return;
295 printf(
296 "REFCNT: %4d addr=0x%08x nRef=%d\n",
297 p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
298 );
299 cnt++; /* Something to set a breakpoint on */
300 }
301# define REFINFO(X) pager_refinfo(X)
302#else
303# define REFINFO(X)
304#endif
305
306/*
307** Read a 32-bit integer from the given file descriptor. Store the integer
308** that is read in *pRes. Return SQLITE_OK if everything worked, or an
309** error code is something goes wrong.
310**
311** If the journal format is 2 or 3, read a big-endian integer. If the
312** journal format is 1, read an integer in the native byte-order of the
313** host machine.
314*/
315static int read32bits(int format, OsFile *fd, u32 *pRes){
316 u32 res;
317 int rc;
318 rc = sqliteOsRead(fd, &res, sizeof(res));
319 if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){
320 unsigned char ac[4];
321 memcpy(ac, &res, 4);
322 res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
323 }
324 *pRes = res;
325 return rc;
326}
327
328/*
329** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK
330** on success or an error code is something goes wrong.
331**
332** If the journal format is 2 or 3, write the integer as 4 big-endian
333** bytes. If the journal format is 1, write the integer in the native
334** byte order. In normal operation, only formats 2 and 3 are used.
335** Journal format 1 is only used for testing.
336*/
337static int write32bits(OsFile *fd, u32 val){
338 unsigned char ac[4];
339 if( journal_format<=1 ){
340 return sqliteOsWrite(fd, &val, 4);
341 }
342 ac[0] = (val>>24) & 0xff;
343 ac[1] = (val>>16) & 0xff;
344 ac[2] = (val>>8) & 0xff;
345 ac[3] = val & 0xff;
346 return sqliteOsWrite(fd, ac, 4);
347}
348
349/*
350** Write a 32-bit integer into a page header right before the
351** page data. This will overwrite the PgHdr.pDirty pointer.
352**
353** The integer is big-endian for formats 2 and 3 and native byte order
354** for journal format 1.
355*/
356static void store32bits(u32 val, PgHdr *p, int offset){
357 unsigned char *ac;
358 ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
359 if( journal_format<=1 ){
360 memcpy(ac, &val, 4);
361 }else{
362 ac[0] = (val>>24) & 0xff;
363 ac[1] = (val>>16) & 0xff;
364 ac[2] = (val>>8) & 0xff;
365 ac[3] = val & 0xff;
366 }
367}
368
369
370/*
371** Convert the bits in the pPager->errMask into an approprate
372** return code.
373*/
374static int pager_errcode(Pager *pPager){
375 int rc = SQLITE_OK;
376 if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL;
377 if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR;
378 if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL;
379 if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM;
380 if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;
381 return rc;
382}
383
384/*
385** Add or remove a page from the list of all pages that are in the
386** checkpoint journal.
387**
388** The Pager keeps a separate list of pages that are currently in
389** the checkpoint journal. This helps the sqlitepager_ckpt_commit()
390** routine run MUCH faster for the common case where there are many
391** pages in memory but only a few are in the checkpoint journal.
392*/
393static void page_add_to_ckpt_list(PgHdr *pPg){
394 Pager *pPager = pPg->pPager;
395 if( pPg->inCkpt ) return;
396 assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );
397 pPg->pPrevCkpt = 0;
398 if( pPager->pCkpt ){
399 pPager->pCkpt->pPrevCkpt = pPg;
400 }
401 pPg->pNextCkpt = pPager->pCkpt;
402 pPager->pCkpt = pPg;
403 pPg->inCkpt = 1;
404}
405static void page_remove_from_ckpt_list(PgHdr *pPg){
406 if( !pPg->inCkpt ) return;
407 if( pPg->pPrevCkpt ){
408 assert( pPg->pPrevCkpt->pNextCkpt==pPg );
409 pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt;
410 }else{
411 assert( pPg->pPager->pCkpt==pPg );
412 pPg->pPager->pCkpt = pPg->pNextCkpt;
413 }
414 if( pPg->pNextCkpt ){
415 assert( pPg->pNextCkpt->pPrevCkpt==pPg );
416 pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt;
417 }
418 pPg->pNextCkpt = 0;
419 pPg->pPrevCkpt = 0;
420 pPg->inCkpt = 0;
421}
422
423/*
424** Find a page in the hash table given its page number. Return
425** a pointer to the page or NULL if not found.
426*/
427static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
428 PgHdr *p = pPager->aHash[pager_hash(pgno)];
429 while( p && p->pgno!=pgno ){
430 p = p->pNextHash;
431 }
432 return p;
433}
434
435/*
436** Unlock the database and clear the in-memory cache. This routine
437** sets the state of the pager back to what it was when it was first
438** opened. Any outstanding pages are invalidated and subsequent attempts
439** to access those pages will likely result in a coredump.
440*/
441static void pager_reset(Pager *pPager){
442 PgHdr *pPg, *pNext;
443 for(pPg=pPager->pAll; pPg; pPg=pNext){
444 pNext = pPg->pNextAll;
445 sqliteFree(pPg);
446 }
447 pPager->pFirst = 0;
448 pPager->pFirstSynced = 0;
449 pPager->pLast = 0;
450 pPager->pAll = 0;
451 memset(pPager->aHash, 0, sizeof(pPager->aHash));
452 pPager->nPage = 0;
453 if( pPager->state>=SQLITE_WRITELOCK ){
454 sqlitepager_rollback(pPager);
455 }
456 sqliteOsUnlock(&pPager->fd);
457 pPager->state = SQLITE_UNLOCK;
458 pPager->dbSize = -1;
459 pPager->nRef = 0;
460 assert( pPager->journalOpen==0 );
461}
462
463/*
464** When this routine is called, the pager has the journal file open and
465** a write lock on the database. This routine releases the database
466** write lock and acquires a read lock in its place. The journal file
467** is deleted and closed.
468**
469** TODO: Consider keeping the journal file open for temporary databases.
470** This might give a performance improvement on windows where opening
471** a file is an expensive operation.
472*/
473static int pager_unwritelock(Pager *pPager){
474 int rc;
475 PgHdr *pPg;
476 if( pPager->state<SQLITE_WRITELOCK ) return SQLITE_OK;
477 sqlitepager_ckpt_commit(pPager);
478 if( pPager->ckptOpen ){
479 sqliteOsClose(&pPager->cpfd);
480 pPager->ckptOpen = 0;
481 }
482 if( pPager->journalOpen ){
483 sqliteOsClose(&pPager->jfd);
484 pPager->journalOpen = 0;
485 sqliteOsDelete(pPager->zJournal);
486 sqliteFree( pPager->aInJournal );
487 pPager->aInJournal = 0;
488 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
489 pPg->inJournal = 0;
490 pPg->dirty = 0;
491 pPg->needSync = 0;
492 }
493 }else{
494 assert( pPager->dirtyFile==0 || pPager->useJournal==0 );
495 }
496 rc = sqliteOsReadLock(&pPager->fd);
497 if( rc==SQLITE_OK ){
498 pPager->state = SQLITE_READLOCK;
499 }else{
500 /* This can only happen if a process does a BEGIN, then forks and the
501 ** child process does the COMMIT. Because of the semantics of unix
502 ** file locking, the unlock will fail.
503 */
504 pPager->state = SQLITE_UNLOCK;
505 }
506 return rc;
507}
508
509/*
510** Compute and return a checksum for the page of data.
511**
512** This is not a real checksum. It is really just the sum of the
513** random initial value and the page number. We considered do a checksum
514** of the database, but that was found to be too slow.
515*/
516static u32 pager_cksum(Pager *pPager, Pgno pgno, const char *aData){
517 u32 cksum = pPager->cksumInit + pgno;
518 return cksum;
519}
520
521/*
522** Read a single page from the journal file opened on file descriptor
523** jfd. Playback this one page.
524**
525** There are three different journal formats. The format parameter determines
526** which format is used by the journal that is played back.
527*/
528static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int format){
529 int rc;
530 PgHdr *pPg; /* An existing page in the cache */
531 PageRecord pgRec;
532 u32 cksum;
533
534 rc = read32bits(format, jfd, &pgRec.pgno);
535 if( rc!=SQLITE_OK ) return rc;
536 rc = sqliteOsRead(jfd, &pgRec.aData, sizeof(pgRec.aData));
537 if( rc!=SQLITE_OK ) return rc;
538
539 /* Sanity checking on the page. This is more important that I originally
540 ** thought. If a power failure occurs while the journal is being written,
541 ** it could cause invalid data to be written into the journal. We need to
542 ** detect this invalid data (with high probability) and ignore it.
543 */
544 if( pgRec.pgno==0 ){
545 return SQLITE_DONE;
546 }
547 if( pgRec.pgno>(unsigned)pPager->dbSize ){
548 return SQLITE_OK;
549 }
550 if( format>=JOURNAL_FORMAT_3 ){
551 rc = read32bits(format, jfd, &cksum);
552 if( rc ) return rc;
553 if( pager_cksum(pPager, pgRec.pgno, pgRec.aData)!=cksum ){
554 return SQLITE_DONE;
555 }
556 }
557
558 /* Playback the page. Update the in-memory copy of the page
559 ** at the same time, if there is one.
560 */
561 pPg = pager_lookup(pPager, pgRec.pgno);
562 TRACE2("PLAYBACK %d\n", pgRec.pgno);
563 sqliteOsSeek(&pPager->fd, (pgRec.pgno-1)*(off_t)SQLITE_PAGE_SIZE);
564 rc = sqliteOsWrite(&pPager->fd, pgRec.aData, SQLITE_PAGE_SIZE);
565 if( pPg ){
566 /* No page should ever be rolled back that is in use, except for page
567 ** 1 which is held in use in order to keep the lock on the database
568 ** active.
569 */
570 assert( pPg->nRef==0 || pPg->pgno==1 );
571 memcpy(PGHDR_TO_DATA(pPg), pgRec.aData, SQLITE_PAGE_SIZE);
572 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
573 pPg->dirty = 0;
574 pPg->needSync = 0;
575 CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
576 }
577 return rc;
578}
579
580/*
581** Playback the journal and thus restore the database file to
582** the state it was in before we started making changes.
583**
584** The journal file format is as follows:
585**
586** * 8 byte prefix. One of the aJournalMagic123 vectors defined
587** above. The format of the journal file is determined by which
588** of the three prefix vectors is seen.
589** * 4 byte big-endian integer which is the number of valid page records
590** in the journal. If this value is 0xffffffff, then compute the
591** number of page records from the journal size. This field appears
592** in format 3 only.
593** * 4 byte big-endian integer which is the initial value for the
594** sanity checksum. This field appears in format 3 only.
595** * 4 byte integer which is the number of pages to truncate the
596** database to during a rollback.
597** * Zero or more pages instances, each as follows:
598** + 4 byte page number.
599** + SQLITE_PAGE_SIZE bytes of data.
600** + 4 byte checksum (format 3 only)
601**
602** When we speak of the journal header, we mean the first 4 bullets above.
603** Each entry in the journal is an instance of the 5th bullet. Note that
604** bullets 2 and 3 only appear in format-3 journals.
605**
606** Call the value from the second bullet "nRec". nRec is the number of
607** valid page entries in the journal. In most cases, you can compute the
608** value of nRec from the size of the journal file. But if a power
609** failure occurred while the journal was being written, it could be the
610** case that the size of the journal file had already been increased but
611** the extra entries had not yet made it safely to disk. In such a case,
612** the value of nRec computed from the file size would be too large. For
613** that reason, we always use the nRec value in the header.
614**
615** If the nRec value is 0xffffffff it means that nRec should be computed
616** from the file size. This value is used when the user selects the
617** no-sync option for the journal. A power failure could lead to corruption
618** in this case. But for things like temporary table (which will be
619** deleted when the power is restored) we don't care.
620**
621** Journal formats 1 and 2 do not have an nRec value in the header so we
622** have to compute nRec from the file size. This has risks (as described
623** above) which is why all persistent tables have been changed to use
624** format 3.
625**
626** If the file opened as the journal file is not a well-formed
627** journal file then the database will likely already be
628** corrupted, so the PAGER_ERR_CORRUPT bit is set in pPager->errMask
629** and SQLITE_CORRUPT is returned. If it all works, then this routine
630** returns SQLITE_OK.
631*/
632static int pager_playback(Pager *pPager, int useJournalSize){
633 off_t szJ; /* Size of the journal file in bytes */
634 int nRec; /* Number of Records in the journal */
635 int i; /* Loop counter */
636 Pgno mxPg = 0; /* Size of the original file in pages */
637 int format; /* Format of the journal file. */
638 unsigned char aMagic[sizeof(aJournalMagic1)];
639 int rc;
640
641 /* Figure out how many records are in the journal. Abort early if
642 ** the journal is empty.
643 */
644 assert( pPager->journalOpen );
645 sqliteOsSeek(&pPager->jfd, 0);
646 rc = sqliteOsFileSize(&pPager->jfd, &szJ);
647 if( rc!=SQLITE_OK ){
648 goto end_playback;
649 }
650
651 /* If the journal file is too small to contain a complete header,
652 ** it must mean that the process that created the journal was just
653 ** beginning to write the journal file when it died. In that case,
654 ** the database file should have still been completely unchanged.
655 ** Nothing needs to be rolled back. We can safely ignore this journal.
656 */
657 if( szJ < sizeof(aMagic)+sizeof(Pgno) ){
658 goto end_playback;
659 }
660
661 /* Read the beginning of the journal and truncate the
662 ** database file back to its original size.
663 */
664 rc = sqliteOsRead(&pPager->jfd, aMagic, sizeof(aMagic));
665 if( rc!=SQLITE_OK ){
666 rc = SQLITE_PROTOCOL;
667 goto end_playback;
668 }
669 if( memcmp(aMagic, aJournalMagic3, sizeof(aMagic))==0 ){
670 format = JOURNAL_FORMAT_3;
671 }else if( memcmp(aMagic, aJournalMagic2, sizeof(aMagic))==0 ){
672 format = JOURNAL_FORMAT_2;
673 }else if( memcmp(aMagic, aJournalMagic1, sizeof(aMagic))==0 ){
674 format = JOURNAL_FORMAT_1;
675 }else{
676 rc = SQLITE_PROTOCOL;
677 goto end_playback;
678 }
679 if( format>=JOURNAL_FORMAT_3 ){
680 if( szJ < sizeof(aMagic) + 3*sizeof(u32) ){
681 /* Ignore the journal if it is too small to contain a complete
682 ** header. We already did this test once above, but at the prior
683 ** test, we did not know the journal format and so we had to assume
684 ** the smallest possible header. Now we know the header is bigger
685 ** than the minimum so we test again.
686 */
687 goto end_playback;
688 }
689 rc = read32bits(format, &pPager->jfd, (u32*)&nRec);
690 if( rc ) goto end_playback;
691 rc = read32bits(format, &pPager->jfd, &pPager->cksumInit);
692 if( rc ) goto end_playback;
693 if( nRec==0xffffffff || useJournalSize ){
694 nRec = (szJ - JOURNAL_HDR_SZ(3))/JOURNAL_PG_SZ(3);
695 }
696 }else{
697 nRec = (szJ - JOURNAL_HDR_SZ(2))/JOURNAL_PG_SZ(2);
698 assert( nRec*JOURNAL_PG_SZ(2)+JOURNAL_HDR_SZ(2)==szJ );
699 }
700 rc = read32bits(format, &pPager->jfd, &mxPg);
701 if( rc!=SQLITE_OK ){
702 goto end_playback;
703 }
704 assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
705 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
706 if( rc!=SQLITE_OK ){
707 goto end_playback;
708 }
709 pPager->dbSize = mxPg;
710
711 /* Copy original pages out of the journal and back into the database file.
712 */
713 for(i=0; i<nRec; i++){
714 rc = pager_playback_one_page(pPager, &pPager->jfd, format);
715 if( rc!=SQLITE_OK ){
716 if( rc==SQLITE_DONE ){
717 rc = SQLITE_OK;
718 }
719 break;
720 }
721 }
722
723 /* Pages that have been written to the journal but never synced
724 ** where not restored by the loop above. We have to restore those
725 ** pages by reading them back from the original database.
726 */
727 if( rc==SQLITE_OK ){
728 PgHdr *pPg;
729 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
730 char zBuf[SQLITE_PAGE_SIZE];
731 if( !pPg->dirty ) continue;
732 if( (int)pPg->pgno <= pPager->origDbSize ){
733 sqliteOsSeek(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)(pPg->pgno-1));
734 rc = sqliteOsRead(&pPager->fd, zBuf, SQLITE_PAGE_SIZE);
735 TRACE2("REFETCH %d\n", pPg->pgno);
736 CODEC(pPager, zBuf, pPg->pgno, 2);
737 if( rc ) break;
738 }else{
739 memset(zBuf, 0, SQLITE_PAGE_SIZE);
740 }
741 if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE) ){
742 memcpy(PGHDR_TO_DATA(pPg), zBuf, SQLITE_PAGE_SIZE);
743 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
744 }
745 pPg->needSync = 0;
746 pPg->dirty = 0;
747 }
748 }
749
750end_playback:
751 if( rc!=SQLITE_OK ){
752 pager_unwritelock(pPager);
753 pPager->errMask |= PAGER_ERR_CORRUPT;
754 rc = SQLITE_CORRUPT;
755 }else{
756 rc = pager_unwritelock(pPager);
757 }
758 return rc;
759}
760
761/*
762** Playback the checkpoint journal.
763**
764** This is similar to playing back the transaction journal but with
765** a few extra twists.
766**
767** (1) The number of pages in the database file at the start of
768** the checkpoint is stored in pPager->ckptSize, not in the
769** journal file itself.
770**
771** (2) In addition to playing back the checkpoint journal, also
772** playback all pages of the transaction journal beginning
773** at offset pPager->ckptJSize.
774*/
775static int pager_ckpt_playback(Pager *pPager){
776 off_t szJ; /* Size of the full journal */
777 int nRec; /* Number of Records */
778 int i; /* Loop counter */
779 int rc;
780
781 /* Truncate the database back to its original size.
782 */
783 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->ckptSize);
784 pPager->dbSize = pPager->ckptSize;
785
786 /* Figure out how many records are in the checkpoint journal.
787 */
788 assert( pPager->ckptInUse && pPager->journalOpen );
789 sqliteOsSeek(&pPager->cpfd, 0);
790 nRec = pPager->ckptNRec;
791
792 /* Copy original pages out of the checkpoint journal and back into the
793 ** database file. Note that the checkpoint journal always uses format
794 ** 2 instead of format 3 since it does not need to be concerned with
795 ** power failures corrupting the journal and can thus omit the checksums.
796 */
797 for(i=nRec-1; i>=0; i--){
798 rc = pager_playback_one_page(pPager, &pPager->cpfd, 2);
799 assert( rc!=SQLITE_DONE );
800 if( rc!=SQLITE_OK ) goto end_ckpt_playback;
801 }
802
803 /* Figure out how many pages need to be copied out of the transaction
804 ** journal.
805 */
806 rc = sqliteOsSeek(&pPager->jfd, pPager->ckptJSize);
807 if( rc!=SQLITE_OK ){
808 goto end_ckpt_playback;
809 }
810 rc = sqliteOsFileSize(&pPager->jfd, &szJ);
811 if( rc!=SQLITE_OK ){
812 goto end_ckpt_playback;
813 }
814 nRec = (szJ - pPager->ckptJSize)/JOURNAL_PG_SZ(journal_format);
815 for(i=nRec-1; i>=0; i--){
816 rc = pager_playback_one_page(pPager, &pPager->jfd, journal_format);
817 if( rc!=SQLITE_OK ){
818 assert( rc!=SQLITE_DONE );
819 goto end_ckpt_playback;
820 }
821 }
822
823end_ckpt_playback:
824 if( rc!=SQLITE_OK ){
825 pPager->errMask |= PAGER_ERR_CORRUPT;
826 rc = SQLITE_CORRUPT;
827 }
828 return rc;
829}
830
831/*
832** Change the maximum number of in-memory pages that are allowed.
833**
834** The maximum number is the absolute value of the mxPage parameter.
835** If mxPage is negative, the noSync flag is also set. noSync bypasses
836** calls to sqliteOsSync(). The pager runs much faster with noSync on,
837** but if the operating system crashes or there is an abrupt power
838** failure, the database file might be left in an inconsistent and
839** unrepairable state.
840*/
841void sqlitepager_set_cachesize(Pager *pPager, int mxPage){
842 if( mxPage>=0 ){
843 pPager->noSync = pPager->tempFile;
844 if( pPager->noSync==0 ) pPager->needSync = 0;
845 }else{
846 pPager->noSync = 1;
847 mxPage = -mxPage;
848 }
849 if( mxPage>10 ){
850 pPager->mxPage = mxPage;
851 }
852}
853
854/*
855** Adjust the robustness of the database to damage due to OS crashes
856** or power failures by changing the number of syncs()s when writing
857** the rollback journal. There are three levels:
858**
859** OFF sqliteOsSync() is never called. This is the default
860** for temporary and transient files.
861**
862** NORMAL The journal is synced once before writes begin on the
863** database. This is normally adequate protection, but
864** it is theoretically possible, though very unlikely,
865** that an inopertune power failure could leave the journal
866** in a state which would cause damage to the database
867** when it is rolled back.
868**
869** FULL The journal is synced twice before writes begin on the
870** database (with some additional information - the nRec field
871** of the journal header - being written in between the two
872** syncs). If we assume that writing a
873** single disk sector is atomic, then this mode provides
874** assurance that the journal will not be corrupted to the
875** point of causing damage to the database during rollback.
876**
877** Numeric values associated with these states are OFF==1, NORMAL=2,
878** and FULL=3.
879*/
880void sqlitepager_set_safety_level(Pager *pPager, int level){
881 pPager->noSync = level==1 || pPager->tempFile;
882 pPager->fullSync = level==3 && !pPager->tempFile;
883 if( pPager->noSync==0 ) pPager->needSync = 0;
884}
885
886/*
887** Open a temporary file. Write the name of the file into zName
888** (zName must be at least SQLITE_TEMPNAME_SIZE bytes long.) Write
889** the file descriptor into *fd. Return SQLITE_OK on success or some
890** other error code if we fail.
891**
892** The OS will automatically delete the temporary file when it is
893** closed.
894*/
895static int sqlitepager_opentemp(char *zFile, OsFile *fd){
896 int cnt = 8;
897 int rc;
898 do{
899 cnt--;
900 sqliteOsTempFileName(zFile);
901 rc = sqliteOsOpenExclusive(zFile, fd, 1);
902 }while( cnt>0 && rc!=SQLITE_OK );
903 return rc;
904}
905
906/*
907** Create a new page cache and put a pointer to the page cache in *ppPager.
908** The file to be cached need not exist. The file is not locked until
909** the first call to sqlitepager_get() and is only held open until the
910** last page is released using sqlitepager_unref().
911**
912** If zFilename is NULL then a randomly-named temporary file is created
913** and used as the file to be cached. The file will be deleted
914** automatically when it is closed.
915*/
916int sqlitepager_open(
917 Pager **ppPager, /* Return the Pager structure here */
918 const char *zFilename, /* Name of the database file to open */
919 int mxPage, /* Max number of in-memory cache pages */
920 int nExtra, /* Extra bytes append to each in-memory page */
921 int useJournal /* TRUE to use a rollback journal on this file */
922){
923 Pager *pPager;
924 char *zFullPathname;
925 int nameLen;
926 OsFile fd;
927 int rc, i;
928 int tempFile;
929 int readOnly = 0;
930 char zTemp[SQLITE_TEMPNAME_SIZE];
931
932 *ppPager = 0;
933 if( sqlite_malloc_failed ){
934 return SQLITE_NOMEM;
935 }
936 if( zFilename && zFilename[0] ){
937 zFullPathname = sqliteOsFullPathname(zFilename);
938 rc = sqliteOsOpenReadWrite(zFullPathname, &fd, &readOnly);
939 tempFile = 0;
940 }else{
941 rc = sqlitepager_opentemp(zTemp, &fd);
942 zFilename = zTemp;
943 zFullPathname = sqliteOsFullPathname(zFilename);
944 tempFile = 1;
945 }
946 if( sqlite_malloc_failed ){
947 return SQLITE_NOMEM;
948 }
949 if( rc!=SQLITE_OK ){
950 sqliteFree(zFullPathname);
951 return SQLITE_CANTOPEN;
952 }
953 nameLen = strlen(zFullPathname);
954 pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
955 if( pPager==0 ){
956 sqliteOsClose(&fd);
957 sqliteFree(zFullPathname);
958 return SQLITE_NOMEM;
959 }
960 SET_PAGER(pPager);
961 pPager->zFilename = (char*)&pPager[1];
962 pPager->zDirectory = &pPager->zFilename[nameLen+1];
963 pPager->zJournal = &pPager->zDirectory[nameLen+1];
964 strcpy(pPager->zFilename, zFullPathname);
965 strcpy(pPager->zDirectory, zFullPathname);
966 for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
967 if( i>0 ) pPager->zDirectory[i-1] = 0;
968 strcpy(pPager->zJournal, zFullPathname);
969 sqliteFree(zFullPathname);
970 strcpy(&pPager->zJournal[nameLen], "-journal");
971 pPager->fd = fd;
972 pPager->journalOpen = 0;
973 pPager->useJournal = useJournal;
974 pPager->ckptOpen = 0;
975 pPager->ckptInUse = 0;
976 pPager->nRef = 0;
977 pPager->dbSize = -1;
978 pPager->ckptSize = 0;
979 pPager->ckptJSize = 0;
980 pPager->nPage = 0;
981 pPager->mxPage = mxPage>5 ? mxPage : 10;
982 pPager->state = SQLITE_UNLOCK;
983 pPager->errMask = 0;
984 pPager->tempFile = tempFile;
985 pPager->readOnly = readOnly;
986 pPager->needSync = 0;
987 pPager->noSync = pPager->tempFile || !useJournal;
988 pPager->pFirst = 0;
989 pPager->pFirstSynced = 0;
990 pPager->pLast = 0;
991 pPager->nExtra = nExtra;
992 memset(pPager->aHash, 0, sizeof(pPager->aHash));
993 *ppPager = pPager;
994 return SQLITE_OK;
995}
996
997/*
998** Set the destructor for this pager. If not NULL, the destructor is called
999** when the reference count on each page reaches zero. The destructor can
1000** be used to clean up information in the extra segment appended to each page.
1001**
1002** The destructor is not called as a result sqlitepager_close().
1003** Destructors are only called by sqlitepager_unref().
1004*/
1005void sqlitepager_set_destructor(Pager *pPager, void (*xDesc)(void*)){
1006 pPager->xDestructor = xDesc;
1007}
1008
1009/*
1010** Return the total number of pages in the disk file associated with
1011** pPager.
1012*/
1013int sqlitepager_pagecount(Pager *pPager){
1014 off_t n;
1015 assert( pPager!=0 );
1016 if( pPager->dbSize>=0 ){
1017 return pPager->dbSize;
1018 }
1019 if( sqliteOsFileSize(&pPager->fd, &n)!=SQLITE_OK ){
1020 pPager->errMask |= PAGER_ERR_DISK;
1021 return 0;
1022 }
1023 n /= SQLITE_PAGE_SIZE;
1024 if( pPager->state!=SQLITE_UNLOCK ){
1025 pPager->dbSize = n;
1026 }
1027 return n;
1028}
1029
1030/*
1031** Forward declaration
1032*/
1033static int syncJournal(Pager*);
1034
1035/*
1036** Truncate the file to the number of pages specified.
1037*/
1038int sqlitepager_truncate(Pager *pPager, Pgno nPage){
1039 int rc;
1040 if( pPager->dbSize<0 ){
1041 sqlitepager_pagecount(pPager);
1042 }
1043 if( pPager->errMask!=0 ){
1044 rc = pager_errcode(pPager);
1045 return rc;
1046 }
1047 if( nPage>=(unsigned)pPager->dbSize ){
1048 return SQLITE_OK;
1049 }
1050 syncJournal(pPager);
1051 rc = sqliteOsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)nPage);
1052 if( rc==SQLITE_OK ){
1053 pPager->dbSize = nPage;
1054 }
1055 return rc;
1056}
1057
1058/*
1059** Shutdown the page cache. Free all memory and close all files.
1060**
1061** If a transaction was in progress when this routine is called, that
1062** transaction is rolled back. All outstanding pages are invalidated
1063** and their memory is freed. Any attempt to use a page associated
1064** with this page cache after this function returns will likely
1065** result in a coredump.
1066*/
1067int sqlitepager_close(Pager *pPager){
1068 PgHdr *pPg, *pNext;
1069 switch( pPager->state ){
1070 case SQLITE_WRITELOCK: {
1071 sqlitepager_rollback(pPager);
1072 sqliteOsUnlock(&pPager->fd);
1073 assert( pPager->journalOpen==0 );
1074 break;
1075 }
1076 case SQLITE_READLOCK: {
1077 sqliteOsUnlock(&pPager->fd);
1078 break;
1079 }
1080 default: {
1081 /* Do nothing */
1082 break;
1083 }
1084 }
1085 for(pPg=pPager->pAll; pPg; pPg=pNext){
1086 pNext = pPg->pNextAll;
1087 sqliteFree(pPg);
1088 }
1089 sqliteOsClose(&pPager->fd);
1090 assert( pPager->journalOpen==0 );
1091 /* Temp files are automatically deleted by the OS
1092 ** if( pPager->tempFile ){
1093 ** sqliteOsDelete(pPager->zFilename);
1094 ** }
1095 */
1096 CLR_PAGER(pPager);
1097 if( pPager->zFilename!=(char*)&pPager[1] ){
1098 assert( 0 ); /* Cannot happen */
1099 sqliteFree(pPager->zFilename);
1100 sqliteFree(pPager->zJournal);
1101 sqliteFree(pPager->zDirectory);
1102 }
1103 sqliteFree(pPager);
1104 return SQLITE_OK;
1105}
1106
1107/*
1108** Return the page number for the given page data.
1109*/
1110Pgno sqlitepager_pagenumber(void *pData){
1111 PgHdr *p = DATA_TO_PGHDR(pData);
1112 return p->pgno;
1113}
1114
1115/*
1116** Increment the reference count for a page. If the page is
1117** currently on the freelist (the reference count is zero) then
1118** remove it from the freelist.
1119*/
1120#define page_ref(P) ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
1121static void _page_ref(PgHdr *pPg){
1122 if( pPg->nRef==0 ){
1123 /* The page is currently on the freelist. Remove it. */
1124 if( pPg==pPg->pPager->pFirstSynced ){
1125 PgHdr *p = pPg->pNextFree;
1126 while( p && p->needSync ){ p = p->pNextFree; }
1127 pPg->pPager->pFirstSynced = p;
1128 }
1129 if( pPg->pPrevFree ){
1130 pPg->pPrevFree->pNextFree = pPg->pNextFree;
1131 }else{
1132 pPg->pPager->pFirst = pPg->pNextFree;
1133 }
1134 if( pPg->pNextFree ){
1135 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1136 }else{
1137 pPg->pPager->pLast = pPg->pPrevFree;
1138 }
1139 pPg->pPager->nRef++;
1140 }
1141 pPg->nRef++;
1142 REFINFO(pPg);
1143}
1144
1145/*
1146** Increment the reference count for a page. The input pointer is
1147** a reference to the page data.
1148*/
1149int sqlitepager_ref(void *pData){
1150 PgHdr *pPg = DATA_TO_PGHDR(pData);
1151 page_ref(pPg);
1152 return SQLITE_OK;
1153}
1154
1155/*
1156** Sync the journal. In other words, make sure all the pages that have
1157** been written to the journal have actually reached the surface of the
1158** disk. It is not safe to modify the original database file until after
1159** the journal has been synced. If the original database is modified before
1160** the journal is synced and a power failure occurs, the unsynced journal
1161** data would be lost and we would be unable to completely rollback the
1162** database changes. Database corruption would occur.
1163**
1164** This routine also updates the nRec field in the header of the journal.
1165** (See comments on the pager_playback() routine for additional information.)
1166** If the sync mode is FULL, two syncs will occur. First the whole journal
1167** is synced, then the nRec field is updated, then a second sync occurs.
1168**
1169** For temporary databases, we do not care if we are able to rollback
1170** after a power failure, so sync occurs.
1171**
1172** This routine clears the needSync field of every page current held in
1173** memory.
1174*/
1175static int syncJournal(Pager *pPager){
1176 PgHdr *pPg;
1177 int rc = SQLITE_OK;
1178
1179 /* Sync the journal before modifying the main database
1180 ** (assuming there is a journal and it needs to be synced.)
1181 */
1182 if( pPager->needSync ){
1183 if( !pPager->tempFile ){
1184 assert( pPager->journalOpen );
1185 /* assert( !pPager->noSync ); // noSync might be set if synchronous
1186 ** was turned off after the transaction was started. Ticket #615 */
1187#ifndef NDEBUG
1188 {
1189 /* Make sure the pPager->nRec counter we are keeping agrees
1190 ** with the nRec computed from the size of the journal file.
1191 */
1192 off_t hdrSz, pgSz, jSz;
1193 hdrSz = JOURNAL_HDR_SZ(journal_format);
1194 pgSz = JOURNAL_PG_SZ(journal_format);
1195 rc = sqliteOsFileSize(&pPager->jfd, &jSz);
1196 if( rc!=0 ) return rc;
1197 assert( pPager->nRec*pgSz+hdrSz==jSz );
1198 }
1199#endif
1200 if( journal_format>=3 ){
1201 /* Write the nRec value into the journal file header */
1202 off_t szJ;
1203 if( pPager->fullSync ){
1204 TRACE1("SYNC\n");
1205 rc = sqliteOsSync(&pPager->jfd);
1206 if( rc!=0 ) return rc;
1207 }
1208 sqliteOsSeek(&pPager->jfd, sizeof(aJournalMagic1));
1209 rc = write32bits(&pPager->jfd, pPager->nRec);
1210 if( rc ) return rc;
1211 szJ = JOURNAL_HDR_SZ(journal_format) +
1212 pPager->nRec*JOURNAL_PG_SZ(journal_format);
1213 sqliteOsSeek(&pPager->jfd, szJ);
1214 }
1215 TRACE1("SYNC\n");
1216 rc = sqliteOsSync(&pPager->jfd);
1217 if( rc!=0 ) return rc;
1218 pPager->journalStarted = 1;
1219 }
1220 pPager->needSync = 0;
1221
1222 /* Erase the needSync flag from every page.
1223 */
1224 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1225 pPg->needSync = 0;
1226 }
1227 pPager->pFirstSynced = pPager->pFirst;
1228 }
1229
1230#ifndef NDEBUG
1231 /* If the Pager.needSync flag is clear then the PgHdr.needSync
1232 ** flag must also be clear for all pages. Verify that this
1233 ** invariant is true.
1234 */
1235 else{
1236 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
1237 assert( pPg->needSync==0 );
1238 }
1239 assert( pPager->pFirstSynced==pPager->pFirst );
1240 }
1241#endif
1242
1243 return rc;
1244}
1245
1246/*
1247** Given a list of pages (connected by the PgHdr.pDirty pointer) write
1248** every one of those pages out to the database file and mark them all
1249** as clean.
1250*/
1251static int pager_write_pagelist(PgHdr *pList){
1252 Pager *pPager;
1253 int rc;
1254
1255 if( pList==0 ) return SQLITE_OK;
1256 pPager = pList->pPager;
1257 while( pList ){
1258 assert( pList->dirty );
1259 sqliteOsSeek(&pPager->fd, (pList->pgno-1)*(off_t)SQLITE_PAGE_SIZE);
1260 CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
1261 TRACE2("STORE %d\n", pList->pgno);
1262 rc = sqliteOsWrite(&pPager->fd, PGHDR_TO_DATA(pList), SQLITE_PAGE_SIZE);
1263 CODEC(pPager, PGHDR_TO_DATA(pList), pList->pgno, 0);
1264 if( rc ) return rc;
1265 pList->dirty = 0;
1266 pList = pList->pDirty;
1267 }
1268 return SQLITE_OK;
1269}
1270
1271/*
1272** Collect every dirty page into a dirty list and
1273** return a pointer to the head of that list. All pages are
1274** collected even if they are still in use.
1275*/
1276static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
1277 PgHdr *p, *pList;
1278 pList = 0;
1279 for(p=pPager->pAll; p; p=p->pNextAll){
1280 if( p->dirty ){
1281 p->pDirty = pList;
1282 pList = p;
1283 }
1284 }
1285 return pList;
1286}
1287
1288/*
1289** Acquire a page.
1290**
1291** A read lock on the disk file is obtained when the first page is acquired.
1292** This read lock is dropped when the last page is released.
1293**
1294** A _get works for any page number greater than 0. If the database
1295** file is smaller than the requested page, then no actual disk
1296** read occurs and the memory image of the page is initialized to
1297** all zeros. The extra data appended to a page is always initialized
1298** to zeros the first time a page is loaded into memory.
1299**
1300** The acquisition might fail for several reasons. In all cases,
1301** an appropriate error code is returned and *ppPage is set to NULL.
1302**
1303** See also sqlitepager_lookup(). Both this routine and _lookup() attempt
1304** to find a page in the in-memory cache first. If the page is not already
1305** in memory, this routine goes to disk to read it in whereas _lookup()
1306** just returns 0. This routine acquires a read-lock the first time it
1307** has to go to disk, and could also playback an old journal if necessary.
1308** Since _lookup() never goes to disk, it never has to deal with locks
1309** or journal files.
1310*/
1311int sqlitepager_get(Pager *pPager, Pgno pgno, void **ppPage){
1312 PgHdr *pPg;
1313 int rc;
1314
1315 /* Make sure we have not hit any critical errors.
1316 */
1317 assert( pPager!=0 );
1318 assert( pgno!=0 );
1319 *ppPage = 0;
1320 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1321 return pager_errcode(pPager);
1322 }
1323
1324 /* If this is the first page accessed, then get a read lock
1325 ** on the database file.
1326 */
1327 if( pPager->nRef==0 ){
1328 rc = sqliteOsReadLock(&pPager->fd);
1329 if( rc!=SQLITE_OK ){
1330 return rc;
1331 }
1332 pPager->state = SQLITE_READLOCK;
1333
1334 /* If a journal file exists, try to play it back.
1335 */
1336 if( pPager->useJournal && sqliteOsFileExists(pPager->zJournal) ){
1337 int rc;
1338
1339 /* Get a write lock on the database
1340 */
1341 rc = sqliteOsWriteLock(&pPager->fd);
1342 if( rc!=SQLITE_OK ){
1343 if( sqliteOsUnlock(&pPager->fd)!=SQLITE_OK ){
1344 /* This should never happen! */
1345 rc = SQLITE_INTERNAL;
1346 }
1347 return rc;
1348 }
1349 pPager->state = SQLITE_WRITELOCK;
1350
1351 /* Open the journal for reading only. Return SQLITE_BUSY if
1352 ** we are unable to open the journal file.
1353 **
1354 ** The journal file does not need to be locked itself. The
1355 ** journal file is never open unless the main database file holds
1356 ** a write lock, so there is never any chance of two or more
1357 ** processes opening the journal at the same time.
1358 */
1359 rc = sqliteOsOpenReadOnly(pPager->zJournal, &pPager->jfd);
1360 if( rc!=SQLITE_OK ){
1361 rc = sqliteOsUnlock(&pPager->fd);
1362 assert( rc==SQLITE_OK );
1363 return SQLITE_BUSY;
1364 }
1365 pPager->journalOpen = 1;
1366 pPager->journalStarted = 0;
1367
1368 /* Playback and delete the journal. Drop the database write
1369 ** lock and reacquire the read lock.
1370 */
1371 rc = pager_playback(pPager, 0);
1372 if( rc!=SQLITE_OK ){
1373 return rc;
1374 }
1375 }
1376 pPg = 0;
1377 }else{
1378 /* Search for page in cache */
1379 pPg = pager_lookup(pPager, pgno);
1380 }
1381 if( pPg==0 ){
1382 /* The requested page is not in the page cache. */
1383 int h;
1384 pPager->nMiss++;
1385 if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 ){
1386 /* Create a new page */
1387 pPg = sqliteMallocRaw( sizeof(*pPg) + SQLITE_PAGE_SIZE
1388 + sizeof(u32) + pPager->nExtra );
1389 if( pPg==0 ){
1390 pager_unwritelock(pPager);
1391 pPager->errMask |= PAGER_ERR_MEM;
1392 return SQLITE_NOMEM;
1393 }
1394 memset(pPg, 0, sizeof(*pPg));
1395 pPg->pPager = pPager;
1396 pPg->pNextAll = pPager->pAll;
1397 if( pPager->pAll ){
1398 pPager->pAll->pPrevAll = pPg;
1399 }
1400 pPg->pPrevAll = 0;
1401 pPager->pAll = pPg;
1402 pPager->nPage++;
1403 }else{
1404 /* Find a page to recycle. Try to locate a page that does not
1405 ** require us to do an fsync() on the journal.
1406 */
1407 pPg = pPager->pFirstSynced;
1408
1409 /* If we could not find a page that does not require an fsync()
1410 ** on the journal file then fsync the journal file. This is a
1411 ** very slow operation, so we work hard to avoid it. But sometimes
1412 ** it can't be helped.
1413 */
1414 if( pPg==0 ){
1415 int rc = syncJournal(pPager);
1416 if( rc!=0 ){
1417 sqlitepager_rollback(pPager);
1418 return SQLITE_IOERR;
1419 }
1420 pPg = pPager->pFirst;
1421 }
1422 assert( pPg->nRef==0 );
1423
1424 /* Write the page to the database file if it is dirty.
1425 */
1426 if( pPg->dirty ){
1427 assert( pPg->needSync==0 );
1428 pPg->pDirty = 0;
1429 rc = pager_write_pagelist( pPg );
1430 if( rc!=SQLITE_OK ){
1431 sqlitepager_rollback(pPager);
1432 return SQLITE_IOERR;
1433 }
1434 }
1435 assert( pPg->dirty==0 );
1436
1437 /* If the page we are recycling is marked as alwaysRollback, then
1438 ** set the global alwaysRollback flag, thus disabling the
1439 ** sqlite_dont_rollback() optimization for the rest of this transaction.
1440 ** It is necessary to do this because the page marked alwaysRollback
1441 ** might be reloaded at a later time but at that point we won't remember
1442 ** that is was marked alwaysRollback. This means that all pages must
1443 ** be marked as alwaysRollback from here on out.
1444 */
1445 if( pPg->alwaysRollback ){
1446 pPager->alwaysRollback = 1;
1447 }
1448
1449 /* Unlink the old page from the free list and the hash table
1450 */
1451 if( pPg==pPager->pFirstSynced ){
1452 PgHdr *p = pPg->pNextFree;
1453 while( p && p->needSync ){ p = p->pNextFree; }
1454 pPager->pFirstSynced = p;
1455 }
1456 if( pPg->pPrevFree ){
1457 pPg->pPrevFree->pNextFree = pPg->pNextFree;
1458 }else{
1459 assert( pPager->pFirst==pPg );
1460 pPager->pFirst = pPg->pNextFree;
1461 }
1462 if( pPg->pNextFree ){
1463 pPg->pNextFree->pPrevFree = pPg->pPrevFree;
1464 }else{
1465 assert( pPager->pLast==pPg );
1466 pPager->pLast = pPg->pPrevFree;
1467 }
1468 pPg->pNextFree = pPg->pPrevFree = 0;
1469 if( pPg->pNextHash ){
1470 pPg->pNextHash->pPrevHash = pPg->pPrevHash;
1471 }
1472 if( pPg->pPrevHash ){
1473 pPg->pPrevHash->pNextHash = pPg->pNextHash;
1474 }else{
1475 h = pager_hash(pPg->pgno);
1476 assert( pPager->aHash[h]==pPg );
1477 pPager->aHash[h] = pPg->pNextHash;
1478 }
1479 pPg->pNextHash = pPg->pPrevHash = 0;
1480 pPager->nOvfl++;
1481 }
1482 pPg->pgno = pgno;
1483 if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
1484 sqliteCheckMemory(pPager->aInJournal, pgno/8);
1485 assert( pPager->journalOpen );
1486 pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
1487 pPg->needSync = 0;
1488 }else{
1489 pPg->inJournal = 0;
1490 pPg->needSync = 0;
1491 }
1492 if( pPager->aInCkpt && (int)pgno<=pPager->ckptSize
1493 && (pPager->aInCkpt[pgno/8] & (1<<(pgno&7)))!=0 ){
1494 page_add_to_ckpt_list(pPg);
1495 }else{
1496 page_remove_from_ckpt_list(pPg);
1497 }
1498 pPg->dirty = 0;
1499 pPg->nRef = 1;
1500 REFINFO(pPg);
1501 pPager->nRef++;
1502 h = pager_hash(pgno);
1503 pPg->pNextHash = pPager->aHash[h];
1504 pPager->aHash[h] = pPg;
1505 if( pPg->pNextHash ){
1506 assert( pPg->pNextHash->pPrevHash==0 );
1507 pPg->pNextHash->pPrevHash = pPg;
1508 }
1509 if( pPager->nExtra>0 ){
1510 memset(PGHDR_TO_EXTRA(pPg), 0, pPager->nExtra);
1511 }
1512 if( pPager->dbSize<0 ) sqlitepager_pagecount(pPager);
1513 if( pPager->errMask!=0 ){
1514 sqlitepager_unref(PGHDR_TO_DATA(pPg));
1515 rc = pager_errcode(pPager);
1516 return rc;
1517 }
1518 if( pPager->dbSize<(int)pgno ){
1519 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1520 }else{
1521 int rc;
1522 sqliteOsSeek(&pPager->fd, (pgno-1)*(off_t)SQLITE_PAGE_SIZE);
1523 rc = sqliteOsRead(&pPager->fd, PGHDR_TO_DATA(pPg), SQLITE_PAGE_SIZE);
1524 TRACE2("FETCH %d\n", pPg->pgno);
1525 CODEC(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
1526 if( rc!=SQLITE_OK ){
1527 off_t fileSize;
1528 if( sqliteOsFileSize(&pPager->fd,&fileSize)!=SQLITE_OK
1529 || fileSize>=pgno*SQLITE_PAGE_SIZE ){
1530 sqlitepager_unref(PGHDR_TO_DATA(pPg));
1531 return rc;
1532 }else{
1533 memset(PGHDR_TO_DATA(pPg), 0, SQLITE_PAGE_SIZE);
1534 }
1535 }
1536 }
1537 }else{
1538 /* The requested page is in the page cache. */
1539 pPager->nHit++;
1540 page_ref(pPg);
1541 }
1542 *ppPage = PGHDR_TO_DATA(pPg);
1543 return SQLITE_OK;
1544}
1545
1546/*
1547** Acquire a page if it is already in the in-memory cache. Do
1548** not read the page from disk. Return a pointer to the page,
1549** or 0 if the page is not in cache.
1550**
1551** See also sqlitepager_get(). The difference between this routine
1552** and sqlitepager_get() is that _get() will go to the disk and read
1553** in the page if the page is not already in cache. This routine
1554** returns NULL if the page is not in cache or if a disk I/O error
1555** has ever happened.
1556*/
1557void *sqlitepager_lookup(Pager *pPager, Pgno pgno){
1558 PgHdr *pPg;
1559
1560 assert( pPager!=0 );
1561 assert( pgno!=0 );
1562 if( pPager->errMask & ~(PAGER_ERR_FULL) ){
1563 return 0;
1564 }
1565 /* if( pPager->nRef==0 ){
1566 ** return 0;
1567 ** }
1568 */
1569 pPg = pager_lookup(pPager, pgno);
1570 if( pPg==0 ) return 0;
1571 page_ref(pPg);
1572 return PGHDR_TO_DATA(pPg);
1573}
1574
1575/*
1576** Release a page.
1577**
1578** If the number of references to the page drop to zero, then the
1579** page is added to the LRU list. When all references to all pages
1580** are released, a rollback occurs and the lock on the database is
1581** removed.
1582*/
1583int sqlitepager_unref(void *pData){
1584 PgHdr *pPg;
1585
1586 /* Decrement the reference count for this page
1587 */
1588 pPg = DATA_TO_PGHDR(pData);
1589 assert( pPg->nRef>0 );
1590 pPg->nRef--;
1591 REFINFO(pPg);
1592
1593 /* When the number of references to a page reach 0, call the
1594 ** destructor and add the page to the freelist.
1595 */
1596 if( pPg->nRef==0 ){
1597 Pager *pPager;
1598 pPager = pPg->pPager;
1599 pPg->pNextFree = 0;
1600 pPg->pPrevFree = pPager->pLast;
1601 pPager->pLast = pPg;
1602 if( pPg->pPrevFree ){
1603 pPg->pPrevFree->pNextFree = pPg;
1604 }else{
1605 pPager->pFirst = pPg;
1606 }
1607 if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
1608 pPager->pFirstSynced = pPg;
1609 }
1610 if( pPager->xDestructor ){
1611 pPager->xDestructor(pData);
1612 }
1613
1614 /* When all pages reach the freelist, drop the read lock from
1615 ** the database file.
1616 */
1617 pPager->nRef--;
1618 assert( pPager->nRef>=0 );
1619 if( pPager->nRef==0 ){
1620 pager_reset(pPager);
1621 }
1622 }
1623 return SQLITE_OK;
1624}
1625
1626/*
1627** Create a journal file for pPager. There should already be a write
1628** lock on the database file when this routine is called.
1629**
1630** Return SQLITE_OK if everything. Return an error code and release the
1631** write lock if anything goes wrong.
1632*/
1633static int pager_open_journal(Pager *pPager){
1634 int rc;
1635 assert( pPager->state==SQLITE_WRITELOCK );
1636 assert( pPager->journalOpen==0 );
1637 assert( pPager->useJournal );
1638 sqlitepager_pagecount(pPager);
1639 pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
1640 if( pPager->aInJournal==0 ){
1641 sqliteOsReadLock(&pPager->fd);
1642 pPager->state = SQLITE_READLOCK;
1643 return SQLITE_NOMEM;
1644 }
1645 rc = sqliteOsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
1646 if( rc!=SQLITE_OK ){
1647 sqliteFree(pPager->aInJournal);
1648 pPager->aInJournal = 0;
1649 sqliteOsReadLock(&pPager->fd);
1650 pPager->state = SQLITE_READLOCK;
1651 return SQLITE_CANTOPEN;
1652 }
1653 sqliteOsOpenDirectory(pPager->zDirectory, &pPager->jfd);
1654 pPager->journalOpen = 1;
1655 pPager->journalStarted = 0;
1656 pPager->needSync = 0;
1657 pPager->alwaysRollback = 0;
1658 pPager->nRec = 0;
1659 if( pPager->errMask!=0 ){
1660 rc = pager_errcode(pPager);
1661 return rc;
1662 }
1663 pPager->origDbSize = pPager->dbSize;
1664 if( journal_format==JOURNAL_FORMAT_3 ){
1665 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic3, sizeof(aJournalMagic3));
1666 if( rc==SQLITE_OK ){
1667 rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
1668 }
1669 if( rc==SQLITE_OK ){
1670 sqliteRandomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
1671 rc = write32bits(&pPager->jfd, pPager->cksumInit);
1672 }
1673 }else if( journal_format==JOURNAL_FORMAT_2 ){
1674 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic2, sizeof(aJournalMagic2));
1675 }else{
1676 assert( journal_format==JOURNAL_FORMAT_1 );
1677 rc = sqliteOsWrite(&pPager->jfd, aJournalMagic1, sizeof(aJournalMagic1));
1678 }
1679 if( rc==SQLITE_OK ){
1680 rc = write32bits(&pPager->jfd, pPager->dbSize);
1681 }
1682 if( pPager->ckptAutoopen && rc==SQLITE_OK ){
1683 rc = sqlitepager_ckpt_begin(pPager);
1684 }
1685 if( rc!=SQLITE_OK ){
1686 rc = pager_unwritelock(pPager);
1687 if( rc==SQLITE_OK ){
1688 rc = SQLITE_FULL;
1689 }
1690 }
1691 return rc;
1692}
1693
1694/*
1695** Acquire a write-lock on the database. The lock is removed when
1696** the any of the following happen:
1697**
1698** * sqlitepager_commit() is called.
1699** * sqlitepager_rollback() is called.
1700** * sqlitepager_close() is called.
1701** * sqlitepager_unref() is called to on every outstanding page.
1702**
1703** The parameter to this routine is a pointer to any open page of the
1704** database file. Nothing changes about the page - it is used merely
1705** to acquire a pointer to the Pager structure and as proof that there
1706** is already a read-lock on the database.
1707**
1708** A journal file is opened if this is not a temporary file. For
1709** temporary files, the opening of the journal file is deferred until
1710** there is an actual need to write to the journal.
1711**
1712** If the database is already write-locked, this routine is a no-op.
1713*/
1714int sqlitepager_begin(void *pData){
1715 PgHdr *pPg = DATA_TO_PGHDR(pData);
1716 Pager *pPager = pPg->pPager;
1717 int rc = SQLITE_OK;
1718 assert( pPg->nRef>0 );
1719 assert( pPager->state!=SQLITE_UNLOCK );
1720 if( pPager->state==SQLITE_READLOCK ){
1721 assert( pPager->aInJournal==0 );
1722 rc = sqliteOsWriteLock(&pPager->fd);
1723 if( rc!=SQLITE_OK ){
1724 return rc;
1725 }
1726 pPager->state = SQLITE_WRITELOCK;
1727 pPager->dirtyFile = 0;
1728 TRACE1("TRANSACTION\n");
1729 if( pPager->useJournal && !pPager->tempFile ){
1730 rc = pager_open_journal(pPager);
1731 }
1732 }
1733 return rc;
1734}
1735
1736/*
1737** Mark a data page as writeable. The page is written into the journal
1738** if it is not there already. This routine must be called before making
1739** changes to a page.
1740**
1741** The first time this routine is called, the pager creates a new
1742** journal and acquires a write lock on the database. If the write
1743** lock could not be acquired, this routine returns SQLITE_BUSY. The
1744** calling routine must check for that return value and be careful not to
1745** change any page data until this routine returns SQLITE_OK.
1746**
1747** If the journal file could not be written because the disk is full,
1748** then this routine returns SQLITE_FULL and does an immediate rollback.
1749** All subsequent write attempts also return SQLITE_FULL until there
1750** is a call to sqlitepager_commit() or sqlitepager_rollback() to
1751** reset.
1752*/
1753int sqlitepager_write(void *pData){
1754 PgHdr *pPg = DATA_TO_PGHDR(pData);
1755 Pager *pPager = pPg->pPager;
1756 int rc = SQLITE_OK;
1757
1758 /* Check for errors
1759 */
1760 if( pPager->errMask ){
1761 return pager_errcode(pPager);
1762 }
1763 if( pPager->readOnly ){
1764 return SQLITE_PERM;
1765 }
1766
1767 /* Mark the page as dirty. If the page has already been written
1768 ** to the journal then we can return right away.
1769 */
1770 pPg->dirty = 1;
1771 if( pPg->inJournal && (pPg->inCkpt || pPager->ckptInUse==0) ){
1772 pPager->dirtyFile = 1;
1773 return SQLITE_OK;
1774 }
1775
1776 /* If we get this far, it means that the page needs to be
1777 ** written to the transaction journal or the ckeckpoint journal
1778 ** or both.
1779 **
1780 ** First check to see that the transaction journal exists and
1781 ** create it if it does not.
1782 */
1783 assert( pPager->state!=SQLITE_UNLOCK );
1784 rc = sqlitepager_begin(pData);
1785 if( rc!=SQLITE_OK ){
1786 return rc;
1787 }
1788 assert( pPager->state==SQLITE_WRITELOCK );
1789 if( !pPager->journalOpen && pPager->useJournal ){
1790 rc = pager_open_journal(pPager);
1791 if( rc!=SQLITE_OK ) return rc;
1792 }
1793 assert( pPager->journalOpen || !pPager->useJournal );
1794 pPager->dirtyFile = 1;
1795
1796 /* The transaction journal now exists and we have a write lock on the
1797 ** main database file. Write the current page to the transaction
1798 ** journal if it is not there already.
1799 */
1800 if( !pPg->inJournal && pPager->useJournal ){
1801 if( (int)pPg->pgno <= pPager->origDbSize ){
1802 int szPg;
1803 u32 saved;
1804 if( journal_format>=JOURNAL_FORMAT_3 ){
1805 u32 cksum = pager_cksum(pPager, pPg->pgno, pData);
1806 saved = *(u32*)PGHDR_TO_EXTRA(pPg);
1807 store32bits(cksum, pPg, SQLITE_PAGE_SIZE);
1808 szPg = SQLITE_PAGE_SIZE+8;
1809 }else{
1810 szPg = SQLITE_PAGE_SIZE+4;
1811 }
1812 store32bits(pPg->pgno, pPg, -4);
1813 CODEC(pPager, pData, pPg->pgno, 7);
1814 rc = sqliteOsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
1815 TRACE3("JOURNAL %d %d\n", pPg->pgno, pPg->needSync);
1816 CODEC(pPager, pData, pPg->pgno, 0);
1817 if( journal_format>=JOURNAL_FORMAT_3 ){
1818 *(u32*)PGHDR_TO_EXTRA(pPg) = saved;
1819 }
1820 if( rc!=SQLITE_OK ){
1821 sqlitepager_rollback(pPager);
1822 pPager->errMask |= PAGER_ERR_FULL;
1823 return rc;
1824 }
1825 pPager->nRec++;
1826 assert( pPager->aInJournal!=0 );
1827 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1828 pPg->needSync = !pPager->noSync;
1829 pPg->inJournal = 1;
1830 if( pPager->ckptInUse ){
1831 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1832 page_add_to_ckpt_list(pPg);
1833 }
1834 }else{
1835 pPg->needSync = !pPager->journalStarted && !pPager->noSync;
1836 TRACE3("APPEND %d %d\n", pPg->pgno, pPg->needSync);
1837 }
1838 if( pPg->needSync ){
1839 pPager->needSync = 1;
1840 }
1841 }
1842
1843 /* If the checkpoint journal is open and the page is not in it,
1844 ** then write the current page to the checkpoint journal. Note that
1845 ** the checkpoint journal always uses the simplier format 2 that lacks
1846 ** checksums. The header is also omitted from the checkpoint journal.
1847 */
1848 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
1849 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1850 store32bits(pPg->pgno, pPg, -4);
1851 CODEC(pPager, pData, pPg->pgno, 7);
1852 rc = sqliteOsWrite(&pPager->cpfd, &((char*)pData)[-4], SQLITE_PAGE_SIZE+4);
1853 TRACE2("CKPT-JOURNAL %d\n", pPg->pgno);
1854 CODEC(pPager, pData, pPg->pgno, 0);
1855 if( rc!=SQLITE_OK ){
1856 sqlitepager_rollback(pPager);
1857 pPager->errMask |= PAGER_ERR_FULL;
1858 return rc;
1859 }
1860 pPager->ckptNRec++;
1861 assert( pPager->aInCkpt!=0 );
1862 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1863 page_add_to_ckpt_list(pPg);
1864 }
1865
1866 /* Update the database size and return.
1867 */
1868 if( pPager->dbSize<(int)pPg->pgno ){
1869 pPager->dbSize = pPg->pgno;
1870 }
1871 return rc;
1872}
1873
1874/*
1875** Return TRUE if the page given in the argument was previously passed
1876** to sqlitepager_write(). In other words, return TRUE if it is ok
1877** to change the content of the page.
1878*/
1879int sqlitepager_iswriteable(void *pData){
1880 PgHdr *pPg = DATA_TO_PGHDR(pData);
1881 return pPg->dirty;
1882}
1883
1884/*
1885** Replace the content of a single page with the information in the third
1886** argument.
1887*/
1888int sqlitepager_overwrite(Pager *pPager, Pgno pgno, void *pData){
1889 void *pPage;
1890 int rc;
1891
1892 rc = sqlitepager_get(pPager, pgno, &pPage);
1893 if( rc==SQLITE_OK ){
1894 rc = sqlitepager_write(pPage);
1895 if( rc==SQLITE_OK ){
1896 memcpy(pPage, pData, SQLITE_PAGE_SIZE);
1897 }
1898 sqlitepager_unref(pPage);
1899 }
1900 return rc;
1901}
1902
1903/*
1904** A call to this routine tells the pager that it is not necessary to
1905** write the information on page "pgno" back to the disk, even though
1906** that page might be marked as dirty.
1907**
1908** The overlying software layer calls this routine when all of the data
1909** on the given page is unused. The pager marks the page as clean so
1910** that it does not get written to disk.
1911**
1912** Tests show that this optimization, together with the
1913** sqlitepager_dont_rollback() below, more than double the speed
1914** of large INSERT operations and quadruple the speed of large DELETEs.
1915**
1916** When this routine is called, set the alwaysRollback flag to true.
1917** Subsequent calls to sqlitepager_dont_rollback() for the same page
1918** will thereafter be ignored. This is necessary to avoid a problem
1919** where a page with data is added to the freelist during one part of
1920** a transaction then removed from the freelist during a later part
1921** of the same transaction and reused for some other purpose. When it
1922** is first added to the freelist, this routine is called. When reused,
1923** the dont_rollback() routine is called. But because the page contains
1924** critical data, we still need to be sure it gets rolled back in spite
1925** of the dont_rollback() call.
1926*/
1927void sqlitepager_dont_write(Pager *pPager, Pgno pgno){
1928 PgHdr *pPg;
1929
1930 pPg = pager_lookup(pPager, pgno);
1931 pPg->alwaysRollback = 1;
1932 if( pPg && pPg->dirty && !pPager->ckptInUse ){
1933 if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
1934 /* If this pages is the last page in the file and the file has grown
1935 ** during the current transaction, then do NOT mark the page as clean.
1936 ** When the database file grows, we must make sure that the last page
1937 ** gets written at least once so that the disk file will be the correct
1938 ** size. If you do not write this page and the size of the file
1939 ** on the disk ends up being too small, that can lead to database
1940 ** corruption during the next transaction.
1941 */
1942 }else{
1943 TRACE2("DONT_WRITE %d\n", pgno);
1944 pPg->dirty = 0;
1945 }
1946 }
1947}
1948
1949/*
1950** A call to this routine tells the pager that if a rollback occurs,
1951** it is not necessary to restore the data on the given page. This
1952** means that the pager does not have to record the given page in the
1953** rollback journal.
1954*/
1955void sqlitepager_dont_rollback(void *pData){
1956 PgHdr *pPg = DATA_TO_PGHDR(pData);
1957 Pager *pPager = pPg->pPager;
1958
1959 if( pPager->state!=SQLITE_WRITELOCK || pPager->journalOpen==0 ) return;
1960 if( pPg->alwaysRollback || pPager->alwaysRollback ) return;
1961 if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
1962 assert( pPager->aInJournal!=0 );
1963 pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1964 pPg->inJournal = 1;
1965 if( pPager->ckptInUse ){
1966 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1967 page_add_to_ckpt_list(pPg);
1968 }
1969 TRACE2("DONT_ROLLBACK %d\n", pPg->pgno);
1970 }
1971 if( pPager->ckptInUse && !pPg->inCkpt && (int)pPg->pgno<=pPager->ckptSize ){
1972 assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
1973 assert( pPager->aInCkpt!=0 );
1974 pPager->aInCkpt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
1975 page_add_to_ckpt_list(pPg);
1976 }
1977}
1978
1979/*
1980** Commit all changes to the database and release the write lock.
1981**
1982** If the commit fails for any reason, a rollback attempt is made
1983** and an error code is returned. If the commit worked, SQLITE_OK
1984** is returned.
1985*/
1986int sqlitepager_commit(Pager *pPager){
1987 int rc;
1988 PgHdr *pPg;
1989
1990 if( pPager->errMask==PAGER_ERR_FULL ){
1991 rc = sqlitepager_rollback(pPager);
1992 if( rc==SQLITE_OK ){
1993 rc = SQLITE_FULL;
1994 }
1995 return rc;
1996 }
1997 if( pPager->errMask!=0 ){
1998 rc = pager_errcode(pPager);
1999 return rc;
2000 }
2001 if( pPager->state!=SQLITE_WRITELOCK ){
2002 return SQLITE_ERROR;
2003 }
2004 TRACE1("COMMIT\n");
2005 if( pPager->dirtyFile==0 ){
2006 /* Exit early (without doing the time-consuming sqliteOsSync() calls)
2007 ** if there have been no changes to the database file. */
2008 assert( pPager->needSync==0 );
2009 rc = pager_unwritelock(pPager);
2010 pPager->dbSize = -1;
2011 return rc;
2012 }
2013 assert( pPager->journalOpen );
2014 rc = syncJournal(pPager);
2015 if( rc!=SQLITE_OK ){
2016 goto commit_abort;
2017 }
2018 pPg = pager_get_all_dirty_pages(pPager);
2019 if( pPg ){
2020 rc = pager_write_pagelist(pPg);
2021 if( rc || (!pPager->noSync && sqliteOsSync(&pPager->fd)!=SQLITE_OK) ){
2022 goto commit_abort;
2023 }
2024 }
2025 rc = pager_unwritelock(pPager);
2026 pPager->dbSize = -1;
2027 return rc;
2028
2029 /* Jump here if anything goes wrong during the commit process.
2030 */
2031commit_abort:
2032 rc = sqlitepager_rollback(pPager);
2033 if( rc==SQLITE_OK ){
2034 rc = SQLITE_FULL;
2035 }
2036 return rc;
2037}
2038
2039/*
2040** Rollback all changes. The database falls back to read-only mode.
2041** All in-memory cache pages revert to their original data contents.
2042** The journal is deleted.
2043**
2044** This routine cannot fail unless some other process is not following
2045** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
2046** process is writing trash into the journal file (SQLITE_CORRUPT) or
2047** unless a prior malloc() failed (SQLITE_NOMEM). Appropriate error
2048** codes are returned for all these occasions. Otherwise,
2049** SQLITE_OK is returned.
2050*/
2051int sqlitepager_rollback(Pager *pPager){
2052 int rc;
2053 TRACE1("ROLLBACK\n");
2054 if( !pPager->dirtyFile || !pPager->journalOpen ){
2055 rc = pager_unwritelock(pPager);
2056 pPager->dbSize = -1;
2057 return rc;
2058 }
2059
2060 if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
2061 if( pPager->state>=SQLITE_WRITELOCK ){
2062 pager_playback(pPager, 1);
2063 }
2064 return pager_errcode(pPager);
2065 }
2066 if( pPager->state!=SQLITE_WRITELOCK ){
2067 return SQLITE_OK;
2068 }
2069 rc = pager_playback(pPager, 1);
2070 if( rc!=SQLITE_OK ){
2071 rc = SQLITE_CORRUPT;
2072 pPager->errMask |= PAGER_ERR_CORRUPT;
2073 }
2074 pPager->dbSize = -1;
2075 return rc;
2076}
2077
2078/*
2079** Return TRUE if the database file is opened read-only. Return FALSE
2080** if the database is (in theory) writable.
2081*/
2082int sqlitepager_isreadonly(Pager *pPager){
2083 return pPager->readOnly;
2084}
2085
2086/*
2087** This routine is used for testing and analysis only.
2088*/
2089int *sqlitepager_stats(Pager *pPager){
2090 static int a[9];
2091 a[0] = pPager->nRef;
2092 a[1] = pPager->nPage;
2093 a[2] = pPager->mxPage;
2094 a[3] = pPager->dbSize;
2095 a[4] = pPager->state;
2096 a[5] = pPager->errMask;
2097 a[6] = pPager->nHit;
2098 a[7] = pPager->nMiss;
2099 a[8] = pPager->nOvfl;
2100 return a;
2101}
2102
2103/*
2104** Set the checkpoint.
2105**
2106** This routine should be called with the transaction journal already
2107** open. A new checkpoint journal is created that can be used to rollback
2108** changes of a single SQL command within a larger transaction.
2109*/
2110int sqlitepager_ckpt_begin(Pager *pPager){
2111 int rc;
2112 char zTemp[SQLITE_TEMPNAME_SIZE];
2113 if( !pPager->journalOpen ){
2114 pPager->ckptAutoopen = 1;
2115 return SQLITE_OK;
2116 }
2117 assert( pPager->journalOpen );
2118 assert( !pPager->ckptInUse );
2119 pPager->aInCkpt = sqliteMalloc( pPager->dbSize/8 + 1 );
2120 if( pPager->aInCkpt==0 ){
2121 sqliteOsReadLock(&pPager->fd);
2122 return SQLITE_NOMEM;
2123 }
2124#ifndef NDEBUG
2125 rc = sqliteOsFileSize(&pPager->jfd, &pPager->ckptJSize);
2126 if( rc ) goto ckpt_begin_failed;
2127 assert( pPager->ckptJSize ==
2128 pPager->nRec*JOURNAL_PG_SZ(journal_format)+JOURNAL_HDR_SZ(journal_format) );
2129#endif
2130 pPager->ckptJSize = pPager->nRec*JOURNAL_PG_SZ(journal_format)
2131 + JOURNAL_HDR_SZ(journal_format);
2132 pPager->ckptSize = pPager->dbSize;
2133 if( !pPager->ckptOpen ){
2134 rc = sqlitepager_opentemp(zTemp, &pPager->cpfd);
2135 if( rc ) goto ckpt_begin_failed;
2136 pPager->ckptOpen = 1;
2137 pPager->ckptNRec = 0;
2138 }
2139 pPager->ckptInUse = 1;
2140 return SQLITE_OK;
2141
2142ckpt_begin_failed:
2143 if( pPager->aInCkpt ){
2144 sqliteFree(pPager->aInCkpt);
2145 pPager->aInCkpt = 0;
2146 }
2147 return rc;
2148}
2149
2150/*
2151** Commit a checkpoint.
2152*/
2153int sqlitepager_ckpt_commit(Pager *pPager){
2154 if( pPager->ckptInUse ){
2155 PgHdr *pPg, *pNext;
2156 sqliteOsSeek(&pPager->cpfd, 0);
2157 /* sqliteOsTruncate(&pPager->cpfd, 0); */
2158 pPager->ckptNRec = 0;
2159 pPager->ckptInUse = 0;
2160 sqliteFree( pPager->aInCkpt );
2161 pPager->aInCkpt = 0;
2162 for(pPg=pPager->pCkpt; pPg; pPg=pNext){
2163 pNext = pPg->pNextCkpt;
2164 assert( pPg->inCkpt );
2165 pPg->inCkpt = 0;
2166 pPg->pPrevCkpt = pPg->pNextCkpt = 0;
2167 }
2168 pPager->pCkpt = 0;
2169 }
2170 pPager->ckptAutoopen = 0;
2171 return SQLITE_OK;
2172}
2173
2174/*
2175** Rollback a checkpoint.
2176*/
2177int sqlitepager_ckpt_rollback(Pager *pPager){
2178 int rc;
2179 if( pPager->ckptInUse ){
2180 rc = pager_ckpt_playback(pPager);
2181 sqlitepager_ckpt_commit(pPager);
2182 }else{
2183 rc = SQLITE_OK;
2184 }
2185 pPager->ckptAutoopen = 0;
2186 return rc;
2187}
2188
2189/*
2190** Return the full pathname of the database file.
2191*/
2192const char *sqlitepager_filename(Pager *pPager){
2193 return pPager->zFilename;
2194}
2195
2196/*
2197** Set the codec for this pager
2198*/
2199void sqlitepager_set_codec(
2200 Pager *pPager,
2201 void (*xCodec)(void*,void*,Pgno,int),
2202 void *pCodecArg
2203){
2204 pPager->xCodec = xCodec;
2205 pPager->pCodecArg = pCodecArg;
2206}
2207
2208#ifdef SQLITE_TEST
2209/*
2210** Print a listing of all referenced pages and their ref count.
2211*/
2212void sqlitepager_refdump(Pager *pPager){
2213 PgHdr *pPg;
2214 for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
2215 if( pPg->nRef<=0 ) continue;
2216 printf("PAGE %3d addr=0x%08x nRef=%d\n",
2217 pPg->pgno, (int)PGHDR_TO_DATA(pPg), pPg->nRef);
2218 }
2219}
2220#endif
Note: See TracBrowser for help on using the repository browser.