Ignore:
Timestamp:
Mar 18, 2016, 7:53:30 PM (9 years ago)
Author:
Silvan Scherrer
Message:

tdb: Use DosSetFileLocks directly for db locks.

This is to overcome some fcntl() API incompleteness in kLIBC
(like inability to upgrade locks or join adjacent lock regions
into one). It made the torture test run a bit better but there
are still two major problems both coming from DosSetFileLocks
impl (which fcntl() is currently based upon too): a) inability to
detect deadlocks and b) missing atomic unlock/lock support if
unlock/lock regions don't match.

With the current implementation, tdborture works fine for 1 or 2
worker processes but hangs when there are 3 or more. [Before that,
it would only work with 1 process and would likely to corrupt
the database and terminate if there were 2 or more processes].

Author: Dmitriy Kuminov (@dmik).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/server/lib/tdb/common/lock.c

    r878 r895  
    3333}
    3434
     35#ifdef __OS2__
     36enum os2_fl {
     37    OS2_FL_LOCK = 0x1,
     38    OS2_FL_RW = 0x2,
     39    OS2_FL_WAIT = 0x4,
     40    OS2_FL_UPGRADE = 0x8,
     41};
     42
     43static int os2_set_file_locks(struct tdb_context *tdb, enum os2_fl flags,
     44                              off_t start, off_t offset, off_t len)
     45{
     46    FILELOCKL locks[2] = {0};
     47    ULONG fl;
     48    BOOL dowait;
     49    APIRET arc;
     50
     51    if ((!(flags & OS2_FL_UPGRADE) && (start > offset || start < 0)) ||
     52        offset < 0 || offset + len < 0) {
     53        errno = EINVAL;
     54        return -1;
     55    }
     56
     57    fl = (flags & OS2_FL_RW) ? 0 : 1; /* excluive when rw, shared otherwise */
     58
     59    if (flags & OS2_FL_UPGRADE) {
     60        locks[0].lOffset = offset;
     61        locks[0].lRange = len == 0 ? OFF_MAX : len;
     62        locks[1] = locks[0];
     63        fl |= 2; /* atomic */
     64    } else {
     65        if (start != offset) {
     66            locks[0].lOffset = start;
     67            locks[0].lRange = offset - start;
     68            fl |= 2; /* atomic */
     69        }
     70        locks[1].lOffset = start;
     71        locks[1].lRange = len == 0 ? OFF_MAX : (locks[0].lRange + len);
     72    }
     73
     74    dowait = (flags & (OS2_FL_LOCK | OS2_FL_UPGRADE)) && (flags & OS2_FL_WAIT);
     75
     76    TDB_LOG((tdb, TDB_DEBUG_TRACE, "os2_file_locks: fd=%d lock_fd=%d flags=%x start=%lld offset=%lld len=%lld pid=%d\n",
     77             tdb->fd, tdb->lock_fd, flags, start, offset, len, getpid()));
     78
     79    arc = DosSetFileLocksL(tdb->lock_fd,
     80                           &locks[(flags & OS2_FL_LOCK) ? 0 : 1],
     81                           &locks[(flags & OS2_FL_LOCK) ? 1 : 0],
     82                           dowait ? SEM_INDEFINITE_WAIT : SEM_IMMEDIATE_RETURN,
     83                           fl);
     84
     85    TDB_LOG((tdb, TDB_DEBUG_TRACE, "os2_file_locks: arc=%d pid=%d\n", arc, getpid()));
     86
     87    if (arc) {
     88        switch (arc) {
     89            case ERROR_LOCK_VIOLATION:
     90                errno = EACCES;
     91                break;
     92            case ERROR_INTERRUPT:
     93                errno = EINTR;
     94                break;
     95            case ERROR_TIMEOUT:
     96                errno = EDEADLK;
     97                break;
     98            default:
     99                TDB_LOG((tdb, TDB_DEBUG_ERROR, "os2_file_locks failed, lock_fd=%d flags=%x start=%d off=%d len=%d (arc=%d)\n",
     100                         tdb->lock_fd, flags, start, offset, len, arc));
     101        }
     102        return -1;
     103    }
     104
     105    return 0;
     106}
     107#else /* __OS2__ */
    35108static int fcntl_lock(struct tdb_context *tdb,
    36109                      int rw, off_t off, off_t len, bool waitflag)
     
    44117        fl.l_pid = 0;
    45118
    46 #ifdef __OS2__
    47         int rc = 0;
    48         int lockFile = 0;
    49 
    50         if (off == ACTIVE_LOCK || off == OPEN_LOCK || off == TRANSACTION_LOCK)
    51                 lockFile = tdb->hActiveLock;
    52         else
    53                 lockFile = tdb->fd;
    54 
    55         int cmd = 0;
    56         if (waitflag)
    57                 cmd = F_SETLKW;
    58         else
    59                 cmd = F_SETLK;
    60 
    61         rc = fcntl(lockFile, cmd, &fl);
    62         // if the first lock doesn't work and it's a complete lock,
    63         // we split it in 2 parts. first hash size*4 and then the rest
    64         if (rc != 0 && off == FREELIST_TOP && len == 0) {
    65                 fl.l_len = tdb->header.hash_size * 4;
    66                 rc = fcntl(lockFile, cmd, &fl);
    67                 if (rc == 0) {
    68                         fl.l_start = off + tdb->header.hash_size * 4;
    69                         fl.l_len = 0;
    70                         rc = fcntl(lockFile, cmd, &fl);
    71                 }
    72         }
    73 
    74         TDB_LOG((tdb, TDB_DEBUG_TRACE,"fcntl_lock: (fd=%d) offset=%lld rw_type=%d len=%lld waitflag=%d (rc=%d) pid=%d\n",
    75                 lockFile, off, rw, len, waitflag, rc, getpid()));
    76 
    77         return rc;
    78 #else
    79119        if (waitflag)
    80120                return fcntl(tdb->fd, F_SETLKW, &fl);
    81121        else
    82122                return fcntl(tdb->fd, F_SETLK, &fl);
    83 #endif
    84123}
    85124
     
    151190        fl.l_pid = 0;
    152191
    153 #ifdef __OS2__
    154         int rc = 0;
    155         int lockFile = 0;
    156         if (off == ACTIVE_LOCK || off == OPEN_LOCK || off == TRANSACTION_LOCK)
    157                 lockFile = tdb->hActiveLock;
    158         else
    159                 lockFile = tdb->fd;
    160 
    161         rc = fcntl(lockFile, F_SETLKW, &fl);
    162         // if the first unlock doesn't work and it's a complete unlock,
    163         // we split it in 2 parts. first hash size*4 and then the rest
    164         // as it was locked that way as well. and it seems fcntl() doesn't care
    165         if (rc != 0 && off == FREELIST_TOP && len == 0) {
    166                 fl.l_len = tdb->header.hash_size * 4;
    167                 rc = fcntl(lockFile, F_SETLKW, &fl);
    168                 if (rc == 0) {
    169                         fl.l_start = off + tdb->header.hash_size * 4;
    170                         fl.l_len = 0;
    171                         rc = fcntl(lockFile, F_SETLKW, &fl);
    172                 }
    173         }
    174 
    175         TDB_LOG((tdb, TDB_DEBUG_TRACE,"fcntl_unlock: (fd=%d) offset=%lld rw_type=%d len=%lld (rc=%d) pid=%d\n",
    176                  lockFile, off, rw, len, rc, getpid()));
    177 
    178         return rc;
    179 #else
    180192        return fcntl(tdb->fd, F_SETLKW, &fl);
    181 #endif
    182 }
     193}
     194#endif /* __OS2__ */
    183195
    184196/* list -1 is the alloc list, otherwise a hash chain. */
     
    196208   note that a len of zero means lock to end of file
    197209*/
     210#ifdef __OS2__
     211static int tdb_brlock_ex(struct tdb_context *tdb,
     212                         int rw_type, tdb_off_t start,
     213                         tdb_off_t offset, size_t len,
     214                         enum tdb_lock_flags flags)
     215#else
    198216int tdb_brlock(struct tdb_context *tdb,
    199217               int rw_type, tdb_off_t offset, size_t len,
    200218               enum tdb_lock_flags flags)
     219#endif
    201220{
    202221        int ret;
     
    215234        }
    216235
     236#ifdef __OS2__
     237        int os2_flags = OS2_FL_LOCK;
     238        if (rw_type == F_WRLCK) {
     239            os2_flags |= OS2_FL_RW;
     240        }
     241        if (flags & TDB_LOCK_WAIT) {
     242            os2_flags |= OS2_FL_WAIT;
     243        }
     244        if (flags & TDB_LOCK_UPGRADE) {
     245            os2_flags |= OS2_FL_UPGRADE;
     246        }
     247#endif
     248
    217249        do {
     250#ifdef __OS2__
     251                ret = os2_set_file_locks(tdb, os2_flags, start, offset, len);
     252#else
    218253                ret = fcntl_lock(tdb, rw_type, offset, len,
    219254                                 flags & TDB_LOCK_WAIT);
     255#endif
    220256                /* Check for a sigalarm break. */
    221257                if (ret == -1 && errno == EINTR &&
     
    232268                 * locks. */
    233269                if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) {
    234                         TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d flags=%d len=%d\n",
    235                                  tdb->fd, offset, rw_type, flags, (int)len));
     270                        TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d flags=%d len=%d (errno=%d)\n",
     271                                 tdb->fd, offset, rw_type, flags, (int)len, errno));
    236272                }
    237273                return -1;
     
    240276}
    241277
     278#ifdef __OS2__
     279int tdb_brlock(struct tdb_context *tdb,
     280               int rw_type, tdb_off_t offset, size_t len,
     281               enum tdb_lock_flags flags)
     282{
     283    return tdb_brlock_ex(tdb, rw_type, offset, offset, len, flags);
     284}
     285#endif
     286
     287#ifdef __OS2__
     288static int tdb_brunlock_ex(struct tdb_context *tdb,
     289                           int rw_type, tdb_off_t start,
     290                           tdb_off_t offset, size_t len)
     291#else
    242292int tdb_brunlock(struct tdb_context *tdb,
    243293                 int rw_type, tdb_off_t offset, size_t len)
     294#endif
    244295{
    245296        int ret;
     
    250301
    251302        do {
     303#ifdef __OS2__
     304                ret = os2_set_file_locks(tdb, rw_type == F_WRLCK ? OS2_FL_RW : 0,
     305                                         start, offset, len);
     306#else
    252307                ret = fcntl_unlock(tdb, rw_type, offset, len);
     308#endif
    253309        } while (ret == -1 && errno == EINTR);
    254310
    255311        if (ret == -1) {
    256                 TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset %d rw_type=%d len=%d\n",
    257                          tdb->fd, offset, rw_type, (int)len));
     312                TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset %d rw_type=%d len=%d (errno=%d)\n",
     313                         tdb->fd, offset, rw_type, (int)len, errno));
    258314        }
    259315        return ret;
    260316}
     317
     318#ifdef __OS2__
     319int tdb_brunlock(struct tdb_context *tdb,
     320                 int rw_type, tdb_off_t offset, size_t len)
     321{
     322        return tdb_brunlock_ex(tdb, rw_type, offset, offset, len);
     323}
     324#endif
    261325
    262326/*
     
    285349        while (count--) {
    286350                struct timeval tv;
    287 #ifdef __OS2__
    288                 // we need to remove locks, as upgrade doesn't work
    289                 tdb_brunlock(tdb, F_RDLCK, FREELIST_TOP, 0);
    290 #endif
    291351                if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0,
    292                                TDB_LOCK_WAIT|TDB_LOCK_PROBE) == 0) {
     352                               TDB_LOCK_WAIT|TDB_LOCK_PROBE|TDB_LOCK_UPGRADE) == 0) {
    293353                        tdb->allrecord_lock.ltype = F_WRLCK;
    294354                        tdb->allrecord_lock.off = 0;
     
    580640}
    581641
     642#ifdef __OS2__
     643#define TDB_ADJLOCK_START_DECL size_t start,
     644#define TDB_ADJLOCK_START start,
     645#define TDB_ADJLOCK(tdb, rw_type, start, offset, len, flags) \
     646        tdb_brlock_ex(tdb, rw_type, start, offset, len, flags)
     647#define TDB_ADJUNLOCK(tdb, rw_type, start, offset, len) \
     648        tdb_brunlock_ex(tdb, rw_type, start, offset, len)
     649#define TDB_CHAINLOCK_GRADUAL(tdb, ltype, flags, start, offset, len) \
     650        tdb_chainlock_gradual(tdb, ltype, flags, start, offset, len)
     651#else
     652#define TDB_ADJLOCK_START_DECL
     653#define TDB_ADJLOCK_START_REF
     654#define TDB_ADJLOCK(tdb, rw_type, start, offset, len, flags) \
     655        tdb_brlock(tdb, rw_type, offset, len, flags)
     656#define TDB_ADJUNLOCK(tdb, rw_type, start, offset, len) \
     657        tdb_brunlock(tdb, rw_type, offset, len)
     658#define TDB_CHAINLOCK_GRADUAL(tdb, ltype, flags, start, offset, len) \
     659        tdb_chainlock_gradual(tdb, ltype, flags, offset, len)
     660#endif
     661
    582662/* We only need to lock individual bytes, but Linux merges consecutive locks
    583663 * so we lock in contiguous ranges. */
    584664static int tdb_chainlock_gradual(struct tdb_context *tdb,
    585665                                 int ltype, enum tdb_lock_flags flags,
    586                                  size_t off, size_t len)
     666                                 TDB_ADJLOCK_START_DECL size_t off, size_t len)
    587667{
    588668        int ret;
     
    591671        if (len <= 4) {
    592672                /* Single record.  Just do blocking lock. */
    593                 return tdb_brlock(tdb, ltype, off, len, flags);
     673                return TDB_ADJLOCK(tdb, ltype, start, off, len, flags);
    594674        }
    595675
    596676        /* First we try non-blocking. */
    597         ret = tdb_brlock(tdb, ltype, off, len, nb_flags);
     677        ret = TDB_ADJLOCK(tdb, ltype, start, off, len, nb_flags);
    598678        if (ret == 0) {
    599679                return 0;
     
    601681
    602682        /* Try locking first half, then second. */
    603         ret = tdb_chainlock_gradual(tdb, ltype, flags, off, len / 2);
     683        ret = TDB_CHAINLOCK_GRADUAL(tdb, ltype, flags, start, off, len / 2);
    604684        if (ret == -1)
    605685                return -1;
    606686
    607         ret = tdb_chainlock_gradual(tdb, ltype, flags,
     687        ret = TDB_CHAINLOCK_GRADUAL(tdb, ltype, flags, start,
    608688                                    off + len / 2, len - len / 2);
    609689        if (ret == -1) {
    610                 tdb_brunlock(tdb, ltype, off, len / 2);
     690                TDB_ADJUNLOCK(tdb, ltype, start, off, len / 2);
    611691                return -1;
    612692        }
     
    634714         * It is (1) which cause the starvation problem, so we're only
    635715         * gradual for that. */
    636         if (tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP,
     716        if (TDB_CHAINLOCK_GRADUAL(tdb, ltype, flags, FREELIST_TOP, FREELIST_TOP,
    637717                                  tdb->header.hash_size * 4) == -1) {
    638718                return -1;
     
    640720
    641721        /* Grab individual record locks. */
    642         if (tdb_brlock(tdb, ltype, lock_offset(tdb->header.hash_size), 0,
    643                        flags) == -1) {
     722        if (TDB_ADJLOCK(tdb, ltype, FREELIST_TOP, lock_offset(tdb->header.hash_size), 0,
     723                        flags) == -1) {
    644724                tdb_brunlock(tdb, ltype, FREELIST_TOP,
    645725                             tdb->header.hash_size * 4);
Note: See TracChangeset for help on using the changeset viewer.