[391] | 1 | /* Drop in replacement for heapq.py
|
---|
[2] | 2 |
|
---|
| 3 | C implementation derived directly from heapq.py in Py2.3
|
---|
| 4 | which was written by Kevin O'Connor, augmented by Tim Peters,
|
---|
[391] | 5 | annotated by François Pinard, and converted to C by Raymond Hettinger.
|
---|
[2] | 6 |
|
---|
| 7 | */
|
---|
| 8 |
|
---|
| 9 | #include "Python.h"
|
---|
| 10 |
|
---|
| 11 | /* Older implementations of heapq used Py_LE for comparisons. Now, it uses
|
---|
| 12 | Py_LT so it will match min(), sorted(), and bisect(). Unfortunately, some
|
---|
| 13 | client code (Twisted for example) relied on Py_LE, so this little function
|
---|
[391] | 14 | restores compatibility by trying both.
|
---|
[2] | 15 | */
|
---|
| 16 | static int
|
---|
| 17 | cmp_lt(PyObject *x, PyObject *y)
|
---|
| 18 | {
|
---|
[391] | 19 | int cmp;
|
---|
| 20 | static PyObject *lt = NULL;
|
---|
[2] | 21 |
|
---|
[391] | 22 | if (lt == NULL) {
|
---|
| 23 | lt = PyString_FromString("__lt__");
|
---|
| 24 | if (lt == NULL)
|
---|
| 25 | return -1;
|
---|
| 26 | }
|
---|
| 27 | if (PyObject_HasAttr(x, lt))
|
---|
| 28 | return PyObject_RichCompareBool(x, y, Py_LT);
|
---|
| 29 | cmp = PyObject_RichCompareBool(y, x, Py_LE);
|
---|
| 30 | if (cmp != -1)
|
---|
| 31 | cmp = 1 - cmp;
|
---|
| 32 | return cmp;
|
---|
[2] | 33 | }
|
---|
| 34 |
|
---|
| 35 | static int
|
---|
| 36 | _siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
|
---|
| 37 | {
|
---|
[391] | 38 | PyObject *newitem, *parent, *olditem;
|
---|
| 39 | int cmp;
|
---|
| 40 | Py_ssize_t parentpos;
|
---|
| 41 | Py_ssize_t size;
|
---|
[2] | 42 |
|
---|
[391] | 43 | assert(PyList_Check(heap));
|
---|
| 44 | size = PyList_GET_SIZE(heap);
|
---|
| 45 | if (pos >= size) {
|
---|
| 46 | PyErr_SetString(PyExc_IndexError, "index out of range");
|
---|
| 47 | return -1;
|
---|
| 48 | }
|
---|
[2] | 49 |
|
---|
[391] | 50 | newitem = PyList_GET_ITEM(heap, pos);
|
---|
| 51 | Py_INCREF(newitem);
|
---|
| 52 | /* Follow the path to the root, moving parents down until finding
|
---|
| 53 | a place newitem fits. */
|
---|
| 54 | while (pos > startpos){
|
---|
| 55 | parentpos = (pos - 1) >> 1;
|
---|
| 56 | parent = PyList_GET_ITEM(heap, parentpos);
|
---|
| 57 | cmp = cmp_lt(newitem, parent);
|
---|
| 58 | if (cmp == -1) {
|
---|
| 59 | Py_DECREF(newitem);
|
---|
| 60 | return -1;
|
---|
| 61 | }
|
---|
| 62 | if (size != PyList_GET_SIZE(heap)) {
|
---|
| 63 | Py_DECREF(newitem);
|
---|
| 64 | PyErr_SetString(PyExc_RuntimeError,
|
---|
| 65 | "list changed size during iteration");
|
---|
| 66 | return -1;
|
---|
| 67 | }
|
---|
| 68 | if (cmp == 0)
|
---|
| 69 | break;
|
---|
| 70 | Py_INCREF(parent);
|
---|
| 71 | olditem = PyList_GET_ITEM(heap, pos);
|
---|
| 72 | PyList_SET_ITEM(heap, pos, parent);
|
---|
| 73 | Py_DECREF(olditem);
|
---|
| 74 | pos = parentpos;
|
---|
| 75 | if (size != PyList_GET_SIZE(heap)) {
|
---|
| 76 | PyErr_SetString(PyExc_RuntimeError,
|
---|
| 77 | "list changed size during iteration");
|
---|
| 78 | return -1;
|
---|
| 79 | }
|
---|
| 80 | }
|
---|
| 81 | Py_DECREF(PyList_GET_ITEM(heap, pos));
|
---|
| 82 | PyList_SET_ITEM(heap, pos, newitem);
|
---|
| 83 | return 0;
|
---|
[2] | 84 | }
|
---|
| 85 |
|
---|
| 86 | static int
|
---|
| 87 | _siftup(PyListObject *heap, Py_ssize_t pos)
|
---|
| 88 | {
|
---|
[391] | 89 | Py_ssize_t startpos, endpos, childpos, rightpos;
|
---|
| 90 | int cmp;
|
---|
| 91 | PyObject *newitem, *tmp, *olditem;
|
---|
| 92 | Py_ssize_t size;
|
---|
[2] | 93 |
|
---|
[391] | 94 | assert(PyList_Check(heap));
|
---|
| 95 | size = PyList_GET_SIZE(heap);
|
---|
| 96 | endpos = size;
|
---|
| 97 | startpos = pos;
|
---|
| 98 | if (pos >= endpos) {
|
---|
| 99 | PyErr_SetString(PyExc_IndexError, "index out of range");
|
---|
| 100 | return -1;
|
---|
| 101 | }
|
---|
| 102 | newitem = PyList_GET_ITEM(heap, pos);
|
---|
| 103 | Py_INCREF(newitem);
|
---|
[2] | 104 |
|
---|
[391] | 105 | /* Bubble up the smaller child until hitting a leaf. */
|
---|
| 106 | childpos = 2*pos + 1; /* leftmost child position */
|
---|
| 107 | while (childpos < endpos) {
|
---|
| 108 | /* Set childpos to index of smaller child. */
|
---|
| 109 | rightpos = childpos + 1;
|
---|
| 110 | if (rightpos < endpos) {
|
---|
| 111 | cmp = cmp_lt(
|
---|
| 112 | PyList_GET_ITEM(heap, childpos),
|
---|
| 113 | PyList_GET_ITEM(heap, rightpos));
|
---|
| 114 | if (cmp == -1) {
|
---|
| 115 | Py_DECREF(newitem);
|
---|
| 116 | return -1;
|
---|
| 117 | }
|
---|
| 118 | if (cmp == 0)
|
---|
| 119 | childpos = rightpos;
|
---|
| 120 | }
|
---|
| 121 | if (size != PyList_GET_SIZE(heap)) {
|
---|
| 122 | Py_DECREF(newitem);
|
---|
| 123 | PyErr_SetString(PyExc_RuntimeError,
|
---|
| 124 | "list changed size during iteration");
|
---|
| 125 | return -1;
|
---|
| 126 | }
|
---|
| 127 | /* Move the smaller child up. */
|
---|
| 128 | tmp = PyList_GET_ITEM(heap, childpos);
|
---|
| 129 | Py_INCREF(tmp);
|
---|
| 130 | olditem = PyList_GET_ITEM(heap, pos);
|
---|
| 131 | PyList_SET_ITEM(heap, pos, tmp);
|
---|
| 132 | Py_DECREF(olditem);
|
---|
| 133 | pos = childpos;
|
---|
| 134 | childpos = 2*pos + 1;
|
---|
| 135 | if (size != PyList_GET_SIZE(heap)) {
|
---|
| 136 | PyErr_SetString(PyExc_RuntimeError,
|
---|
| 137 | "list changed size during iteration");
|
---|
| 138 | return -1;
|
---|
| 139 | }
|
---|
| 140 | }
|
---|
[2] | 141 |
|
---|
[391] | 142 | /* The leaf at pos is empty now. Put newitem there, and bubble
|
---|
| 143 | it up to its final resting place (by sifting its parents down). */
|
---|
| 144 | Py_DECREF(PyList_GET_ITEM(heap, pos));
|
---|
| 145 | PyList_SET_ITEM(heap, pos, newitem);
|
---|
| 146 | return _siftdown(heap, startpos, pos);
|
---|
[2] | 147 | }
|
---|
| 148 |
|
---|
| 149 | static PyObject *
|
---|
| 150 | heappush(PyObject *self, PyObject *args)
|
---|
| 151 | {
|
---|
[391] | 152 | PyObject *heap, *item;
|
---|
[2] | 153 |
|
---|
[391] | 154 | if (!PyArg_UnpackTuple(args, "heappush", 2, 2, &heap, &item))
|
---|
| 155 | return NULL;
|
---|
[2] | 156 |
|
---|
[391] | 157 | if (!PyList_Check(heap)) {
|
---|
| 158 | PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
|
---|
| 159 | return NULL;
|
---|
| 160 | }
|
---|
[2] | 161 |
|
---|
[391] | 162 | if (PyList_Append(heap, item) == -1)
|
---|
| 163 | return NULL;
|
---|
[2] | 164 |
|
---|
[391] | 165 | if (_siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1) == -1)
|
---|
| 166 | return NULL;
|
---|
| 167 | Py_INCREF(Py_None);
|
---|
| 168 | return Py_None;
|
---|
[2] | 169 | }
|
---|
| 170 |
|
---|
| 171 | PyDoc_STRVAR(heappush_doc,
|
---|
[391] | 172 | "heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
|
---|
[2] | 173 |
|
---|
| 174 | static PyObject *
|
---|
| 175 | heappop(PyObject *self, PyObject *heap)
|
---|
| 176 | {
|
---|
[391] | 177 | PyObject *lastelt, *returnitem;
|
---|
| 178 | Py_ssize_t n;
|
---|
[2] | 179 |
|
---|
[391] | 180 | if (!PyList_Check(heap)) {
|
---|
| 181 | PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
|
---|
| 182 | return NULL;
|
---|
| 183 | }
|
---|
[2] | 184 |
|
---|
[391] | 185 | /* # raises appropriate IndexError if heap is empty */
|
---|
| 186 | n = PyList_GET_SIZE(heap);
|
---|
| 187 | if (n == 0) {
|
---|
| 188 | PyErr_SetString(PyExc_IndexError, "index out of range");
|
---|
| 189 | return NULL;
|
---|
| 190 | }
|
---|
[2] | 191 |
|
---|
[391] | 192 | lastelt = PyList_GET_ITEM(heap, n-1) ;
|
---|
| 193 | Py_INCREF(lastelt);
|
---|
| 194 | PyList_SetSlice(heap, n-1, n, NULL);
|
---|
| 195 | n--;
|
---|
[2] | 196 |
|
---|
[391] | 197 | if (!n)
|
---|
| 198 | return lastelt;
|
---|
| 199 | returnitem = PyList_GET_ITEM(heap, 0);
|
---|
| 200 | PyList_SET_ITEM(heap, 0, lastelt);
|
---|
| 201 | if (_siftup((PyListObject *)heap, 0) == -1) {
|
---|
| 202 | Py_DECREF(returnitem);
|
---|
| 203 | return NULL;
|
---|
| 204 | }
|
---|
| 205 | return returnitem;
|
---|
[2] | 206 | }
|
---|
| 207 |
|
---|
| 208 | PyDoc_STRVAR(heappop_doc,
|
---|
| 209 | "Pop the smallest item off the heap, maintaining the heap invariant.");
|
---|
| 210 |
|
---|
| 211 | static PyObject *
|
---|
| 212 | heapreplace(PyObject *self, PyObject *args)
|
---|
| 213 | {
|
---|
[391] | 214 | PyObject *heap, *item, *returnitem;
|
---|
[2] | 215 |
|
---|
[391] | 216 | if (!PyArg_UnpackTuple(args, "heapreplace", 2, 2, &heap, &item))
|
---|
| 217 | return NULL;
|
---|
[2] | 218 |
|
---|
[391] | 219 | if (!PyList_Check(heap)) {
|
---|
| 220 | PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
|
---|
| 221 | return NULL;
|
---|
| 222 | }
|
---|
[2] | 223 |
|
---|
[391] | 224 | if (PyList_GET_SIZE(heap) < 1) {
|
---|
| 225 | PyErr_SetString(PyExc_IndexError, "index out of range");
|
---|
| 226 | return NULL;
|
---|
| 227 | }
|
---|
[2] | 228 |
|
---|
[391] | 229 | returnitem = PyList_GET_ITEM(heap, 0);
|
---|
| 230 | Py_INCREF(item);
|
---|
| 231 | PyList_SET_ITEM(heap, 0, item);
|
---|
| 232 | if (_siftup((PyListObject *)heap, 0) == -1) {
|
---|
| 233 | Py_DECREF(returnitem);
|
---|
| 234 | return NULL;
|
---|
| 235 | }
|
---|
| 236 | return returnitem;
|
---|
[2] | 237 | }
|
---|
| 238 |
|
---|
| 239 | PyDoc_STRVAR(heapreplace_doc,
|
---|
[391] | 240 | "heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
|
---|
[2] | 241 | \n\
|
---|
| 242 | This is more efficient than heappop() followed by heappush(), and can be\n\
|
---|
| 243 | more appropriate when using a fixed-size heap. Note that the value\n\
|
---|
| 244 | returned may be larger than item! That constrains reasonable uses of\n\
|
---|
| 245 | this routine unless written as part of a conditional replacement:\n\n\
|
---|
[391] | 246 | if item > heap[0]:\n\
|
---|
| 247 | item = heapreplace(heap, item)\n");
|
---|
[2] | 248 |
|
---|
| 249 | static PyObject *
|
---|
| 250 | heappushpop(PyObject *self, PyObject *args)
|
---|
| 251 | {
|
---|
[391] | 252 | PyObject *heap, *item, *returnitem;
|
---|
| 253 | int cmp;
|
---|
[2] | 254 |
|
---|
[391] | 255 | if (!PyArg_UnpackTuple(args, "heappushpop", 2, 2, &heap, &item))
|
---|
| 256 | return NULL;
|
---|
[2] | 257 |
|
---|
[391] | 258 | if (!PyList_Check(heap)) {
|
---|
| 259 | PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
|
---|
| 260 | return NULL;
|
---|
| 261 | }
|
---|
[2] | 262 |
|
---|
[391] | 263 | if (PyList_GET_SIZE(heap) < 1) {
|
---|
| 264 | Py_INCREF(item);
|
---|
| 265 | return item;
|
---|
| 266 | }
|
---|
[2] | 267 |
|
---|
[391] | 268 | cmp = cmp_lt(PyList_GET_ITEM(heap, 0), item);
|
---|
| 269 | if (cmp == -1)
|
---|
| 270 | return NULL;
|
---|
| 271 | if (cmp == 0) {
|
---|
| 272 | Py_INCREF(item);
|
---|
| 273 | return item;
|
---|
| 274 | }
|
---|
[2] | 275 |
|
---|
[391] | 276 | returnitem = PyList_GET_ITEM(heap, 0);
|
---|
| 277 | Py_INCREF(item);
|
---|
| 278 | PyList_SET_ITEM(heap, 0, item);
|
---|
| 279 | if (_siftup((PyListObject *)heap, 0) == -1) {
|
---|
| 280 | Py_DECREF(returnitem);
|
---|
| 281 | return NULL;
|
---|
| 282 | }
|
---|
| 283 | return returnitem;
|
---|
[2] | 284 | }
|
---|
| 285 |
|
---|
| 286 | PyDoc_STRVAR(heappushpop_doc,
|
---|
[391] | 287 | "heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
|
---|
[2] | 288 | from the heap. The combined action runs more efficiently than\n\
|
---|
| 289 | heappush() followed by a separate call to heappop().");
|
---|
| 290 |
|
---|
| 291 | static PyObject *
|
---|
| 292 | heapify(PyObject *self, PyObject *heap)
|
---|
| 293 | {
|
---|
[391] | 294 | Py_ssize_t i, n;
|
---|
[2] | 295 |
|
---|
[391] | 296 | if (!PyList_Check(heap)) {
|
---|
| 297 | PyErr_SetString(PyExc_TypeError, "heap argument must be a list");
|
---|
| 298 | return NULL;
|
---|
| 299 | }
|
---|
[2] | 300 |
|
---|
[391] | 301 | n = PyList_GET_SIZE(heap);
|
---|
| 302 | /* Transform bottom-up. The largest index there's any point to
|
---|
| 303 | looking at is the largest with a child index in-range, so must
|
---|
| 304 | have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
|
---|
| 305 | (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
|
---|
| 306 | n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
|
---|
| 307 | and that's again n//2-1.
|
---|
| 308 | */
|
---|
| 309 | for (i=n/2-1 ; i>=0 ; i--)
|
---|
| 310 | if(_siftup((PyListObject *)heap, i) == -1)
|
---|
| 311 | return NULL;
|
---|
| 312 | Py_INCREF(Py_None);
|
---|
| 313 | return Py_None;
|
---|
[2] | 314 | }
|
---|
| 315 |
|
---|
| 316 | PyDoc_STRVAR(heapify_doc,
|
---|
| 317 | "Transform list into a heap, in-place, in O(len(heap)) time.");
|
---|
| 318 |
|
---|
| 319 | static PyObject *
|
---|
| 320 | nlargest(PyObject *self, PyObject *args)
|
---|
| 321 | {
|
---|
[391] | 322 | PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
|
---|
| 323 | Py_ssize_t i, n;
|
---|
| 324 | int cmp;
|
---|
[2] | 325 |
|
---|
[391] | 326 | if (!PyArg_ParseTuple(args, "nO:nlargest", &n, &iterable))
|
---|
| 327 | return NULL;
|
---|
[2] | 328 |
|
---|
[391] | 329 | it = PyObject_GetIter(iterable);
|
---|
| 330 | if (it == NULL)
|
---|
| 331 | return NULL;
|
---|
[2] | 332 |
|
---|
[391] | 333 | heap = PyList_New(0);
|
---|
| 334 | if (heap == NULL)
|
---|
| 335 | goto fail;
|
---|
[2] | 336 |
|
---|
[391] | 337 | for (i=0 ; i<n ; i++ ){
|
---|
| 338 | elem = PyIter_Next(it);
|
---|
| 339 | if (elem == NULL) {
|
---|
| 340 | if (PyErr_Occurred())
|
---|
| 341 | goto fail;
|
---|
| 342 | else
|
---|
| 343 | goto sortit;
|
---|
| 344 | }
|
---|
| 345 | if (PyList_Append(heap, elem) == -1) {
|
---|
| 346 | Py_DECREF(elem);
|
---|
| 347 | goto fail;
|
---|
| 348 | }
|
---|
| 349 | Py_DECREF(elem);
|
---|
| 350 | }
|
---|
| 351 | if (PyList_GET_SIZE(heap) == 0)
|
---|
| 352 | goto sortit;
|
---|
[2] | 353 |
|
---|
[391] | 354 | for (i=n/2-1 ; i>=0 ; i--)
|
---|
| 355 | if(_siftup((PyListObject *)heap, i) == -1)
|
---|
| 356 | goto fail;
|
---|
[2] | 357 |
|
---|
[391] | 358 | sol = PyList_GET_ITEM(heap, 0);
|
---|
| 359 | while (1) {
|
---|
| 360 | elem = PyIter_Next(it);
|
---|
| 361 | if (elem == NULL) {
|
---|
| 362 | if (PyErr_Occurred())
|
---|
| 363 | goto fail;
|
---|
| 364 | else
|
---|
| 365 | goto sortit;
|
---|
| 366 | }
|
---|
| 367 | cmp = cmp_lt(sol, elem);
|
---|
| 368 | if (cmp == -1) {
|
---|
| 369 | Py_DECREF(elem);
|
---|
| 370 | goto fail;
|
---|
| 371 | }
|
---|
| 372 | if (cmp == 0) {
|
---|
| 373 | Py_DECREF(elem);
|
---|
| 374 | continue;
|
---|
| 375 | }
|
---|
| 376 | oldelem = PyList_GET_ITEM(heap, 0);
|
---|
| 377 | PyList_SET_ITEM(heap, 0, elem);
|
---|
| 378 | Py_DECREF(oldelem);
|
---|
| 379 | if (_siftup((PyListObject *)heap, 0) == -1)
|
---|
| 380 | goto fail;
|
---|
| 381 | sol = PyList_GET_ITEM(heap, 0);
|
---|
| 382 | }
|
---|
[2] | 383 | sortit:
|
---|
[391] | 384 | if (PyList_Sort(heap) == -1)
|
---|
| 385 | goto fail;
|
---|
| 386 | if (PyList_Reverse(heap) == -1)
|
---|
| 387 | goto fail;
|
---|
| 388 | Py_DECREF(it);
|
---|
| 389 | return heap;
|
---|
[2] | 390 |
|
---|
| 391 | fail:
|
---|
[391] | 392 | Py_DECREF(it);
|
---|
| 393 | Py_XDECREF(heap);
|
---|
| 394 | return NULL;
|
---|
[2] | 395 | }
|
---|
| 396 |
|
---|
| 397 | PyDoc_STRVAR(nlargest_doc,
|
---|
| 398 | "Find the n largest elements in a dataset.\n\
|
---|
| 399 | \n\
|
---|
| 400 | Equivalent to: sorted(iterable, reverse=True)[:n]\n");
|
---|
| 401 |
|
---|
| 402 | static int
|
---|
| 403 | _siftdownmax(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
|
---|
| 404 | {
|
---|
[391] | 405 | PyObject *newitem, *parent;
|
---|
| 406 | int cmp;
|
---|
| 407 | Py_ssize_t parentpos;
|
---|
[2] | 408 |
|
---|
[391] | 409 | assert(PyList_Check(heap));
|
---|
| 410 | if (pos >= PyList_GET_SIZE(heap)) {
|
---|
| 411 | PyErr_SetString(PyExc_IndexError, "index out of range");
|
---|
| 412 | return -1;
|
---|
| 413 | }
|
---|
[2] | 414 |
|
---|
[391] | 415 | newitem = PyList_GET_ITEM(heap, pos);
|
---|
| 416 | Py_INCREF(newitem);
|
---|
| 417 | /* Follow the path to the root, moving parents down until finding
|
---|
| 418 | a place newitem fits. */
|
---|
| 419 | while (pos > startpos){
|
---|
| 420 | parentpos = (pos - 1) >> 1;
|
---|
| 421 | parent = PyList_GET_ITEM(heap, parentpos);
|
---|
| 422 | cmp = cmp_lt(parent, newitem);
|
---|
| 423 | if (cmp == -1) {
|
---|
| 424 | Py_DECREF(newitem);
|
---|
| 425 | return -1;
|
---|
| 426 | }
|
---|
| 427 | if (cmp == 0)
|
---|
| 428 | break;
|
---|
| 429 | Py_INCREF(parent);
|
---|
| 430 | Py_DECREF(PyList_GET_ITEM(heap, pos));
|
---|
| 431 | PyList_SET_ITEM(heap, pos, parent);
|
---|
| 432 | pos = parentpos;
|
---|
| 433 | }
|
---|
| 434 | Py_DECREF(PyList_GET_ITEM(heap, pos));
|
---|
| 435 | PyList_SET_ITEM(heap, pos, newitem);
|
---|
| 436 | return 0;
|
---|
[2] | 437 | }
|
---|
| 438 |
|
---|
| 439 | static int
|
---|
| 440 | _siftupmax(PyListObject *heap, Py_ssize_t pos)
|
---|
| 441 | {
|
---|
[391] | 442 | Py_ssize_t startpos, endpos, childpos, rightpos;
|
---|
| 443 | int cmp;
|
---|
| 444 | PyObject *newitem, *tmp;
|
---|
[2] | 445 |
|
---|
[391] | 446 | assert(PyList_Check(heap));
|
---|
| 447 | endpos = PyList_GET_SIZE(heap);
|
---|
| 448 | startpos = pos;
|
---|
| 449 | if (pos >= endpos) {
|
---|
| 450 | PyErr_SetString(PyExc_IndexError, "index out of range");
|
---|
| 451 | return -1;
|
---|
| 452 | }
|
---|
| 453 | newitem = PyList_GET_ITEM(heap, pos);
|
---|
| 454 | Py_INCREF(newitem);
|
---|
[2] | 455 |
|
---|
[391] | 456 | /* Bubble up the smaller child until hitting a leaf. */
|
---|
| 457 | childpos = 2*pos + 1; /* leftmost child position */
|
---|
| 458 | while (childpos < endpos) {
|
---|
| 459 | /* Set childpos to index of smaller child. */
|
---|
| 460 | rightpos = childpos + 1;
|
---|
| 461 | if (rightpos < endpos) {
|
---|
| 462 | cmp = cmp_lt(
|
---|
| 463 | PyList_GET_ITEM(heap, rightpos),
|
---|
| 464 | PyList_GET_ITEM(heap, childpos));
|
---|
| 465 | if (cmp == -1) {
|
---|
| 466 | Py_DECREF(newitem);
|
---|
| 467 | return -1;
|
---|
| 468 | }
|
---|
| 469 | if (cmp == 0)
|
---|
| 470 | childpos = rightpos;
|
---|
| 471 | }
|
---|
| 472 | /* Move the smaller child up. */
|
---|
| 473 | tmp = PyList_GET_ITEM(heap, childpos);
|
---|
| 474 | Py_INCREF(tmp);
|
---|
| 475 | Py_DECREF(PyList_GET_ITEM(heap, pos));
|
---|
| 476 | PyList_SET_ITEM(heap, pos, tmp);
|
---|
| 477 | pos = childpos;
|
---|
| 478 | childpos = 2*pos + 1;
|
---|
| 479 | }
|
---|
[2] | 480 |
|
---|
[391] | 481 | /* The leaf at pos is empty now. Put newitem there, and bubble
|
---|
| 482 | it up to its final resting place (by sifting its parents down). */
|
---|
| 483 | Py_DECREF(PyList_GET_ITEM(heap, pos));
|
---|
| 484 | PyList_SET_ITEM(heap, pos, newitem);
|
---|
| 485 | return _siftdownmax(heap, startpos, pos);
|
---|
[2] | 486 | }
|
---|
| 487 |
|
---|
| 488 | static PyObject *
|
---|
| 489 | nsmallest(PyObject *self, PyObject *args)
|
---|
| 490 | {
|
---|
[391] | 491 | PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
|
---|
| 492 | Py_ssize_t i, n;
|
---|
| 493 | int cmp;
|
---|
[2] | 494 |
|
---|
[391] | 495 | if (!PyArg_ParseTuple(args, "nO:nsmallest", &n, &iterable))
|
---|
| 496 | return NULL;
|
---|
[2] | 497 |
|
---|
[391] | 498 | it = PyObject_GetIter(iterable);
|
---|
| 499 | if (it == NULL)
|
---|
| 500 | return NULL;
|
---|
[2] | 501 |
|
---|
[391] | 502 | heap = PyList_New(0);
|
---|
| 503 | if (heap == NULL)
|
---|
| 504 | goto fail;
|
---|
[2] | 505 |
|
---|
[391] | 506 | for (i=0 ; i<n ; i++ ){
|
---|
| 507 | elem = PyIter_Next(it);
|
---|
| 508 | if (elem == NULL) {
|
---|
| 509 | if (PyErr_Occurred())
|
---|
| 510 | goto fail;
|
---|
| 511 | else
|
---|
| 512 | goto sortit;
|
---|
| 513 | }
|
---|
| 514 | if (PyList_Append(heap, elem) == -1) {
|
---|
| 515 | Py_DECREF(elem);
|
---|
| 516 | goto fail;
|
---|
| 517 | }
|
---|
| 518 | Py_DECREF(elem);
|
---|
| 519 | }
|
---|
| 520 | n = PyList_GET_SIZE(heap);
|
---|
| 521 | if (n == 0)
|
---|
| 522 | goto sortit;
|
---|
[2] | 523 |
|
---|
[391] | 524 | for (i=n/2-1 ; i>=0 ; i--)
|
---|
| 525 | if(_siftupmax((PyListObject *)heap, i) == -1)
|
---|
| 526 | goto fail;
|
---|
[2] | 527 |
|
---|
[391] | 528 | los = PyList_GET_ITEM(heap, 0);
|
---|
| 529 | while (1) {
|
---|
| 530 | elem = PyIter_Next(it);
|
---|
| 531 | if (elem == NULL) {
|
---|
| 532 | if (PyErr_Occurred())
|
---|
| 533 | goto fail;
|
---|
| 534 | else
|
---|
| 535 | goto sortit;
|
---|
| 536 | }
|
---|
| 537 | cmp = cmp_lt(elem, los);
|
---|
| 538 | if (cmp == -1) {
|
---|
| 539 | Py_DECREF(elem);
|
---|
| 540 | goto fail;
|
---|
| 541 | }
|
---|
| 542 | if (cmp == 0) {
|
---|
| 543 | Py_DECREF(elem);
|
---|
| 544 | continue;
|
---|
| 545 | }
|
---|
[2] | 546 |
|
---|
[391] | 547 | oldelem = PyList_GET_ITEM(heap, 0);
|
---|
| 548 | PyList_SET_ITEM(heap, 0, elem);
|
---|
| 549 | Py_DECREF(oldelem);
|
---|
| 550 | if (_siftupmax((PyListObject *)heap, 0) == -1)
|
---|
| 551 | goto fail;
|
---|
| 552 | los = PyList_GET_ITEM(heap, 0);
|
---|
| 553 | }
|
---|
[2] | 554 |
|
---|
| 555 | sortit:
|
---|
[391] | 556 | if (PyList_Sort(heap) == -1)
|
---|
| 557 | goto fail;
|
---|
| 558 | Py_DECREF(it);
|
---|
| 559 | return heap;
|
---|
[2] | 560 |
|
---|
| 561 | fail:
|
---|
[391] | 562 | Py_DECREF(it);
|
---|
| 563 | Py_XDECREF(heap);
|
---|
| 564 | return NULL;
|
---|
[2] | 565 | }
|
---|
| 566 |
|
---|
| 567 | PyDoc_STRVAR(nsmallest_doc,
|
---|
| 568 | "Find the n smallest elements in a dataset.\n\
|
---|
| 569 | \n\
|
---|
| 570 | Equivalent to: sorted(iterable)[:n]\n");
|
---|
| 571 |
|
---|
| 572 | static PyMethodDef heapq_methods[] = {
|
---|
[391] | 573 | {"heappush", (PyCFunction)heappush,
|
---|
| 574 | METH_VARARGS, heappush_doc},
|
---|
| 575 | {"heappushpop", (PyCFunction)heappushpop,
|
---|
| 576 | METH_VARARGS, heappushpop_doc},
|
---|
| 577 | {"heappop", (PyCFunction)heappop,
|
---|
| 578 | METH_O, heappop_doc},
|
---|
| 579 | {"heapreplace", (PyCFunction)heapreplace,
|
---|
| 580 | METH_VARARGS, heapreplace_doc},
|
---|
| 581 | {"heapify", (PyCFunction)heapify,
|
---|
| 582 | METH_O, heapify_doc},
|
---|
| 583 | {"nlargest", (PyCFunction)nlargest,
|
---|
| 584 | METH_VARARGS, nlargest_doc},
|
---|
| 585 | {"nsmallest", (PyCFunction)nsmallest,
|
---|
| 586 | METH_VARARGS, nsmallest_doc},
|
---|
| 587 | {NULL, NULL} /* sentinel */
|
---|
[2] | 588 | };
|
---|
| 589 |
|
---|
| 590 | PyDoc_STRVAR(module_doc,
|
---|
| 591 | "Heap queue algorithm (a.k.a. priority queue).\n\
|
---|
| 592 | \n\
|
---|
| 593 | Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
|
---|
| 594 | all k, counting elements from 0. For the sake of comparison,\n\
|
---|
| 595 | non-existing elements are considered to be infinite. The interesting\n\
|
---|
| 596 | property of a heap is that a[0] is always its smallest element.\n\
|
---|
| 597 | \n\
|
---|
| 598 | Usage:\n\
|
---|
| 599 | \n\
|
---|
| 600 | heap = [] # creates an empty heap\n\
|
---|
| 601 | heappush(heap, item) # pushes a new item on the heap\n\
|
---|
| 602 | item = heappop(heap) # pops the smallest item from the heap\n\
|
---|
| 603 | item = heap[0] # smallest item on the heap without popping it\n\
|
---|
| 604 | heapify(x) # transforms list into a heap, in-place, in linear time\n\
|
---|
| 605 | item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
|
---|
| 606 | # new item; the heap size is unchanged\n\
|
---|
| 607 | \n\
|
---|
| 608 | Our API differs from textbook heap algorithms as follows:\n\
|
---|
| 609 | \n\
|
---|
| 610 | - We use 0-based indexing. This makes the relationship between the\n\
|
---|
| 611 | index for a node and the indexes for its children slightly less\n\
|
---|
| 612 | obvious, but is more suitable since Python uses 0-based indexing.\n\
|
---|
| 613 | \n\
|
---|
| 614 | - Our heappop() method returns the smallest item, not the largest.\n\
|
---|
| 615 | \n\
|
---|
| 616 | These two make it possible to view the heap as a regular Python list\n\
|
---|
| 617 | without surprises: heap[0] is the smallest item, and heap.sort()\n\
|
---|
| 618 | maintains the heap invariant!\n");
|
---|
| 619 |
|
---|
| 620 |
|
---|
| 621 | PyDoc_STRVAR(__about__,
|
---|
| 622 | "Heap queues\n\
|
---|
| 623 | \n\
|
---|
| 624 | [explanation by François Pinard]\n\
|
---|
| 625 | \n\
|
---|
| 626 | Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
|
---|
| 627 | all k, counting elements from 0. For the sake of comparison,\n\
|
---|
| 628 | non-existing elements are considered to be infinite. The interesting\n\
|
---|
| 629 | property of a heap is that a[0] is always its smallest element.\n"
|
---|
| 630 | "\n\
|
---|
| 631 | The strange invariant above is meant to be an efficient memory\n\
|
---|
| 632 | representation for a tournament. The numbers below are `k', not a[k]:\n\
|
---|
| 633 | \n\
|
---|
| 634 | 0\n\
|
---|
| 635 | \n\
|
---|
| 636 | 1 2\n\
|
---|
| 637 | \n\
|
---|
| 638 | 3 4 5 6\n\
|
---|
| 639 | \n\
|
---|
| 640 | 7 8 9 10 11 12 13 14\n\
|
---|
| 641 | \n\
|
---|
| 642 | 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
|
---|
| 643 | \n\
|
---|
| 644 | \n\
|
---|
| 645 | In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
|
---|
| 646 | an usual binary tournament we see in sports, each cell is the winner\n\
|
---|
| 647 | over the two cells it tops, and we can trace the winner down the tree\n\
|
---|
| 648 | to see all opponents s/he had. However, in many computer applications\n\
|
---|
| 649 | of such tournaments, we do not need to trace the history of a winner.\n\
|
---|
| 650 | To be more memory efficient, when a winner is promoted, we try to\n\
|
---|
| 651 | replace it by something else at a lower level, and the rule becomes\n\
|
---|
| 652 | that a cell and the two cells it tops contain three different items,\n\
|
---|
| 653 | but the top cell \"wins\" over the two topped cells.\n"
|
---|
| 654 | "\n\
|
---|
| 655 | If this heap invariant is protected at all time, index 0 is clearly\n\
|
---|
| 656 | the overall winner. The simplest algorithmic way to remove it and\n\
|
---|
| 657 | find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
|
---|
| 658 | diagram above) into the 0 position, and then percolate this new 0 down\n\
|
---|
| 659 | the tree, exchanging values, until the invariant is re-established.\n\
|
---|
| 660 | This is clearly logarithmic on the total number of items in the tree.\n\
|
---|
| 661 | By iterating over all items, you get an O(n ln n) sort.\n"
|
---|
| 662 | "\n\
|
---|
| 663 | A nice feature of this sort is that you can efficiently insert new\n\
|
---|
| 664 | items while the sort is going on, provided that the inserted items are\n\
|
---|
| 665 | not \"better\" than the last 0'th element you extracted. This is\n\
|
---|
| 666 | especially useful in simulation contexts, where the tree holds all\n\
|
---|
| 667 | incoming events, and the \"win\" condition means the smallest scheduled\n\
|
---|
| 668 | time. When an event schedule other events for execution, they are\n\
|
---|
| 669 | scheduled into the future, so they can easily go into the heap. So, a\n\
|
---|
| 670 | heap is a good structure for implementing schedulers (this is what I\n\
|
---|
| 671 | used for my MIDI sequencer :-).\n"
|
---|
| 672 | "\n\
|
---|
| 673 | Various structures for implementing schedulers have been extensively\n\
|
---|
| 674 | studied, and heaps are good for this, as they are reasonably speedy,\n\
|
---|
| 675 | the speed is almost constant, and the worst case is not much different\n\
|
---|
| 676 | than the average case. However, there are other representations which\n\
|
---|
| 677 | are more efficient overall, yet the worst cases might be terrible.\n"
|
---|
| 678 | "\n\
|
---|
| 679 | Heaps are also very useful in big disk sorts. You most probably all\n\
|
---|
| 680 | know that a big sort implies producing \"runs\" (which are pre-sorted\n\
|
---|
| 681 | sequences, which size is usually related to the amount of CPU memory),\n\
|
---|
| 682 | followed by a merging passes for these runs, which merging is often\n\
|
---|
| 683 | very cleverly organised[1]. It is very important that the initial\n\
|
---|
| 684 | sort produces the longest runs possible. Tournaments are a good way\n\
|
---|
| 685 | to that. If, using all the memory available to hold a tournament, you\n\
|
---|
| 686 | replace and percolate items that happen to fit the current run, you'll\n\
|
---|
| 687 | produce runs which are twice the size of the memory for random input,\n\
|
---|
| 688 | and much better for input fuzzily ordered.\n"
|
---|
| 689 | "\n\
|
---|
| 690 | Moreover, if you output the 0'th item on disk and get an input which\n\
|
---|
| 691 | may not fit in the current tournament (because the value \"wins\" over\n\
|
---|
| 692 | the last output value), it cannot fit in the heap, so the size of the\n\
|
---|
| 693 | heap decreases. The freed memory could be cleverly reused immediately\n\
|
---|
| 694 | for progressively building a second heap, which grows at exactly the\n\
|
---|
| 695 | same rate the first heap is melting. When the first heap completely\n\
|
---|
| 696 | vanishes, you switch heaps and start a new run. Clever and quite\n\
|
---|
| 697 | effective!\n\
|
---|
| 698 | \n\
|
---|
| 699 | In a word, heaps are useful memory structures to know. I use them in\n\
|
---|
| 700 | a few applications, and I think it is good to keep a `heap' module\n\
|
---|
| 701 | around. :-)\n"
|
---|
| 702 | "\n\
|
---|
| 703 | --------------------\n\
|
---|
| 704 | [1] The disk balancing algorithms which are current, nowadays, are\n\
|
---|
| 705 | more annoying than clever, and this is a consequence of the seeking\n\
|
---|
| 706 | capabilities of the disks. On devices which cannot seek, like big\n\
|
---|
| 707 | tape drives, the story was quite different, and one had to be very\n\
|
---|
| 708 | clever to ensure (far in advance) that each tape movement will be the\n\
|
---|
| 709 | most effective possible (that is, will best participate at\n\
|
---|
| 710 | \"progressing\" the merge). Some tapes were even able to read\n\
|
---|
| 711 | backwards, and this was also used to avoid the rewinding time.\n\
|
---|
| 712 | Believe me, real good tape sorts were quite spectacular to watch!\n\
|
---|
| 713 | From all times, sorting has always been a Great Art! :-)\n");
|
---|
| 714 |
|
---|
| 715 | PyMODINIT_FUNC
|
---|
| 716 | init_heapq(void)
|
---|
| 717 | {
|
---|
[391] | 718 | PyObject *m;
|
---|
[2] | 719 |
|
---|
[391] | 720 | m = Py_InitModule3("_heapq", heapq_methods, module_doc);
|
---|
| 721 | if (m == NULL)
|
---|
| 722 | return;
|
---|
| 723 | PyModule_AddObject(m, "__about__", PyString_FromString(__about__));
|
---|
[2] | 724 | }
|
---|
| 725 |
|
---|