LCOV - code coverage report
Current view: top level - gdk - gdk_bbp.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1278 1830 69.8 %
Date: 2021-09-14 19:48:19 Functions: 76 77 98.7 %

          Line data    Source code
       1             : /*
       2             :  * This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       5             :  *
       6             :  * Copyright 1997 - July 2008 CWI, August 2008 - 2021 MonetDB B.V.
       7             :  */
       8             : 
       9             : /*
      10             :  * @a M. L. Kersten, P. Boncz, N. J. Nes
      11             :  * @* BAT Buffer Pool (BBP)
      12             :  * The BATs created and loaded are collected in a BAT buffer pool.
      13             :  * The Bat Buffer Pool has a number of functions:
      14             :  * @table @code
      15             :  *
      16             :  * @item administration and lookup
      17             :  * The BBP is a directory which contains status information about all
      18             :  * known BATs.  This interface may be used very heavily, by
      19             :  * data-intensive applications.  To eliminate all overhead, read-only
      20             :  * access to the BBP may be done by table-lookups. The integer index
      21             :  * type for these lookups is @emph{bat}, as retrieved by
      22             :  * @emph{b->batCacheid}. The @emph{bat} zero is reserved for the nil
      23             :  * bat.
      24             :  *
      25             :  * @item persistence
      26             :  * The BBP is made persistent by saving it to the dictionary file
      27             :  * called @emph{BBP.dir} in the database.
      28             :  *
      29             :  * When the number of BATs rises, having all files in one directory
      30             :  * becomes a bottleneck.  The BBP therefore implements a scheme that
      31             :  * distributes all BATs in a growing directory tree with at most 64
      32             :  * BATs stored in one node.
      33             :  *
      34             :  * @item buffer management
      35             :  * The BBP is responsible for loading and saving of BATs to disk. It
      36             :  * also contains routines to unload BATs from memory when memory
      37             :  * resources get scarce. For this purpose, it administers BAT memory
      38             :  * reference counts (to know which BATs can be unloaded) and BAT usage
      39             :  * statistics (it unloads the least recently used BATs).
      40             :  *
      41             :  * @item recovery
      42             :  * When the database is closed or during a run-time syncpoint, the
      43             :  * system tables must be written to disk in a safe way, that is immune
      44             :  * for system failures (like disk full). To do so, the BBP implements
      45             :  * an atomic commit and recovery protocol: first all files to be
      46             :  * overwritten are moved to a BACKUP/ dir. If that succeeds, the
      47             :  * writes are done. If that also fully succeeds the BACKUP/ dir is
      48             :  * renamed to DELETE_ME/ and subsequently deleted.  If not, all files
      49             :  * in BACKUP/ are moved back to their original location.
      50             :  *
      51             :  * @item unloading
      52             :  * Bats which have a logical reference (ie. a lrefs > 0) but no memory
      53             :  * reference (refcnt == 0) can be unloaded. Unloading dirty bats
      54             :  * means, moving the original (committed version) to the BACKUP/ dir
      55             :  * and saving the bat. This complicates the commit and recovery/abort
      56             :  * issues.  The commit has to check if the bat is already moved. And
      57             :  * The recovery has to always move back the files from the BACKUP/
      58             :  * dir.
      59             :  *
      60             :  * @item reference counting
      61             :  * Bats use have two kinds of references: logical and physical
      62             :  * (pointer) ones.  The logical references are administered by
      63             :  * BBPretain/BBPrelease, the physical ones by BBPfix/BBPunfix.
      64             :  *
      65             :  * @item share counting
      66             :  * Views use the heaps of there parent bats. To save guard this, the
      67             :  * parent has a shared counter, which is incremented and decremented
      68             :  * using BBPshare and BBPunshare. These functions make sure the parent
      69             :  * is memory resident as required because of the 'pointer' sharing.
      70             :  * @end table
      71             :  */
      72             : 
      73             : #include "monetdb_config.h"
      74             : #include "gdk.h"
      75             : #include "gdk_private.h"
      76             : #include "mutils.h"
      77             : #ifdef HAVE_FCNTL_H
      78             : #include <fcntl.h>
      79             : #endif
      80             : 
      81             : #ifndef F_OK
      82             : #define F_OK 0
      83             : #endif
      84             : #ifndef S_ISDIR
      85             : #define S_ISDIR(mode)   (((mode) & _S_IFMT) == _S_IFDIR)
      86             : #endif
      87             : 
      88             : /*
      89             :  * The BBP has a fixed address, so re-allocation due to a growing BBP
      90             :  * caused by one thread does not disturb reads to the old entries by
      91             :  * another.  This is implemented using anonymous virtual memory;
      92             :  * extensions on the same address are guaranteed because a large
      93             :  * non-committed VM area is requested initially. New slots in the BBP
      94             :  * are found in O(1) by keeping a freelist that uses the 'next' field
      95             :  * in the BBPrec records.
      96             :  */
      97             : BBPrec *BBP[N_BBPINIT];         /* fixed base VM address of BBP array */
      98             : bat BBPlimit = 0;               /* current committed VM BBP array */
      99             : static ATOMIC_TYPE BBPsize = ATOMIC_VAR_INIT(0); /* current used size of BBP array */
     100             : 
     101             : struct BBPfarm_t BBPfarms[MAXFARMS];
     102             : 
     103             : #define KITTENNAP 1             /* used to suspend processing */
     104             : #define BBPNONAME "."         /* filler for no name in BBP.dir */
     105             : /*
     106             :  * The hash index uses a bucket index (int array) of size mask that is
     107             :  * tuned for perfect hashing (1 lookup). The bucket chain uses the
     108             :  * 'next' field in the BBPrec records.
     109             :  */
     110             : static MT_Lock BBPnameLock = MT_LOCK_INITIALIZER(BBPnameLock);
     111             : static bat *BBP_hash = NULL;            /* BBP logical name hash buckets */
     112             : static bat BBP_mask = 0;                /* number of buckets = & mask */
     113             : #define BBP_THREADMASK  0               /* originally: 63 */
     114             : #if SIZEOF_SIZE_T == 8
     115             : #define threadmask(y)   ((int) (mix_lng(y) & BBP_THREADMASK))
     116             : #else
     117             : #define threadmask(y)   ((int) (mix_int(y) & BBP_THREADMASK))
     118             : #endif
     119             : static struct {
     120             :         MT_Lock cache;
     121             :         bat free;
     122             : } GDKbbpLock[BBP_THREADMASK + 1];
     123             : #define GDKcacheLock(y) GDKbbpLock[y].cache
     124             : #define BBP_free(y)     GDKbbpLock[y].free
     125             : 
     126             : static gdk_return BBPfree(BAT *b);
     127             : static void BBPdestroy(BAT *b);
     128             : static void BBPuncacheit(bat bid, bool unloaddesc);
     129             : static gdk_return BBPprepare(bool subcommit);
     130             : static BAT *getBBPdescriptor(bat i, bool lock);
     131             : static gdk_return BBPbackup(BAT *b, bool subcommit);
     132             : static gdk_return BBPdir_init(void);
     133             : 
     134             : static lng BBPlogno;            /* two lngs of extra info in BBP.dir */
     135             : static lng BBPtransid;
     136             : 
     137             : #ifdef HAVE_HGE
     138             : /* start out by saying we have no hge, but as soon as we've seen one,
     139             :  * we'll always say we do have it */
     140             : static bool havehge = false;
     141             : #endif
     142             : 
     143             : #define BBPtmpcheck(s)  (strncmp(s, "tmp_", 4) == 0)
     144             : 
     145             : #define BBPnamecheck(s) (BBPtmpcheck(s) ? strtol((s) + 4, NULL, 8) : 0)
     146             : 
     147             : static void
     148       24828 : BBP_insert(bat i)
     149             : {
     150       24828 :         bat idx = (bat) (strHash(BBP_logical(i)) & BBP_mask);
     151             : 
     152       24828 :         BBP_next(i) = BBP_hash[idx];
     153       24828 :         BBP_hash[idx] = i;
     154       24828 : }
     155             : 
     156             : static void
     157       13098 : BBP_delete(bat i)
     158             : {
     159       13098 :         bat *h = BBP_hash;
     160       13098 :         const char *s = BBP_logical(i);
     161       13098 :         bat idx = (bat) (strHash(s) & BBP_mask);
     162             : 
     163       13098 :         for (h += idx; (i = *h) != 0; h = &BBP_next(i)) {
     164       13098 :                 if (strcmp(BBP_logical(i), s) == 0) {
     165       13098 :                         *h = BBP_next(i);
     166       13098 :                         break;
     167             :                 }
     168             :         }
     169       13098 : }
     170             : 
     171             : bat
     172   492493561 : getBBPsize(void)
     173             : {
     174   492493561 :         return (bat) ATOMIC_GET(&BBPsize);
     175             : }
     176             : 
     177             : lng
     178         889 : getBBPlogno(void)
     179             : {
     180         889 :         return BBPlogno;
     181             : }
     182             : 
     183             : lng
     184         889 : getBBPtransid(void)
     185             : {
     186         889 :         return BBPtransid;
     187             : }
     188             : 
     189             : 
     190             : /*
     191             :  * @+ BBP Consistency and Concurrency
     192             :  * While GDK provides the basic building blocks for an ACID system, in
     193             :  * itself it is not such a system, as we this would entail too much
     194             :  * overhead that is often not needed. Hence, some consistency control
     195             :  * is left to the user. The first important user constraint is that if
     196             :  * a user updates a BAT, (s)he himself must assure that no-one else
     197             :  * accesses this BAT.
     198             :  *
     199             :  * Concerning buffer management, the BBP carries out a swapping
     200             :  * policy.  BATs are kept in memory till the memory is full. If the
     201             :  * memory is full, the malloc functions initiate BBP trim actions,
     202             :  * that unload the coldest BATs that have a zero reference count. The
     203             :  * second important user constraint is therefore that a user may only
     204             :  * manipulate live BAT data in memory if it is sure that there is at
     205             :  * least one reference count to that BAT.
     206             :  *
     207             :  * The main BBP array is protected by two locks:
     208             :  * @table @code
     209             :  * @item GDKcacheLock]
     210             :  * this lock guards the free slot management in the BBP array.  The
     211             :  * BBP operations that allocate a new slot for a new BAT
     212             :  * (@emph{BBPinit},@emph{BBPcacheit}), delete the slot of a destroyed
     213             :  * BAT (@emph{BBPreclaim}), or rename a BAT (@emph{BBPrename}), hold
     214             :  * this lock. It also protects all BAT (re)naming actions include
     215             :  * (read and write) in the hash table with BAT names.
     216             :  * @item GDKswapLock
     217             :  * this lock guards the swap (loaded/unloaded) status of the
     218             :  * BATs. Hence, all BBP routines that influence the swapping policy,
     219             :  * or actually carry out the swapping policy itself, acquire this lock
     220             :  * (e.g. @emph{BBPfix},@emph{BBPunfix}).  Note that this also means
     221             :  * that updates to the BBP_status indicator array must be protected by
     222             :  * GDKswapLock.
     223             :  *
     224             :  * To reduce contention GDKswapLock was split into multiple locks; it
     225             :  * is now an array of lock pointers which is accessed by
     226             :  * GDKswapLock(bat)
     227             :  * @end table
     228             :  *
     229             :  * Routines that need both locks should first acquire the locks in the
     230             :  * GDKswapLock array (in ascending order) and then GDKcacheLock (and
     231             :  * release them in reverse order).
     232             :  *
     233             :  * To obtain maximum speed, read operations to existing elements in
     234             :  * the BBP are unguarded. As said, it is the users responsibility that
     235             :  * the BAT that is being read is not being modified. BBP update
     236             :  * actions that modify the BBP data structure itself are locked by the
     237             :  * BBP functions themselves. Hence, multiple concurrent BBP read
     238             :  * operations may be ongoing while at the same time at most one BBP
     239             :  * write operation @strong{on a different BAT} is executing.  This
     240             :  * holds for accesses to the public (quasi-) arrays @emph{BBPcache},
     241             :  * @emph{BBPstatus} and @emph{BBPrefs}.
     242             :  * These arrays are called quasi as now they are
     243             :  * actually stored together in one big BBPrec array called BBP, that
     244             :  * is allocated in anonymous VM space, so we can reallocate this
     245             :  * structure without changing the base address (a crucial feature if
     246             :  * read actions are to go on unlocked while other entries in the BBP
     247             :  * may be modified).
     248             :  */
     249             : static volatile MT_Id locked_by = 0;
     250             : 
     251             : /* use a lock instead of atomic instructions so that we wait for
     252             :  * BBPlock/BBPunlock */
     253             : #define BBP_unload_inc()                        \
     254             :         do {                                    \
     255             :                 MT_lock_set(&GDKunloadLock);        \
     256             :                 BBPunloadCnt++;                 \
     257             :                 MT_lock_unset(&GDKunloadLock);      \
     258             :         } while (0)
     259             : 
     260             : #define BBP_unload_dec()                        \
     261             :         do {                                    \
     262             :                 MT_lock_set(&GDKunloadLock);        \
     263             :                 --BBPunloadCnt;                 \
     264             :                 assert(BBPunloadCnt >= 0);   \
     265             :                 MT_lock_unset(&GDKunloadLock);      \
     266             :         } while (0)
     267             : 
     268             : static int BBPunloadCnt = 0;
     269             : static MT_Lock GDKunloadLock = MT_LOCK_INITIALIZER(GDKunloadLock);
     270             : 
     271             : void
     272         272 : BBPlock(void)
     273             : {
     274             :         int i;
     275             : 
     276             :         /* wait for all pending unloads to finish */
     277         272 :         MT_lock_set(&GDKunloadLock);
     278         272 :         while (BBPunloadCnt > 0) {
     279           0 :                 MT_lock_unset(&GDKunloadLock);
     280           0 :                 MT_sleep_ms(1);
     281           0 :                 MT_lock_set(&GDKunloadLock);
     282             :         }
     283             : 
     284         272 :         MT_lock_set(&GDKtmLock);
     285         544 :         for (i = 0; i <= BBP_THREADMASK; i++)
     286         272 :                 MT_lock_set(&GDKcacheLock(i));
     287     2228496 :         for (i = 0; i <= BBP_BATMASK; i++)
     288     2228224 :                 MT_lock_set(&GDKswapLock(i));
     289         272 :         locked_by = MT_getpid();
     290             : 
     291         272 :         MT_lock_unset(&GDKunloadLock);
     292         272 : }
     293             : 
     294             : void
     295          18 : BBPunlock(void)
     296             : {
     297             :         int i;
     298             : 
     299      147474 :         for (i = BBP_BATMASK; i >= 0; i--)
     300      147456 :                 MT_lock_unset(&GDKswapLock(i));
     301          36 :         for (i = BBP_THREADMASK; i >= 0; i--)
     302          18 :                 MT_lock_unset(&GDKcacheLock(i));
     303          18 :         locked_by = 0;
     304          18 :         MT_lock_unset(&GDKtmLock);
     305          18 : }
     306             : 
     307             : 
     308             : static gdk_return
     309         266 : BBPinithash(int j, bat size)
     310             : {
     311         266 :         assert(j >= 0 && j <= BBP_THREADMASK);
     312        3990 :         for (BBP_mask = 1; (BBP_mask << 1) <= BBPlimit; BBP_mask <<= 1)
     313             :                 ;
     314         266 :         BBP_hash = (bat *) GDKzalloc(BBP_mask * sizeof(bat));
     315         266 :         if (BBP_hash == NULL) {
     316             :                 return GDK_FAIL;
     317             :         }
     318         266 :         BBP_mask--;
     319             : 
     320       84720 :         while (--size > 0) {
     321       84454 :                 const char *s = BBP_logical(size);
     322             : 
     323       84454 :                 if (s) {
     324       21774 :                         if (*s != '.' && !BBPtmpcheck(s)) {
     325        1116 :                                 BBP_insert(size);
     326             :                         }
     327             :                 } else {
     328       62680 :                         BBP_next(size) = BBP_free(j);
     329       62680 :                         BBP_free(j) = size;
     330       62680 :                         if (++j > BBP_THREADMASK)
     331             :                                 j = 0;
     332             :                 }
     333             :         }
     334             :         return GDK_SUCCEED;
     335             : }
     336             : 
     337             : int
     338    13510333 : BBPselectfarm(role_t role, int type, enum heaptype hptype)
     339             : {
     340             :         int i;
     341             : 
     342             :         (void) type;            /* may use in future */
     343             :         (void) hptype;          /* may use in future */
     344             : 
     345    13510333 :         if (GDKinmemory(0))
     346             :                 return 0;
     347             : 
     348             : #ifndef PERSISTENTHASH
     349             :         if (hptype == hashheap)
     350             :                 role = TRANSIENT;
     351             : #endif
     352             : #ifndef PERSISTENTIDX
     353             :         if (hptype == orderidxheap)
     354             :                 role = TRANSIENT;
     355             : #endif
     356    26775123 :         for (i = 0; i < MAXFARMS; i++)
     357    26775123 :                 if (BBPfarms[i].roles & (1U << (int) role))
     358    13511948 :                         return i;
     359             :         /* must be able to find farms for TRANSIENT and PERSISTENT */
     360           0 :         assert(role != TRANSIENT && role != PERSISTENT);
     361             :         return -1;
     362             : }
     363             : 
     364             : static gdk_return
     365         266 : BBPextend(int idx, bool buildhash, bat newsize)
     366             : {
     367         266 :         if (newsize >= N_BBPINIT * BBPINIT) {
     368           0 :                 GDKerror("trying to extend BAT pool beyond the "
     369             :                          "limit (%d)\n", N_BBPINIT * BBPINIT);
     370           0 :                 return GDK_FAIL;
     371             :         }
     372             : 
     373             :         /* make sure the new size is at least BBPsize large */
     374         532 :         while (BBPlimit < newsize) {
     375         266 :                 BUN limit = BBPlimit >> BBPINITLOG;
     376         266 :                 assert(BBP[limit] == NULL);
     377         266 :                 BBP[limit] = GDKzalloc(BBPINIT * sizeof(BBPrec));
     378         266 :                 if (BBP[limit] == NULL) {
     379           0 :                         GDKerror("failed to extend BAT pool\n");
     380           0 :                         return GDK_FAIL;
     381             :                 }
     382     4358410 :                 for (BUN i = 0; i < BBPINIT; i++) {
     383     4358144 :                         ATOMIC_INIT(&BBP[limit][i].status, 0);
     384     4358144 :                         BBP[limit][i].pid = ~(MT_Id)0;
     385             :                 }
     386         266 :                 BBPlimit += BBPINIT;
     387             :         }
     388             : 
     389         266 :         if (buildhash) {
     390             :                 int i;
     391             : 
     392           0 :                 GDKfree(BBP_hash);
     393           0 :                 BBP_hash = NULL;
     394           0 :                 for (i = 0; i <= BBP_THREADMASK; i++)
     395           0 :                         BBP_free(i) = 0;
     396           0 :                 if (BBPinithash(idx, newsize) != GDK_SUCCEED)
     397           0 :                         return GDK_FAIL;
     398             :         }
     399             :         return GDK_SUCCEED;
     400             : }
     401             : 
     402             : static gdk_return
     403          78 : recover_dir(int farmid, bool direxists)
     404             : {
     405          78 :         if (direxists) {
     406             :                 /* just try; don't care about these non-vital files */
     407           0 :                 if (GDKunlink(farmid, BATDIR, "BBP", "bak") != GDK_SUCCEED)
     408           0 :                         TRC_WARNING(GDK, "unlink of BBP.bak failed\n");
     409           0 :                 if (GDKmove(farmid, BATDIR, "BBP", "dir", BATDIR, "BBP", "bak", false) != GDK_SUCCEED)
     410           0 :                         TRC_WARNING(GDK, "rename of BBP.dir to BBP.bak failed\n");
     411             :         }
     412          78 :         return GDKmove(farmid, BAKDIR, "BBP", "dir", BATDIR, "BBP", "dir", true);
     413             : }
     414             : 
     415             : static gdk_return BBPrecover(int farmid);
     416             : static gdk_return BBPrecover_subdir(void);
     417             : static bool BBPdiskscan(const char *, size_t);
     418             : 
     419             : static int
     420       21774 : heapinit(BAT *b, const char *buf, int *hashash, unsigned bbpversion, bat bid, const char *filename, int lineno)
     421             : {
     422             :         int t;
     423             :         char type[33];
     424             :         uint16_t width;
     425             :         uint16_t var;
     426             :         uint16_t properties;
     427             :         uint64_t nokey0;
     428             :         uint64_t nokey1;
     429             :         uint64_t nosorted;
     430             :         uint64_t norevsorted;
     431             :         uint64_t base;
     432             :         uint64_t free;
     433             :         uint64_t size;
     434             :         uint16_t storage;
     435             :         uint64_t minpos, maxpos;
     436             :         int n;
     437             : 
     438             :         (void) bbpversion;      /* could be used to implement compatibility */
     439             : 
     440       21774 :         minpos = maxpos = (uint64_t) oid_nil; /* for GDKLIBRARY_MINMAX_POS case */
     441       43548 :         if (bbpversion <= GDKLIBRARY_MINMAX_POS ?
     442        2574 :             sscanf(buf,
     443             :                    " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
     444             :                    " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
     445             :                    " %" SCNu64 " %" SCNu64 " %" SCNu16
     446             :                    "%n",
     447             :                    type, &width, &var, &properties, &nokey0,
     448             :                    &nokey1, &nosorted, &norevsorted, &base,
     449             :                    &free, &size, &storage,
     450             :                    &n) < 12 :
     451       19200 :             sscanf(buf,
     452             :                    " %10s %" SCNu16 " %" SCNu16 " %" SCNu16 " %" SCNu64
     453             :                    " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64
     454             :                    " %" SCNu64 " %" SCNu64 " %" SCNu16 " %" SCNu64 " %" SCNu64
     455             :                    "%n",
     456             :                    type, &width, &var, &properties, &nokey0,
     457             :                    &nokey1, &nosorted, &norevsorted, &base,
     458             :                    &free, &size, &storage, &minpos, &maxpos,
     459             :                    &n) < 14) {
     460           0 :                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
     461           0 :                 return -1;
     462             :         }
     463             : 
     464       21774 :         if (properties & ~0x0F81) {
     465           0 :                 TRC_CRITICAL(GDK, "unknown properties are set: incompatible database on line %d of BBP.dir\n", lineno);
     466           0 :                 return -1;
     467             :         }
     468       21774 :         *hashash = var & 2;
     469       21774 :         var &= ~2;
     470             : #ifdef HAVE_HGE
     471       21774 :         if (strcmp(type, "hge") == 0)
     472           0 :                 havehge = true;
     473             : #endif
     474       21774 :         if ((t = ATOMindex(type)) < 0) {
     475         295 :                 if ((t = ATOMunknown_find(type)) == 0) {
     476           0 :                         TRC_CRITICAL(GDK, "no space for atom %s", type);
     477           0 :                         return -1;
     478             :                 }
     479       37414 :         } else if (var != (t == TYPE_void || BATatoms[t].atomPut != NULL)) {
     480           0 :                 TRC_CRITICAL(GDK, "inconsistent entry in BBP.dir: tvarsized mismatch for BAT %d on line %d\n", (int) bid, lineno);
     481           0 :                 return -1;
     482       42958 :         } else if (var && t != 0 ?
     483        5544 :                    ATOMsize(t) < width ||
     484        5544 :                    (width != 1 && width != 2 && width != 4
     485             : #if SIZEOF_VAR_T == 8
     486           0 :                     && width != 8
     487             : #endif
     488             :                            ) :
     489       15935 :                    ATOMsize(t) != width) {
     490           0 :                 TRC_CRITICAL(GDK, "inconsistent entry in BBP.dir: tsize mismatch for BAT %d on line %d\n", (int) bid, lineno);
     491           0 :                 return -1;
     492             :         }
     493       21774 :         b->ttype = t;
     494       21774 :         b->twidth = width;
     495       21774 :         b->tvarsized = var != 0;
     496       21774 :         b->tshift = ATOMelmshift(width);
     497       21774 :         assert_shift_width(b->tshift,b->twidth);
     498       21774 :         b->tnokey[0] = (BUN) nokey0;
     499       21774 :         b->tnokey[1] = (BUN) nokey1;
     500       21774 :         b->tsorted = (bit) ((properties & 0x0001) != 0);
     501       21774 :         b->trevsorted = (bit) ((properties & 0x0080) != 0);
     502       21774 :         b->tkey = (properties & 0x0100) != 0;
     503       21774 :         b->tnonil = (properties & 0x0400) != 0;
     504       21774 :         b->tnil = (properties & 0x0800) != 0;
     505       21774 :         b->tnosorted = (BUN) nosorted;
     506       21774 :         b->tnorevsorted = (BUN) norevsorted;
     507             :         /* (properties & 0x0200) is the old tdense flag */
     508       21774 :         b->tseqbase = (properties & 0x0200) == 0 || base >= (uint64_t) oid_nil ? oid_nil : (oid) base;
     509       21774 :         b->theap->free = (size_t) free;
     510       21774 :         b->theap->size = (size_t) size;
     511       21774 :         b->theap->base = NULL;
     512       21774 :         settailname(b->theap, filename, t, width);
     513       21774 :         b->theap->storage = STORE_INVALID;
     514       21774 :         b->theap->newstorage = STORE_INVALID;
     515       21774 :         b->theap->farmid = BBPselectfarm(PERSISTENT, b->ttype, offheap);
     516       21774 :         b->theap->dirty = false;
     517       21774 :         b->theap->parentid = b->batCacheid;
     518       21774 :         if (minpos < b->batCount)
     519        4443 :                 BATsetprop_nolock(b, GDK_MIN_POS, TYPE_oid, &(oid){(oid)minpos});
     520       21774 :         if (maxpos < b->batCount)
     521        4547 :                 BATsetprop_nolock(b, GDK_MAX_POS, TYPE_oid, &(oid){(oid)maxpos});
     522       21774 :         if (b->theap->free > b->theap->size) {
     523           0 :                 TRC_CRITICAL(GDK, "\"free\" value larger than \"size\" in heap of bat %d on line %d\n", (int) bid, lineno);
     524           0 :                 return -1;
     525             :         }
     526       21774 :         return n;
     527             : }
     528             : 
     529             : static int
     530       21774 : vheapinit(BAT *b, const char *buf, int hashash, bat bid, const char *filename, int lineno)
     531             : {
     532       21774 :         int n = 0;
     533             :         uint64_t free, size;
     534             :         uint16_t storage;
     535             : 
     536       21774 :         if (b->tvarsized && b->ttype != TYPE_void) {
     537        5812 :                 if (sscanf(buf,
     538             :                            " %" SCNu64 " %" SCNu64 " %" SCNu16
     539             :                            "%n",
     540             :                            &free, &size, &storage, &n) < 3) {
     541           0 :                         TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
     542           0 :                         return -1;
     543             :                 }
     544        5812 :                 b->tvheap = GDKmalloc(sizeof(Heap));
     545        5812 :                 if (b->tvheap == NULL) {
     546           0 :                         TRC_CRITICAL(GDK, "cannot allocate memory for heap.");
     547           0 :                         return -1;
     548             :                 }
     549        5812 :                 *b->tvheap = (Heap) {
     550        5812 :                         .free = (size_t) free,
     551        5812 :                         .size = (size_t) size,
     552             :                         .base = NULL,
     553             :                         .storage = STORE_INVALID,
     554        5812 :                         .hashash = hashash != 0,
     555             :                         .cleanhash = true,
     556             :                         .newstorage = STORE_INVALID,
     557             :                         .dirty = false,
     558             :                         .parentid = bid,
     559        5812 :                         .farmid = BBPselectfarm(PERSISTENT, b->ttype, varheap),
     560             :                 };
     561        5812 :                 strconcat_len(b->tvheap->filename, sizeof(b->tvheap->filename),
     562             :                               filename, ".theap", NULL);
     563        5812 :                 ATOMIC_INIT(&b->tvheap->refs, 1);
     564        5812 :                 if (b->tvheap->free > b->tvheap->size) {
     565           0 :                         TRC_CRITICAL(GDK, "\"free\" value larger than \"size\" in var heap of bat %d on line %d\n", (int) bid, lineno);
     566           0 :                         return -1;
     567             :                 }
     568             :         }
     569       21774 :         return n;
     570             : }
     571             : 
     572             : static gdk_return
     573         265 : BBPreadEntries(FILE *fp, unsigned bbpversion, int lineno)
     574             : {
     575             :         bat bid = 0;
     576             :         char buf[4096];
     577             : 
     578             :         /* read the BBP.dir and insert the BATs into the BBP */
     579       22039 :         while (fgets(buf, sizeof(buf), fp) != NULL) {
     580             :                 BAT *bn;
     581             :                 uint64_t batid;
     582             :                 uint16_t status;
     583             :                 char headname[129];
     584             :                 char filename[sizeof(BBP_physical(0))];
     585             :                 unsigned int properties;
     586             :                 int nread, n;
     587             :                 char *s, *options = NULL;
     588             :                 char logical[1024];
     589       21774 :                 uint64_t count, capacity, base = 0;
     590             :                 int Thashash;
     591             : 
     592       21774 :                 lineno++;
     593       21774 :                 if ((s = strchr(buf, '\r')) != NULL) {
     594             :                         /* convert \r\n into just \n */
     595           0 :                         if (s[1] != '\n') {
     596           0 :                                 TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
     597           0 :                                 return GDK_FAIL;
     598             :                         }
     599           0 :                         *s++ = '\n';
     600           0 :                         *s = 0;
     601             :                 }
     602             : 
     603       21774 :                 if (sscanf(buf,
     604             :                            "%" SCNu64 " %" SCNu16 " %128s %19s %u %" SCNu64
     605             :                            " %" SCNu64 " %" SCNu64
     606             :                            "%n",
     607             :                            &batid, &status, headname, filename,
     608             :                            &properties,
     609             :                            &count, &capacity, &base,
     610             :                            &nread) < 8) {
     611           0 :                         TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
     612           0 :                         return GDK_FAIL;
     613             :                 }
     614             : 
     615       21774 :                 if (batid >= N_BBPINIT * BBPINIT) {
     616           0 :                         TRC_CRITICAL(GDK, "bat ID (%" PRIu64 ") too large to accomodate (max %d), on line %d.", batid, N_BBPINIT * BBPINIT - 1, lineno);
     617           0 :                         return GDK_FAIL;
     618             :                 }
     619             : 
     620             :                 /* convert both / and \ path separators to our own DIR_SEP */
     621             : #if DIR_SEP != '/'
     622             :                 s = filename;
     623             :                 while ((s = strchr(s, '/')) != NULL)
     624             :                         *s++ = DIR_SEP;
     625             : #endif
     626             : #if DIR_SEP != '\\'
     627             :                 s = filename;
     628       21774 :                 while ((s = strchr(s, '\\')) != NULL)
     629           0 :                         *s++ = DIR_SEP;
     630             : #endif
     631             : 
     632       21774 :                 bid = (bat) batid;
     633       21774 :                 if (batid >= (uint64_t) ATOMIC_GET(&BBPsize)) {
     634           0 :                         if ((bat) ATOMIC_GET(&BBPsize) + 1 >= BBPlimit &&
     635           0 :                             BBPextend(0, false, bid + 1) != GDK_SUCCEED)
     636             :                                 return GDK_FAIL;
     637           0 :                         ATOMIC_SET(&BBPsize, bid + 1);
     638             :                 }
     639       21774 :                 if (BBP_desc(bid) != NULL) {
     640           0 :                         TRC_CRITICAL(GDK, "duplicate entry in BBP.dir (ID = "
     641             :                                      "%" PRIu64 ") on line %d.", batid, lineno);
     642           0 :                         return GDK_FAIL;
     643             :                 }
     644       21774 :                 if ((bn = GDKzalloc(sizeof(BAT))) == NULL ||
     645       21774 :                     (bn->theap = GDKzalloc(sizeof(Heap))) == NULL) {
     646           0 :                         GDKfree(bn);
     647           0 :                         TRC_CRITICAL(GDK, "cannot allocate memory for BAT.");
     648           0 :                         return GDK_FAIL;
     649             :                 }
     650       21774 :                 bn->batCacheid = bid;
     651       21774 :                 if (BATroles(bn, NULL) != GDK_SUCCEED) {
     652           0 :                         GDKfree(bn->theap);
     653           0 :                         GDKfree(bn);
     654           0 :                         TRC_CRITICAL(GDK, "BATroles failed.");
     655           0 :                         return GDK_FAIL;
     656             :                 }
     657       21774 :                 bn->batTransient = false;
     658       21774 :                 bn->batCopiedtodisk = true;
     659       21774 :                 bn->batRestricted = (properties & 0x06) >> 1;
     660       21774 :                 bn->batCount = (BUN) count;
     661       21774 :                 bn->batInserted = bn->batCount;
     662       21774 :                 bn->batCapacity = (BUN) capacity;
     663             :                 char name[MT_NAME_LEN];
     664       21774 :                 snprintf(name, sizeof(name), "heaplock%d", bn->batCacheid); /* fits */
     665       21774 :                 MT_lock_init(&bn->theaplock, name);
     666       21774 :                 snprintf(name, sizeof(name), "BATlock%d", bn->batCacheid); /* fits */
     667       21774 :                 MT_lock_init(&bn->batIdxLock, name);
     668       21774 :                 snprintf(name, sizeof(name), "hashlock%d", bn->batCacheid); /* fits */
     669       21774 :                 MT_rwlock_init(&bn->thashlock, name);
     670       21774 :                 ATOMIC_INIT(&bn->theap->refs, 1);
     671             : 
     672       21774 :                 if (base > (uint64_t) GDK_oid_max) {
     673           0 :                         BATdestroy(bn);
     674           0 :                         TRC_CRITICAL(GDK, "head seqbase out of range (ID = %" PRIu64 ", seq = %" PRIu64 ") on line %d.", batid, base, lineno);
     675           0 :                         return GDK_FAIL;
     676             :                 }
     677       21774 :                 bn->hseqbase = (oid) base;
     678       21774 :                 n = heapinit(bn, buf + nread, &Thashash, bbpversion, bid, filename, lineno);
     679       21774 :                 if (n < 0) {
     680           0 :                         BATdestroy(bn);
     681           0 :                         return GDK_FAIL;
     682             :                 }
     683       21774 :                 nread += n;
     684       21774 :                 n = vheapinit(bn, buf + nread, Thashash, bid, filename, lineno);
     685       21774 :                 if (n < 0) {
     686           0 :                         BATdestroy(bn);
     687           0 :                         return GDK_FAIL;
     688             :                 }
     689       21774 :                 nread += n;
     690             : 
     691       21774 :                 if (buf[nread] != '\n' && buf[nread] != ' ') {
     692           0 :                         BATdestroy(bn);
     693           0 :                         TRC_CRITICAL(GDK, "invalid format for BBP.dir on line %d", lineno);
     694           0 :                         return GDK_FAIL;
     695             :                 }
     696       21774 :                 if (buf[nread] == ' ')
     697           0 :                         options = buf + nread + 1;
     698             : 
     699       21774 :                 if (snprintf(BBP_bak(bid), sizeof(BBP_bak(bid)), "tmp_%o", (unsigned) bid) >= (int) sizeof(BBP_bak(bid))) {
     700           0 :                         BATdestroy(bn);
     701           0 :                         TRC_CRITICAL(GDK, "BBP logical filename directory is too large, on line %d\n", lineno);
     702           0 :                         return GDK_FAIL;
     703             :                 }
     704       21774 :                 if ((s = strchr(headname, '~')) != NULL && s == headname) {
     705             :                         /* sizeof(logical) > sizeof(BBP_bak(bid)), so
     706             :                          * this fits */
     707           0 :                         strcpy(logical, BBP_bak(bid));
     708             :                 } else {
     709       21774 :                         if (s)
     710           0 :                                 *s = 0;
     711       21774 :                         strcpy_len(logical, headname, sizeof(logical));
     712             :                 }
     713       21774 :                 if (strcmp(logical, BBP_bak(bid)) == 0) {
     714       20658 :                         BBP_logical(bid) = BBP_bak(bid);
     715             :                 } else {
     716        1116 :                         BBP_logical(bid) = GDKstrdup(logical);
     717        1116 :                         if (BBP_logical(bid) == NULL) {
     718           0 :                                 BATdestroy(bn);
     719           0 :                                 TRC_CRITICAL(GDK, "GDKstrdup failed\n");
     720           0 :                                 return GDK_FAIL;
     721             :                         }
     722             :                 }
     723             :                 /* tailname is ignored */
     724       21774 :                 strcpy_len(BBP_physical(bid), filename, sizeof(BBP_physical(bid)));
     725             : #ifdef __COVERITY__
     726             :                 /* help coverity */
     727             :                 BBP_physical(bid)[sizeof(BBP_physical(bid)) - 1] = 0;
     728             : #endif
     729       21774 :                 BBP_options(bid) = NULL;
     730       21774 :                 if (options) {
     731           0 :                         BBP_options(bid) = GDKstrdup(options);
     732           0 :                         if (BBP_options(bid) == NULL) {
     733           0 :                                 BATdestroy(bn);
     734           0 :                                 TRC_CRITICAL(GDK, "GDKstrdup failed\n");
     735           0 :                                 return GDK_FAIL;
     736             :                         }
     737             :                 }
     738       21774 :                 BBP_refs(bid) = 0;
     739       21774 :                 BBP_lrefs(bid) = 1;     /* any BAT we encounter here is persistent, so has a logical reference */
     740       21774 :                 BBP_desc(bid) = bn;
     741       21774 :                 BBP_pid(bid) = 0;
     742       21774 :                 BBP_status_set(bid, BBPEXISTING);       /* do we need other status bits? */
     743             :         }
     744             :         return GDK_SUCCEED;
     745             : }
     746             : 
     747             : /* check that the necessary files for all BATs exist and are large
     748             :  * enough */
     749             : static gdk_return
     750         266 : BBPcheckbats(unsigned bbpversion)
     751             : {
     752             :         (void) bbpversion;
     753       84720 :         for (bat bid = 1, size = (bat) ATOMIC_GET(&BBPsize); bid < size; bid++) {
     754             :                 struct stat statb;
     755             :                 BAT *b;
     756             :                 char *path;
     757             : 
     758       84454 :                 if ((b = BBP_desc(bid)) == NULL) {
     759             :                         /* not a valid BAT */
     760       62680 :                         continue;
     761             :                 }
     762       21774 :                 if (b->ttype == TYPE_void) {
     763             :                         /* no files needed */
     764           0 :                         continue;
     765             :                 }
     766       21774 :                 if (b->theap->free > 0) {
     767       13424 :                         path = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
     768       13424 :                         if (path == NULL)
     769           0 :                                 return GDK_FAIL;
     770             : #if 1
     771             :                         /* first check string offset heap with width,
     772             :                          * then without */
     773       13424 :                         if (MT_stat(path, &statb) < 0) {
     774             : #ifdef GDKLIBRARY_TAILN
     775         430 :                                 if (b->ttype == TYPE_str &&
     776         430 :                                     b->twidth < SIZEOF_VAR_T) {
     777         430 :                                         size_t taillen = strlen(path) - 1;
     778         430 :                                         char tailsave = path[taillen];
     779         430 :                                         path[taillen] = 0;
     780         430 :                                         if (MT_stat(path, &statb) < 0) {
     781           0 :                                                 GDKsyserror("cannot stat file %s%c or %s (expected size %zu)\n",
     782             :                                                             path, tailsave, path, b->theap->free);
     783           0 :                                                 GDKfree(path);
     784             :                                                 return GDK_FAIL;
     785             :                                         }
     786             :                                 } else
     787             : #endif
     788             :                                 {
     789           0 :                                         GDKsyserror("cannot stat file %s (expected size %zu)\n",
     790             :                                                     path, b->theap->free);
     791           0 :                                         GDKfree(path);
     792             :                                         return GDK_FAIL;
     793             :                                 }
     794             :                         }
     795             : #else
     796             :                         /* first check string offset heap without widht,
     797             :                          * then with */
     798             : #ifdef GDKLIBRARY_TAILN
     799             :                         /* if bbpversion > GDKLIBRARY_TAILN, the offset heap can
     800             :                          * exist with either name .tail1 (etc) or .tail, if <=
     801             :                          * GDKLIBRARY_TAILN, only with .tail */
     802             :                         char tailsave = 0;
     803             :                         size_t taillen = 0;
     804             :                         if (b->ttype == TYPE_str &&
     805             :                             b->twidth < SIZEOF_VAR_T) {
     806             :                                 /* old version: .tail, not .tail1, .tail2, .tail4 */
     807             :                                 taillen = strlen(path) - 1;
     808             :                                 tailsave = path[taillen];
     809             :                                 path[taillen] = 0;
     810             :                         }
     811             : #endif
     812             :                         if (MT_stat(path, &statb) < 0
     813             : #ifdef GDKLIBRARY_TAILN
     814             :                             && bbpversion > GDKLIBRARY_TAILN
     815             :                             && b->ttype == TYPE_str
     816             :                             && b->twidth < SIZEOF_VAR_T
     817             :                             && (path[taillen] = tailsave) != 0
     818             :                             && MT_stat(path, &statb) < 0
     819             : #endif
     820             :                                 ) {
     821             : 
     822             :                                 GDKsyserror("cannot stat file %s (expected size %zu)\n",
     823             :                                             path, b->theap->free);
     824             :                                 GDKfree(path);
     825             :                                 return GDK_FAIL;
     826             :                         }
     827             : #endif
     828       13424 :                         if ((size_t) statb.st_size < b->theap->free) {
     829           0 :                                 GDKerror("file %s too small (expected %zu, actual %zu)\n", path, b->theap->free, (size_t) statb.st_size);
     830           0 :                                 GDKfree(path);
     831           0 :                                 return GDK_FAIL;
     832             :                         }
     833       13424 :                         GDKfree(path);
     834             :                 }
     835       21774 :                 if (b->tvheap != NULL && b->tvheap->free > 0) {
     836        4494 :                         path = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "theap");
     837        4494 :                         if (path == NULL)
     838             :                                 return GDK_FAIL;
     839        4494 :                         if (MT_stat(path, &statb) < 0) {
     840           0 :                                 GDKsyserror("cannot stat file %s\n",
     841             :                                             path);
     842           0 :                                 GDKfree(path);
     843             :                                 return GDK_FAIL;
     844             :                         }
     845        4494 :                         if ((size_t) statb.st_size < b->tvheap->free) {
     846           0 :                                 GDKerror("file %s too small (expected %zu, actual %zu)\n", path, b->tvheap->free, (size_t) statb.st_size);
     847           0 :                                 GDKfree(path);
     848           0 :                                 return GDK_FAIL;
     849             :                         }
     850        4494 :                         GDKfree(path);
     851             :                 }
     852             :         }
     853             :         return GDK_SUCCEED;
     854             : }
     855             : 
     856             : #ifdef HAVE_HGE
     857             : #define SIZEOF_MAX_INT SIZEOF_HGE
     858             : #else
     859             : #define SIZEOF_MAX_INT SIZEOF_LNG
     860             : #endif
     861             : 
     862             : static unsigned
     863         265 : BBPheader(FILE *fp, int *lineno, bat *bbpsize)
     864             : {
     865             :         char buf[BUFSIZ];
     866             :         int sz, ptrsize, oidsize, intsize;
     867             :         unsigned bbpversion;
     868             : 
     869         265 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
     870           0 :                 TRC_CRITICAL(GDK, "BBP.dir is empty");
     871           0 :                 return 0;
     872             :         }
     873         265 :         ++*lineno;
     874         265 :         if (sscanf(buf, "BBP.dir, GDKversion %u\n", &bbpversion) != 1) {
     875           0 :                 GDKerror("old BBP without version number; "
     876             :                          "dump the database using a compatible version, "
     877             :                          "then restore into new database using this version.\n");
     878           0 :                 return 0;
     879             :         }
     880         265 :         if (bbpversion != GDKLIBRARY &&
     881         265 :             bbpversion != GDKLIBRARY_TAILN &&
     882             :             bbpversion != GDKLIBRARY_MINMAX_POS) {
     883           0 :                 TRC_CRITICAL(GDK, "incompatible BBP version: expected 0%o, got 0%o. "
     884             :                              "This database was probably created by a %s version of MonetDB.",
     885             :                              GDKLIBRARY, bbpversion,
     886             :                              bbpversion > GDKLIBRARY ? "newer" : "too old");
     887           0 :                 return 0;
     888             :         }
     889         265 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
     890           0 :                 TRC_CRITICAL(GDK, "short BBP");
     891           0 :                 return 0;
     892             :         }
     893         265 :         ++*lineno;
     894         265 :         if (sscanf(buf, "%d %d %d", &ptrsize, &oidsize, &intsize) != 3) {
     895           0 :                 TRC_CRITICAL(GDK, "BBP.dir has incompatible format: pointer, OID, and max. integer sizes are missing on line %d", *lineno);
     896           0 :                 return 0;
     897             :         }
     898         265 :         if (ptrsize != SIZEOF_SIZE_T || oidsize != SIZEOF_OID) {
     899           0 :                 TRC_CRITICAL(GDK, "database created with incompatible server: "
     900             :                              "expected pointer size %d, got %d, expected OID size %d, got %d.",
     901             :                              SIZEOF_SIZE_T, ptrsize, SIZEOF_OID, oidsize);
     902           0 :                 return 0;
     903             :         }
     904         265 :         if (intsize > SIZEOF_MAX_INT) {
     905           0 :                 TRC_CRITICAL(GDK, "database created with incompatible server: "
     906             :                              "expected max. integer size %d, got %d.",
     907             :                              SIZEOF_MAX_INT, intsize);
     908           0 :                 return 0;
     909             :         }
     910         265 :         if (fgets(buf, sizeof(buf), fp) == NULL) {
     911           0 :                 TRC_CRITICAL(GDK, "short BBP");
     912           0 :                 return 0;
     913             :         }
     914         265 :         ++*lineno;
     915         265 :         if (sscanf(buf, "BBPsize=%d", &sz) != 1) {
     916           0 :                 TRC_CRITICAL(GDK, "no BBPsize value found\n");
     917           0 :                 return 0;
     918             :         }
     919         265 :         sz = (int) (sz * BATMARGIN);
     920         265 :         if (sz > *bbpsize)
     921          78 :                 *bbpsize = sz;
     922         265 :         if (bbpversion > GDKLIBRARY_MINMAX_POS) {
     923         257 :                 if (fgets(buf, sizeof(buf), fp) == NULL) {
     924           0 :                         TRC_CRITICAL(GDK, "short BBP");
     925           0 :                         return 0;
     926             :                 }
     927         257 :                 if (sscanf(buf, "BBPinfo=" LLSCN " " LLSCN, &BBPlogno, &BBPtransid) != 2) {
     928           0 :                         TRC_CRITICAL(GDK, "no info value found\n");
     929           0 :                         return 0;
     930             :                 }
     931             :         }
     932         265 :         return bbpversion;
     933             : }
     934             : 
     935             : bool
     936   138824376 : GDKinmemory(int farmid)
     937             : {
     938   138824376 :         if (farmid == NOFARM)
     939             :                 farmid = 0;
     940   138824376 :         assert(farmid >= 0 && farmid < MAXFARMS);
     941   138824376 :         return BBPfarms[farmid].dirname == NULL;
     942             : }
     943             : 
     944             : /* all errors are fatal */
     945             : gdk_return
     946         524 : BBPaddfarm(const char *dirname, uint32_t rolemask, bool logerror)
     947             : {
     948             :         struct stat st;
     949             :         int i;
     950             : 
     951         524 :         if (dirname == NULL) {
     952           1 :                 assert(BBPfarms[0].dirname == NULL);
     953           1 :                 assert(rolemask & 1);
     954           1 :                 assert(BBPfarms[0].roles == 0);
     955           1 :                 BBPfarms[0].roles = rolemask;
     956           1 :                 return GDK_SUCCEED;
     957             :         }
     958         523 :         if (strchr(dirname, '\n') != NULL) {
     959           0 :                 if (logerror)
     960           0 :                         GDKerror("no newline allowed in directory name\n");
     961           0 :                 return GDK_FAIL;
     962             :         }
     963         523 :         if (rolemask == 0 || (rolemask & 1 && BBPfarms[0].dirname != NULL)) {
     964           0 :                 if (logerror)
     965           0 :                         GDKerror("bad rolemask\n");
     966           0 :                 return GDK_FAIL;
     967             :         }
     968         523 :         if (strcmp(dirname, "in-memory") == 0 ||
     969         523 :             /* backward compatibility: */ strcmp(dirname, ":memory:") == 0) {
     970             :                 dirname = NULL;
     971         523 :         } else if (MT_mkdir(dirname) < 0) {
     972         459 :                 if (errno == EEXIST) {
     973         459 :                         if (MT_stat(dirname, &st) == -1 || !S_ISDIR(st.st_mode)) {
     974           0 :                                 if (logerror)
     975           0 :                                         GDKerror("%s: not a directory\n", dirname);
     976           0 :                                 return GDK_FAIL;
     977             :                         }
     978             :                 } else {
     979           0 :                         if (logerror)
     980           0 :                                 GDKsyserror("%s: cannot create directory\n", dirname);
     981           0 :                         return GDK_FAIL;
     982             :                 }
     983             :         }
     984         779 :         for (i = 0; i < MAXFARMS; i++) {
     985         779 :                 if (BBPfarms[i].roles == 0) {
     986         523 :                         if (dirname) {
     987         523 :                                 BBPfarms[i].dirname = GDKstrdup(dirname);
     988         523 :                                 if (BBPfarms[i].dirname == NULL)
     989             :                                         return GDK_FAIL;
     990             :                         }
     991         523 :                         BBPfarms[i].roles = rolemask;
     992         523 :                         if ((rolemask & 1) == 0 && dirname != NULL) {
     993             :                                 char *bbpdir;
     994             :                                 int j;
     995             : 
     996         401 :                                 for (j = 0; j < i; j++)
     997         256 :                                         if (BBPfarms[j].dirname != NULL &&
     998         256 :                                             strcmp(BBPfarms[i].dirname,
     999             :                                                    BBPfarms[j].dirname) == 0)
    1000             :                                                 return GDK_SUCCEED;
    1001             :                                 /* if an extra farm, make sure we
    1002             :                                  * don't find a BBP.dir there that
    1003             :                                  * might belong to an existing
    1004             :                                  * database */
    1005         145 :                                 bbpdir = GDKfilepath(i, BATDIR, "BBP", "dir");
    1006         145 :                                 if (bbpdir == NULL) {
    1007             :                                         return GDK_FAIL;
    1008             :                                 }
    1009         145 :                                 if (MT_stat(bbpdir, &st) != -1 || errno != ENOENT) {
    1010           0 :                                         GDKfree(bbpdir);
    1011           0 :                                         if (logerror)
    1012           0 :                                                 GDKerror("%s is a database\n", dirname);
    1013           0 :                                         return GDK_FAIL;
    1014             :                                 }
    1015         145 :                                 GDKfree(bbpdir);
    1016         145 :                                 bbpdir = GDKfilepath(i, BAKDIR, "BBP", "dir");
    1017         145 :                                 if (bbpdir == NULL) {
    1018             :                                         return GDK_FAIL;
    1019             :                                 }
    1020         145 :                                 if (MT_stat(bbpdir, &st) != -1 || errno != ENOENT) {
    1021           0 :                                         GDKfree(bbpdir);
    1022           0 :                                         if (logerror)
    1023           0 :                                                 GDKerror("%s is a database\n", dirname);
    1024           0 :                                         return GDK_FAIL;
    1025             :                                 }
    1026         145 :                                 GDKfree(bbpdir);
    1027             :                         }
    1028         412 :                         return GDK_SUCCEED;
    1029             :                 }
    1030             :         }
    1031           0 :         if (logerror)
    1032           0 :                 GDKerror("too many farms\n");
    1033             :         return GDK_FAIL;
    1034             : }
    1035             : 
    1036             : #ifdef GDKLIBRARY_TAILN
    1037             : static gdk_return
    1038           8 : movestrbats(void)
    1039             : {
    1040       13853 :         for (bat bid = 1, nbat = (bat) ATOMIC_GET(&BBPsize); bid < nbat; bid++) {
    1041       13845 :                 BAT *b = BBP_desc(bid);
    1042       13845 :                 if (b == NULL) {
    1043             :                         /* not a valid BAT */
    1044       11271 :                         continue;
    1045             :                 }
    1046        2574 :                 if (b->ttype != TYPE_str || b->twidth == SIZEOF_VAR_T || b->batCount == 0)
    1047        2144 :                         continue;
    1048         430 :                 char *oldpath = GDKfilepath(0, BATDIR, BBP_physical(b->batCacheid), "tail");
    1049         430 :                 char *newpath = GDKfilepath(0, BATDIR, b->theap->filename, NULL);
    1050             :                 int ret = -1;
    1051         430 :                 if (oldpath != NULL && newpath != NULL) {
    1052             :                         struct stat oldst, newst;
    1053             :                         bool oldexist = MT_stat(oldpath, &oldst) == 0;
    1054             :                         bool newexist = MT_stat(newpath, &newst) == 0;
    1055         430 :                         if (newexist) {
    1056           0 :                                 if (oldexist) {
    1057           0 :                                         if (oldst.st_mtime > newst.st_mtime) {
    1058           0 :                                                 GDKerror("both %s and %s exist with %s unexpectedly newer: manual intervention required\n", oldpath, newpath, oldpath);
    1059             :                                                 ret = -1;
    1060             :                                         } else {
    1061           0 :                                                 TRC_WARNING(GDK, "both %s and %s exist, removing %s\n", oldpath, newpath, oldpath);
    1062             :                                                 ret = MT_remove(oldpath);
    1063             :                                         }
    1064             :                                 } else {
    1065             :                                         /* already good */
    1066             :                                         ret = 0;
    1067             :                                 }
    1068         430 :                         } else if (oldexist) {
    1069         430 :                                 TRC_DEBUG(IO_, "rename %s to %s\n", oldpath, newpath);
    1070             :                                 ret = MT_rename(oldpath, newpath);
    1071             :                         } else {
    1072             :                                 /* neither file exists: may be ok, but
    1073             :                                  * will be checked later */
    1074             :                                 ret = 0;
    1075             :                         }
    1076             :                 }
    1077         430 :                 GDKfree(oldpath);
    1078         430 :                 GDKfree(newpath);
    1079         430 :                 if (ret == -1)
    1080             :                         return GDK_FAIL;
    1081             :         }
    1082             :         return GDK_SUCCEED;
    1083             : }
    1084             : #endif
    1085             : 
    1086             : static void
    1087          47 : BBPtrim(bool aggressive)
    1088             : {
    1089             :         int n = 0;
    1090             :         unsigned flag = BBPUNLOADING | BBPSYNCING | BBPSAVING;
    1091          47 :         if (!aggressive)
    1092             :                 flag |= BBPHOT;
    1093       55993 :         for (bat bid = 1, nbat = (bat) ATOMIC_GET(&BBPsize); bid < nbat; bid++) {
    1094       55946 :                 MT_lock_set(&GDKswapLock(bid));
    1095             :                 BAT *b = NULL;
    1096             :                 bool swap = false;
    1097       55946 :                 if (BBP_refs(bid) == 0 &&
    1098       53062 :                     BBP_lrefs(bid) != 0 &&
    1099       35561 :                     (b = BBP_cache(bid)) != NULL &&
    1100       28762 :                     b->batSharecnt == 0 &&
    1101       28507 :                     (!BATdirty(b) || (aggressive && b->theap->storage == STORE_MMAP && (b->tvheap == NULL || b->tvheap->storage == STORE_MMAP))) &&
    1102       12610 :                     !(BBP_status(bid) & flag) /*&&
    1103             :                     (BBP_status(bid) & BBPPERSISTENT ||
    1104             :                     (b->batRole == PERSISTENT && BBP_lrefs(bid) == 1)) */) {
    1105        2286 :                         BBP_status_on(bid, BBPUNLOADING);
    1106             :                         swap = true;
    1107             :                 }
    1108       55946 :                 MT_lock_unset(&GDKswapLock(bid));
    1109       55946 :                 if (swap) {
    1110        2286 :                         TRC_DEBUG(BAT_, "unload and free bat %d\n", bid);
    1111        2286 :                         if (BBPfree(b) != GDK_SUCCEED)
    1112           0 :                                 GDKerror("unload failed for bat %d", bid);
    1113        2286 :                         n++;
    1114             :                 }
    1115             :         }
    1116          47 :         TRC_DEBUG(BAT_, "unloaded %d bats%s\n", n, aggressive ? " (also hot)" : "");
    1117          47 : }
    1118             : 
    1119             : static void
    1120         266 : BBPmanager(void *dummy)
    1121             : {
    1122             :         (void) dummy;
    1123             : 
    1124             :         for (;;) {
    1125             :                 int n = 0;
    1126      140901 :                 for (bat bid = 1, nbat = (bat) ATOMIC_GET(&BBPsize); bid < nbat; bid++) {
    1127      140588 :                         MT_lock_set(&GDKswapLock(bid));
    1128      140588 :                         if (BBP_refs(bid) == 0 && BBP_lrefs(bid) != 0) {
    1129       57373 :                                 n += (BBP_status(bid) & BBPHOT) != 0;
    1130       57373 :                                 BBP_status_off(bid, BBPHOT);
    1131             :                         }
    1132      140588 :                         MT_lock_unset(&GDKswapLock(bid));
    1133             :                 }
    1134         313 :                 TRC_DEBUG(BAT_, "cleared HOT bit from %d bats\n", n);
    1135         313 :                 size_t cur = GDKvm_cursize();
    1136        9350 :                 for (int i = 0, n = cur > GDK_vm_maxsize / 2 ? 1 : cur > GDK_vm_maxsize / 4 ? 10 : 100; i < n; i++) {
    1137        8992 :                         MT_sleep_ms(100);
    1138        8990 :                         if (GDKexiting())
    1139             :                                 return;
    1140             :                 }
    1141          47 :                 BBPtrim(false);
    1142          47 :                 if (GDKexiting())
    1143             :                         return;
    1144             :         }
    1145             : }
    1146             : 
    1147             : static MT_Id manager;
    1148             : 
    1149             : gdk_return
    1150         266 : BBPinit(bool first)
    1151             : {
    1152             :         FILE *fp = NULL;
    1153             :         struct stat st;
    1154             :         unsigned bbpversion = 0;
    1155             :         int i;
    1156         266 :         int lineno = 0;
    1157             : 
    1158             :         /* the maximum number of BATs allowed in the system and the
    1159             :          * size of the "physical" array are linked in a complicated
    1160             :          * manner.  The expression below shows the relationship */
    1161             :         static_assert((uint64_t) N_BBPINIT * BBPINIT < (UINT64_C(1) << (3 * ((sizeof(BBP[0][0].physical) + 2) * 2 / 5))), "\"physical\" array in BBPrec is too small");
    1162             :         /* similarly, the maximum number of BATs allowed also has a
    1163             :          * (somewhat simpler) relation with the size of the "bak"
    1164             :          * array */
    1165             :         static_assert((uint64_t) N_BBPINIT * BBPINIT < (UINT64_C(1) << (3 * (sizeof(BBP[0][0].bak) - 5))), "\"bak\" array in BBPrec is too small");
    1166             : 
    1167         266 :         if (first) {
    1168           0 :                 for (i = 0; i <= BBP_THREADMASK; i++) {
    1169             :                         char name[MT_NAME_LEN];
    1170           0 :                         snprintf(name, sizeof(name), "GDKcacheLock%d", i);
    1171           0 :                         MT_lock_init(&GDKbbpLock[i].cache, name);
    1172           0 :                         GDKbbpLock[i].free = 0;
    1173             :                 }
    1174             :         }
    1175         266 :         if (!GDKinmemory(0)) {
    1176             :                 str bbpdirstr, backupbbpdirstr;
    1177             : 
    1178         265 :                 MT_lock_set(&GDKtmLock);
    1179             : 
    1180         265 :                 if (!(bbpdirstr = GDKfilepath(0, BATDIR, "BBP", "dir"))) {
    1181           0 :                         TRC_CRITICAL(GDK, "GDKmalloc failed\n");
    1182           0 :                         MT_lock_unset(&GDKtmLock);
    1183           0 :                         return GDK_FAIL;
    1184             :                 }
    1185             : 
    1186         265 :                 if (!(backupbbpdirstr = GDKfilepath(0, BAKDIR, "BBP", "dir"))) {
    1187           0 :                         GDKfree(bbpdirstr);
    1188           0 :                         TRC_CRITICAL(GDK, "GDKmalloc failed\n");
    1189           0 :                         MT_lock_unset(&GDKtmLock);
    1190           0 :                         return GDK_FAIL;
    1191             :                 }
    1192             : 
    1193         265 :                 if (GDKremovedir(0, TEMPDIR) != GDK_SUCCEED) {
    1194           0 :                         GDKfree(bbpdirstr);
    1195           0 :                         GDKfree(backupbbpdirstr);
    1196           0 :                         TRC_CRITICAL(GDK, "cannot remove directory %s\n", TEMPDIR);
    1197           0 :                         MT_lock_unset(&GDKtmLock);
    1198           0 :                         return GDK_FAIL;
    1199             :                 }
    1200             : 
    1201         265 :                 if (GDKremovedir(0, DELDIR) != GDK_SUCCEED) {
    1202           0 :                         GDKfree(bbpdirstr);
    1203           0 :                         GDKfree(backupbbpdirstr);
    1204           0 :                         TRC_CRITICAL(GDK, "cannot remove directory %s\n", DELDIR);
    1205           0 :                         MT_lock_unset(&GDKtmLock);
    1206           0 :                         return GDK_FAIL;
    1207             :                 }
    1208             : 
    1209             :                 /* first move everything from SUBDIR to BAKDIR (its parent) */
    1210         265 :                 if (BBPrecover_subdir() != GDK_SUCCEED) {
    1211           0 :                         GDKfree(bbpdirstr);
    1212           0 :                         GDKfree(backupbbpdirstr);
    1213           0 :                         TRC_CRITICAL(GDK, "cannot properly recover_subdir process %s.", SUBDIR);
    1214           0 :                         MT_lock_unset(&GDKtmLock);
    1215           0 :                         return GDK_FAIL;
    1216             :                 }
    1217             : 
    1218             :                 /* try to obtain a BBP.dir from bakdir */
    1219         265 :                 if (MT_stat(backupbbpdirstr, &st) == 0) {
    1220             :                         /* backup exists; *must* use it */
    1221          78 :                         if (recover_dir(0, MT_stat(bbpdirstr, &st) == 0) != GDK_SUCCEED) {
    1222           0 :                                 GDKfree(bbpdirstr);
    1223           0 :                                 GDKfree(backupbbpdirstr);
    1224           0 :                                 MT_lock_unset(&GDKtmLock);
    1225           0 :                                 goto bailout;
    1226             :                         }
    1227          78 :                         if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
    1228           0 :                                 GDKfree(bbpdirstr);
    1229           0 :                                 GDKfree(backupbbpdirstr);
    1230           0 :                                 TRC_CRITICAL(GDK, "cannot open recovered BBP.dir.");
    1231           0 :                                 MT_lock_unset(&GDKtmLock);
    1232           0 :                                 return GDK_FAIL;
    1233             :                         }
    1234         187 :                 } else if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
    1235             :                         /* there was no BBP.dir either. Panic! try to use a
    1236             :                          * BBP.bak */
    1237         187 :                         if (MT_stat(backupbbpdirstr, &st) < 0) {
    1238             :                                 /* no BBP.bak (nor BBP.dir or BACKUP/BBP.dir):
    1239             :                                  * create a new one */
    1240         187 :                                 TRC_DEBUG(IO_, "initializing BBP.\n");
    1241         187 :                                 if (BBPdir_init() != GDK_SUCCEED) {
    1242           0 :                                         GDKfree(bbpdirstr);
    1243           0 :                                         GDKfree(backupbbpdirstr);
    1244           0 :                                         MT_lock_unset(&GDKtmLock);
    1245           0 :                                         goto bailout;
    1246             :                                 }
    1247           0 :                         } else if (GDKmove(0, BATDIR, "BBP", "bak", BATDIR, "BBP", "dir", true) == GDK_SUCCEED)
    1248           0 :                                 TRC_DEBUG(IO_, "reverting to dir saved in BBP.bak.\n");
    1249             : 
    1250         187 :                         if ((fp = GDKfilelocate(0, "BBP", "r", "dir")) == NULL) {
    1251           0 :                                 GDKsyserror("cannot open BBP.dir");
    1252           0 :                                 GDKfree(bbpdirstr);
    1253           0 :                                 GDKfree(backupbbpdirstr);
    1254           0 :                                 MT_lock_unset(&GDKtmLock);
    1255           0 :                                 goto bailout;
    1256             :                         }
    1257             :                 }
    1258         265 :                 assert(fp != NULL);
    1259         265 :                 GDKfree(bbpdirstr);
    1260         265 :                 GDKfree(backupbbpdirstr);
    1261         265 :                 MT_lock_unset(&GDKtmLock);
    1262             :         }
    1263             : 
    1264             :         /* scan the BBP.dir to obtain current size */
    1265         266 :         BBPlimit = 0;
    1266         266 :         memset(BBP, 0, sizeof(BBP));
    1267             : 
    1268             :         bat bbpsize;
    1269         266 :         bbpsize = 1;
    1270         266 :         if (GDKinmemory(0)) {
    1271             :                 bbpversion = GDKLIBRARY;
    1272             :         } else {
    1273         265 :                 bbpversion = BBPheader(fp, &lineno, &bbpsize);
    1274         265 :                 if (bbpversion == 0)
    1275             :                         return GDK_FAIL;
    1276             :         }
    1277             : 
    1278             :         /* allocate BBP records */
    1279         266 :         if (BBPextend(0, false, bbpsize) != GDK_SUCCEED)
    1280             :                 return GDK_FAIL;
    1281         266 :         ATOMIC_SET(&BBPsize, bbpsize);
    1282             : 
    1283         266 :         if (!GDKinmemory(0)) {
    1284         265 :                 if (BBPreadEntries(fp, bbpversion, lineno) != GDK_SUCCEED)
    1285             :                         return GDK_FAIL;
    1286         265 :                 fclose(fp);
    1287             :         }
    1288             : 
    1289         266 :         MT_lock_set(&BBPnameLock);
    1290         266 :         if (BBPinithash(0, (bat) ATOMIC_GET(&BBPsize)) != GDK_SUCCEED) {
    1291           0 :                 TRC_CRITICAL(GDK, "BBPinithash failed");
    1292           0 :                 MT_lock_unset(&BBPnameLock);
    1293           0 :                 return GDK_FAIL;
    1294             :         }
    1295         266 :         MT_lock_unset(&BBPnameLock);
    1296             : 
    1297             :         /* will call BBPrecover if needed */
    1298         266 :         if (!GDKinmemory(0)) {
    1299         265 :                 MT_lock_set(&GDKtmLock);
    1300         265 :                 gdk_return rc = BBPprepare(false);
    1301         265 :                 MT_lock_unset(&GDKtmLock);
    1302         265 :                 if (rc != GDK_SUCCEED) {
    1303           0 :                         TRC_CRITICAL(GDK, "cannot properly prepare process %s.", BAKDIR);
    1304           0 :                         return rc;
    1305             :                 }
    1306             :         }
    1307             : 
    1308         266 :         if (BBPcheckbats(bbpversion) != GDK_SUCCEED)
    1309             :                 return GDK_FAIL;
    1310             : 
    1311             : #ifdef GDKLIBRARY_TAILN
    1312             :         char *needstrbatmove;
    1313         266 :         if (GDKinmemory(0)) {
    1314             :                 needstrbatmove = NULL;
    1315             :         } else {
    1316         265 :                 needstrbatmove = GDKfilepath(0, BATDIR, "needstrbatmove", NULL);
    1317         265 :                 if (bbpversion <= GDKLIBRARY_TAILN) {
    1318             :                         /* create signal file that we need to rename string
    1319             :                          * offset heaps */
    1320             :                         int fd = MT_open(needstrbatmove, O_WRONLY | O_CREAT);
    1321           8 :                         if (fd < 0) {
    1322           0 :                                 TRC_CRITICAL(GDK, "cannot create signal file needstrbatmove.\n");
    1323           0 :                                 GDKfree(needstrbatmove);
    1324           0 :                                 return GDK_FAIL;
    1325             :                         }
    1326           8 :                         close(fd);
    1327             :                 } else {
    1328             :                         /* check signal file whether we need to rename string
    1329             :                          * offset heaps */
    1330             :                         int fd = MT_open(needstrbatmove, O_RDONLY);
    1331         257 :                         if (fd >= 0) {
    1332             :                                 /* yes, we do */
    1333           0 :                                 close(fd);
    1334         257 :                         } else if (errno == ENOENT) {
    1335             :                                 /* no, we don't: set var to NULL */
    1336         257 :                                 GDKfree(needstrbatmove);
    1337             :                                 needstrbatmove = NULL;
    1338             :                         } else {
    1339           0 :                                 GDKsyserror("unexpected error opening %s\n", needstrbatmove);
    1340           0 :                                 GDKfree(needstrbatmove);
    1341             :                                 return GDK_FAIL;
    1342             :                         }
    1343             :                 }
    1344             :         }
    1345             : #endif
    1346             : 
    1347         266 :         if (bbpversion < GDKLIBRARY && TMcommit() != GDK_SUCCEED) {
    1348           0 :                 TRC_CRITICAL(GDK, "TMcommit failed\n");
    1349           0 :                 return GDK_FAIL;
    1350             :         }
    1351             : 
    1352             : #ifdef GDKLIBRARY_TAILN
    1353             :         /* we rename the offset heaps after the above commit: in this
    1354             :          * version we accept both the old and new names, but we want to
    1355             :          * convert so that future versions only have the new name */
    1356         266 :         if (needstrbatmove) {
    1357             :                 /* note, if renaming fails, nothing is lost: a next
    1358             :                  * invocation will just try again; an older version of
    1359             :                  * mserver will not work because of the TMcommit
    1360             :                  * above */
    1361           8 :                 if (movestrbats() != GDK_SUCCEED) {
    1362           0 :                         GDKfree(needstrbatmove);
    1363           0 :                         return GDK_FAIL;
    1364             :                 }
    1365             :                 MT_remove(needstrbatmove);
    1366           8 :                 GDKfree(needstrbatmove);
    1367             :                 needstrbatmove = NULL;
    1368             :         }
    1369             : #endif
    1370             : 
    1371             :         /* cleanup any leftovers (must be done after BBPrecover) */
    1372         785 :         for (i = 0; i < MAXFARMS && BBPfarms[i].dirname != NULL; i++) {
    1373             :                 int j;
    1374         664 :                 for (j = 0; j < i; j++) {
    1375             :                         /* don't clean a directory twice */
    1376         254 :                         if (BBPfarms[j].dirname &&
    1377         254 :                             strcmp(BBPfarms[i].dirname,
    1378             :                                    BBPfarms[j].dirname) == 0)
    1379             :                                 break;
    1380             :                 }
    1381         519 :                 if (j == i) {
    1382         410 :                         char *d = GDKfilepath(i, NULL, BATDIR, NULL);
    1383         410 :                         if (d == NULL) {
    1384             :                                 return GDK_FAIL;
    1385             :                         }
    1386         410 :                         BBPdiskscan(d, strlen(d) - strlen(BATDIR));
    1387         410 :                         GDKfree(d);
    1388             :                 }
    1389             :         }
    1390             : 
    1391         266 :         manager = THRcreate(BBPmanager, NULL, MT_THR_DETACHED, "BBPmanager");
    1392         266 :         return GDK_SUCCEED;
    1393             : 
    1394           0 :   bailout:
    1395             :         /* now it is time for real panic */
    1396           0 :         TRC_CRITICAL(GDK, "could not write %s%cBBP.dir.", BATDIR, DIR_SEP);
    1397           0 :         return GDK_FAIL;
    1398             : }
    1399             : 
    1400             : /*
    1401             :  * During the exit phase all non-persistent BATs are removed.  Upon
    1402             :  * exit the status of the BBP tables is saved on disk.  This function
    1403             :  * is called once and during the shutdown of the server. Since
    1404             :  * shutdown may be issued from any thread (dangerous) it may lead to
    1405             :  * interference in a parallel session.
    1406             :  */
    1407             : 
    1408             : static int backup_files = 0, backup_dir = 0, backup_subdir = 0;
    1409             : 
    1410             : void
    1411         264 : BBPexit(void)
    1412             : {
    1413             :         bat i;
    1414             :         bool skipped;
    1415             : 
    1416         264 :         BBPlock();      /* stop all threads ever touching more descriptors */
    1417             : 
    1418             :         /* free all memory (just for leak-checking in Purify) */
    1419             :         do {
    1420             :                 skipped = false;
    1421      222207 :                 for (i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) {
    1422      221943 :                         if (BBPvalid(i)) {
    1423       83057 :                                 BAT *b = BBP_desc(i);
    1424             : 
    1425       83057 :                                 if (b) {
    1426       83057 :                                         if (b->batSharecnt > 0) {
    1427             :                                                 skipped = true;
    1428           0 :                                                 continue;
    1429             :                                         }
    1430       83057 :                                         if (isVIEW(b)) {
    1431             :                                                 /* "manually"
    1432             :                                                  * decrement parent
    1433             :                                                  * references, since
    1434             :                                                  * VIEWdestroy doesn't
    1435             :                                                  * (and can't here due
    1436             :                                                  * to locks) do it */
    1437           0 :                                                 bat tp = VIEWtparent(b);
    1438           0 :                                                 bat vtp = VIEWvtparent(b);
    1439           0 :                                                 if (tp) {
    1440           0 :                                                         BBP_desc(tp)->batSharecnt--;
    1441           0 :                                                         --BBP_lrefs(tp);
    1442             :                                                 }
    1443           0 :                                                 if (vtp) {
    1444           0 :                                                         BBP_desc(vtp)->batSharecnt--;
    1445           0 :                                                         --BBP_lrefs(vtp);
    1446             :                                                 }
    1447           0 :                                                 VIEWdestroy(b);
    1448             :                                         } else {
    1449       83057 :                                                 PROPdestroy(b);
    1450       83057 :                                                 BATfree(b);
    1451             :                                         }
    1452             :                                 }
    1453       83057 :                                 BBP_pid(i) = 0;
    1454       83057 :                                 BBPuncacheit(i, true);
    1455       83057 :                                 if (BBP_logical(i) != BBP_bak(i))
    1456       11635 :                                         GDKfree(BBP_logical(i));
    1457       83057 :                                 BBP_logical(i) = NULL;
    1458             :                         }
    1459             :                 }
    1460         264 :         } while (skipped);
    1461         264 :         GDKfree(BBP_hash);
    1462         264 :         BBP_hash = NULL;
    1463             :         // these need to be NULL, otherwise no new ones get created
    1464         264 :         backup_files = 0;
    1465         264 :         backup_dir = 0;
    1466         264 :         backup_subdir = 0;
    1467             : 
    1468         264 : }
    1469             : 
    1470             : /*
    1471             :  * The routine BBPdir creates the BAT pool dictionary file.  It
    1472             :  * includes some information about the current state of affair in the
    1473             :  * pool.  The location in the buffer pool is saved for later use as
    1474             :  * well.  This is merely done for ease of debugging and of no
    1475             :  * importance to front-ends.  The tail of non-used entries is
    1476             :  * reclaimed as well.
    1477             :  */
    1478             : static inline int
    1479     4397913 : heap_entry(FILE *fp, BAT *b, BUN size)
    1480             : {
    1481             :         const ValRecord *minprop, *maxprop;
    1482     4397913 :         minprop = BATgetprop_nolock(b, GDK_MIN_POS);
    1483     4397913 :         maxprop = BATgetprop_nolock(b, GDK_MAX_POS);
    1484     4397913 :         size_t free = b->theap->free;
    1485     4397913 :         if (size < BUN_NONE) {
    1486     4397913 :                 if ((b->ttype >= 0 && ATOMstorage(b->ttype) == TYPE_msk)) {
    1487      689395 :                         BUN bytes = ((size + 31) / 32) * 4;
    1488             :                         if (free > bytes)
    1489             :                                 free = bytes;
    1490     3708518 :                 } else if (b->twidth > 0 && free / b->twidth > size)
    1491      282355 :                         free = size << b->tshift;
    1492             :         }
    1493    14769188 :         return fprintf(fp, " %s %d %d %d " BUNFMT " " BUNFMT " " BUNFMT " "
    1494             :                        BUNFMT " " OIDFMT " %zu %zu %d " OIDFMT " " OIDFMT,
    1495     4397913 :                        b->ttype >= 0 ? BATatoms[b->ttype].name : ATOMunknown_name(b->ttype),
    1496     4397913 :                        b->twidth,
    1497     1039919 :                        b->tvarsized | (b->tvheap ? b->tvheap->hashash << 1 : 0),
    1498     4397913 :                        (unsigned short) b->tsorted |
    1499     4397913 :                            ((unsigned short) b->trevsorted << 7) |
    1500     8795826 :                            (((unsigned short) b->tkey & 0x01) << 8) |
    1501     4397913 :                            ((unsigned short) BATtdense(b) << 9) |
    1502     4397913 :                            ((unsigned short) b->tnonil << 10) |
    1503     4397913 :                            ((unsigned short) b->tnil << 11),
    1504     2449013 :                        b->tnokey[0] >= size || b->tnokey[1] >= size ? 0 : b->tnokey[0],
    1505     4397913 :                        b->tnokey[0] >= size || b->tnokey[1] >= size ? 0 : b->tnokey[1],
    1506     4397913 :                        b->tnosorted >= size ? 0 : b->tnosorted,
    1507     4397913 :                        b->tnorevsorted >= size ? 0 : b->tnorevsorted,
    1508             :                        b->tseqbase,
    1509             :                        free,
    1510             :                        b->theap->size,
    1511             :                        0,
    1512     1304607 :                        minprop && minprop->val.oval < b->hseqbase + size ? minprop->val.oval : oid_nil,
    1513     1526379 :                        maxprop && maxprop->val.oval < b->hseqbase + size ? maxprop->val.oval : oid_nil);
    1514             : }
    1515             : 
    1516             : static inline int
    1517     4397913 : vheap_entry(FILE *fp, Heap *h)
    1518             : {
    1519     4397913 :         if (h == NULL)
    1520             :                 return 0;
    1521     1039919 :         return fprintf(fp, " %zu %zu %d", h->free, h->size, 0);
    1522             : }
    1523             : 
    1524             : static gdk_return
    1525     4397913 : new_bbpentry(FILE *fp, bat i, BUN size)
    1526             : {
    1527             : #ifndef NDEBUG
    1528     4397913 :         assert(i > 0);
    1529     4397913 :         assert(i < (bat) ATOMIC_GET(&BBPsize));
    1530     4397913 :         assert(BBP_desc(i));
    1531     4397913 :         assert(BBP_desc(i)->batCacheid == i);
    1532     4397913 :         assert(BBP_desc(i)->batRole == PERSISTENT);
    1533     4397913 :         assert(0 <= BBP_desc(i)->theap->farmid && BBP_desc(i)->theap->farmid < MAXFARMS);
    1534     4397913 :         assert(BBPfarms[BBP_desc(i)->theap->farmid].roles & (1U << PERSISTENT));
    1535     4397913 :         if (BBP_desc(i)->tvheap) {
    1536     1039919 :                 assert(0 <= BBP_desc(i)->tvheap->farmid && BBP_desc(i)->tvheap->farmid < MAXFARMS);
    1537     1039919 :                 assert(BBPfarms[BBP_desc(i)->tvheap->farmid].roles & (1U << PERSISTENT));
    1538             :         }
    1539             : #endif
    1540             : 
    1541     4397913 :         if (size > BBP_desc(i)->batCount)
    1542             :                 size = BBP_desc(i)->batCount;
    1543     4397913 :         if (fprintf(fp, "%d %u %s %s %d " BUNFMT " " BUNFMT " " OIDFMT,
    1544             :                     /* BAT info */
    1545             :                     (int) i,
    1546     4397913 :                     BBP_status(i) & BBPPERSISTENT,
    1547             :                     BBP_logical(i),
    1548     4397913 :                     BBP_physical(i),
    1549     4397913 :                     BBP_desc(i)->batRestricted << 1,
    1550             :                     size,
    1551             :                     BBP_desc(i)->batCapacity,
    1552     4397913 :                     BBP_desc(i)->hseqbase) < 0 ||
    1553     8795826 :             heap_entry(fp, BBP_desc(i), size) < 0 ||
    1554     4397913 :             vheap_entry(fp, BBP_desc(i)->tvheap) < 0 ||
    1555     8795826 :             (BBP_options(i) && fprintf(fp, " %s", BBP_options(i)) < 0) ||
    1556     4397913 :             fprintf(fp, "\n") < 0) {
    1557           0 :                 GDKsyserror("new_bbpentry: Writing BBP.dir entry failed\n");
    1558             :                 return GDK_FAIL;
    1559             :         }
    1560             : 
    1561             :         return GDK_SUCCEED;
    1562             : }
    1563             : 
    1564             : static gdk_return
    1565       16508 : BBPdir_header(FILE *f, int n, lng logno, lng transid)
    1566             : {
    1567       16508 :         if (fprintf(f, "BBP.dir, GDKversion %u\n%d %d %d\nBBPsize=%d\nBBPinfo=" LLFMT " " LLFMT "\n",
    1568             :                     GDKLIBRARY, SIZEOF_SIZE_T, SIZEOF_OID,
    1569             : #ifdef HAVE_HGE
    1570       16508 :                     havehge ? SIZEOF_HGE :
    1571             : #endif
    1572       16508 :                     SIZEOF_LNG, n, logno, transid) < 0 ||
    1573       16508 :             ferror(f)) {
    1574           0 :                 GDKsyserror("Writing BBP.dir header failed\n");
    1575             :                 return GDK_FAIL;
    1576             :         }
    1577             :         return GDK_SUCCEED;
    1578             : }
    1579             : 
    1580             : static gdk_return
    1581       16508 : BBPdir_first(bool subcommit, lng logno, lng transid,
    1582             :              FILE **obbpfp, FILE **nbbpfp)
    1583             : {
    1584             :         FILE *obbpf = NULL, *nbbpf = NULL;
    1585       16508 :         int n = 0;
    1586             :         lng ologno, otransid;
    1587             : 
    1588       16508 :         if (obbpfp)
    1589       16321 :                 *obbpfp = NULL;
    1590       16508 :         *nbbpfp = NULL;
    1591             : 
    1592       16508 :         if ((nbbpf = GDKfilelocate(0, "BBP", "w", "dir")) == NULL) {
    1593             :                 return GDK_FAIL;
    1594             :         }
    1595             : 
    1596       16508 :         if (subcommit) {
    1597             :                 char buf[512];
    1598             : 
    1599       16313 :                 assert(obbpfp != NULL);
    1600             :                 /* we need to copy the backup BBP.dir to the new, but
    1601             :                  * replacing the entries for the subcommitted bats */
    1602       16313 :                 if ((obbpf = GDKfileopen(0, SUBDIR, "BBP", "dir", "r")) == NULL &&
    1603           0 :                     (obbpf = GDKfileopen(0, BAKDIR, "BBP", "dir", "r")) == NULL) {
    1604           0 :                         GDKsyserror("subcommit attempted without backup BBP.dir.");
    1605           0 :                         goto bailout;
    1606             :                 }
    1607             :                 /* read first three lines */
    1608       32626 :                 if (fgets(buf, sizeof(buf), obbpf) == NULL || /* BBP.dir, GDKversion %d */
    1609       32626 :                     fgets(buf, sizeof(buf), obbpf) == NULL || /* SIZEOF_SIZE_T SIZEOF_OID SIZEOF_MAX_INT */
    1610       16313 :                     fgets(buf, sizeof(buf), obbpf) == NULL) { /* BBPsize=%d */
    1611           0 :                         GDKerror("subcommit attempted with invalid backup BBP.dir.");
    1612           0 :                         goto bailout;
    1613             :                 }
    1614             :                 /* third line contains BBPsize */
    1615       16313 :                 if (sscanf(buf, "BBPsize=%d", &n) != 1) {
    1616           0 :                         GDKerror("cannot read BBPsize in backup BBP.dir.");
    1617           0 :                         goto bailout;
    1618             :                 }
    1619             :                 /* fourth line contains BBPinfo */
    1620       16313 :                 if (fgets(buf, sizeof(buf), obbpf) == NULL ||
    1621       16313 :                     sscanf(buf, "BBPinfo=" LLSCN " " LLSCN, &ologno, &otransid) != 2) {
    1622           0 :                         GDKerror("cannot read BBPinfo in backup BBP.dir.");
    1623           0 :                         goto bailout;
    1624             :                 }
    1625             :         }
    1626             : 
    1627       16508 :         if (n < (bat) ATOMIC_GET(&BBPsize))
    1628        2493 :                 n = (bat) ATOMIC_GET(&BBPsize);
    1629             : 
    1630       16508 :         TRC_DEBUG(IO_, "writing BBP.dir (%d bats).\n", n);
    1631             : 
    1632       16508 :         if (BBPdir_header(nbbpf, n, logno, transid) != GDK_SUCCEED) {
    1633           0 :                 goto bailout;
    1634             :         }
    1635             : 
    1636       16508 :         if (obbpfp)
    1637       16321 :                 *obbpfp = obbpf;
    1638       16508 :         *nbbpfp = nbbpf;
    1639             : 
    1640       16508 :         return GDK_SUCCEED;
    1641             : 
    1642           0 :   bailout:
    1643           0 :         if (obbpf != NULL)
    1644           0 :                 fclose(obbpf);
    1645             :         if (nbbpf != NULL)
    1646           0 :                 fclose(nbbpf);
    1647           0 :         return GDK_FAIL;
    1648             : }
    1649             : 
    1650             : static bat
    1651     4439158 : BBPdir_step(bat bid, BUN size, int n, char *buf, size_t bufsize,
    1652             :             FILE **obbpfp, FILE *nbbpf)
    1653             : {
    1654     4439158 :         if (n < -1)          /* safety catch */
    1655             :                 return n;
    1656     8940610 :         while (n >= 0 && n < bid) {
    1657     4501452 :                 if (n > 0 && fputs(buf, nbbpf) == EOF) {
    1658           0 :                         GDKerror("Writing BBP.dir file failed.\n");
    1659           0 :                         goto bailout;
    1660             :                 }
    1661     4501452 :                 if (fgets(buf, (int) bufsize, *obbpfp) == NULL) {
    1662        3118 :                         if (ferror(*obbpfp)) {
    1663           0 :                                 GDKerror("error reading backup BBP.dir.");
    1664           0 :                                 goto bailout;
    1665             :                         }
    1666        3118 :                         n = -1;
    1667        3118 :                         if (fclose(*obbpfp) == EOF) {
    1668           0 :                                 GDKsyserror("Closing backup BBP.dir file failed.\n");
    1669           0 :                                 GDKclrerr(); /* ignore error */
    1670             :                         }
    1671        3118 :                         *obbpfp = NULL;
    1672             :                 } else {
    1673     4498334 :                         if (sscanf(buf, "%d", &n) != 1 || n <= 0) {
    1674           0 :                                 GDKerror("subcommit attempted with invalid backup BBP.dir.");
    1675           0 :                                 goto bailout;
    1676             :                         }
    1677             :                 }
    1678             :         }
    1679     4439158 :         if (BBP_status(bid) & BBPPERSISTENT) {
    1680     4397913 :                 if (new_bbpentry(nbbpf, bid, size) != GDK_SUCCEED)
    1681           0 :                         goto bailout;
    1682             :         }
    1683     4439158 :         return n == -1 ? -1 : n == bid ? 0 : n;
    1684             : 
    1685           0 :   bailout:
    1686           0 :         if (*obbpfp)
    1687           0 :                 fclose(*obbpfp);
    1688           0 :         fclose(nbbpf);
    1689           0 :         return -2;
    1690             : }
    1691             : 
    1692             : static gdk_return
    1693       16508 : BBPdir_last(int n, char *buf, size_t bufsize, FILE *obbpf, FILE *nbbpf)
    1694             : {
    1695       16508 :         if (n > 0 && fputs(buf, nbbpf) == EOF) {
    1696           0 :                 GDKerror("Writing BBP.dir file failed.\n");
    1697           0 :                 goto bailout;
    1698             :         }
    1699       98386 :         while (obbpf) {
    1700       95073 :                 if (fgets(buf, (int) bufsize, obbpf) == NULL) {
    1701       13195 :                         if (ferror(obbpf)) {
    1702           0 :                                 GDKerror("error reading backup BBP.dir.");
    1703           0 :                                 goto bailout;
    1704             :                         }
    1705       13195 :                         if (fclose(obbpf) == EOF) {
    1706           0 :                                 GDKsyserror("Closing backup BBP.dir file failed.\n");
    1707           0 :                                 GDKclrerr(); /* ignore error */
    1708             :                         }
    1709             :                         obbpf = NULL;
    1710             :                 } else {
    1711       81878 :                         if (fputs(buf, nbbpf) == EOF) {
    1712           0 :                                 GDKerror("Writing BBP.dir file failed.\n");
    1713           0 :                                 goto bailout;
    1714             :                         }
    1715             :                 }
    1716             :         }
    1717       16508 :         if (fflush(nbbpf) == EOF ||
    1718       16508 :             (!(GDKdebug & NOSYNCMASK)
    1719             : #if defined(NATIVE_WIN32)
    1720             :              && _commit(_fileno(nbbpf)) < 0
    1721             : #elif defined(HAVE_FDATASYNC)
    1722         200 :              && fdatasync(fileno(nbbpf)) < 0
    1723             : #elif defined(HAVE_FSYNC)
    1724             :              && fsync(fileno(nbbpf)) < 0
    1725             : #endif
    1726             :                     )) {
    1727           0 :                 GDKsyserror("Syncing BBP.dir file failed\n");
    1728           0 :                 goto bailout;
    1729             :         }
    1730       16508 :         if (fclose(nbbpf) == EOF) {
    1731           0 :                 GDKsyserror("Closing BBP.dir file failed\n");
    1732           0 :                 goto bailout;
    1733             :         }
    1734             : 
    1735       16508 :         TRC_DEBUG(IO_, "end\n");
    1736             : 
    1737             :         return GDK_SUCCEED;
    1738             : 
    1739           0 :   bailout:
    1740           0 :         if (obbpf != NULL)
    1741           0 :                 fclose(obbpf);
    1742           0 :         if (nbbpf != NULL)
    1743           0 :                 fclose(nbbpf);
    1744             :         return GDK_FAIL;
    1745             : }
    1746             : 
    1747             : gdk_return
    1748         187 : BBPdir_init(void)
    1749             : {
    1750             :         FILE *fp;
    1751             :         gdk_return rc;
    1752             : 
    1753         187 :         rc = BBPdir_first(false, 0, 0, NULL, &fp);
    1754         187 :         if (rc == GDK_SUCCEED)
    1755         187 :                 rc = BBPdir_last(-1, NULL, 0, NULL, fp);
    1756         187 :         return rc;
    1757             : }
    1758             : 
    1759             : /* function used for debugging */
    1760             : void
    1761           0 : BBPdump(void)
    1762             : {
    1763             :         size_t mem = 0, vm = 0;
    1764             :         size_t cmem = 0, cvm = 0;
    1765             :         int n = 0, nc = 0;
    1766             : 
    1767           0 :         for (bat i = 0; i < (bat) ATOMIC_GET(&BBPsize); i++) {
    1768           0 :                 if (BBP_refs(i) == 0 && BBP_lrefs(i) == 0)
    1769           0 :                         continue;
    1770           0 :                 BAT *b = BBP_desc(i);
    1771           0 :                 unsigned status = BBP_status(i);
    1772           0 :                 fprintf(stderr,
    1773             :                         "# %d: " ALGOOPTBATFMT " "
    1774             :                         "refs=%d lrefs=%d "
    1775             :                         "status=%u%s",
    1776             :                         i,
    1777           0 :                         ALGOOPTBATPAR(b),
    1778             :                         BBP_refs(i),
    1779             :                         BBP_lrefs(i),
    1780             :                         status,
    1781           0 :                         BBP_cache(i) ? "" : " not cached");
    1782           0 :                 if (b == NULL) {
    1783           0 :                         fprintf(stderr, ", no descriptor\n");
    1784           0 :                         continue;
    1785             :                 }
    1786           0 :                 if (b->batSharecnt > 0)
    1787           0 :                         fprintf(stderr, " shares=%d", b->batSharecnt);
    1788           0 :                 if (b->batDirtydesc)
    1789           0 :                         fprintf(stderr, " DirtyDesc");
    1790           0 :                 if (b->theap) {
    1791           0 :                         if (b->theap->parentid != b->batCacheid) {
    1792           0 :                                 fprintf(stderr, " Theap -> %d", b->theap->parentid);
    1793             :                         } else {
    1794           0 :                                 fprintf(stderr,
    1795             :                                         " Theap=[%zu,%zu,f=%d]%s%s",
    1796             :                                         b->theap->free,
    1797             :                                         b->theap->size,
    1798           0 :                                         b->theap->farmid,
    1799           0 :                                         b->theap->base == NULL ? "X" : b->theap->storage == STORE_MMAP ? "M" : "",
    1800           0 :                                         status & BBPSWAPPED ? "(Swapped)" : b->theap->dirty ? "(Dirty)" : "");
    1801           0 :                                 if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
    1802           0 :                                         cmem += HEAPmemsize(b->theap);
    1803           0 :                                         cvm += HEAPvmsize(b->theap);
    1804           0 :                                         nc++;
    1805             :                                 } else {
    1806           0 :                                         mem += HEAPmemsize(b->theap);
    1807           0 :                                         vm += HEAPvmsize(b->theap);
    1808           0 :                                         n++;
    1809             :                                 }
    1810             :                         }
    1811             :                 }
    1812           0 :                 if (b->tvheap) {
    1813           0 :                         if (b->tvheap->parentid != b->batCacheid) {
    1814           0 :                                 fprintf(stderr,
    1815             :                                         " Tvheap -> %d",
    1816             :                                         b->tvheap->parentid);
    1817             :                         } else {
    1818           0 :                                 fprintf(stderr,
    1819             :                                         " Tvheap=[%zu,%zu,f=%d]%s%s",
    1820             :                                         b->tvheap->free,
    1821             :                                         b->tvheap->size,
    1822           0 :                                         b->tvheap->farmid,
    1823           0 :                                         b->tvheap->base == NULL ? "X" : b->tvheap->storage == STORE_MMAP ? "M" : "",
    1824           0 :                                         b->tvheap->dirty ? "(Dirty)" : "");
    1825           0 :                                 if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
    1826           0 :                                         cmem += HEAPmemsize(b->tvheap);
    1827           0 :                                         cvm += HEAPvmsize(b->tvheap);
    1828             :                                 } else {
    1829           0 :                                         mem += HEAPmemsize(b->tvheap);
    1830           0 :                                         vm += HEAPvmsize(b->tvheap);
    1831             :                                 }
    1832             :                         }
    1833             :                 }
    1834           0 :                 if (MT_rwlock_rdtry(&b->thashlock)) {
    1835           0 :                         if (b->thash && b->thash != (Hash *) 1) {
    1836           0 :                                 size_t m = HEAPmemsize(&b->thash->heaplink) + HEAPmemsize(&b->thash->heapbckt);
    1837           0 :                                 size_t v = HEAPvmsize(&b->thash->heaplink) + HEAPvmsize(&b->thash->heapbckt);
    1838           0 :                                 fprintf(stderr, " Thash=[%zu,%zu,f=%d/%d]", m, v,
    1839           0 :                                         b->thash->heaplink.farmid,
    1840           0 :                                         b->thash->heapbckt.farmid);
    1841           0 :                                 if (BBP_logical(i) && BBP_logical(i)[0] == '.') {
    1842           0 :                                         cmem += m;
    1843           0 :                                         cvm += v;
    1844             :                                 } else {
    1845           0 :                                         mem += m;
    1846           0 :                                         vm += v;
    1847             :                                 }
    1848             :                         }
    1849           0 :                         MT_rwlock_rdunlock(&b->thashlock);
    1850             :                 }
    1851           0 :                 fprintf(stderr, " role: %s\n",
    1852           0 :                         b->batRole == PERSISTENT ? "persistent" : "transient");
    1853             :         }
    1854           0 :         fprintf(stderr,
    1855             :                 "# %d bats: mem=%zu, vm=%zu %d cached bats: mem=%zu, vm=%zu\n",
    1856             :                 n, mem, vm, nc, cmem, cvm);
    1857           0 :         fflush(stderr);
    1858           0 : }
    1859             : 
    1860             : /*
    1861             :  * @+ BBP Readonly Interface
    1862             :  *
    1863             :  * These interface functions do not change the BBP tables. If they
    1864             :  * only access one specific BAT, the caller must have ensured that no
    1865             :  * other thread is modifying that BAT, therefore such functions do not
    1866             :  * need locking.
    1867             :  *
    1868             :  * BBP index lookup by BAT name:
    1869             :  */
    1870             : static inline bat
    1871       38588 : BBP_find(const char *nme, bool lock)
    1872             : {
    1873       38588 :         bat i = BBPnamecheck(nme);
    1874             : 
    1875       11505 :         if (i != 0) {
    1876             :                 /* for tmp_X BATs, we already know X */
    1877             :                 const char *s;
    1878             : 
    1879       11505 :                 if (i >= (bat) ATOMIC_GET(&BBPsize) || (s = BBP_logical(i)) == NULL || strcmp(s, nme)) {
    1880             :                         i = 0;
    1881             :                 }
    1882       27083 :         } else if (*nme != '.') {
    1883             :                 /* must lock since hash-lookup traverses other BATs */
    1884       27083 :                 if (lock)
    1885        3368 :                         MT_lock_set(&BBPnameLock);
    1886       27111 :                 for (i = BBP_hash[strHash(nme) & BBP_mask]; i; i = BBP_next(i)) {
    1887        1192 :                         if (strcmp(BBP_logical(i), nme) == 0)
    1888             :                                 break;
    1889             :                 }
    1890       27083 :                 if (lock)
    1891        3368 :                         MT_lock_unset(&BBPnameLock);
    1892             :         }
    1893       38588 :         return i;
    1894             : }
    1895             : 
    1896             : bat
    1897        3368 : BBPindex(const char *nme)
    1898             : {
    1899        3368 :         return BBP_find(nme, true);
    1900             : }
    1901             : 
    1902             : /*
    1903             :  * @+ BBP Update Interface
    1904             :  * Operations to insert, delete, clear, and modify BBP entries.
    1905             :  * Our policy for the BBP is to provide unlocked BBP access for
    1906             :  * speed, but still write operations have to be locked.
    1907             :  * #ifdef DEBUG_THREADLOCAL_BATS
    1908             :  * Create the shadow version (reversed) of a bat.
    1909             :  *
    1910             :  * An existing BAT is inserted into the BBP
    1911             :  */
    1912             : static inline str
    1913      180141 : BBPsubdir_recursive(str s, bat i)
    1914             : {
    1915      180141 :         i >>= 6;
    1916      180141 :         if (i >= 0100) {
    1917        8997 :                 s = BBPsubdir_recursive(s, i);
    1918        8998 :                 *s++ = DIR_SEP;
    1919             :         }
    1920      180142 :         i &= 077;
    1921      180142 :         *s++ = '0' + (i >> 3);
    1922      180142 :         *s++ = '0' + (i & 7);
    1923      180142 :         return s;
    1924             : }
    1925             : 
    1926             : static inline void
    1927      199261 : BBPgetsubdir(str s, bat i)
    1928             : {
    1929      199261 :         if (i >= 0100) {
    1930      171236 :                 s = BBPsubdir_recursive(s, i);
    1931             :         }
    1932      199242 :         *s = 0;
    1933      199242 : }
    1934             : 
    1935             : /* There are BBP_THREADMASK+1 (64) free lists, and ours (idx) is
    1936             :  * empty.  Here we find a longish free list (at least 20 entries), and
    1937             :  * if we can find one, we take one entry from that list.  If no long
    1938             :  * enough list can be found, we create a new entry by either just
    1939             :  * increasing BBPsize (up to BBPlimit) or extending the BBP (which
    1940             :  * increases BBPlimit).  Every time this function is called we start
    1941             :  * searching in a following free list (variable "last").
    1942             :  *
    1943             :  * Note that this is the only place in normal, multi-threaded operation
    1944             :  * where BBPsize is assigned a value (never decreasing), that the
    1945             :  * assignment happens after any necessary memory was allocated and
    1946             :  * initialized, and that this happens when the BBPnameLock is held. */
    1947             : static gdk_return
    1948      138936 : maybeextend(int idx)
    1949             : {
    1950             : #if BBP_THREADMASK > 0
    1951             :         int t, m;
    1952             :         int n, l;
    1953             :         bat i;
    1954             :         static int last = 0;
    1955             : 
    1956             :         l = 0;                  /* length of longest list */
    1957             :         m = 0;                  /* index of longest list */
    1958             :         /* find a longish free list */
    1959             :         for (t = 0; t <= BBP_THREADMASK && l <= 20; t++) {
    1960             :                 n = 0;
    1961             :                 for (i = BBP_free((t + last) & BBP_THREADMASK);
    1962             :                      i != 0 && n <= 20;
    1963             :                      i = BBP_next(i))
    1964             :                         n++;
    1965             :                 if (n > l) {
    1966             :                         m = (t + last) & BBP_THREADMASK;
    1967             :                         l = n;
    1968             :                 }
    1969             :         }
    1970             :         if (l > 20) {
    1971             :                 /* list is long enough, get an entry from there */
    1972             :                 i = BBP_free(m);
    1973             :                 BBP_free(m) = BBP_next(i);
    1974             :                 BBP_next(i) = 0;
    1975             :                 BBP_free(idx) = i;
    1976             :         } else {
    1977             : #endif
    1978             :                 /* let the longest list alone, get a fresh entry */
    1979      138936 :                 bat size = (bat) ATOMIC_GET(&BBPsize);
    1980      138936 :                 if (size >= BBPlimit &&
    1981           0 :                     BBPextend(idx, true, size + 1) != GDK_SUCCEED) {
    1982             :                         /* couldn't extend; if there is any
    1983             :                          * free entry, take it from the
    1984             :                          * longest list after all */
    1985             : #if BBP_THREADMASK > 0
    1986             :                         if (l > 0) {
    1987             :                                 i = BBP_free(m);
    1988             :                                 BBP_free(m) = BBP_next(i);
    1989             :                                 BBP_next(i) = 0;
    1990             :                                 BBP_free(idx) = i;
    1991             :                                 GDKclrerr();
    1992             :                         } else
    1993             : #endif
    1994             :                         {
    1995             :                                 /* nothing available */
    1996             :                                 return GDK_FAIL;
    1997             :                         }
    1998             :                 } else {
    1999      138936 :                         ATOMIC_SET(&BBPsize, size + 1);
    2000      138936 :                         BBP_free(idx) = size;
    2001             :                 }
    2002             : #if BBP_THREADMASK > 0
    2003             :         }
    2004             :         last = (last + 1) & BBP_THREADMASK;
    2005             : #endif
    2006      138936 :         return GDK_SUCCEED;
    2007             : }
    2008             : 
    2009             : /* return new BAT id (> 0); return 0 on failure */
    2010             : bat
    2011    16545666 : BBPinsert(BAT *bn)
    2012             : {
    2013    16545666 :         MT_Id pid = MT_getpid();
    2014    16561556 :         bool lock = locked_by == 0 || locked_by != pid;
    2015             :         char dirname[24];
    2016             :         bat i;
    2017             :         int idx = threadmask(pid), len = 0;
    2018             : 
    2019             :         /* critical section: get a new BBP entry */
    2020    16561556 :         if (lock) {
    2021    16561556 :                 MT_lock_set(&GDKcacheLock(idx));
    2022             :         }
    2023             : 
    2024             :         /* find an empty slot */
    2025    16697610 :         if (BBP_free(idx) <= 0) {
    2026             :                 /* we need to extend the BBP */
    2027             :                 gdk_return r = GDK_SUCCEED;
    2028             : #if BBP_THREADMASK > 0
    2029             :                 if (lock) {
    2030             :                         /* we must take all locks in a consistent
    2031             :                          * order so first unset the one we've already
    2032             :                          * got */
    2033             :                         MT_lock_unset(&GDKcacheLock(idx));
    2034             :                         for (i = 0; i <= BBP_THREADMASK; i++)
    2035             :                                 MT_lock_set(&GDKcacheLock(i));
    2036             :                 }
    2037             : #endif
    2038      138936 :                 MT_lock_set(&BBPnameLock);
    2039             :                 /* check again in case some other thread extended
    2040             :                  * while we were waiting */
    2041      138936 :                 if (BBP_free(idx) <= 0) {
    2042      138936 :                         r = maybeextend(idx);
    2043             :                 }
    2044      138936 :                 MT_lock_unset(&BBPnameLock);
    2045             : #if BBP_THREADMASK > 0
    2046             :                 if (lock)
    2047             :                         for (i = BBP_THREADMASK; i >= 0; i--)
    2048             :                                 if (i != idx)
    2049             :                                         MT_lock_unset(&GDKcacheLock(i));
    2050             : #endif
    2051      138936 :                 if (r != GDK_SUCCEED) {
    2052           0 :                         if (lock) {
    2053           0 :                                 MT_lock_unset(&GDKcacheLock(idx));
    2054             :                         }
    2055           0 :                         return 0;
    2056             :                 }
    2057             :         }
    2058    16697610 :         i = BBP_free(idx);
    2059    16697610 :         assert(i > 0);
    2060    16697610 :         BBP_free(idx) = BBP_next(i);
    2061             : 
    2062    16697610 :         if (lock) {
    2063    16697610 :                 MT_lock_unset(&GDKcacheLock(idx));
    2064             :         }
    2065             :         /* rest of the work outside the lock */
    2066             : 
    2067             :         /* fill in basic BBP fields for the new bat */
    2068             : 
    2069    16690881 :         bn->batCacheid = i;
    2070    16690881 :         bn->creator_tid = MT_getpid();
    2071             : 
    2072    16690325 :         BBP_status_set(i, BBPDELETING|BBPHOT);
    2073    16690325 :         BBP_cache(i) = NULL;
    2074    16690325 :         BBP_desc(i) = NULL;
    2075    16690325 :         BBP_refs(i) = 1;        /* new bats have 1 pin */
    2076    16690325 :         BBP_lrefs(i) = 0;       /* ie. no logical refs */
    2077    16690325 :         BBP_pid(i) = MT_getpid();
    2078             : 
    2079             : #ifdef HAVE_HGE
    2080    16688936 :         if (bn->ttype == TYPE_hge)
    2081      118865 :                 havehge = true;
    2082             : #endif
    2083             : 
    2084    16688936 :         if (*BBP_bak(i) == 0)
    2085      164749 :                 len = snprintf(BBP_bak(i), sizeof(BBP_bak(i)), "tmp_%o", (unsigned) i);
    2086    16688936 :         if (len == -1 || len >= FILENAME_MAX) {
    2087           0 :                 GDKerror("impossible error\n");
    2088           0 :                 return 0;
    2089             :         }
    2090    16688936 :         BBP_logical(i) = BBP_bak(i);
    2091             : 
    2092             :         /* Keep the physical location around forever */
    2093    16688936 :         if (!GDKinmemory(0) && *BBP_physical(i) == 0) {
    2094      164122 :                 BBPgetsubdir(dirname, i);
    2095             : 
    2096      163996 :                 if (*dirname)   /* i.e., i >= 0100 */
    2097      149005 :                         len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)),
    2098             :                                        "%s%c%o", dirname, DIR_SEP, (unsigned) i);
    2099             :                 else
    2100       14991 :                         len = snprintf(BBP_physical(i), sizeof(BBP_physical(i)),
    2101             :                                        "%o", (unsigned) i);
    2102      163996 :                 if (len == -1 || len >= FILENAME_MAX)
    2103             :                         return 0;
    2104             : 
    2105      164068 :                 TRC_DEBUG(BAT_, "%d = new %s(%s)\n", (int) i, BBP_logical(i), ATOMname(bn->ttype));
    2106             :         }
    2107             : 
    2108             :         return i;
    2109             : }
    2110             : 
    2111             : gdk_return
    2112    16607938 : BBPcacheit(BAT *bn, bool lock)
    2113             : {
    2114    16607938 :         bat i = bn->batCacheid;
    2115             :         unsigned mode;
    2116             : 
    2117    16607938 :         if (lock)
    2118    16596470 :                 lock = locked_by == 0 || locked_by != MT_getpid();
    2119             : 
    2120    16607938 :         if (i) {
    2121    16607938 :                 assert(i > 0);
    2122             :         } else {
    2123           0 :                 i = BBPinsert(bn);      /* bat was not previously entered */
    2124           0 :                 if (i == 0)
    2125             :                         return GDK_FAIL;
    2126           0 :                 if (bn->theap)
    2127           0 :                         bn->theap->parentid = i;
    2128           0 :                 if (bn->tvheap)
    2129           0 :                         bn->tvheap->parentid = i;
    2130             :         }
    2131             : 
    2132    16607938 :         if (lock)
    2133    16594801 :                 MT_lock_set(&GDKswapLock(i));
    2134    16704969 :         mode = (BBP_status(i) | BBPLOADED) & ~(BBPLOADING | BBPDELETING | BBPSWAPPED);
    2135    16704969 :         BBP_desc(i) = bn;
    2136             : 
    2137             :         /* cache it! */
    2138    16704969 :         BBP_cache(i) = bn;
    2139             : 
    2140    16704969 :         BBP_status_set(i, mode);
    2141             : 
    2142    16704969 :         if (lock)
    2143    16694779 :                 MT_lock_unset(&GDKswapLock(i));
    2144             :         return GDK_SUCCEED;
    2145             : }
    2146             : 
    2147             : /*
    2148             :  * BBPuncacheit changes the BBP status to swapped out.  Currently only
    2149             :  * used in BBPfree (bat swapped out) and BBPclear (bat destroyed
    2150             :  * forever).
    2151             :  */
    2152             : 
    2153             : static void
    2154    16541851 : BBPuncacheit(bat i, bool unloaddesc)
    2155             : {
    2156             :         if (i < 0)
    2157             :                 i = -i;
    2158    16541851 :         if (BBPcheck(i)) {
    2159    16555104 :                 BAT *b = BBP_desc(i);
    2160             : 
    2161    16555104 :                 assert(unloaddesc || BBP_refs(i) == 0);
    2162             : 
    2163    16555104 :                 if (b) {
    2164    16555104 :                         if (BBP_cache(i)) {
    2165    16542594 :                                 TRC_DEBUG(BAT_, "uncache %d (%s)\n", (int) i, BBP_logical(i));
    2166             : 
    2167             :                                 /* clearing bits can be done without the lock */
    2168    16542594 :                                 BBP_status_off(i, BBPLOADED);
    2169             : 
    2170    16542594 :                                 BBP_cache(i) = NULL;
    2171             :                         }
    2172    16555104 :                         if (unloaddesc) {
    2173    16651448 :                                 BBP_desc(i) = NULL;
    2174    16651448 :                                 BATdestroy(b);
    2175             :                         }
    2176             :                 }
    2177             :         }
    2178    16599249 : }
    2179             : 
    2180             : /*
    2181             :  * @- BBPclear
    2182             :  * BBPclear removes a BAT from the BBP directory forever.
    2183             :  */
    2184             : static inline void
    2185    16504163 : bbpclear(bat i, int idx, bool lock)
    2186             : {
    2187    16504163 :         TRC_DEBUG(BAT_, "clear %d (%s)\n", (int) i, BBP_logical(i));
    2188    16504163 :         BBPuncacheit(i, true);
    2189    16618414 :         TRC_DEBUG(BAT_, "set to unloading %d\n", i);
    2190    16618414 :         if (lock)
    2191    16618010 :                 MT_lock_set(&GDKcacheLock(idx));
    2192             : 
    2193    16635957 :         BBP_status_set(i, BBPUNLOADING);
    2194    16635957 :         BBP_refs(i) = 0;
    2195    16635957 :         BBP_lrefs(i) = 0;
    2196    16635957 :         if (!BBPtmpcheck(BBP_logical(i))) {
    2197        1593 :                 MT_lock_set(&BBPnameLock);
    2198        1593 :                 BBP_delete(i);
    2199        1593 :                 MT_lock_unset(&BBPnameLock);
    2200             :         }
    2201    16635957 :         if (BBP_logical(i) != BBP_bak(i))
    2202        1593 :                 GDKfree(BBP_logical(i));
    2203    16635957 :         BBP_status_set(i, 0);
    2204    16635957 :         BBP_logical(i) = NULL;
    2205    16635957 :         BBP_next(i) = BBP_free(idx);
    2206    16635957 :         BBP_free(idx) = i;
    2207    16635957 :         BBP_pid(i) = ~(MT_Id)0; /* not zero, not a valid thread id */
    2208    16635957 :         if (lock)
    2209    16635553 :                 MT_lock_unset(&GDKcacheLock(idx));
    2210    16632790 : }
    2211             : 
    2212             : void
    2213    16483172 : BBPclear(bat i, bool lock)
    2214             : {
    2215    16483172 :         MT_Id pid = MT_getpid();
    2216             : 
    2217    16503616 :         lock &= locked_by == 0 || locked_by != pid;
    2218    16503616 :         if (BBPcheck(i)) {
    2219    16503648 :                 bbpclear(i, threadmask(pid), lock);
    2220             :         }
    2221    16626699 : }
    2222             : 
    2223             : /*
    2224             :  * @- BBP rename
    2225             :  *
    2226             :  * Each BAT has a logical name that is globally unique.
    2227             :  * The batId is the same as the logical BAT name.
    2228             :  *
    2229             :  * The default logical name of a BAT is tmp_X, where X is the
    2230             :  * batCacheid.  Apart from being globally unique, new logical bat
    2231             :  * names cannot be of the form tmp_X, unless X is the batCacheid.
    2232             :  *
    2233             :  * Physical names consist of a directory name followed by a logical
    2234             :  * name suffix.  The directory name is derived from the batCacheid,
    2235             :  * and is currently organized in a hierarchy that puts max 64 bats in
    2236             :  * each directory (see BBPgetsubdir).
    2237             :  *
    2238             :  * Concerning the physical suffix: it is almost always bat_X. This
    2239             :  * saves us a whole lot of trouble, as bat_X is always unique and no
    2240             :  * conflicts can occur.  Other suffixes are only supported in order
    2241             :  * just for backward compatibility with old repositories (you won't
    2242             :  * see them anymore in new repositories).
    2243             :  */
    2244             : int
    2245       35220 : BBPrename(bat bid, const char *nme)
    2246             : {
    2247       35220 :         BAT *b = BBPdescriptor(bid);
    2248             :         char dirname[24];
    2249             :         bat tmpid = 0, i;
    2250             : 
    2251       35220 :         if (b == NULL)
    2252             :                 return 0;
    2253             : 
    2254       35220 :         if (nme == NULL) {
    2255       11505 :                 if (BBP_bak(bid)[0] == 0 &&
    2256           0 :                     snprintf(BBP_bak(bid), sizeof(BBP_bak(bid)), "tmp_%o", (unsigned) bid) >= (int) sizeof(BBP_bak(bid))) {
    2257             :                         /* cannot happen */
    2258           0 :                         TRC_CRITICAL(GDK, "BBP default filename too long\n");
    2259           0 :                         return BBPRENAME_LONG;
    2260             :                 }
    2261       11505 :                 nme = BBP_bak(bid);
    2262             :         }
    2263             : 
    2264             :         /* If name stays same, do nothing */
    2265       35220 :         if (BBP_logical(bid) && strcmp(BBP_logical(bid), nme) == 0)
    2266             :                 return 0;
    2267             : 
    2268       35220 :         BBPgetsubdir(dirname, bid);
    2269             : 
    2270       35220 :         if ((tmpid = BBPnamecheck(nme)) && tmpid != bid) {
    2271           0 :                 GDKerror("illegal temporary name: '%s'\n", nme);
    2272           0 :                 return BBPRENAME_ILLEGAL;
    2273             :         }
    2274       35220 :         if (strlen(dirname) + strLen(nme) + 1 >= IDLENGTH) {
    2275           0 :                 GDKerror("illegal temporary name: '%s'\n", nme);
    2276           0 :                 return BBPRENAME_LONG;
    2277             :         }
    2278             : 
    2279       35220 :         MT_lock_set(&BBPnameLock);
    2280       35220 :         i = BBP_find(nme, false);
    2281       35220 :         if (i != 0) {
    2282           3 :                 MT_lock_unset(&BBPnameLock);
    2283           3 :                 GDKerror("name is in use: '%s'.\n", nme);
    2284           3 :                 return BBPRENAME_ALREADY;
    2285             :         }
    2286             : 
    2287             :         char *nnme;
    2288       35217 :         if (nme == BBP_bak(bid) || strcmp(nme, BBP_bak(bid)) == 0) {
    2289             :                 nnme = BBP_bak(bid);
    2290             :         } else {
    2291       23712 :                 nnme = GDKstrdup(nme);
    2292       23712 :                 if (nnme == NULL) {
    2293           0 :                         MT_lock_unset(&BBPnameLock);
    2294           0 :                         return BBPRENAME_MEMORY;
    2295             :                 }
    2296             :         }
    2297             : 
    2298             :         /* carry through the name change */
    2299       35217 :         if (BBP_logical(bid) && !BBPtmpcheck(BBP_logical(bid))) {
    2300       11505 :                 BBP_delete(bid);
    2301             :         }
    2302       35217 :         if (BBP_logical(bid) != BBP_bak(bid))
    2303       11505 :                 GDKfree(BBP_logical(bid));
    2304       35217 :         BBP_logical(bid) = nnme;
    2305       35217 :         if (tmpid == 0) {
    2306       23712 :                 BBP_insert(bid);
    2307             :         }
    2308       35217 :         b->batDirtydesc = true;
    2309       35217 :         if (!b->batTransient) {
    2310       12593 :                 bool lock = locked_by == 0 || locked_by != MT_getpid();
    2311             : 
    2312       12593 :                 if (lock)
    2313       12593 :                         MT_lock_set(&GDKswapLock(i));
    2314       12593 :                 BBP_status_on(bid, BBPRENAMED);
    2315       12593 :                 if (lock)
    2316       12593 :                         MT_lock_unset(&GDKswapLock(i));
    2317             :         }
    2318       35217 :         MT_lock_unset(&BBPnameLock);
    2319       35217 :         return 0;
    2320             : }
    2321             : 
    2322             : /*
    2323             :  * @+ BBP swapping Policy
    2324             :  * The BAT can be moved back to disk using the routine BBPfree.  It
    2325             :  * frees the storage for other BATs. After this call BAT* references
    2326             :  * maintained for the BAT are wrong.  We should keep track of dirty
    2327             :  * unloaded BATs. They may have to be committed later on, which may
    2328             :  * include reading them in again.
    2329             :  *
    2330             :  * BBPswappable: may this bat be unloaded?  Only real bats without
    2331             :  * memory references can be unloaded.
    2332             :  */
    2333             : static inline void
    2334     8830382 : BBPspin(bat i, const char *s, unsigned event)
    2335             : {
    2336     8830382 :         if (BBPcheck(i) && (BBP_status(i) & event)) {
    2337             :                 lng spin = LL_CONSTANT(0);
    2338             : 
    2339             :                 do {
    2340        2485 :                         MT_sleep_ms(KITTENNAP);
    2341        2467 :                         spin++;
    2342        2467 :                 } while (BBP_status(i) & event);
    2343        2427 :                 TRC_DEBUG(BAT_, "%d,%s,%u: " LLFMT " loops\n", (int) i, s, event, spin);
    2344             :         }
    2345     8830363 : }
    2346             : 
    2347             : void
    2348    14584561 : BBPcold(bat i)
    2349             : {
    2350    14584561 :         if (!is_bat_nil(i)) {
    2351    14597585 :                 BAT *b = BBP_cache(i);
    2352    14597585 :                 if (b == NULL)
    2353           0 :                         b = BBP_desc(i);
    2354    14597585 :                 if (b == NULL || b->batRole == PERSISTENT)
    2355         482 :                         BBP_status_off(i, BBPHOT);
    2356             :         }
    2357    14584561 : }
    2358             : 
    2359             : /* This function can fail if the input parameter (i) is incorrect
    2360             :  * (unlikely), of if the bat is a view, this is a physical (not
    2361             :  * logical) incref (i.e. called through BBPfix(), and it is the first
    2362             :  * reference (refs was 0 and should become 1).  It can fail in this
    2363             :  * case if the parent bat cannot be loaded.
    2364             :  * This means the return value of BBPfix should be checked in these
    2365             :  * circumstances, but not necessarily in others. */
    2366             : static inline int
    2367   157166023 : incref(bat i, bool logical, bool lock)
    2368             : {
    2369             :         int refs;
    2370             :         bat tp = i, tvp = i;
    2371             :         BAT *b, *pb = NULL, *pvb = NULL;
    2372             :         bool load = false;
    2373             : 
    2374   157166023 :         if (!BBPcheck(i))
    2375             :                 return 0;
    2376             : 
    2377             :         /* Before we get the lock and before we do all sorts of
    2378             :          * things, make sure we can load the parent bats if there are
    2379             :          * any.  If we can't load them, we can still easily fail.  If
    2380             :          * this is indeed a view, but not the first physical
    2381             :          * reference, getting the parent BAT descriptor is
    2382             :          * superfluous, but not too expensive, so we do it anyway. */
    2383   157504021 :         if (!logical && (b = BBP_desc(i)) != NULL) {
    2384   125222676 :                 MT_lock_set(&b->theaplock);
    2385   129439023 :                 tp = b->theap ? b->theap->parentid : i;
    2386   129439023 :                 tvp = b->tvheap ? b->tvheap->parentid : i;
    2387   129439023 :                 MT_lock_unset(&b->theaplock);
    2388   129549788 :                 if (tp != i) {
    2389    11041510 :                         pb = BATdescriptor(tp);
    2390    11053454 :                         if (pb == NULL)
    2391             :                                 return 0;
    2392             :                 }
    2393   129561732 :                 if (tvp != i) {
    2394     4675912 :                         pvb = BATdescriptor(tvp);
    2395     4677881 :                         if (pvb == NULL) {
    2396           0 :                                 if (pb)
    2397           0 :                                         BBPunfix(pb->batCacheid);
    2398           0 :                                 return 0;
    2399             :                         }
    2400             :                 }
    2401             :         }
    2402             : 
    2403   161845046 :         if (lock) {
    2404             :                 for (;;) {
    2405   161849602 :                         MT_lock_set(&GDKswapLock(i));
    2406   164655305 :                         if (!(BBP_status(i) & (BBPUNSTABLE|BBPLOADING)))
    2407             :                                 break;
    2408             :                         /* the BATs is "unstable", try again */
    2409        4579 :                         MT_lock_unset(&GDKswapLock(i));
    2410        4579 :                         BBPspin(i, __func__, BBPUNSTABLE|BBPLOADING);
    2411             :                 }
    2412             :         }
    2413             :         /* we have the lock */
    2414             : 
    2415   164650726 :         b = BBP_desc(i);
    2416   164650726 :         if (b == NULL) {
    2417             :                 /* should not have happened */
    2418           0 :                 if (lock)
    2419           0 :                         MT_lock_unset(&GDKswapLock(i));
    2420           0 :                 return 0;
    2421             :         }
    2422             : 
    2423   164650726 :         assert(BBP_refs(i) + BBP_lrefs(i) ||
    2424             :                BBP_status(i) & (BBPDELETED | BBPSWAPPED));
    2425   164650726 :         if (logical) {
    2426             :                 /* parent BATs are not relevant for logical refs */
    2427    34756983 :                 refs = ++BBP_lrefs(i);
    2428    34756983 :                 BBP_pid(i) = 0;
    2429             :         } else {
    2430   129893743 :                 assert(tp >= 0);
    2431   129893743 :                 refs = ++BBP_refs(i);
    2432             :                 unsigned flag = BBPHOT;
    2433   129893743 :                 if (refs == 1 && (tp != i || tvp != i)) {
    2434             :                         /* If this is a view, we must load the parent
    2435             :                          * BATs, but we must do that outside of the
    2436             :                          * lock.  Set the BBPLOADING flag so that
    2437             :                          * other threads will wait until we're
    2438             :                          * done. */
    2439             :                         flag |= BBPLOADING;
    2440             :                         load = true;
    2441             :                 }
    2442   129893743 :                 BBP_status_on(i, flag);
    2443             :         }
    2444   164650726 :         if (lock)
    2445   164710025 :                 MT_lock_unset(&GDKswapLock(i));
    2446             : 
    2447   165051575 :         if (load) {
    2448             :                 /* load the parent BATs */
    2449    12237518 :                 assert(!logical);
    2450    12237518 :                 if (tp != i) {
    2451    10569461 :                         assert(pb != NULL);
    2452             :                         /* load being set implies there is no other
    2453             :                          * thread that has access to this bat, but the
    2454             :                          * parent is a different matter */
    2455    10569461 :                         MT_lock_set(&pb->theaplock);
    2456    10567812 :                         if (b->theap != pb->theap) {
    2457           0 :                                 HEAPincref(pb->theap);
    2458           0 :                                 HEAPdecref(b->theap, false);
    2459           0 :                                 b->theap = pb->theap;
    2460             :                         }
    2461    10567812 :                         MT_lock_unset(&pb->theaplock);
    2462             :                 }
    2463             :                 /* done loading, release descriptor */
    2464    12236333 :                 BBP_status_off(i, BBPLOADING);
    2465   152814057 :         } else if (!logical) {
    2466             :                 /* this wasn't the first physical reference, so undo
    2467             :                  * the fixes on the parent bats */
    2468   118138164 :                 if (pb)
    2469      495863 :                         BBPunfix(pb->batCacheid);
    2470   118166848 :                 if (pvb)
    2471       82743 :                         BBPunfix(pvb->batCacheid);
    2472             :         }
    2473             :         return refs;
    2474             : }
    2475             : 
    2476             : /* see comment for incref */
    2477             : int
    2478   119035449 : BBPfix(bat i)
    2479             : {
    2480   119035449 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    2481             : 
    2482   119035449 :         return incref(i, false, lock);
    2483             : }
    2484             : 
    2485             : int
    2486    17750002 : BBPretain(bat i)
    2487             : {
    2488    17750002 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    2489             : 
    2490    17750002 :         return incref(i, true, lock);
    2491             : }
    2492             : 
    2493             : void
    2494     7077026 : BBPshare(bat parent)
    2495             : {
    2496     7077026 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    2497             : 
    2498     7077026 :         assert(parent > 0);
    2499     7077026 :         (void) incref(parent, true, lock);
    2500     7090036 :         if (lock)
    2501     7093079 :                 MT_lock_set(&GDKswapLock(parent));
    2502     7091862 :         ++BBP_cache(parent)->batSharecnt;
    2503     7091862 :         assert(BBP_refs(parent) > 0);
    2504     7091862 :         if (lock)
    2505     7090939 :                 MT_lock_unset(&GDKswapLock(parent));
    2506     7092557 :         (void) incref(parent, false, lock);
    2507     7088615 : }
    2508             : 
    2509             : static inline int
    2510   181981322 : decref(bat i, bool logical, bool releaseShare, bool lock, const char *func)
    2511             : {
    2512             :         int refs = 0, lrefs;
    2513             :         bool swap = false;
    2514             :         bat tp = 0, tvp = 0;
    2515             :         int farmid = 0;
    2516             :         BAT *b;
    2517             : 
    2518   181981322 :         if (is_bat_nil(i))
    2519             :                 return -1;
    2520   181953727 :         assert(i > 0);
    2521   181953727 :         if (BBPcheck(i) == 0)
    2522             :                 return -1;
    2523             : 
    2524   182378773 :         if (lock)
    2525   182420879 :                 MT_lock_set(&GDKswapLock(i));
    2526   186810208 :         if (releaseShare) {
    2527     7095767 :                 assert(BBP_lrefs(i) > 0);
    2528     7095767 :                 if (BBP_desc(i)->batSharecnt == 0) {
    2529           0 :                         GDKerror("%s: %s does not have any shares.\n", func, BBP_logical(i));
    2530           0 :                         assert(0);
    2531             :                 } else {
    2532     7095767 :                         --BBP_desc(i)->batSharecnt;
    2533             :                 }
    2534     7095767 :                 if (lock)
    2535     7095075 :                         MT_lock_unset(&GDKswapLock(i));
    2536     7095062 :                 return refs;
    2537             :         }
    2538             : 
    2539   179714441 :         while (BBP_status(i) & BBPUNLOADING) {
    2540           0 :                 if (lock)
    2541           0 :                         MT_lock_unset(&GDKswapLock(i));
    2542           0 :                 BBPspin(i, func, BBPUNLOADING);
    2543           0 :                 if (lock)
    2544           0 :                         MT_lock_set(&GDKswapLock(i));
    2545             :         }
    2546             : 
    2547   180366935 :         b = BBP_cache(i);
    2548             : 
    2549             :         /* decrement references by one */
    2550   180366935 :         if (logical) {
    2551    34720399 :                 if (BBP_lrefs(i) == 0) {
    2552           0 :                         GDKerror("%s: %s does not have logical references.\n", func, BBP_logical(i));
    2553           0 :                         assert(0);
    2554             :                 } else {
    2555    34720399 :                         refs = --BBP_lrefs(i);
    2556             :                 }
    2557             :                 /* cannot release last logical ref if still shared */
    2558    34720399 :                 assert(BBP_desc(i)->batSharecnt == 0 || refs > 0);
    2559             :         } else {
    2560   145646536 :                 if (BBP_refs(i) == 0) {
    2561           0 :                         GDKerror("%s: %s does not have pointer fixes.\n", func, BBP_logical(i));
    2562           0 :                         assert(0);
    2563             :                 } else {
    2564   145646536 :                         assert(b == NULL || b->theap == NULL || BBP_refs(b->theap->parentid) > 0);
    2565   145646536 :                         assert(b == NULL || b->tvheap == NULL || BBP_refs(b->tvheap->parentid) > 0);
    2566   145646536 :                         refs = --BBP_refs(i);
    2567   145646536 :                         if (b && refs == 0) {
    2568   117055965 :                                 tp = VIEWtparent(b);
    2569   117055965 :                                 tvp = VIEWvtparent(b);
    2570   117055965 :                                 if (tp || tvp)
    2571    17963284 :                                         BBP_status_on(i, BBPHOT);
    2572             :                         }
    2573             :                 }
    2574             :         }
    2575   180366935 :         if (b) {
    2576   179460153 :                 MT_lock_set(&b->theaplock);
    2577   180841787 :                 if (b->batCount > b->batInserted && !isVIEW(b)) {
    2578             :                         /* if batCount is larger than batInserted and
    2579             :                          * the dirty bits are off, it may be that a
    2580             :                          * (sub)commit happened in parallel to an
    2581             :                          * update; we must undo the turning off of the
    2582             :                          * dirty bits */
    2583    78579769 :                         b->batDirtydesc = true;
    2584    78579769 :                         if (b->theap)
    2585    78579769 :                                 b->theap->dirty = true;
    2586    78579769 :                         if (b->tvheap)
    2587     6686433 :                                 b->tvheap->dirty = true;
    2588             :                 }
    2589   180841787 :                 farmid = b->theap->farmid;
    2590   180841787 :                 MT_lock_unset(&b->theaplock);
    2591             :         }
    2592             : 
    2593             :         /* we destroy transients asap and unload persistent bats only
    2594             :          * if they have been made cold or are not dirty */
    2595             :         unsigned chkflag = BBPSYNCING;
    2596   182172664 :         if (GDKvm_cursize() < GDK_vm_maxsize &&
    2597   180863959 :              ((b && b->theap ? b->theap->size : 0) + (b && b->tvheap ? b->tvheap->size : 0)) < (GDK_vm_maxsize - GDKvm_cursize()) / 32)
    2598             :                 chkflag |= BBPHOT;
    2599             :         /* only consider unloading if refs is 0; if, in addition, lrefs
    2600             :          * is 0, we can definitely unload, else only if some more
    2601             :          * conditions are met */
    2602   309201126 :         if (BBP_refs(i) == 0 &&
    2603   145427964 :             (BBP_lrefs(i) == 0 ||
    2604             :              (b != NULL
    2605    18660062 :               ? (!BATdirty(b) &&
    2606     9419757 :                  !(BBP_status(i) & chkflag) &&
    2607        5662 :                  (BBP_status(i) & BBPPERSISTENT) &&
    2608   128794255 :                  !GDKinmemory(farmid) &&
    2609        2830 :                  b->batSharecnt == 0)
    2610       21202 :               : (BBP_status(i) & BBPTMP)))) {
    2611             :                 /* bat will be unloaded now. set the UNLOADING bit
    2612             :                  * while locked so no other thread thinks it's
    2613             :                  * available anymore */
    2614    16618167 :                 assert((BBP_status(i) & BBPUNLOADING) == 0);
    2615    16618167 :                 TRC_DEBUG(BAT_, "%s set to unloading BAT %d (status %u, lrefs %d)\n", func, i, BBP_status(i), BBP_lrefs(i));
    2616    16618167 :                 BBP_status_on(i, BBPUNLOADING);
    2617             :                 swap = true;
    2618             :         } /* else: bat cannot be swapped out */
    2619   180388499 :         lrefs = BBP_lrefs(i);
    2620             : 
    2621             :         /* unlock before re-locking in unload; as saving a dirty
    2622             :          * persistent bat may take a long time */
    2623   180388499 :         if (lock)
    2624   180676242 :                 MT_lock_unset(&GDKswapLock(i));
    2625             : 
    2626   181349173 :         if (swap && b != NULL) {
    2627    16631506 :                 if (lrefs == 0 && (BBP_status(i) & BBPDELETED) == 0) {
    2628             :                         /* free memory (if loaded) and delete from
    2629             :                          * disk (if transient but saved) */
    2630    16627823 :                         BBPdestroy(b);
    2631             :                 } else {
    2632        3683 :                         TRC_DEBUG(BAT_, "%s unload and free bat %d\n", func, i);
    2633             :                         /* free memory of transient */
    2634        3683 :                         if (BBPfree(b) != GDK_SUCCEED)
    2635             :                                 return -1;      /* indicate failure */
    2636             :                 }
    2637             :         }
    2638   181340937 :         if (tp)
    2639    16200056 :                 decref(tp, false, false, lock, func);
    2640   181330136 :         if (tvp)
    2641     6052892 :                 decref(tvp, false, false, lock, func);
    2642             :         return refs;
    2643             : }
    2644             : 
    2645             : int
    2646   115231798 : BBPunfix(bat i)
    2647             : {
    2648   115231798 :         return decref(i, false, false, true, "BBPunfix");
    2649             : }
    2650             : 
    2651             : int
    2652    27654473 : BBPrelease(bat i)
    2653             : {
    2654    27654473 :         return decref(i, true, false, true, "BBPrelease");
    2655             : }
    2656             : 
    2657             : /*
    2658             :  * M5 often changes the physical ref into a logical reference.  This
    2659             :  * state change consist of the sequence BBPretain(b);BBPunfix(b).
    2660             :  * A faster solution is given below, because it does not trigger the
    2661             :  * BBP management actions, such as garbage collecting the bats.
    2662             :  * [first step, initiate code change]
    2663             :  */
    2664             : void
    2665     9873451 : BBPkeepref(bat i)
    2666             : {
    2667     9873451 :         if (BBPcheck(i)) {
    2668     9876483 :                 bool lock = locked_by == 0 || locked_by != MT_getpid();
    2669             :                 BAT *b;
    2670             : 
    2671     9876483 :                 incref(i, true, lock);
    2672     9939598 :                 if ((b = BBPdescriptor(i)) != NULL) {
    2673     9937860 :                         BATsettrivprop(b);
    2674     9895783 :                         if (GDKdebug & (CHECKMASK | PROPMASK))
    2675     9840980 :                                 BATassertProps(b);
    2676     9905675 :                         if (BATsetaccess(b, BAT_READ) == NULL)
    2677             :                                 return; /* already decreffed */
    2678             :                 }
    2679             : 
    2680     7468306 :                 assert(BBP_refs(i));
    2681     7468306 :                 decref(i, false, false, lock, "BBPkeepref");
    2682             :         }
    2683             : }
    2684             : 
    2685             : static inline void
    2686     7094566 : GDKunshare(bat parent)
    2687             : {
    2688     7094566 :         (void) decref(parent, false, true, true, "GDKunshare");
    2689     7094764 :         (void) decref(parent, true, false, true, "GDKunshare");
    2690     7095605 : }
    2691             : 
    2692             : void
    2693         736 : BBPunshare(bat parent)
    2694             : {
    2695         736 :         GDKunshare(parent);
    2696         736 : }
    2697             : 
    2698             : /*
    2699             :  * BBPreclaim is a user-exported function; the common way to destroy a
    2700             :  * BAT the hard way.
    2701             :  *
    2702             :  * Return values:
    2703             :  * -1 = bat cannot be unloaded (it has more than your own memory fix)
    2704             :  *  0 = unloaded successfully
    2705             :  *  1 = unload failed (due to write-to-disk failure)
    2706             :  */
    2707             : int
    2708      239465 : BBPreclaim(BAT *b)
    2709             : {
    2710             :         bat i;
    2711      239465 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    2712             : 
    2713      239465 :         if (b == NULL)
    2714             :                 return -1;
    2715      239461 :         i = b->batCacheid;
    2716             : 
    2717      239461 :         assert(BBP_refs(i) == 1);
    2718             : 
    2719      239461 :         return decref(i, false, false, lock, "BBPreclaim") <0;
    2720             : }
    2721             : 
    2722             : /*
    2723             :  * BBPdescriptor checks whether BAT needs loading and does so if
    2724             :  * necessary. You must have at least one fix on the BAT before calling
    2725             :  * this.
    2726             :  */
    2727             : static BAT *
    2728     9961338 : getBBPdescriptor(bat i, bool lock)
    2729             : {
    2730             :         bool load = false;
    2731             :         BAT *b = NULL;
    2732             : 
    2733     9961338 :         assert(i > 0);
    2734     9961338 :         if (!BBPcheck(i)) {
    2735           0 :                 GDKerror("BBPcheck failed for bat id %d\n", i);
    2736           0 :                 return NULL;
    2737             :         }
    2738     9948354 :         assert(BBP_refs(i));
    2739     9948354 :         if (lock)
    2740     9946530 :                 MT_lock_set(&GDKswapLock(i));
    2741     9988614 :         if ((b = BBP_cache(i)) == NULL || BBP_status(i) & BBPWAITING) {
    2742             : 
    2743       14703 :                 while (BBP_status(i) & BBPWAITING) {        /* wait for bat to be loaded by other thread */
    2744           3 :                         if (lock)
    2745           3 :                                 MT_lock_unset(&GDKswapLock(i));
    2746           3 :                         BBPspin(i, __func__, BBPWAITING);
    2747           3 :                         if (lock)
    2748           3 :                                 MT_lock_set(&GDKswapLock(i));
    2749             :                 }
    2750       14700 :                 if (BBPvalid(i)) {
    2751       14700 :                         b = BBP_cache(i);
    2752       14700 :                         if (b == NULL) {
    2753             :                                 load = true;
    2754       14696 :                                 TRC_DEBUG(BAT_, "set to loading BAT %d\n", i);
    2755       14696 :                                 BBP_status_on(i, BBPLOADING);
    2756             :                         }
    2757             :                 }
    2758             :         }
    2759     9988614 :         if (lock)
    2760     9975425 :                 MT_lock_unset(&GDKswapLock(i));
    2761    10008693 :         if (load) {
    2762       14697 :                 TRC_DEBUG(IO_, "load %s\n", BBP_logical(i));
    2763             : 
    2764       14697 :                 b = BATload_intern(i, lock);
    2765             : 
    2766             :                 /* clearing bits can be done without the lock */
    2767       14664 :                 BBP_status_off(i, BBPLOADING);
    2768       14664 :                 CHECKDEBUG if (b != NULL)
    2769       11928 :                         BATassertProps(b);
    2770             :         }
    2771             :         return b;
    2772             : }
    2773             : 
    2774             : BAT *
    2775     9960502 : BBPdescriptor(bat i)
    2776             : {
    2777     9960502 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    2778             : 
    2779     9960502 :         return getBBPdescriptor(i, lock);
    2780             : }
    2781             : 
    2782             : /*
    2783             :  * In BBPsave executes unlocked; it just marks the BBP_status of the
    2784             :  * BAT to BBPsaving, so others that want to save or unload this BAT
    2785             :  * must spin lock on the BBP_status field.
    2786             :  */
    2787             : gdk_return
    2788        5520 : BBPsave(BAT *b)
    2789             : {
    2790        5520 :         bool lock = locked_by == 0 || locked_by != MT_getpid();
    2791        5520 :         bat bid = b->batCacheid;
    2792             :         gdk_return ret = GDK_SUCCEED;
    2793             : 
    2794        5520 :         if (BBP_lrefs(bid) == 0 || isVIEW(b) || !BATdirtydata(b)) {
    2795             :                 /* do nothing */
    2796        5520 :                 MT_rwlock_rdlock(&b->thashlock);
    2797        5520 :                 if (b->thash && b->thash != (Hash *) 1 &&
    2798         525 :                     (b->thash->heaplink.dirty || b->thash->heapbckt.dirty))
    2799         202 :                         BAThashsave(b, (BBP_status(bid) & BBPPERSISTENT) != 0);
    2800        5520 :                 MT_rwlock_rdunlock(&b->thashlock);
    2801        5520 :                 return GDK_SUCCEED;
    2802             :         }
    2803           0 :         if (lock)
    2804           0 :                 MT_lock_set(&GDKswapLock(bid));
    2805             : 
    2806           0 :         if (BBP_status(bid) & BBPSAVING) {
    2807             :                 /* wait until save in other thread completes */
    2808           0 :                 if (lock)
    2809           0 :                         MT_lock_unset(&GDKswapLock(bid));
    2810           0 :                 BBPspin(bid, __func__, BBPSAVING);
    2811             :         } else {
    2812             :                 /* save it */
    2813             :                 unsigned flags = BBPSAVING;
    2814             : 
    2815           0 :                 if (DELTAdirty(b)) {
    2816             :                         flags |= BBPSWAPPED;
    2817             :                 }
    2818           0 :                 if (b->batTransient) {
    2819           0 :                         flags |= BBPTMP;
    2820             :                 }
    2821           0 :                 BBP_status_on(bid, flags);
    2822           0 :                 if (lock)
    2823           0 :                         MT_lock_unset(&GDKswapLock(bid));
    2824             : 
    2825           0 :                 TRC_DEBUG(IO_, "save %s\n", BATgetId(b));
    2826             : 
    2827             :                 /* do the time-consuming work unlocked */
    2828           0 :                 if (BBP_status(bid) & BBPEXISTING)
    2829           0 :                         ret = BBPbackup(b, false);
    2830           0 :                 if (ret == GDK_SUCCEED) {
    2831           0 :                         ret = BATsave(b);
    2832             :                 }
    2833             :                 /* clearing bits can be done without the lock */
    2834           0 :                 BBP_status_off(bid, BBPSAVING);
    2835             :         }
    2836             :         return ret;
    2837             : }
    2838             : 
    2839             : /*
    2840             :  * TODO merge BBPfree with BATfree? Its function is to prepare a BAT
    2841             :  * for being unloaded (or even destroyed, if the BAT is not
    2842             :  * persistent).
    2843             :  */
    2844             : static void
    2845    16621782 : BBPdestroy(BAT *b)
    2846             : {
    2847    16621782 :         bat tp = VIEWtparent(b);
    2848    16621782 :         bat vtp = VIEWvtparent(b);
    2849             : 
    2850    16621782 :         if (isVIEW(b)) {        /* a physical view */
    2851     5817617 :                 VIEWdestroy(b);
    2852             :         } else {
    2853             :                 /* bats that get destroyed must unfix their atoms */
    2854    10804165 :                 gdk_return (*tunfix) (const void *) = BATatoms[b->ttype].atomUnfix;
    2855    10804165 :                 assert(b->batSharecnt == 0);
    2856    10804165 :                 if (tunfix) {
    2857             :                         BUN p, q;
    2858           0 :                         BATiter bi = bat_iterator_nolock(b);
    2859             : 
    2860           0 :                         BATloop(b, p, q) {
    2861             :                                 /* ignore errors */
    2862           0 :                                 (void) (*tunfix)(BUNtail(bi, p));
    2863             :                         }
    2864             :                 }
    2865    10804165 :                 BATdelete(b);   /* handles persistent case also (file deletes) */
    2866             :         }
    2867    16614485 :         BBPclear(b->batCacheid, true);       /* if destroyed; de-register from BBP */
    2868             : 
    2869             :         /* parent released when completely done with child */
    2870    16633225 :         if (tp)
    2871     5639389 :                 GDKunshare(tp);
    2872    16632300 :         if (vtp)
    2873     1456911 :                 GDKunshare(vtp);
    2874    16632291 : }
    2875             : 
    2876             : static gdk_return
    2877        5520 : BBPfree(BAT *b)
    2878             : {
    2879        5520 :         bat bid = b->batCacheid, tp = VIEWtparent(b), vtp = VIEWvtparent(b);
    2880             :         gdk_return ret;
    2881             : 
    2882        5520 :         assert(bid > 0);
    2883        5520 :         assert(BBPswappable(b));
    2884             : 
    2885        5520 :         BBP_unload_inc();
    2886             :         /* write dirty BATs before being unloaded */
    2887        5520 :         ret = BBPsave(b);
    2888        5520 :         if (ret == GDK_SUCCEED) {
    2889        5520 :                 if (isVIEW(b)) {        /* physical view */
    2890           0 :                         VIEWdestroy(b);
    2891             :                 } else {
    2892        5520 :                         if (BBP_cache(bid))
    2893        5520 :                                 BATfree(b);     /* free memory */
    2894             :                 }
    2895        5520 :                 BBPuncacheit(bid, false);
    2896             :         }
    2897             :         /* clearing bits can be done without the lock */
    2898        5520 :         TRC_DEBUG(BAT_, "turn off unloading %d\n", bid);
    2899        5520 :         BBP_status_off(bid, BBPUNLOADING);
    2900        5520 :         BBP_unload_dec();
    2901             : 
    2902             :         /* parent released when completely done with child */
    2903        5520 :         if (ret == GDK_SUCCEED && tp)
    2904           0 :                 GDKunshare(tp);
    2905        5520 :         if (ret == GDK_SUCCEED && vtp)
    2906           0 :                 GDKunshare(vtp);
    2907        5520 :         return ret;
    2908             : }
    2909             : 
    2910             : /*
    2911             :  * BBPquickdesc loads a BAT descriptor without loading the entire BAT,
    2912             :  * of which the result be used only for a *limited* number of
    2913             :  * purposes. Specifically, during the global sync/commit, we do not
    2914             :  * want to load any BATs that are not already loaded, both because
    2915             :  * this costs performance, and because getting into memory shortage
    2916             :  * during a commit is extremely dangerous. Loading a BAT tends not to
    2917             :  * be required, since the commit actions mostly involve moving some
    2918             :  * pointers in the BAT descriptor.
    2919             :  */
    2920             : BAT *
    2921    27712717 : BBPquickdesc(bat bid)
    2922             : {
    2923             :         BAT *b;
    2924             : 
    2925    27712717 :         if (!BBPcheck(bid)) {
    2926           0 :                 if (!is_bat_nil(bid)) {
    2927           0 :                         GDKerror("called with invalid batid.\n");
    2928           0 :                         assert(0);
    2929             :                 }
    2930             :                 return NULL;
    2931             :         }
    2932    27698746 :         if ((b = BBP_cache(bid)) != NULL)
    2933             :                 return b;       /* already cached */
    2934       41465 :         return BBP_desc(bid);
    2935             : }
    2936             : 
    2937             : /*
    2938             :  * @+ Global Commit
    2939             :  */
    2940             : static BAT *
    2941     8837071 : dirty_bat(bat *i, bool subcommit)
    2942             : {
    2943     8837071 :         if (BBPvalid(*i)) {
    2944             :                 BAT *b;
    2945     8825800 :                 BBPspin(*i, __func__, BBPSAVING);
    2946     8825800 :                 b = BBP_cache(*i);
    2947     8825800 :                 if (b != NULL) {
    2948     8553612 :                         if ((BBP_status(*i) & BBPNEW) &&
    2949      165500 :                             BATcheckmodes(b, false) != GDK_SUCCEED) /* check mmap modes */
    2950           0 :                                 *i = 0; /* error */
    2951     8388112 :                         if ((BBP_status(*i) & BBPPERSISTENT) &&
    2952           0 :                             (subcommit || BATdirty(b)))
    2953     8358542 :                                 return b;       /* the bat is loaded, persistent and dirty */
    2954      437688 :                 } else if (BBP_status(*i) & BBPSWAPPED) {
    2955           0 :                         b = (BAT *) BBPquickdesc(*i);
    2956           0 :                         if (b && (subcommit || b->batDirtydesc))
    2957           0 :                                 return b;       /* only the desc is loaded & dirty */
    2958             :                 }
    2959             :         }
    2960             :         return NULL;
    2961             : }
    2962             : 
    2963             : /*
    2964             :  * @- backup-bat
    2965             :  * Backup-bat moves all files of a BAT to a backup directory. Only
    2966             :  * after this succeeds, it may be saved. If some failure occurs
    2967             :  * halfway saving, we can thus always roll back.
    2968             :  */
    2969             : static gdk_return
    2970      155420 : file_move(int farmid, const char *srcdir, const char *dstdir, const char *name, const char *ext)
    2971             : {
    2972      155420 :         if (GDKmove(farmid, srcdir, name, ext, dstdir, name, ext, true) == GDK_SUCCEED) {
    2973             :                 return GDK_SUCCEED;
    2974             :         } else {
    2975             :                 char *path;
    2976             :                 struct stat st;
    2977             : 
    2978           0 :                 path = GDKfilepath(farmid, srcdir, name, ext);
    2979           0 :                 if (path == NULL)
    2980           0 :                         return GDK_FAIL;
    2981           0 :                 if (MT_stat(path, &st)) {
    2982             :                         /* source file does not exist; the best
    2983             :                          * recovery is to give an error but continue
    2984             :                          * by considering the BAT as not saved; making
    2985             :                          * sure that this time it does get saved.
    2986             :                          */
    2987           0 :                         GDKsyserror("file_move: cannot stat %s\n", path);
    2988           0 :                         GDKfree(path);
    2989             :                         return GDK_FAIL;        /* fishy, but not fatal */
    2990             :                 }
    2991           0 :                 GDKfree(path);
    2992             :         }
    2993           0 :         return GDK_FAIL;
    2994             : }
    2995             : 
    2996             : /* returns true if the file exists */
    2997             : static bool
    2998    30810460 : file_exists(int farmid, const char *dir, const char *name, const char *ext)
    2999             : {
    3000             :         char *path;
    3001             :         struct stat st;
    3002             :         int ret = -1;
    3003             : 
    3004    30810460 :         path = GDKfilepath(farmid, dir, name, ext);
    3005    30810460 :         if (path) {
    3006             :                 ret = MT_stat(path, &st);
    3007    30810460 :                 TRC_DEBUG(IO_, "stat(%s) = %d\n", path, ret);
    3008    30810460 :                 GDKfree(path);
    3009             :         }
    3010    30810460 :         return (ret == 0);
    3011             : }
    3012             : 
    3013             : static gdk_return
    3014      153776 : heap_move(Heap *hp, const char *srcdir, const char *dstdir, const char *nme, const char *ext)
    3015             : {
    3016             :         /* see doc at BATsetaccess()/gdk_bat.c for an expose on mmap
    3017             :          * heap modes */
    3018      153776 :         if (file_exists(hp->farmid, dstdir, nme, ext)) {
    3019             :                 /* dont overwrite heap with the committed state
    3020             :                  * already in dstdir */
    3021             :                 return GDK_SUCCEED;
    3022      153776 :         } else if (hp->newstorage == STORE_PRIV &&
    3023           0 :                    !file_exists(hp->farmid, srcdir, nme, ext)) {
    3024             : 
    3025             :                 /* In order to prevent half-saved X.new files
    3026             :                  * surviving a recover we create a dummy file in the
    3027             :                  * BACKUP(dstdir) whose presence will trigger
    3028             :                  * BBPrecover to remove them.  Thus, X will prevail
    3029             :                  * where it otherwise wouldn't have.  If X already has
    3030             :                  * a saved X.new, that one is backed up as normal.
    3031             :                  */
    3032             : 
    3033             :                 FILE *fp;
    3034             :                 long_str kill_ext;
    3035             :                 char *path;
    3036             : 
    3037           0 :                 strconcat_len(kill_ext, sizeof(kill_ext), ext, ".kill", NULL);
    3038           0 :                 path = GDKfilepath(hp->farmid, dstdir, nme, kill_ext);
    3039           0 :                 if (path == NULL)
    3040             :                         return GDK_FAIL;
    3041             :                 fp = MT_fopen(path, "w");
    3042           0 :                 if (fp == NULL)
    3043           0 :                         GDKsyserror("heap_move: cannot open file %s\n", path);
    3044           0 :                 TRC_DEBUG(IO_, "open %s = %d\n", path, fp ? 0 : -1);
    3045           0 :                 GDKfree(path);
    3046             : 
    3047           0 :                 if (fp != NULL) {
    3048           0 :                         fclose(fp);
    3049           0 :                         return GDK_SUCCEED;
    3050             :                 } else {
    3051             :                         return GDK_FAIL;
    3052             :                 }
    3053             :         }
    3054      153776 :         return file_move(hp->farmid, srcdir, dstdir, nme, ext);
    3055             : }
    3056             : 
    3057             : /*
    3058             :  * @- BBPprepare
    3059             :  *
    3060             :  * this routine makes sure there is a BAKDIR/, and initiates one if
    3061             :  * not.  For subcommits, it does the same with SUBDIR.
    3062             :  *
    3063             :  * It is now locked, to get proper file counters, and also to prevent
    3064             :  * concurrent BBPrecovers, etc.
    3065             :  *
    3066             :  * backup_dir == 0 => no backup BBP.dir
    3067             :  * backup_dir == 1 => BBP.dir saved in BACKUP/
    3068             :  * backup_dir == 2 => BBP.dir saved in SUBCOMMIT/
    3069             :  */
    3070             : 
    3071             : static gdk_return
    3072     4129428 : BBPprepare(bool subcommit)
    3073             : {
    3074             :         bool start_subcommit;
    3075     4129428 :         int set = 1 + subcommit;
    3076             :         str bakdirpath, subdirpath;
    3077             :         gdk_return ret = GDK_SUCCEED;
    3078             : 
    3079     4129428 :         bakdirpath = GDKfilepath(0, NULL, BAKDIR, NULL);
    3080     4129428 :         subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL);
    3081     4129428 :         if (bakdirpath == NULL || subdirpath == NULL) {
    3082           0 :                 GDKfree(bakdirpath);
    3083           0 :                 GDKfree(subdirpath);
    3084           0 :                 return GDK_FAIL;
    3085             :         }
    3086             : 
    3087     4129428 :         start_subcommit = (subcommit && backup_subdir == 0);
    3088     4129428 :         if (start_subcommit) {
    3089             :                 /* starting a subcommit. Make sure SUBDIR and DELDIR
    3090             :                  * are clean */
    3091       16313 :                 ret = BBPrecover_subdir();
    3092             :         }
    3093     4129428 :         if (backup_files == 0) {
    3094         273 :                 backup_dir = 0;
    3095         273 :                 ret = BBPrecover(0);
    3096         273 :                 if (ret == GDK_SUCCEED) {
    3097         273 :                         if (MT_mkdir(bakdirpath) < 0 && errno != EEXIST) {
    3098           0 :                                 GDKsyserror("cannot create directory %s\n", bakdirpath);
    3099             :                                 ret = GDK_FAIL;
    3100             :                         }
    3101             :                         /* if BAKDIR already exists, don't signal error */
    3102         273 :                         TRC_DEBUG(IO_, "mkdir %s = %d\n", bakdirpath, (int) ret);
    3103             :                 }
    3104             :         }
    3105     4129428 :         if (ret == GDK_SUCCEED && start_subcommit) {
    3106             :                 /* make a new SUBDIR (subdir of BAKDIR) */
    3107       16313 :                 if (MT_mkdir(subdirpath) < 0) {
    3108           0 :                         GDKsyserror("cannot create directory %s\n", subdirpath);
    3109             :                         ret = GDK_FAIL;
    3110             :                 }
    3111       16313 :                 TRC_DEBUG(IO_, "mkdir %s = %d\n", subdirpath, (int) ret);
    3112             :         }
    3113     4129428 :         if (ret == GDK_SUCCEED && backup_dir != set) {
    3114             :                 /* a valid backup dir *must* at least contain BBP.dir */
    3115       66071 :                 if ((ret = GDKmove(0, backup_dir ? BAKDIR : BATDIR, "BBP", "dir", subcommit ? SUBDIR : BAKDIR, "BBP", "dir", true)) == GDK_SUCCEED) {
    3116       32899 :                         backup_dir = set;
    3117             :                 }
    3118             :         }
    3119             :         /* increase counters */
    3120     4129428 :         if (ret == GDK_SUCCEED) {
    3121     4129428 :                 backup_subdir += subcommit;
    3122     4129428 :                 backup_files++;
    3123             :         }
    3124     4129428 :         GDKfree(bakdirpath);
    3125     4129428 :         GDKfree(subdirpath);
    3126     4129428 :         return ret;
    3127             : }
    3128             : 
    3129             : static gdk_return
    3130     4702622 : do_backup(const char *srcdir, const char *nme, const char *ext,
    3131             :           Heap *h, bool dirty, bool subcommit)
    3132             : {
    3133             :         gdk_return ret = GDK_SUCCEED;
    3134             :         char extnew[16];
    3135     4702622 :         bool istail = strncmp(ext, "tail", 4) == 0;
    3136             : 
    3137     4702622 :         if (h->wasempty) {
    3138             :                 return GDK_SUCCEED;
    3139             :         }
    3140             : 
    3141             :         /* direct mmap is unprotected (readonly usage, or has WAL
    3142             :          * protection); however, if we're backing up for subcommit
    3143             :          * and a backup already exists in the main backup directory
    3144             :          * (see GDKupgradevarheap), move the file */
    3145     2622463 :         if (subcommit) {
    3146     2622463 :                 strcpy_len(extnew, ext, sizeof(extnew));
    3147     2622463 :                 char *p = extnew + strlen(extnew) - 1;
    3148     2622463 :                 if (*p == 'l') {
    3149     1515302 :                         p++;
    3150     1515302 :                         p[1] = 0;
    3151             :                 }
    3152             :                 bool exists;
    3153             :                 for (;;) {
    3154     7447112 :                         exists = file_exists(h->farmid, BAKDIR, nme, extnew);
    3155     7447112 :                         if (exists)
    3156             :                                 break;
    3157     7445470 :                         if (!istail)
    3158             :                                 break;
    3159     6890408 :                         if (*p == '1')
    3160             :                                 break;
    3161     4824649 :                         if (*p == '2')
    3162     1754360 :                                 *p = '1';
    3163             : #if SIZEOF_VAR_T == 8
    3164     3070289 :                         else if (*p != '4')
    3165     1515302 :                                 *p = '4';
    3166             : #endif
    3167             :                         else
    3168     1554987 :                                 *p = '2';
    3169             :                 }
    3170     2624105 :                 if (exists &&
    3171        1642 :                     file_move(h->farmid, BAKDIR, SUBDIR, nme, extnew) != GDK_SUCCEED)
    3172             :                         return GDK_FAIL;
    3173             :         }
    3174     2622463 :         if (h->storage != STORE_MMAP) {
    3175             :                 /* STORE_PRIV saves into X.new files. Two cases could
    3176             :                  * happen. The first is when a valid X.new exists
    3177             :                  * because of an access change or a previous
    3178             :                  * commit. This X.new should be backed up as
    3179             :                  * usual. The second case is when X.new doesn't
    3180             :                  * exist. In that case we could have half written
    3181             :                  * X.new files (after a crash). To protect against
    3182             :                  * these we write X.new.kill files in the backup
    3183             :                  * directory (see heap_move). */
    3184             :                 gdk_return mvret = GDK_SUCCEED;
    3185             :                 bool exists;
    3186             : 
    3187     2595869 :                 if (istail) {
    3188     6200901 :                         exists = file_exists(h->farmid, BAKDIR, nme, "tail.new") ||
    3189             : #if SIZEOF_VAR_T == 8
    3190     4133934 :                                 file_exists(h->farmid, BAKDIR, nme, "tail4.new") ||
    3191             : #endif
    3192     4133934 :                                 file_exists(h->farmid, BAKDIR, nme, "tail2.new") ||
    3193     4133934 :                                 file_exists(h->farmid, BAKDIR, nme, "tail1.new") ||
    3194     4133934 :                                 file_exists(h->farmid, BAKDIR, nme, "tail") ||
    3195             : #if SIZEOF_VAR_T == 8
    3196     4133934 :                                 file_exists(h->farmid, BAKDIR, nme, "tail4") ||
    3197             : #endif
    3198     6200901 :                                 file_exists(h->farmid, BAKDIR, nme, "tail2") ||
    3199     2066967 :                                 file_exists(h->farmid, BAKDIR, nme, "tail1");
    3200             :                 } else {
    3201     1057804 :                         exists = file_exists(h->farmid, BAKDIR, nme, "theap.new") ||
    3202      528902 :                                 file_exists(h->farmid, BAKDIR, nme, "theap");
    3203             :                 }
    3204             : 
    3205     2595869 :                 strconcat_len(extnew, sizeof(extnew), ext, ".new", NULL);
    3206     2595869 :                 if (dirty && !exists) {
    3207             :                         /* if the heap is dirty and there is no heap
    3208             :                          * file (with or without .new extension) in
    3209             :                          * the BAKDIR, move the heap (preferably with
    3210             :                          * .new extension) to the correct backup
    3211             :                          * directory */
    3212      155163 :                         if (istail) {
    3213      129329 :                                 if (file_exists(h->farmid, srcdir, nme, "tail.new"))
    3214           0 :                                         mvret = heap_move(h, srcdir,
    3215             :                                                           subcommit ? SUBDIR : BAKDIR,
    3216             :                                                           nme, "tail.new");
    3217             : #if SIZEOF_VAR_T == 8
    3218      129329 :                                 else if (file_exists(h->farmid, srcdir, nme, "tail4.new"))
    3219           0 :                                         mvret = heap_move(h, srcdir,
    3220             :                                                           subcommit ? SUBDIR : BAKDIR,
    3221             :                                                           nme, "tail4.new");
    3222             : #endif
    3223      129329 :                                 else if (file_exists(h->farmid, srcdir, nme, "tail2.new"))
    3224           0 :                                         mvret = heap_move(h, srcdir,
    3225             :                                                           subcommit ? SUBDIR : BAKDIR,
    3226             :                                                           nme, "tail2.new");
    3227      129329 :                                 else if (file_exists(h->farmid, srcdir, nme, "tail1.new"))
    3228           0 :                                         mvret = heap_move(h, srcdir,
    3229             :                                                           subcommit ? SUBDIR : BAKDIR,
    3230             :                                                           nme, "tail1.new");
    3231      129329 :                                 else if (file_exists(h->farmid, srcdir, nme, "tail"))
    3232      103612 :                                         mvret = heap_move(h, srcdir,
    3233             :                                                           subcommit ? SUBDIR : BAKDIR,
    3234             :                                                           nme, "tail");
    3235             : #if SIZEOF_VAR_T == 8
    3236       25717 :                                 else if (file_exists(h->farmid, srcdir, nme, "tail4"))
    3237         325 :                                         mvret = heap_move(h, srcdir,
    3238             :                                                           subcommit ? SUBDIR : BAKDIR,
    3239             :                                                           nme, "tail4");
    3240             : #endif
    3241       25392 :                                 else if (file_exists(h->farmid, srcdir, nme, "tail2"))
    3242       14360 :                                         mvret = heap_move(h, srcdir,
    3243             :                                                           subcommit ? SUBDIR : BAKDIR,
    3244             :                                                           nme, "tail2");
    3245       11032 :                                 else if (file_exists(h->farmid, srcdir, nme, "tail1"))
    3246        9645 :                                         mvret = heap_move(h, srcdir,
    3247             :                                                           subcommit ? SUBDIR : BAKDIR,
    3248             :                                                           nme, "tail1");
    3249       25834 :                         } else if (file_exists(h->farmid, srcdir, nme, extnew))
    3250           0 :                                 mvret = heap_move(h, srcdir,
    3251             :                                                   subcommit ? SUBDIR : BAKDIR,
    3252             :                                                   nme, extnew);
    3253             :                         else
    3254       25834 :                                 mvret = heap_move(h, srcdir,
    3255             :                                                   subcommit ? SUBDIR : BAKDIR,
    3256             :                                                   nme, ext);
    3257     2440706 :                 } else if (subcommit) {
    3258             :                         /* if subcommit, we may need to move an
    3259             :                          * already made backup from BAKDIR to
    3260             :                          * SUBDIR */
    3261     2440706 :                         if (file_exists(h->farmid, BAKDIR, nme, extnew))
    3262           0 :                                 mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, extnew);
    3263     2440706 :                         else if (file_exists(h->farmid, BAKDIR, nme, ext))
    3264           0 :                                 mvret = file_move(h->farmid, BAKDIR, SUBDIR, nme, ext);
    3265             :                 }
    3266             :                 /* there is a situation where the move may fail,
    3267             :                  * namely if this heap was not supposed to be existing
    3268             :                  * before, i.e. after a BATmaterialize on a persistent
    3269             :                  * bat; as a workaround, do not complain about move
    3270             :                  * failure if the source file is nonexistent
    3271             :                  */
    3272      153776 :                 if (mvret != GDK_SUCCEED && file_exists(h->farmid, srcdir, nme, ext)) {
    3273             :                         ret = GDK_FAIL;
    3274             :                 }
    3275     2595869 :                 if (subcommit &&
    3276     2595869 :                     (h->storage == STORE_PRIV || h->newstorage == STORE_PRIV)) {
    3277             :                         long_str kill_ext;
    3278             : 
    3279           0 :                         strconcat_len(kill_ext, sizeof(kill_ext),
    3280             :                                       ext, ".new.kill", NULL);
    3281           0 :                         if (file_exists(h->farmid, BAKDIR, nme, kill_ext) &&
    3282           0 :                             file_move(h->farmid, BAKDIR, SUBDIR, nme, kill_ext) != GDK_SUCCEED) {
    3283             :                                 ret = GDK_FAIL;
    3284             :                         }
    3285             :                 }
    3286             :         }
    3287             :         return ret;
    3288             : }
    3289             : 
    3290             : static gdk_return
    3291     4096521 : BBPbackup(BAT *b, bool subcommit)
    3292             : {
    3293             :         char *srcdir;
    3294             :         long_str nme;
    3295     4096521 :         const char *s = BBP_physical(b->batCacheid);
    3296             :         size_t slen;
    3297             : 
    3298     4096521 :         if (BBPprepare(subcommit) != GDK_SUCCEED) {
    3299             :                 return GDK_FAIL;
    3300             :         }
    3301     4096521 :         if (!b->batCopiedtodisk || b->batTransient) {
    3302             :                 return GDK_SUCCEED;
    3303             :         }
    3304             :         /* determine location dir and physical suffix */
    3305     3805627 :         if (!(srcdir = GDKfilepath(NOFARM, BATDIR, s, NULL)))
    3306           0 :                 goto fail;
    3307     3805627 :         s = strrchr(srcdir, DIR_SEP);
    3308     3805627 :         if (!s)
    3309           0 :                 goto fail;
    3310             : 
    3311     3805627 :         slen = strlen(++s);
    3312     3805627 :         if (slen >= sizeof(nme))
    3313           0 :                 goto fail;
    3314     3805627 :         memcpy(nme, s, slen + 1);
    3315     3805627 :         srcdir[s - srcdir] = 0;
    3316             : 
    3317     7611254 :         if (b->ttype != TYPE_void &&
    3318     3805627 :             do_backup(srcdir, nme, gettailname(b), b->theap,
    3319     3805627 :                       b->batDirtydesc || b->theap->dirty,
    3320             :                       subcommit) != GDK_SUCCEED)
    3321           0 :                 goto fail;
    3322     4702622 :         if (b->tvheap &&
    3323      896995 :             do_backup(srcdir, nme, "theap", b->tvheap,
    3324      896995 :                       b->batDirtydesc || b->tvheap->dirty,
    3325             :                       subcommit) != GDK_SUCCEED)
    3326           0 :                 goto fail;
    3327     3805627 :         GDKfree(srcdir);
    3328     3805627 :         return GDK_SUCCEED;
    3329           0 :   fail:
    3330           0 :         if(srcdir)
    3331           0 :                 GDKfree(srcdir);
    3332             :         return GDK_FAIL;
    3333             : }
    3334             : 
    3335             : /*
    3336             :  * @+ Atomic Write
    3337             :  * The atomic BBPsync() function first safeguards the old images of
    3338             :  * all files to be written in BAKDIR. It then saves all files. If that
    3339             :  * succeeds fully, BAKDIR is renamed to DELDIR. The rename is
    3340             :  * considered an atomic action. If it succeeds, the DELDIR is removed.
    3341             :  * If something fails, the pre-sync status can be obtained by moving
    3342             :  * back all backed up files; this is done by BBPrecover().
    3343             :  *
    3344             :  * The BBP.dir is also moved into the BAKDIR.
    3345             :  */
    3346             : gdk_return
    3347       16321 : BBPsync(int cnt, bat *restrict subcommit, BUN *restrict sizes, lng logno, lng transid)
    3348             : {
    3349             :         gdk_return ret = GDK_SUCCEED;
    3350             :         int t0 = 0, t1 = 0;
    3351             :         str bakdir, deldir;
    3352       16321 :         const bool lock = locked_by == 0 || locked_by != MT_getpid();
    3353             :         char buf[3000];
    3354       16321 :         int n = subcommit ? 0 : -1;
    3355             :         FILE *obbpf, *nbbpf;
    3356             : 
    3357       16329 :         if(!(bakdir = GDKfilepath(0, NULL, subcommit ? SUBDIR : BAKDIR, NULL)))
    3358             :                 return GDK_FAIL;
    3359       16321 :         if(!(deldir = GDKfilepath(0, NULL, DELDIR, NULL))) {
    3360           0 :                 GDKfree(bakdir);
    3361           0 :                 return GDK_FAIL;
    3362             :         }
    3363             : 
    3364       16321 :         TRC_DEBUG_IF(PERF) t0 = t1 = GDKms();
    3365             : 
    3366       16321 :         ret = BBPprepare(subcommit != NULL);
    3367             : 
    3368             :         /* PHASE 1: safeguard everything in a backup-dir */
    3369       16321 :         if (ret == GDK_SUCCEED) {
    3370             :                 int idx = 0;
    3371             : 
    3372     4455479 :                 while (++idx < cnt) {
    3373     4439158 :                         bat i = subcommit ? subcommit[idx] : idx;
    3374     4439158 :                         if (lock)
    3375     4425313 :                                 MT_lock_set(&GDKswapLock(i));
    3376             :                         /* set flag that we're syncing, i.e. that we'll
    3377             :                          * be between moving heap to backup dir and
    3378             :                          * saving the new version, in other words, the
    3379             :                          * heap may not exist in the usual location */
    3380     4439158 :                         BBP_status_on(i, BBPSYNCING);
    3381             :                         /* wait until unloading is finished before
    3382             :                          * attempting to make a backup */
    3383     8878316 :                         while (BBP_status(i) & BBPUNLOADING) {
    3384           0 :                                 if (lock)
    3385           0 :                                         MT_lock_unset(&GDKswapLock(i));
    3386           0 :                                 BBPspin(i, __func__, BBPUNLOADING);
    3387           0 :                                 if (lock)
    3388           0 :                                         MT_lock_set(&GDKswapLock(i));
    3389             :                         }
    3390     4439158 :                         BAT *b = dirty_bat(&i, subcommit != NULL);
    3391     4439158 :                         if (i <= 0) {
    3392           0 :                                 if (lock)
    3393           0 :                                         MT_lock_unset(&GDKswapLock(subcommit ? subcommit[idx] : idx));
    3394           0 :                                 break;
    3395             :                         }
    3396     4439158 :                         if (BBP_status(i) & BBPEXISTING) {
    3397     4315163 :                                 if (b != NULL) {
    3398     4096521 :                                         if (BBPbackup(b, subcommit != NULL) != GDK_SUCCEED) {
    3399           0 :                                                 BBP_status_off(i, BBPSYNCING);
    3400           0 :                                                 if (lock)
    3401           0 :                                                         MT_lock_unset(&GDKswapLock(i));
    3402             :                                                 break;
    3403             :                                         }
    3404             :                                 } else {
    3405             :                                         /* file has not been moved to
    3406             :                                          * backup dir, so no need for
    3407             :                                          * other threads to wait */
    3408      218642 :                                         BBP_status_off(i, BBPSYNCING);
    3409             :                                 }
    3410             :                         } else {
    3411      123995 :                                 BBP_status_off(i, BBPSYNCING);
    3412      123995 :                                 if (subcommit && (b = BBP_desc(i)) && BBP_status(i) & BBPDELETED) {
    3413             :                                         char o[10];
    3414             :                                         char *f;
    3415       29972 :                                         snprintf(o, sizeof(o), "%o", (unsigned) b->batCacheid);
    3416       29972 :                                         f = GDKfilepath(b->theap->farmid, BAKDIR, o, gettailname(b));
    3417       29972 :                                         if (f == NULL) {
    3418           0 :                                                 if (lock)
    3419           0 :                                                         MT_lock_unset(&GDKswapLock(i));
    3420             :                                                 ret = GDK_FAIL;
    3421           0 :                                                 goto bailout;
    3422             :                                         }
    3423       29972 :                                         if (MT_access(f, F_OK) == 0)
    3424           2 :                                                 file_move(b->theap->farmid, BAKDIR, SUBDIR, o, gettailname(b));
    3425       29972 :                                         GDKfree(f);
    3426       29972 :                                         f = GDKfilepath(b->theap->farmid, BAKDIR, o, "theap");
    3427       29972 :                                         if (f == NULL) {
    3428           0 :                                                 if (lock)
    3429           0 :                                                         MT_lock_unset(&GDKswapLock(i));
    3430             :                                                 ret = GDK_FAIL;
    3431           0 :                                                 goto bailout;
    3432             :                                         }
    3433       29972 :                                         if (MT_access(f, F_OK) == 0)
    3434           0 :                                                 file_move(b->theap->farmid, BAKDIR, SUBDIR, o, "theap");
    3435       29972 :                                         GDKfree(f);
    3436             :                                 }
    3437             :                         }
    3438     4439158 :                         if (lock)
    3439     4425313 :                                 MT_lock_unset(&GDKswapLock(i));
    3440             :                 }
    3441       16321 :                 if (idx < cnt)
    3442             :                         ret = GDK_FAIL;
    3443             :         }
    3444       16321 :         TRC_DEBUG(PERF, "move time %d, %d files\n", (t1 = GDKms()) - t0, backup_files);
    3445             : 
    3446             :         /* PHASE 2: save the repository and write new BBP.dir file */
    3447       16321 :         if (ret == GDK_SUCCEED) {
    3448       16321 :                 ret = BBPdir_first(subcommit != NULL, logno, transid,
    3449             :                                    &obbpf, &nbbpf);
    3450             :         }
    3451             : 
    3452       16321 :         if (ret == GDK_SUCCEED) {
    3453             :                 int idx = 0;
    3454             : 
    3455     4455479 :                 while (++idx < cnt) {
    3456     4439158 :                         bat i = subcommit ? subcommit[idx] : idx;
    3457             :                         /* BBP_desc(i) may be NULL */
    3458     4439158 :                         BUN size = sizes ? sizes[idx] : BUN_NONE;
    3459             : 
    3460     4439158 :                         if (BBP_status(i) & BBPPERSISTENT) {
    3461     4397913 :                                 BAT *b = dirty_bat(&i, subcommit != NULL);
    3462     4397913 :                                 if (i <= 0) {
    3463             :                                         break;
    3464             :                                 }
    3465     4397913 :                                 if (b) {
    3466             :                                         /* wait for BBPSAVING so that we
    3467             :                                          * can set it, wait for
    3468             :                                          * BBPUNLOADING before
    3469             :                                          * attempting to save */
    3470             :                                         for (;;) {
    3471     4179271 :                                                 if (lock)
    3472     4179271 :                                                         MT_lock_set(&GDKswapLock(i));
    3473     4179271 :                                                 if (!(BBP_status(i) & (BBPSAVING|BBPUNLOADING)))
    3474             :                                                         break;
    3475           0 :                                                 if (lock)
    3476           0 :                                                         MT_lock_unset(&GDKswapLock(i));
    3477           0 :                                                 BBPspin(i, __func__, BBPSAVING|BBPUNLOADING);
    3478             :                                         }
    3479     4179271 :                                         BBP_status_on(i, BBPSAVING);
    3480     4179271 :                                         if (lock)
    3481     4179271 :                                                 MT_lock_unset(&GDKswapLock(i));
    3482     4179271 :                                         BATiter bi = bat_iterator(b);
    3483     4179271 :                                         if (size > bi.count)
    3484             :                                                 size = bi.count;
    3485     4179271 :                                         MT_rwlock_rdlock(&b->thashlock);
    3486     4179271 :                                         ret = BATsave_locked(b, &bi, size);
    3487     4179271 :                                         MT_rwlock_rdunlock(&b->thashlock);
    3488     4179271 :                                         bat_iterator_end(&bi);
    3489     4179271 :                                         BBP_status_off(i, BBPSAVING);
    3490             :                                 }
    3491             :                         }
    3492     4439158 :                         if (ret == GDK_SUCCEED) {
    3493     4439158 :                                 n = BBPdir_step(i, size, n, buf, sizeof(buf), &obbpf, nbbpf);
    3494             :                         }
    3495     4439158 :                         if (n == -2)
    3496             :                                 break;
    3497             :                         /* we once again have a saved heap */
    3498     4439158 :                         BBP_status_off(i, BBPSYNCING);
    3499             :                 }
    3500       16321 :                 if (idx < cnt)
    3501             :                         ret = GDK_FAIL;
    3502             :         }
    3503             : 
    3504       16321 :         TRC_DEBUG(PERF, "write time %d\n", (t0 = GDKms()) - t1);
    3505             : 
    3506       16321 :         if (ret == GDK_SUCCEED) {
    3507       16321 :                 ret = BBPdir_last(n, buf, sizeof(buf), obbpf, nbbpf);
    3508             :         }
    3509             : 
    3510       16321 :         TRC_DEBUG(PERF, "dir time %d, %d bats\n", (t1 = GDKms()) - t0, (bat) ATOMIC_GET(&BBPsize));
    3511             : 
    3512       16321 :         if (ret == GDK_SUCCEED) {
    3513             :                 /* atomic switchover */
    3514             :                 /* this is the big one: this call determines
    3515             :                  * whether the operation of this function
    3516             :                  * succeeded, so no changing of ret after this
    3517             :                  * call anymore */
    3518             : 
    3519       16321 :                 if (MT_rename(bakdir, deldir) < 0 &&
    3520             :                     /* maybe there was an old deldir, so remove and try again */
    3521           0 :                     (GDKremovedir(0, DELDIR) != GDK_SUCCEED ||
    3522             :                      MT_rename(bakdir, deldir) < 0))
    3523             :                         ret = GDK_FAIL;
    3524             :                 if (ret != GDK_SUCCEED)
    3525           0 :                         GDKsyserror("rename(%s,%s) failed.\n", bakdir, deldir);
    3526       16321 :                 TRC_DEBUG(IO_, "rename %s %s = %d\n", bakdir, deldir, (int) ret);
    3527             :         }
    3528             : 
    3529             :         /* AFTERMATH */
    3530       16321 :         if (ret == GDK_SUCCEED) {
    3531       16321 :                 BBPlogno = logno;       /* the new value */
    3532       16321 :                 BBPtransid = transid;
    3533       16321 :                 backup_files = subcommit ? (backup_files - backup_subdir) : 0;
    3534       16321 :                 backup_dir = backup_subdir = 0;
    3535       16321 :                 if (GDKremovedir(0, DELDIR) != GDK_SUCCEED)
    3536           0 :                         fprintf(stderr, "#BBPsync: cannot remove directory %s\n", DELDIR);
    3537       16321 :                 (void) BBPprepare(false); /* (try to) remove DELDIR and set up new BAKDIR */
    3538       16321 :                 if (backup_files > 1) {
    3539       16313 :                         TRC_DEBUG(PERF, "backup_files %d > 1\n", backup_files);
    3540       16313 :                         backup_files = 1;
    3541             :                 }
    3542             :         }
    3543       16321 :         TRC_DEBUG(PERF, "%s (ready time %d)\n",
    3544             :                   ret == GDK_SUCCEED ? "" : " failed",
    3545             :                   (t0 = GDKms()) - t1);
    3546       16321 :   bailout:
    3547       16321 :         GDKfree(bakdir);
    3548       16321 :         GDKfree(deldir);
    3549       16321 :         return ret;
    3550             : }
    3551             : 
    3552             : /*
    3553             :  * Recovery just moves all files back to their original location. this
    3554             :  * is an incremental process: if something fails, just stop with still
    3555             :  * files left for moving in BACKUP/.  The recovery process can resume
    3556             :  * later with the left over files.
    3557             :  */
    3558             : static gdk_return
    3559           1 : force_move(int farmid, const char *srcdir, const char *dstdir, const char *name)
    3560             : {
    3561             :         const char *p;
    3562             :         char *dstpath, *killfile;
    3563             :         gdk_return ret = GDK_SUCCEED;
    3564             : 
    3565           1 :         if ((p = strrchr(name, '.')) != NULL && strcmp(p, ".kill") == 0) {
    3566             :                 /* Found a X.new.kill file, ie remove the X.new file */
    3567           0 :                 ptrdiff_t len = p - name;
    3568             :                 long_str srcpath;
    3569             : 
    3570           0 :                 strncpy(srcpath, name, len);
    3571           0 :                 srcpath[len] = '\0';
    3572           0 :                 if(!(dstpath = GDKfilepath(farmid, dstdir, srcpath, NULL))) {
    3573             :                         return GDK_FAIL;
    3574             :                 }
    3575             : 
    3576             :                 /* step 1: remove the X.new file that is going to be
    3577             :                  * overridden by X */
    3578           0 :                 if (MT_remove(dstpath) != 0 && errno != ENOENT) {
    3579             :                         /* if it exists and cannot be removed, all
    3580             :                          * this is going to fail */
    3581           0 :                         GDKsyserror("force_move: remove(%s)\n", dstpath);
    3582           0 :                         GDKfree(dstpath);
    3583             :                         return GDK_FAIL;
    3584             :                 }
    3585           0 :                 GDKfree(dstpath);
    3586             : 
    3587             :                 /* step 2: now remove the .kill file. This one is
    3588             :                  * crucial, otherwise we'll never finish recovering */
    3589           0 :                 if(!(killfile = GDKfilepath(farmid, srcdir, name, NULL))) {
    3590             :                         return GDK_FAIL;
    3591             :                 }
    3592           0 :                 if (MT_remove(killfile) != 0) {
    3593             :                         ret = GDK_FAIL;
    3594           0 :                         GDKsyserror("force_move: remove(%s)\n", killfile);
    3595             :                 }
    3596           0 :                 GDKfree(killfile);
    3597           0 :                 return ret;
    3598             :         }
    3599             :         /* try to rename it */
    3600           1 :         ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL, false);
    3601             : 
    3602           1 :         if (ret != GDK_SUCCEED) {
    3603             :                 char *srcpath;
    3604             : 
    3605             :                 /* two legal possible causes: file exists or dir
    3606             :                  * doesn't exist */
    3607           0 :                 if(!(dstpath = GDKfilepath(farmid, dstdir, name, NULL)))
    3608             :                         return GDK_FAIL;
    3609           0 :                 if(!(srcpath = GDKfilepath(farmid, srcdir, name, NULL))) {
    3610           0 :                         GDKfree(dstpath);
    3611           0 :                         return GDK_FAIL;
    3612             :                 }
    3613           0 :                 if (MT_remove(dstpath) != 0)    /* clear destination */
    3614             :                         ret = GDK_FAIL;
    3615           0 :                 TRC_DEBUG(IO_, "remove %s = %d\n", dstpath, (int) ret);
    3616             : 
    3617           0 :                 (void) GDKcreatedir(dstdir); /* if fails, move will fail */
    3618           0 :                 ret = GDKmove(farmid, srcdir, name, NULL, dstdir, name, NULL, true);
    3619           0 :                 TRC_DEBUG(IO_, "link %s %s = %d\n", srcpath, dstpath, (int) ret);
    3620           0 :                 GDKfree(dstpath);
    3621           0 :                 GDKfree(srcpath);
    3622             :         }
    3623             :         return ret;
    3624             : }
    3625             : 
    3626             : gdk_return
    3627         273 : BBPrecover(int farmid)
    3628             : {
    3629             :         str bakdirpath;
    3630             :         str leftdirpath;
    3631             :         DIR *dirp;
    3632             :         struct dirent *dent;
    3633             :         long_str path, dstpath;
    3634             :         bat i;
    3635             :         size_t j = strlen(BATDIR);
    3636             :         gdk_return ret = GDK_SUCCEED;
    3637             :         bool dirseen = false;
    3638             :         str dstdir;
    3639             : 
    3640         273 :         bakdirpath = GDKfilepath(farmid, NULL, BAKDIR, NULL);
    3641         273 :         leftdirpath = GDKfilepath(farmid, NULL, LEFTDIR, NULL);
    3642         273 :         if (bakdirpath == NULL || leftdirpath == NULL) {
    3643           0 :                 GDKfree(bakdirpath);
    3644           0 :                 GDKfree(leftdirpath);
    3645           0 :                 return GDK_FAIL;
    3646             :         }
    3647         273 :         dirp = opendir(bakdirpath);
    3648         273 :         if (dirp == NULL) {
    3649         195 :                 if (errno != ENOENT)
    3650           0 :                         GDKsyserror("cannot open directory %s\n", bakdirpath);
    3651         195 :                 GDKfree(bakdirpath);
    3652         195 :                 GDKfree(leftdirpath);
    3653         195 :                 return GDK_SUCCEED;     /* nothing to do */
    3654             :         }
    3655          78 :         memcpy(dstpath, BATDIR, j);
    3656          78 :         dstpath[j] = DIR_SEP;
    3657          78 :         dstpath[++j] = 0;
    3658             :         dstdir = dstpath + j;
    3659          78 :         TRC_DEBUG(IO_, "start\n");
    3660             : 
    3661          78 :         if (MT_mkdir(leftdirpath) < 0 && errno != EEXIST) {
    3662           0 :                 GDKsyserror("cannot create directory %s\n", leftdirpath);
    3663           0 :                 closedir(dirp);
    3664           0 :                 GDKfree(bakdirpath);
    3665           0 :                 GDKfree(leftdirpath);
    3666             :                 return GDK_FAIL;
    3667             :         }
    3668             : 
    3669             :         /* move back all files */
    3670         235 :         while ((dent = readdir(dirp)) != NULL) {
    3671         157 :                 const char *q = strchr(dent->d_name, '.');
    3672             : 
    3673         157 :                 if (q == dent->d_name) {
    3674             :                         char *fn;
    3675             : 
    3676         156 :                         if (strcmp(dent->d_name, ".") == 0 ||
    3677          78 :                             strcmp(dent->d_name, "..") == 0)
    3678         156 :                                 continue;
    3679           0 :                         fn = GDKfilepath(farmid, BAKDIR, dent->d_name, NULL);
    3680           0 :                         if (fn) {
    3681             :                                 int uret = MT_remove(fn);
    3682           0 :                                 TRC_DEBUG(IO_, "remove %s = %d\n",
    3683             :                                           fn, uret);
    3684           0 :                                 GDKfree(fn);
    3685             :                         }
    3686           0 :                         continue;
    3687           1 :                 } else if (strcmp(dent->d_name, "BBP.dir") == 0) {
    3688             :                         dirseen = true;
    3689           0 :                         continue;
    3690             :                 }
    3691           1 :                 if (q == NULL)
    3692           0 :                         q = dent->d_name + strlen(dent->d_name);
    3693           1 :                 if ((j = q - dent->d_name) + 1 > sizeof(path)) {
    3694             :                         /* name too long: ignore */
    3695           0 :                         continue;
    3696             :                 }
    3697           1 :                 strncpy(path, dent->d_name, j);
    3698           1 :                 path[j] = 0;
    3699           1 :                 if (GDKisdigit(*path)) {
    3700           1 :                         i = strtol(path, NULL, 8);
    3701             :                 } else {
    3702           0 :                         i = BBP_find(path, false);
    3703             :                         if (i < 0)
    3704             :                                 i = -i;
    3705             :                 }
    3706           1 :                 if (i == 0 || i >= (bat) ATOMIC_GET(&BBPsize) || !BBPvalid(i)) {
    3707           0 :                         force_move(farmid, BAKDIR, LEFTDIR, dent->d_name);
    3708             :                 } else {
    3709           1 :                         BBPgetsubdir(dstdir, i);
    3710           1 :                         if (force_move(farmid, BAKDIR, dstpath, dent->d_name) != GDK_SUCCEED)
    3711             :                                 ret = GDK_FAIL;
    3712             :                 }
    3713             :         }
    3714          78 :         closedir(dirp);
    3715          78 :         if (dirseen && ret == GDK_SUCCEED) {    /* we have a saved BBP.dir; it should be moved back!! */
    3716             :                 struct stat st;
    3717             :                 char *fn;
    3718             : 
    3719           0 :                 fn = GDKfilepath(farmid, BATDIR, "BBP", "dir");
    3720           0 :                 if (fn == NULL) {
    3721             :                         ret = GDK_FAIL;
    3722             :                 } else {
    3723           0 :                         ret = recover_dir(farmid, MT_stat(fn, &st) == 0);
    3724           0 :                         GDKfree(fn);
    3725             :                 }
    3726             :         }
    3727             : 
    3728          78 :         if (ret == GDK_SUCCEED) {
    3729          78 :                 if (MT_rmdir(bakdirpath) < 0) {
    3730           0 :                         GDKsyserror("cannot remove directory %s\n", bakdirpath);
    3731             :                         ret = GDK_FAIL;
    3732             :                 }
    3733          78 :                 TRC_DEBUG(IO_, "rmdir %s = %d\n", bakdirpath, (int) ret);
    3734             :         }
    3735          78 :         if (ret != GDK_SUCCEED)
    3736           0 :                 GDKerror("recovery failed.\n");
    3737             : 
    3738          78 :         TRC_DEBUG(IO_, "end\n");
    3739          78 :         GDKfree(bakdirpath);
    3740          78 :         GDKfree(leftdirpath);
    3741          78 :         return ret;
    3742             : }
    3743             : 
    3744             : /*
    3745             :  * SUBDIR recovery is quite mindlessly moving all files back to the
    3746             :  * parent (BAKDIR).  We do recognize moving back BBP.dir and set
    3747             :  * backed_up_subdir accordingly.
    3748             :  */
    3749             : gdk_return
    3750       16578 : BBPrecover_subdir(void)
    3751             : {
    3752             :         str subdirpath;
    3753             :         DIR *dirp;
    3754             :         struct dirent *dent;
    3755             :         gdk_return ret = GDK_SUCCEED;
    3756             : 
    3757       16578 :         subdirpath = GDKfilepath(0, NULL, SUBDIR, NULL);
    3758       16578 :         if (subdirpath == NULL)
    3759             :                 return GDK_FAIL;
    3760       16578 :         dirp = opendir(subdirpath);
    3761       16578 :         if (dirp == NULL && errno != ENOENT)
    3762           0 :                 GDKsyserror("cannot open directory %s\n", subdirpath);
    3763       16578 :         GDKfree(subdirpath);
    3764       16578 :         if (dirp == NULL) {
    3765             :                 return GDK_SUCCEED;     /* nothing to do */
    3766             :         }
    3767           0 :         TRC_DEBUG(IO_, "start\n");
    3768             : 
    3769             :         /* move back all files */
    3770           0 :         while ((dent = readdir(dirp)) != NULL) {
    3771           0 :                 if (dent->d_name[0] == '.')
    3772           0 :                         continue;
    3773           0 :                 ret = GDKmove(0, SUBDIR, dent->d_name, NULL, BAKDIR, dent->d_name, NULL, true);
    3774           0 :                 if (ret == GDK_SUCCEED && strcmp(dent->d_name, "BBP.dir") == 0)
    3775           0 :                         backup_dir = 1;
    3776           0 :                 if (ret != GDK_SUCCEED)
    3777             :                         break;
    3778             :         }
    3779           0 :         closedir(dirp);
    3780             : 
    3781             :         /* delete the directory */
    3782           0 :         if (ret == GDK_SUCCEED) {
    3783           0 :                 ret = GDKremovedir(0, SUBDIR);
    3784           0 :                 if (backup_dir == 2) {
    3785           0 :                         TRC_DEBUG(IO_, "%s%cBBP.dir had disappeared!\n", SUBDIR, DIR_SEP);
    3786           0 :                         backup_dir = 0;
    3787             :                 }
    3788             :         }
    3789           0 :         TRC_DEBUG(IO_, "end = %d\n", (int) ret);
    3790             : 
    3791           0 :         if (ret != GDK_SUCCEED)
    3792           0 :                 GDKerror("recovery failed.\n");
    3793             :         return ret;
    3794             : }
    3795             : 
    3796             : /*
    3797             :  * @- The diskscan
    3798             :  * The BBPdiskscan routine walks through the BAT dir, cleans up
    3799             :  * leftovers, and measures disk occupancy.  Leftovers are files that
    3800             :  * cannot belong to a BAT. in order to establish this for [ht]heap
    3801             :  * files, the BAT descriptor is loaded in order to determine whether
    3802             :  * these files are still required.
    3803             :  *
    3804             :  * The routine gathers all bat sizes in a bat that contains bat-ids
    3805             :  * and bytesizes. The return value is the number of bytes of space
    3806             :  * freed.
    3807             :  */
    3808             : static bool
    3809       23068 : persistent_bat(bat bid)
    3810             : {
    3811       23068 :         if (bid >= 0 && bid < (bat) ATOMIC_GET(&BBPsize) && BBPvalid(bid)) {
    3812       23068 :                 BAT *b = BBP_cache(bid);
    3813             : 
    3814       23068 :                 if (b == NULL || b->batCopiedtodisk) {
    3815       23068 :                         return true;
    3816             :                 }
    3817             :         }
    3818             :         return false;
    3819             : }
    3820             : 
    3821             : static BAT *
    3822       23068 : getdesc(bat bid)
    3823             : {
    3824             :         BAT *b = NULL;
    3825             : 
    3826       23068 :         if (is_bat_nil(bid))
    3827             :                 return NULL;
    3828       23068 :         assert(bid > 0);
    3829       23068 :         if (bid < (bat) ATOMIC_GET(&BBPsize) && BBP_logical(bid))
    3830       23068 :                 b = BBP_desc(bid);
    3831       23068 :         if (b == NULL)
    3832           0 :                 BBPclear(bid, true);
    3833             :         return b;
    3834             : }
    3835             : 
    3836             : static bool
    3837        1531 : BBPdiskscan(const char *parent, size_t baseoff)
    3838             : {
    3839        1531 :         DIR *dirp = opendir(parent);
    3840             :         struct dirent *dent;
    3841             :         char fullname[FILENAME_MAX];
    3842             :         str dst = fullname;
    3843             :         size_t dstlen = sizeof(fullname);
    3844             :         const char *src = parent;
    3845             : 
    3846        1531 :         if (dirp == NULL) {
    3847         145 :                 if (errno != ENOENT)
    3848           0 :                         GDKsyserror("cannot open directory %s\n", parent);
    3849         145 :                 return true;    /* nothing to do */
    3850             :         }
    3851             : 
    3852      153262 :         while (*src) {
    3853      151876 :                 *dst++ = *src++;
    3854      151876 :                 dstlen--;
    3855             :         }
    3856        1386 :         if (dst > fullname && dst[-1] != DIR_SEP) {
    3857        1386 :                 *dst++ = DIR_SEP;
    3858        1386 :                 dstlen--;
    3859             :         }
    3860             : 
    3861       28612 :         while ((dent = readdir(dirp)) != NULL) {
    3862             :                 const char *p;
    3863             :                 bat bid;
    3864             :                 bool ok, delete;
    3865             : 
    3866       27226 :                 if (dent->d_name[0] == '.')
    3867        2772 :                         continue;       /* ignore .dot files and directories (. ..) */
    3868             : 
    3869       24454 :                 if (strncmp(dent->d_name, "BBP.", 4) == 0 &&
    3870         265 :                     (strcmp(parent + baseoff, BATDIR) == 0 ||
    3871         265 :                      strncmp(parent + baseoff, BAKDIR, strlen(BAKDIR)) == 0 ||
    3872           0 :                      strncmp(parent + baseoff, SUBDIR, strlen(SUBDIR)) == 0))
    3873         265 :                         continue;
    3874             : 
    3875       24189 :                 p = strchr(dent->d_name, '.');
    3876             : 
    3877       24189 :                 if (strlen(dent->d_name) >= dstlen) {
    3878             :                         /* found a file with too long a name
    3879             :                            (i.e. unknown); stop pruning in this
    3880             :                            subdir */
    3881           0 :                         fprintf(stderr, "unexpected file %s, leaving %s.\n", dent->d_name, parent);
    3882           0 :                         break;
    3883             :                 }
    3884       24189 :                 strncpy(dst, dent->d_name, dstlen);
    3885       24189 :                 fullname[sizeof(fullname) - 1] = 0;
    3886             : 
    3887       24189 :                 if (p == NULL && !BBPdiskscan(fullname, baseoff)) {
    3888             :                         /* it was a directory */
    3889        1121 :                         continue;
    3890             :                 }
    3891             : 
    3892       23068 :                 if (p && strcmp(p + 1, "tmp") == 0) {
    3893             :                         delete = true;
    3894             :                         ok = true;
    3895             :                         bid = 0;
    3896             :                 } else {
    3897       23068 :                         bid = strtol(dent->d_name, NULL, 8);
    3898       23068 :                         ok = p && bid;
    3899             :                         delete = false;
    3900             : 
    3901       23068 :                         if (!ok || !persistent_bat(bid)) {
    3902             :                                 delete = true;
    3903       23068 :                         } else if (strncmp(p + 1, "tail", 4) == 0) {
    3904       15727 :                                 BAT *b = getdesc(bid);
    3905       15727 :                                 delete = (b == NULL || !b->ttype || !b->batCopiedtodisk);
    3906       15727 :                                 if (!delete) {
    3907       15727 :                                         if (b->ttype == TYPE_str) {
    3908        4019 :                                                 switch (b->twidth) {
    3909        2429 :                                                 case 1:
    3910        2429 :                                                         delete = strcmp(p + 1, "tail1") != 0;
    3911        2429 :                                                         break;
    3912        1336 :                                                 case 2:
    3913        1336 :                                                         delete = strcmp(p + 1, "tail2") != 0;
    3914        1336 :                                                         break;
    3915             : #if SIZEOF_VAR_T == 8
    3916         254 :                                                 case 4:
    3917         254 :                                                         delete = strcmp(p + 1, "tail4") != 0;
    3918         254 :                                                         break;
    3919             : #endif
    3920           0 :                                                 default:
    3921           0 :                                                         delete = strcmp(p + 1, "tail") != 0;
    3922           0 :                                                         break;
    3923             :                                                 }
    3924             :                                         } else {
    3925       11708 :                                                 delete = strcmp(p + 1, "tail") != 0;
    3926             :                                         }
    3927             :                                 }
    3928        7341 :                         } else if (strncmp(p + 1, "theap", 5) == 0) {
    3929        4494 :                                 BAT *b = getdesc(bid);
    3930        8988 :                                 delete = (b == NULL || !b->tvheap || !b->batCopiedtodisk);
    3931        2847 :                         } else if (strncmp(p + 1, "thashl", 6) == 0 ||
    3932        1444 :                                    strncmp(p + 1, "thashb", 6) == 0) {
    3933             : #ifdef PERSISTENTHASH
    3934        2806 :                                 BAT *b = getdesc(bid);
    3935        2806 :                                 delete = b == NULL;
    3936        2806 :                                 if (!delete)
    3937        2806 :                                         b->thash = (Hash *) 1;
    3938             : #else
    3939             :                                 delete = true;
    3940             : #endif
    3941          41 :                         } else if (strncmp(p + 1, "thash", 5) == 0) {
    3942             :                                 /* older versions used .thash which we
    3943             :                                  * can simply ignore */
    3944             :                                 delete = true;
    3945          41 :                         } else if (strncmp(p + 1, "thsh", 4) == 0) {
    3946             :                                 /* temporary hash files which we can
    3947             :                                  * simply ignore */
    3948             :                                 delete = true;
    3949          41 :                         } else if (strncmp(p + 1, "timprints", 9) == 0) {
    3950          20 :                                 BAT *b = getdesc(bid);
    3951          20 :                                 delete = b == NULL;
    3952          20 :                                 if (!delete)
    3953          20 :                                         b->timprints = (Imprints *) 1;
    3954          21 :                         } else if (strncmp(p + 1, "torderidx", 9) == 0) {
    3955             : #ifdef PERSISTENTIDX
    3956          21 :                                 BAT *b = getdesc(bid);
    3957          21 :                                 delete = b == NULL;
    3958          21 :                                 if (!delete)
    3959          21 :                                         b->torderidx = (Heap *) 1;
    3960             : #else
    3961             :                                 delete = true;
    3962             : #endif
    3963           0 :                         } else if (strncmp(p + 1, "new", 3) != 0) {
    3964             :                                 ok = false;
    3965             :                         }
    3966             :                 }
    3967       23068 :                 if (!ok) {
    3968             :                         /* found an unknown file; stop pruning in this
    3969             :                          * subdir */
    3970           0 :                         fprintf(stderr, "unexpected file %s, leaving %s.\n", dent->d_name, parent);
    3971           0 :                         break;
    3972             :                 }
    3973       23068 :                 if (delete) {
    3974         209 :                         if (MT_remove(fullname) != 0 && errno != ENOENT) {
    3975           0 :                                 GDKsyserror("remove(%s)", fullname);
    3976           0 :                                 continue;
    3977             :                         }
    3978         209 :                         TRC_DEBUG(IO_, "remove(%s) = 0\n", fullname);
    3979             :                 }
    3980             :         }
    3981        1386 :         closedir(dirp);
    3982        1386 :         return false;
    3983             : }
    3984             : 
    3985             : void
    3986         264 : gdk_bbp_reset(void)
    3987             : {
    3988             :         int i;
    3989             : 
    3990         528 :         for (i = 0; i <= BBP_THREADMASK; i++) {
    3991         264 :                 GDKbbpLock[i].free = 0;
    3992             :         }
    3993         528 :         while (BBPlimit > 0) {
    3994         264 :                 BBPlimit -= BBPINIT;
    3995         264 :                 assert(BBPlimit >= 0);
    3996         264 :                 GDKfree(BBP[BBPlimit >> BBPINITLOG]);
    3997         264 :                 BBP[BBPlimit >> BBPINITLOG] = NULL;
    3998             :         }
    3999         264 :         ATOMIC_SET(&BBPsize, 0);
    4000        8712 :         for (i = 0; i < MAXFARMS; i++)
    4001        8448 :                 GDKfree((void *) BBPfarms[i].dirname); /* loose "const" */
    4002         264 :         memset(BBPfarms, 0, sizeof(BBPfarms));
    4003         264 :         GDKfree(BBP_hash);
    4004         264 :         BBP_hash = NULL;
    4005         264 :         BBP_mask = 0;
    4006             : 
    4007         264 :         locked_by = 0;
    4008         264 :         BBPunloadCnt = 0;
    4009         264 :         backup_files = 0;
    4010         264 :         backup_dir = 0;
    4011         264 :         backup_subdir = 0;
    4012         264 : }

Generated by: LCOV version 1.14