--- db-4.6.21/btree/bt_delete.c 2007-05-18 01:14:46.000000000 +1000 +++ db-4.6.21-safe-si2/btree/bt_delete.c 2007-12-14 23:00:24.186408930 +1100 @@ -525,7 +525,8 @@ goto err; if ((ret = __memp_fget(mpf, &pagep->next_pgno, dbc->txn, DB_MPOOL_DIRTY, &np)) != 0) { - ret = __db_pgerr(dbp, pagep->next_pgno, ret); + if (ret != DB_SNAPSHOT_CONFLICT) + ret = __db_pgerr(dbp, pagep->next_pgno, ret); goto err; } nlsnp = &np->lsn; @@ -536,7 +537,8 @@ goto err; if ((ret = __memp_fget(mpf, &pagep->prev_pgno, dbc->txn, DB_MPOOL_DIRTY, &pp)) != 0) { - ret = __db_pgerr(dbp, pagep->prev_pgno, ret); + if (ret != DB_SNAPSHOT_CONFLICT) + ret = __db_pgerr(dbp, pagep->prev_pgno, ret); goto err; } plsnp = &pp->lsn; --- db-4.6.21/common/db_err.c 2007-05-30 01:23:05.000000000 +1000 +++ db-4.6.21-safe-si2/common/db_err.c 2007-12-14 23:00:24.190408843 +1100 @@ -294,6 +294,12 @@ case DB_SECONDARY_BAD: return ("DB_SECONDARY_BAD: Secondary index inconsistent with primary"); + case DB_SNAPSHOT_CONFLICT: + return + ("DB_SNAPSHOT_CONFLICT: attempt to update an old version"); + case DB_SNAPSHOT_UNSAFE: + return + ("DB_SNAPSHOT_UNSAFE: potential snapshot isolation anomaly found"); case DB_VERIFY_BAD: return ("DB_VERIFY_BAD: Database verification failed"); case DB_VERSION_MISMATCH: --- db-4.6.21/db/db_meta.c 2007-05-18 03:17:41.000000000 +1000 +++ db-4.6.21-safe-si2/db/db_meta.c 2007-12-14 23:00:24.190408843 +1100 @@ -47,6 +47,7 @@ #include "dbinc/db_page.h" #include "dbinc/lock.h" #include "dbinc/mp.h" +#include "dbinc/txn.h" #include "dbinc/db_am.h" static void __db_init_meta __P((DB *, void *, db_pgno_t, u_int32_t)); @@ -715,8 +716,7 @@ if ((ret = __db_lget(dbc, LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) goto err; - if ((ret = __memp_fget(mpf, &pgno, dbc->txn, - DB_MPOOL_DIRTY, &meta)) != 0) + if ((ret = __memp_fget(mpf, &pgno, dbc->txn, 0, &meta)) != 0) goto err; if (last_pgnop != NULL) @@ -749,6 +749,9 @@ } while (pgno != PGNO_INVALID); nelems = (u_int32_t)(lp - list); + if ((ret = __memp_dirty(mpf, &meta, dbc->txn, dbc->priority, 0)) != 0) + goto err; + /* Log the current state of the free list */ if (DBC_LOGGING(dbc)) { ddbt.data = list; @@ -936,7 +939,8 @@ DB *dbp; DB_ENV *dbenv; DB_LOCKREQ couple[3], *reqp; - DB_TXN *txn; + DB_TXN *parent, *txn; + TXN_DETAIL *td; int has_timeout, i, ret; dbp = dbc->dbp; @@ -948,8 +952,6 @@ * calling __db_lget to acquire the lock. */ if (CDB_LOCKING(dbenv) || !LOCKING_ON(dbenv) || - (MULTIVERSION(dbp) && mode == DB_LOCK_READ && - dbc->txn != NULL && F_ISSET(dbc->txn, TXN_SNAPSHOT)) || F_ISSET(dbc, DBC_DONTLOCK) || (F_ISSET(dbc, DBC_RECOVER) && (action != LCK_ROLLBACK || IS_REP_CLIENT(dbenv))) || (action != LCK_ALWAYS && F_ISSET(dbc, DBC_OPD))) { @@ -957,6 +959,21 @@ return (0); } + if (MULTIVERSION(dbp) && txn != NULL && F_ISSET(txn, TXN_SNAPSHOT)) { + if (mode == DB_LOCK_READ && !F_ISSET(txn, TXN_SNAPSHOT_SAFE)) { + LOCK_INIT(*lockp); + return (0); + } else { + for (parent = txn; parent->parent != NULL;) + parent = parent->parent; + td = parent->td; + + lkflags |= DB_LOCK_SNAPSHOT_SAFE; + if (mode == DB_LOCK_READ) + mode = DB_LOCK_SIREAD; + } + } + dbc->lock.pgno = pgno; if (lkflags & DB_LOCK_RECORD) dbc->lock.type = DB_RECORD_LOCK; @@ -1042,7 +1059,8 @@ break; } - if (txn != NULL && ret == DB_LOCK_DEADLOCK) + if (txn != NULL && + (ret == DB_LOCK_DEADLOCK || ret == DB_SNAPSHOT_UNSAFE)) F_SET(txn, TXN_DEADLOCK); return ((ret == DB_LOCK_NOTGRANTED && !F_ISSET(dbenv, DB_ENV_TIME_NOTGRANTED)) ? DB_LOCK_DEADLOCK : ret); --- db-4.6.21/dbinc/db.in 2007-06-29 00:23:35.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc/db.in 2007-12-14 23:00:24.194408756 +1100 @@ -206,6 +206,20 @@ }; /* + * A DB_LSN has two parts, a fileid which identifies a specific file, and an + * offset within that file. The fileid is an unsigned 4-byte quantity that + * uniquely identifies a file within the log directory -- currently a simple + * counter inside the log. The offset is also an unsigned 4-byte value. The + * log manager guarantees the offset is never more than 4 bytes by switching + * to a new log file before the maximum length imposed by an unsigned 4-byte + * offset is reached. + */ +struct __db_lsn { + u_int32_t file; /* File ID. */ + u_int32_t offset; /* File offset. */ +}; + +/* * Common flags -- * Interfaces which use any of these common flags should never have * interface specific flags in this range. @@ -315,6 +329,7 @@ * Shared flags up to 0x0002000 */ #define DB_TXN_SYNC 0x0004000 /* Always sync log on commit. */ #define DB_TXN_WAIT 0x0008000 /* Always wait for locks in this TXN. */ +#define DB_TXN_SNAPSHOT_SAFE 0x0010000 /* Serializable SI. */ /* * Flags private to DB_ENV->txn_checkpoint. @@ -489,8 +504,9 @@ #define DB_LOCK_NOWAIT 0x002 /* Don't wait on unavailable lock. */ #define DB_LOCK_RECORD 0x004 /* Internal: record lock. */ #define DB_LOCK_SET_TIMEOUT 0x008 /* Internal: set lock timeout. */ -#define DB_LOCK_SWITCH 0x010 /* Internal: switch existing lock. */ -#define DB_LOCK_UPGRADE 0x020 /* Internal: upgrade existing lock. */ +#define DB_LOCK_SNAPSHOT_SAFE 0x010 /* Internal: safe SI write. */ +#define DB_LOCK_SWITCH 0x020 /* Internal: switch existing lock. */ +#define DB_LOCK_UPGRADE 0x040 /* Internal: upgrade existing lock. */ /* Flag values for DbEnv.set_timeout. */ #define DB_SET_LOCK_TIMEOUT 1 /* Set lock timeout */ @@ -514,7 +530,8 @@ DB_LOCK_IREAD=5, /* Intent to share/read. */ DB_LOCK_IWR=6, /* Intent to read and write. */ DB_LOCK_READ_UNCOMMITTED=7, /* Degree 1 isolation. */ - DB_LOCK_WWRITE=8 /* Was Written. */ + DB_LOCK_WWRITE=8, /* Was Written. */ + DB_LOCK_SIREAD=9 /* Snapshot read. */ } db_lockmode_t; /* @@ -546,7 +563,7 @@ * promoted; waiting for the owner * to run and upgrade it to held. */ DB_LSTAT_WAITING=6 /* Lock is on the wait queue. */ -}db_status_t; +} db_status_t; /* Lock statistics structure. */ struct __db_lock_stat { @@ -656,20 +673,6 @@ #define DB_LOG_WRNOSYNC 0x020 /* Write, don't sync log_put */ /* - * A DB_LSN has two parts, a fileid which identifies a specific file, and an - * offset within that file. The fileid is an unsigned 4-byte quantity that - * uniquely identifies a file within the log directory -- currently a simple - * counter inside the log. The offset is also an unsigned 4-byte value. The - * log manager guarantees the offset is never more than 4 bytes by switching - * to a new log file before the maximum length imposed by an unsigned 4-byte - * offset is reached. - */ -struct __db_lsn { - u_int32_t file; /* File ID. */ - u_int32_t offset; /* File offset. */ -}; - -/* * Application-specified log record types start at DB_user_BEGIN, and must not * equal or exceed DB_debug_FLAG. * @@ -1062,8 +1065,9 @@ #define TXN_READ_UNCOMMITTED 0x0400 /* Txn has degree 1 isolation. */ #define TXN_RESTORED 0x0800 /* Txn has been restored. */ #define TXN_SNAPSHOT 0x1000 /* Snapshot Isolation. */ -#define TXN_SYNC 0x2000 /* Write and sync on prepare/commit. */ -#define TXN_WRITE_NOSYNC 0x4000 /* Write only on prepare/commit. */ +#define TXN_SNAPSHOT_SAFE 0x2000 /* Serializable SI. */ +#define TXN_SYNC 0x4000 /* Write and sync on prepare/commit. */ +#define TXN_WRITE_NOSYNC 0x8000 /* Write only on prepare/commit. */ u_int32_t flags; }; @@ -1480,6 +1484,8 @@ #define DB_SECONDARY_BAD (-30974)/* Secondary index corrupt. */ #define DB_VERIFY_BAD (-30973)/* Verify failed; bad format. */ #define DB_VERSION_MISMATCH (-30972)/* Environment version mismatch. */ +#define DB_SNAPSHOT_CONFLICT (-30971)/* Conflicting SI updates detected. */ +#define DB_SNAPSHOT_UNSAFE (-30970)/* Potential SI anomaly detected. */ /* DB (private) error return codes. */ #define DB_ALREADY_ABORTED (-30899) --- db-4.6.21/dbinc/lock.h 2007-05-18 04:46:15.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc/lock.h 2007-12-14 23:00:24.194408756 +1100 @@ -124,6 +124,7 @@ SH_TAILQ_ENTRY dd_links; /* Links for dd list. */ SH_TAILQ_HEAD(__waitl) waiters; /* List of waiting locks. */ SH_TAILQ_HEAD(__holdl) holders; /* List of held locks. */ + SH_TAILQ_HEAD(__sil) sireaders; /* List of sireaders. */ /* Declare room in the object to hold * typical DB lock structures so that * we do not have to allocate them from @@ -139,6 +140,7 @@ pid_t pid; /* Process owning locker ID */ db_threadid_t tid; /* Thread owning locker ID */ + roff_t td_off; /* TXN_DETAIL of locker. */ u_int32_t dd_id; /* Deadlock detector id. */ @@ -162,8 +164,9 @@ #define DB_LOCKER_DELETED 0x0001 #define DB_LOCKER_DIRTY 0x0002 -#define DB_LOCKER_INABORT 0x0004 -#define DB_LOCKER_TIMEOUT 0x0008 +#define DB_LOCKER_FREED 0x0004 +#define DB_LOCKER_INABORT 0x0008 +#define DB_LOCKER_TIMEOUT 0x0010 u_int32_t flags; }; @@ -212,6 +215,22 @@ db_status_t status; /* Status of this lock. */ }; +/* Safe SI helpers. */ +#define LOCKER_TD(dbenv, lockerp) \ + ((TXN_DETAIL *)R_ADDR(&(dbenv)->tx_handle->reginfo, (lockerp)->td_off)) + +#define LOCK_HOLDER(dbenv, lp) \ + ((DB_LOCKER *)R_ADDR(&(dbenv)->lk_handle->reginfo, (lp)->holder)) + +#define LOCK_OWNER(dbenv, lp) \ + LOCKER_TD(dbenv, LOCK_HOLDER(dbenv, lp)) + +#define LOCK_READLSN(dbenv, lp) \ + LOCK_OWNER(dbenv, lp)->read_lsn + +#define LOCK_COMMITLSN(dbenv, lp) \ + LOCK_OWNER(dbenv, lp)->visible_lsn + /* * Flag values for __lock_put_internal: * DB_LOCK_DOALL: Unlock all references in this lock (instead of only 1). --- db-4.6.21/dbinc/shqueue.h 2007-05-18 01:15:05.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc/shqueue.h 2007-12-14 23:00:24.194408756 +1100 @@ -213,8 +213,9 @@ SH_LIST_NEXTP(elm, field, type)->field.sle_prev = \ (elm)->field.sle_prev - (elm)->field.sle_next; \ *__SH_LIST_PREV_OFF(elm, field) += (elm)->field.sle_next;\ - } else \ + } else if ((elm)->field.sle_prev != -1) \ *__SH_LIST_PREV_OFF(elm, field) = -1; \ + (elm)->field.sle_next = (elm)->field.sle_prev = -1; \ } while (0) #define SH_LIST_REMOVE_HEAD(head, field, type) do { \ --- db-4.6.21/dbinc/txn.h 2007-05-18 05:33:03.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc/txn.h 2007-12-14 23:00:24.234407883 +1100 @@ -69,8 +69,10 @@ u_int32_t status; /* status of the transaction */ #define TXN_DTL_COLLECTED 0x1 /* collected during txn_recover */ -#define TXN_DTL_RESTORED 0x2 /* prepared txn restored */ -#define TXN_DTL_INMEMORY 0x4 /* uses in memory logs */ +#define TXN_DTL_INMEMORY 0x2 /* uses in memory logs */ +#define TXN_DTL_RESTORED 0x4 /* prepared txn restored */ +#define TXN_DTL_WCONF 0x8 /* MVCC: write end of a conflict */ +#define TXN_DTL_RCONF 0x10 /* MVCC: read end of a conflict */ u_int32_t flags; /* TXN_XA_{ABORTED, DEADLOCKED, ENDED, PREPARED, STARTED, SUSPENDED} */ @@ -134,6 +136,9 @@ DB_LSN last_ckp; /* lsn of the last checkpoint */ time_t time_ckp; /* time of last checkpoint */ + db_mutex_t mtx_oldlsn; /* Protect the oldest active LSN */ + DB_LSN old_lsn; /* LSN of the oldest transaction. */ + DB_TXN_STAT stat; /* Statistics for txns. */ #define TXN_IN_RECOVERY 0x01 /* environment is being recovered */ --- db-4.6.21/dbinc_auto/btree_ext.h 2007-05-04 08:36:40.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/btree_ext.h 2007-12-14 23:00:24.234407883 +1100 @@ -74,35 +74,6 @@ int __ram_ca __P((DBC *, ca_recno_arg, int *)); int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int)); int __ram_writeback __P((DB *)); -int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *)); -int __bam_adjust __P((DBC *, int32_t)); -int __bam_nrecs __P((DBC *, db_recno_t *)); -db_recno_t __bam_total __P((DB *, PAGE *)); -int __bam_get_root __P((DBC *, db_pgno_t, int, u_int32_t, int *)); -int __bam_search __P((DBC *, db_pgno_t, const DBT *, u_int32_t, int, db_recno_t *, int *)); -int __bam_stkrel __P((DBC *, u_int32_t)); -int __bam_stkgrow __P((DB_ENV *, BTREE_CURSOR *)); -int __bam_split __P((DBC *, void *, db_pgno_t *)); -int __bam_pinsert __P((DBC *, EPG *, u_int32_t, PAGE *, PAGE *, int)); -int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t)); -int __bam_stat __P((DBC *, void *, u_int32_t)); -int __bam_stat_print __P((DBC *, u_int32_t)); -int __bam_stat_callback __P((DB *, PAGE *, void *, int *)); -void __bam_print_cursor __P((DBC *)); -int __bam_key_range __P((DBC *, DBT *, DB_KEY_RANGE *, u_int32_t)); -int __bam_traverse __P((DBC *, db_lockmode_t, db_pgno_t, int (*)(DB *, PAGE *, void *, int *), void *)); -int __bam_30_btreemeta __P((DB *, char *, u_int8_t *)); -int __bam_31_btreemeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); -int __bam_31_lbtree __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); -int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *, db_pgno_t, u_int32_t)); -int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); -int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); -int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t, int, int, u_int32_t)); -int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); -int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *, void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *)); -int __bam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, PAGE *, void *, int (*)(void *, const void *), DBT *, u_int32_t)); -int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *, DBT *, void *, int (*)(void *, const void *), u_int32_t)); -int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *, u_int32_t, DB *)); int __bam_split_log __P((DB *, DB_TXN *, DB_LSN *, u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, const DBT *, u_int32_t)); int __bam_split_read __P((DB_ENV *, void *, __bam_split_args **)); int __bam_rsplit_log __P((DB *, DB_TXN *, DB_LSN *, u_int32_t, db_pgno_t, const DBT *, db_pgno_t, db_pgno_t, const DBT *, DB_LSN *)); @@ -143,6 +114,35 @@ int __bam_merge_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __bam_pgno_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __bam_init_print __P((DB_ENV *, int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), size_t *)); +int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *)); +int __bam_adjust __P((DBC *, int32_t)); +int __bam_nrecs __P((DBC *, db_recno_t *)); +db_recno_t __bam_total __P((DB *, PAGE *)); +int __bam_get_root __P((DBC *, db_pgno_t, int, u_int32_t, int *)); +int __bam_search __P((DBC *, db_pgno_t, const DBT *, u_int32_t, int, db_recno_t *, int *)); +int __bam_stkrel __P((DBC *, u_int32_t)); +int __bam_stkgrow __P((DB_ENV *, BTREE_CURSOR *)); +int __bam_split __P((DBC *, void *, db_pgno_t *)); +int __bam_pinsert __P((DBC *, EPG *, u_int32_t, PAGE *, PAGE *, int)); +int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t)); +int __bam_stat __P((DBC *, void *, u_int32_t)); +int __bam_stat_print __P((DBC *, u_int32_t)); +int __bam_stat_callback __P((DB *, PAGE *, void *, int *)); +void __bam_print_cursor __P((DBC *)); +int __bam_key_range __P((DBC *, DBT *, DB_KEY_RANGE *, u_int32_t)); +int __bam_traverse __P((DBC *, db_lockmode_t, db_pgno_t, int (*)(DB *, PAGE *, void *, int *), void *)); +int __bam_30_btreemeta __P((DB *, char *, u_int8_t *)); +int __bam_31_btreemeta __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __bam_31_lbtree __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); +int __bam_vrfy_meta __P((DB *, VRFY_DBINFO *, BTMETA *, db_pgno_t, u_int32_t)); +int __ram_vrfy_leaf __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +int __bam_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); +int __bam_vrfy_itemorder __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t, int, int, u_int32_t)); +int __bam_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); +int __bam_vrfy_subtree __P((DB *, VRFY_DBINFO *, db_pgno_t, void *, void *, u_int32_t, u_int32_t *, u_int32_t *, u_int32_t *)); +int __bam_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, PAGE *, void *, int (*)(void *, const void *), DBT *, u_int32_t)); +int __bam_salvage_walkdupint __P((DB *, VRFY_DBINFO *, PAGE *, DBT *, void *, int (*)(void *, const void *), u_int32_t)); +int __bam_meta2pgset __P((DB *, VRFY_DBINFO *, BTMETA *, u_int32_t, DB *)); #if defined(__cplusplus) } --- db-4.6.21/dbinc_auto/db_ext.h 2007-05-30 06:52:58.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/db_ext.h 2007-12-14 23:00:24.238407796 +1100 @@ -24,17 +24,6 @@ int __crdel_inmem_create_recover __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __crdel_inmem_rename_recover __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __crdel_inmem_remove_recover __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); -int __db_master_open __P((DB *, DB_TXN *, const char *, u_int32_t, int, DB **)); -int __db_master_update __P((DB *, DB *, DB_TXN *, const char *, DBTYPE, mu_action, const char *, u_int32_t)); -int __db_env_setup __P((DB *, DB_TXN *, const char *, const char *, u_int32_t, u_int32_t)); -int __db_env_mpool __P((DB *, const char *, u_int32_t)); -int __db_close __P((DB *, DB_TXN *, u_int32_t)); -int __db_refresh __P((DB *, DB_TXN *, u_int32_t, int *, int)); -int __db_log_page __P((DB *, DB_TXN *, DB_LSN *, db_pgno_t, PAGE *)); -int __db_backup_name __P((DB_ENV *, const char *, DB_TXN *, char **)); -#ifdef CONFIG_TEST -int __db_testcopy __P((DB_ENV *, DB *, const char *)); -#endif int __db_cursor_int __P((DB *, DB_TXN *, DBTYPE, db_pgno_t, int, DB_LOCKER *, DBC **)); int __db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); int __db_del __P((DB *, DB_TXN *, DBT *, u_int32_t)); @@ -90,6 +79,17 @@ int __db_pg_init_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __db_pg_sort_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __db_init_print __P((DB_ENV *, int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), size_t *)); +int __db_master_open __P((DB *, DB_TXN *, const char *, u_int32_t, int, DB **)); +int __db_master_update __P((DB *, DB *, DB_TXN *, const char *, DBTYPE, mu_action, const char *, u_int32_t)); +int __db_env_setup __P((DB *, DB_TXN *, const char *, const char *, u_int32_t, u_int32_t)); +int __db_env_mpool __P((DB *, const char *, u_int32_t)); +int __db_close __P((DB *, DB_TXN *, u_int32_t)); +int __db_refresh __P((DB *, DB_TXN *, u_int32_t, int *, int)); +int __db_log_page __P((DB *, DB_TXN *, DB_LSN *, db_pgno_t, PAGE *)); +int __db_backup_name __P((DB_ENV *, const char *, DB_TXN *, char **)); +#ifdef CONFIG_TEST +int __db_testcopy __P((DB_ENV *, DB *, const char *)); +#endif int __dbc_close __P((DBC *)); int __dbc_destroy __P((DBC *)); int __dbc_count __P((DBC *, db_recno_t *)); --- db-4.6.21/dbinc_auto/dbreg_ext.h 2007-05-04 08:36:40.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/dbreg_ext.h 2007-12-14 23:00:24.238407796 +1100 @@ -6,6 +6,11 @@ extern "C" { #endif +int __dbreg_register_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, const DBT *, const DBT *, int32_t, DBTYPE, db_pgno_t, u_int32_t)); +int __dbreg_register_read __P((DB_ENV *, void *, __dbreg_register_args **)); +int __dbreg_init_recover __P((DB_ENV *, int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), size_t *)); +int __dbreg_register_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); +int __dbreg_init_print __P((DB_ENV *, int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), size_t *)); int __dbreg_setup __P((DB *, const char *, u_int32_t)); int __dbreg_teardown __P((DB *)); int __dbreg_teardown_int __P((DB_ENV *, FNAME *)); @@ -18,11 +23,6 @@ int __dbreg_close_id_int __P((DB_ENV *, FNAME *, u_int32_t, int)); int __dbreg_log_close __P((DB_ENV *, FNAME *, DB_TXN *, u_int32_t)); int __dbreg_log_id __P((DB *, DB_TXN *, int32_t, int)); -int __dbreg_register_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, const DBT *, const DBT *, int32_t, DBTYPE, db_pgno_t, u_int32_t)); -int __dbreg_register_read __P((DB_ENV *, void *, __dbreg_register_args **)); -int __dbreg_init_recover __P((DB_ENV *, int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), size_t *)); -int __dbreg_register_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); -int __dbreg_init_print __P((DB_ENV *, int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), size_t *)); int __dbreg_register_recover __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __dbreg_stat_print __P((DB_ENV *, u_int32_t)); void __dbreg_print_fname __P((DB_ENV *, FNAME *)); --- db-4.6.21/dbinc_auto/hash_ext.h 2007-05-18 03:17:58.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/hash_ext.h 2007-12-14 23:00:24.238407796 +1100 @@ -6,14 +6,6 @@ extern "C" { #endif -int __ham_quick_delete __P((DBC *)); -int __hamc_init __P((DBC *)); -int __hamc_count __P((DBC *, db_recno_t *)); -int __hamc_dup __P((DBC *, DBC *)); -u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, u_int32_t)); -int __ham_init_dbt __P((DB_ENV *, DBT *, u_int32_t, void **, u_int32_t *)); -int __hamc_update __P((DBC *, u_int32_t, db_ham_curadj, int)); -int __ham_get_clist __P((DB *, db_pgno_t, u_int32_t, DBC ***)); int __ham_insdel_log __P((DB *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, db_pgno_t, u_int32_t, DB_LSN *, const DBT *, const DBT *)); int __ham_insdel_read __P((DB_ENV *, void *, __ham_insdel_args **)); int __ham_newpage_log __P((DB *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *)); @@ -47,6 +39,14 @@ int __ham_curadj_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __ham_chgpg_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __ham_init_print __P((DB_ENV *, int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), size_t *)); +int __ham_quick_delete __P((DBC *)); +int __hamc_init __P((DBC *)); +int __hamc_count __P((DBC *, db_recno_t *)); +int __hamc_dup __P((DBC *, DBC *)); +u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, u_int32_t)); +int __ham_init_dbt __P((DB_ENV *, DBT *, u_int32_t, void **, u_int32_t *)); +int __hamc_update __P((DBC *, u_int32_t, db_ham_curadj, int)); +int __ham_get_clist __P((DB *, db_pgno_t, u_int32_t, DBC ***)); int __ham_pgin __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *)); int __ham_pgout __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *)); int __ham_mswap __P((void *)); --- db-4.6.21/dbinc_auto/int_def.in 2007-06-02 02:40:08.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/int_def.in 2007-12-14 23:00:24.242407708 +1100 @@ -20,17 +20,6 @@ #define __crdel_inmem_create_recover __crdel_inmem_create_recover@DB_VERSION_UNIQUE_NAME@ #define __crdel_inmem_rename_recover __crdel_inmem_rename_recover@DB_VERSION_UNIQUE_NAME@ #define __crdel_inmem_remove_recover __crdel_inmem_remove_recover@DB_VERSION_UNIQUE_NAME@ -#define __db_master_open __db_master_open@DB_VERSION_UNIQUE_NAME@ -#define __db_master_update __db_master_update@DB_VERSION_UNIQUE_NAME@ -#define __db_env_setup __db_env_setup@DB_VERSION_UNIQUE_NAME@ -#define __db_env_mpool __db_env_mpool@DB_VERSION_UNIQUE_NAME@ -#define __db_close __db_close@DB_VERSION_UNIQUE_NAME@ -#define __db_refresh __db_refresh@DB_VERSION_UNIQUE_NAME@ -#define __db_log_page __db_log_page@DB_VERSION_UNIQUE_NAME@ -#define __db_backup_name __db_backup_name@DB_VERSION_UNIQUE_NAME@ -#ifdef CONFIG_TEST -#define __db_testcopy __db_testcopy@DB_VERSION_UNIQUE_NAME@ -#endif #define __db_cursor_int __db_cursor_int@DB_VERSION_UNIQUE_NAME@ #define __db_put __db_put@DB_VERSION_UNIQUE_NAME@ #define __db_del __db_del@DB_VERSION_UNIQUE_NAME@ @@ -86,6 +75,17 @@ #define __db_pg_init_print __db_pg_init_print@DB_VERSION_UNIQUE_NAME@ #define __db_pg_sort_print __db_pg_sort_print@DB_VERSION_UNIQUE_NAME@ #define __db_init_print __db_init_print@DB_VERSION_UNIQUE_NAME@ +#define __db_master_open __db_master_open@DB_VERSION_UNIQUE_NAME@ +#define __db_master_update __db_master_update@DB_VERSION_UNIQUE_NAME@ +#define __db_env_setup __db_env_setup@DB_VERSION_UNIQUE_NAME@ +#define __db_env_mpool __db_env_mpool@DB_VERSION_UNIQUE_NAME@ +#define __db_close __db_close@DB_VERSION_UNIQUE_NAME@ +#define __db_refresh __db_refresh@DB_VERSION_UNIQUE_NAME@ +#define __db_log_page __db_log_page@DB_VERSION_UNIQUE_NAME@ +#define __db_backup_name __db_backup_name@DB_VERSION_UNIQUE_NAME@ +#ifdef CONFIG_TEST +#define __db_testcopy __db_testcopy@DB_VERSION_UNIQUE_NAME@ +#endif #define __dbc_close __dbc_close@DB_VERSION_UNIQUE_NAME@ #define __dbc_destroy __dbc_destroy@DB_VERSION_UNIQUE_NAME@ #define __dbc_count __dbc_count@DB_VERSION_UNIQUE_NAME@ @@ -357,35 +357,6 @@ #define __ram_ca __ram_ca@DB_VERSION_UNIQUE_NAME@ #define __ram_getno __ram_getno@DB_VERSION_UNIQUE_NAME@ #define __ram_writeback __ram_writeback@DB_VERSION_UNIQUE_NAME@ -#define __bam_rsearch __bam_rsearch@DB_VERSION_UNIQUE_NAME@ -#define __bam_adjust __bam_adjust@DB_VERSION_UNIQUE_NAME@ -#define __bam_nrecs __bam_nrecs@DB_VERSION_UNIQUE_NAME@ -#define __bam_total __bam_total@DB_VERSION_UNIQUE_NAME@ -#define __bam_get_root __bam_get_root@DB_VERSION_UNIQUE_NAME@ -#define __bam_search __bam_search@DB_VERSION_UNIQUE_NAME@ -#define __bam_stkrel __bam_stkrel@DB_VERSION_UNIQUE_NAME@ -#define __bam_stkgrow __bam_stkgrow@DB_VERSION_UNIQUE_NAME@ -#define __bam_split __bam_split@DB_VERSION_UNIQUE_NAME@ -#define __bam_pinsert __bam_pinsert@DB_VERSION_UNIQUE_NAME@ -#define __bam_copy __bam_copy@DB_VERSION_UNIQUE_NAME@ -#define __bam_stat __bam_stat@DB_VERSION_UNIQUE_NAME@ -#define __bam_stat_print __bam_stat_print@DB_VERSION_UNIQUE_NAME@ -#define __bam_stat_callback __bam_stat_callback@DB_VERSION_UNIQUE_NAME@ -#define __bam_print_cursor __bam_print_cursor@DB_VERSION_UNIQUE_NAME@ -#define __bam_key_range __bam_key_range@DB_VERSION_UNIQUE_NAME@ -#define __bam_traverse __bam_traverse@DB_VERSION_UNIQUE_NAME@ -#define __bam_30_btreemeta __bam_30_btreemeta@DB_VERSION_UNIQUE_NAME@ -#define __bam_31_btreemeta __bam_31_btreemeta@DB_VERSION_UNIQUE_NAME@ -#define __bam_31_lbtree __bam_31_lbtree@DB_VERSION_UNIQUE_NAME@ -#define __bam_vrfy_meta __bam_vrfy_meta@DB_VERSION_UNIQUE_NAME@ -#define __ram_vrfy_leaf __ram_vrfy_leaf@DB_VERSION_UNIQUE_NAME@ -#define __bam_vrfy __bam_vrfy@DB_VERSION_UNIQUE_NAME@ -#define __bam_vrfy_itemorder __bam_vrfy_itemorder@DB_VERSION_UNIQUE_NAME@ -#define __bam_vrfy_structure __bam_vrfy_structure@DB_VERSION_UNIQUE_NAME@ -#define __bam_vrfy_subtree __bam_vrfy_subtree@DB_VERSION_UNIQUE_NAME@ -#define __bam_salvage __bam_salvage@DB_VERSION_UNIQUE_NAME@ -#define __bam_salvage_walkdupint __bam_salvage_walkdupint@DB_VERSION_UNIQUE_NAME@ -#define __bam_meta2pgset __bam_meta2pgset@DB_VERSION_UNIQUE_NAME@ #define __bam_split_log __bam_split_log@DB_VERSION_UNIQUE_NAME@ #define __bam_split_read __bam_split_read@DB_VERSION_UNIQUE_NAME@ #define __bam_rsplit_log __bam_rsplit_log@DB_VERSION_UNIQUE_NAME@ @@ -426,6 +397,35 @@ #define __bam_merge_print __bam_merge_print@DB_VERSION_UNIQUE_NAME@ #define __bam_pgno_print __bam_pgno_print@DB_VERSION_UNIQUE_NAME@ #define __bam_init_print __bam_init_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_rsearch __bam_rsearch@DB_VERSION_UNIQUE_NAME@ +#define __bam_adjust __bam_adjust@DB_VERSION_UNIQUE_NAME@ +#define __bam_nrecs __bam_nrecs@DB_VERSION_UNIQUE_NAME@ +#define __bam_total __bam_total@DB_VERSION_UNIQUE_NAME@ +#define __bam_get_root __bam_get_root@DB_VERSION_UNIQUE_NAME@ +#define __bam_search __bam_search@DB_VERSION_UNIQUE_NAME@ +#define __bam_stkrel __bam_stkrel@DB_VERSION_UNIQUE_NAME@ +#define __bam_stkgrow __bam_stkgrow@DB_VERSION_UNIQUE_NAME@ +#define __bam_split __bam_split@DB_VERSION_UNIQUE_NAME@ +#define __bam_pinsert __bam_pinsert@DB_VERSION_UNIQUE_NAME@ +#define __bam_copy __bam_copy@DB_VERSION_UNIQUE_NAME@ +#define __bam_stat __bam_stat@DB_VERSION_UNIQUE_NAME@ +#define __bam_stat_print __bam_stat_print@DB_VERSION_UNIQUE_NAME@ +#define __bam_stat_callback __bam_stat_callback@DB_VERSION_UNIQUE_NAME@ +#define __bam_print_cursor __bam_print_cursor@DB_VERSION_UNIQUE_NAME@ +#define __bam_key_range __bam_key_range@DB_VERSION_UNIQUE_NAME@ +#define __bam_traverse __bam_traverse@DB_VERSION_UNIQUE_NAME@ +#define __bam_30_btreemeta __bam_30_btreemeta@DB_VERSION_UNIQUE_NAME@ +#define __bam_31_btreemeta __bam_31_btreemeta@DB_VERSION_UNIQUE_NAME@ +#define __bam_31_lbtree __bam_31_lbtree@DB_VERSION_UNIQUE_NAME@ +#define __bam_vrfy_meta __bam_vrfy_meta@DB_VERSION_UNIQUE_NAME@ +#define __ram_vrfy_leaf __ram_vrfy_leaf@DB_VERSION_UNIQUE_NAME@ +#define __bam_vrfy __bam_vrfy@DB_VERSION_UNIQUE_NAME@ +#define __bam_vrfy_itemorder __bam_vrfy_itemorder@DB_VERSION_UNIQUE_NAME@ +#define __bam_vrfy_structure __bam_vrfy_structure@DB_VERSION_UNIQUE_NAME@ +#define __bam_vrfy_subtree __bam_vrfy_subtree@DB_VERSION_UNIQUE_NAME@ +#define __bam_salvage __bam_salvage@DB_VERSION_UNIQUE_NAME@ +#define __bam_salvage_walkdupint __bam_salvage_walkdupint@DB_VERSION_UNIQUE_NAME@ +#define __bam_meta2pgset __bam_meta2pgset@DB_VERSION_UNIQUE_NAME@ #ifndef HAVE_ATOI #define atoi atoi@DB_VERSION_UNIQUE_NAME@ #endif @@ -601,6 +601,11 @@ #define __db_blockDecrypt __db_blockDecrypt@DB_VERSION_UNIQUE_NAME@ #define __db_padDecrypt __db_padDecrypt@DB_VERSION_UNIQUE_NAME@ #define __db_cipherUpdateRounds __db_cipherUpdateRounds@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_register_log __dbreg_register_log@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_register_read __dbreg_register_read@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_init_recover __dbreg_init_recover@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_register_print __dbreg_register_print@DB_VERSION_UNIQUE_NAME@ +#define __dbreg_init_print __dbreg_init_print@DB_VERSION_UNIQUE_NAME@ #define __dbreg_setup __dbreg_setup@DB_VERSION_UNIQUE_NAME@ #define __dbreg_teardown __dbreg_teardown@DB_VERSION_UNIQUE_NAME@ #define __dbreg_teardown_int __dbreg_teardown_int@DB_VERSION_UNIQUE_NAME@ @@ -613,11 +618,6 @@ #define __dbreg_close_id_int __dbreg_close_id_int@DB_VERSION_UNIQUE_NAME@ #define __dbreg_log_close __dbreg_log_close@DB_VERSION_UNIQUE_NAME@ #define __dbreg_log_id __dbreg_log_id@DB_VERSION_UNIQUE_NAME@ -#define __dbreg_register_log __dbreg_register_log@DB_VERSION_UNIQUE_NAME@ -#define __dbreg_register_read __dbreg_register_read@DB_VERSION_UNIQUE_NAME@ -#define __dbreg_init_recover __dbreg_init_recover@DB_VERSION_UNIQUE_NAME@ -#define __dbreg_register_print __dbreg_register_print@DB_VERSION_UNIQUE_NAME@ -#define __dbreg_init_print __dbreg_init_print@DB_VERSION_UNIQUE_NAME@ #define __dbreg_register_recover __dbreg_register_recover@DB_VERSION_UNIQUE_NAME@ #define __dbreg_stat_print __dbreg_stat_print@DB_VERSION_UNIQUE_NAME@ #define __dbreg_print_fname __dbreg_print_fname@DB_VERSION_UNIQUE_NAME@ @@ -771,14 +771,6 @@ #define __fop_read_meta __fop_read_meta@DB_VERSION_UNIQUE_NAME@ #define __fop_dummy __fop_dummy@DB_VERSION_UNIQUE_NAME@ #define __fop_dbrename __fop_dbrename@DB_VERSION_UNIQUE_NAME@ -#define __ham_quick_delete __ham_quick_delete@DB_VERSION_UNIQUE_NAME@ -#define __hamc_init __hamc_init@DB_VERSION_UNIQUE_NAME@ -#define __hamc_count __hamc_count@DB_VERSION_UNIQUE_NAME@ -#define __hamc_dup __hamc_dup@DB_VERSION_UNIQUE_NAME@ -#define __ham_call_hash __ham_call_hash@DB_VERSION_UNIQUE_NAME@ -#define __ham_init_dbt __ham_init_dbt@DB_VERSION_UNIQUE_NAME@ -#define __hamc_update __hamc_update@DB_VERSION_UNIQUE_NAME@ -#define __ham_get_clist __ham_get_clist@DB_VERSION_UNIQUE_NAME@ #define __ham_insdel_log __ham_insdel_log@DB_VERSION_UNIQUE_NAME@ #define __ham_insdel_read __ham_insdel_read@DB_VERSION_UNIQUE_NAME@ #define __ham_newpage_log __ham_newpage_log@DB_VERSION_UNIQUE_NAME@ @@ -812,6 +804,14 @@ #define __ham_curadj_print __ham_curadj_print@DB_VERSION_UNIQUE_NAME@ #define __ham_chgpg_print __ham_chgpg_print@DB_VERSION_UNIQUE_NAME@ #define __ham_init_print __ham_init_print@DB_VERSION_UNIQUE_NAME@ +#define __ham_quick_delete __ham_quick_delete@DB_VERSION_UNIQUE_NAME@ +#define __hamc_init __hamc_init@DB_VERSION_UNIQUE_NAME@ +#define __hamc_count __hamc_count@DB_VERSION_UNIQUE_NAME@ +#define __hamc_dup __hamc_dup@DB_VERSION_UNIQUE_NAME@ +#define __ham_call_hash __ham_call_hash@DB_VERSION_UNIQUE_NAME@ +#define __ham_init_dbt __ham_init_dbt@DB_VERSION_UNIQUE_NAME@ +#define __hamc_update __hamc_update@DB_VERSION_UNIQUE_NAME@ +#define __ham_get_clist __ham_get_clist@DB_VERSION_UNIQUE_NAME@ #define __ham_pgin __ham_pgin@DB_VERSION_UNIQUE_NAME@ #define __ham_pgout __ham_pgout@DB_VERSION_UNIQUE_NAME@ #define __ham_mswap __ham_mswap@DB_VERSION_UNIQUE_NAME@ @@ -896,6 +896,7 @@ #define __db_SHA1Update __db_SHA1Update@DB_VERSION_UNIQUE_NAME@ #define __db_SHA1Final __db_SHA1Final@DB_VERSION_UNIQUE_NAME@ #define __lock_vec_pp __lock_vec_pp@DB_VERSION_UNIQUE_NAME@ +#define __lock_sicommit __lock_sicommit@DB_VERSION_UNIQUE_NAME@ #define __lock_vec __lock_vec@DB_VERSION_UNIQUE_NAME@ #define __lock_get_pp __lock_get_pp@DB_VERSION_UNIQUE_NAME@ #define __lock_get __lock_get@DB_VERSION_UNIQUE_NAME@ @@ -951,6 +952,9 @@ #define __lock_ohash __lock_ohash@DB_VERSION_UNIQUE_NAME@ #define __lock_lhash __lock_lhash@DB_VERSION_UNIQUE_NAME@ #define __lock_nomem __lock_nomem@DB_VERSION_UNIQUE_NAME@ +#define __log_archive_pp __log_archive_pp@DB_VERSION_UNIQUE_NAME@ +#define __log_get_stable_lsn __log_get_stable_lsn@DB_VERSION_UNIQUE_NAME@ +#define __log_autoremove __log_autoremove@DB_VERSION_UNIQUE_NAME@ #define __log_open __log_open@DB_VERSION_UNIQUE_NAME@ #define __log_find __log_find@DB_VERSION_UNIQUE_NAME@ #define __log_valid __log_valid@DB_VERSION_UNIQUE_NAME@ @@ -967,9 +971,6 @@ #define __log_inmem_copyin __log_inmem_copyin@DB_VERSION_UNIQUE_NAME@ #define __log_set_version __log_set_version@DB_VERSION_UNIQUE_NAME@ #define __log_get_oldversion __log_get_oldversion@DB_VERSION_UNIQUE_NAME@ -#define __log_archive_pp __log_archive_pp@DB_VERSION_UNIQUE_NAME@ -#define __log_get_stable_lsn __log_get_stable_lsn@DB_VERSION_UNIQUE_NAME@ -#define __log_autoremove __log_autoremove@DB_VERSION_UNIQUE_NAME@ #define __log_check_page_lsn __log_check_page_lsn@DB_VERSION_UNIQUE_NAME@ #define __log_printf_capi __log_printf_capi@DB_VERSION_UNIQUE_NAME@ #define __log_printf_pp __log_printf_pp@DB_VERSION_UNIQUE_NAME@ @@ -1133,6 +1134,27 @@ #define __db_win32_mutex_lock __db_win32_mutex_lock@DB_VERSION_UNIQUE_NAME@ #define __db_win32_mutex_unlock __db_win32_mutex_unlock@DB_VERSION_UNIQUE_NAME@ #define __db_win32_mutex_destroy __db_win32_mutex_destroy@DB_VERSION_UNIQUE_NAME@ +#ifndef HAVE_FCLOSE +#define fclose fclose@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_FGETC +#define fgetc fgetc@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_FGETS +#define fgets fgets@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_FOPEN +#define fopen fopen@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_FWRITE +#define fwrite fwrite@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_LOCALTIME +#define localtime localtime@DB_VERSION_UNIQUE_NAME@ +#endif +#ifndef HAVE_TIME +#define time time@DB_VERSION_UNIQUE_NAME@ +#endif #define __os_abort __os_abort@DB_VERSION_UNIQUE_NAME@ #define __os_abspath __os_abspath@DB_VERSION_UNIQUE_NAME@ #define __os_umalloc __os_umalloc@DB_VERSION_UNIQUE_NAME@ @@ -1194,27 +1216,6 @@ #define __os_region_unlink __os_region_unlink@DB_VERSION_UNIQUE_NAME@ #define __os_unlink __os_unlink@DB_VERSION_UNIQUE_NAME@ #define __os_yield __os_yield@DB_VERSION_UNIQUE_NAME@ -#ifndef HAVE_FCLOSE -#define fclose fclose@DB_VERSION_UNIQUE_NAME@ -#endif -#ifndef HAVE_FGETC -#define fgetc fgetc@DB_VERSION_UNIQUE_NAME@ -#endif -#ifndef HAVE_FGETS -#define fgets fgets@DB_VERSION_UNIQUE_NAME@ -#endif -#ifndef HAVE_FOPEN -#define fopen fopen@DB_VERSION_UNIQUE_NAME@ -#endif -#ifndef HAVE_FWRITE -#define fwrite fwrite@DB_VERSION_UNIQUE_NAME@ -#endif -#ifndef HAVE_LOCALTIME -#define localtime localtime@DB_VERSION_UNIQUE_NAME@ -#endif -#ifndef HAVE_TIME -#define time time@DB_VERSION_UNIQUE_NAME@ -#endif #ifdef HAVE_QNX #define __os_qnx_region_open __os_qnx_region_open@DB_VERSION_UNIQUE_NAME@ #endif @@ -1229,13 +1230,6 @@ #define __os_get_neterr __os_get_neterr@DB_VERSION_UNIQUE_NAME@ #endif #define __os_mkdir __os_mkdir@DB_VERSION_UNIQUE_NAME@ -#define __qam_position __qam_position@DB_VERSION_UNIQUE_NAME@ -#define __qam_pitem __qam_pitem@DB_VERSION_UNIQUE_NAME@ -#define __qam_append __qam_append@DB_VERSION_UNIQUE_NAME@ -#define __qamc_dup __qamc_dup@DB_VERSION_UNIQUE_NAME@ -#define __qamc_init __qamc_init@DB_VERSION_UNIQUE_NAME@ -#define __qam_truncate __qam_truncate@DB_VERSION_UNIQUE_NAME@ -#define __qam_delete __qam_delete@DB_VERSION_UNIQUE_NAME@ #define __qam_incfirst_log __qam_incfirst_log@DB_VERSION_UNIQUE_NAME@ #define __qam_incfirst_read __qam_incfirst_read@DB_VERSION_UNIQUE_NAME@ #define __qam_mvptr_log __qam_mvptr_log@DB_VERSION_UNIQUE_NAME@ @@ -1253,6 +1247,13 @@ #define __qam_add_print __qam_add_print@DB_VERSION_UNIQUE_NAME@ #define __qam_delext_print __qam_delext_print@DB_VERSION_UNIQUE_NAME@ #define __qam_init_print __qam_init_print@DB_VERSION_UNIQUE_NAME@ +#define __qam_position __qam_position@DB_VERSION_UNIQUE_NAME@ +#define __qam_pitem __qam_pitem@DB_VERSION_UNIQUE_NAME@ +#define __qam_append __qam_append@DB_VERSION_UNIQUE_NAME@ +#define __qamc_dup __qamc_dup@DB_VERSION_UNIQUE_NAME@ +#define __qamc_init __qamc_init@DB_VERSION_UNIQUE_NAME@ +#define __qam_truncate __qam_truncate@DB_VERSION_UNIQUE_NAME@ +#define __qam_delete __qam_delete@DB_VERSION_UNIQUE_NAME@ #define __qam_mswap __qam_mswap@DB_VERSION_UNIQUE_NAME@ #define __qam_pgin_out __qam_pgin_out@DB_VERSION_UNIQUE_NAME@ #define __qam_fprobe __qam_fprobe@DB_VERSION_UNIQUE_NAME@ @@ -1708,8 +1709,8 @@ #define _DbInfoDelete _DbInfoDelete@DB_VERSION_UNIQUE_NAME@ #define db_Cmd db_Cmd@DB_VERSION_UNIQUE_NAME@ #define tcl_CompactStat tcl_CompactStat@DB_VERSION_UNIQUE_NAME@ -#define tcl_rep_send tcl_rep_send@DB_VERSION_UNIQUE_NAME@ #define dbc_Cmd dbc_Cmd@DB_VERSION_UNIQUE_NAME@ +#define tcl_rep_send tcl_rep_send@DB_VERSION_UNIQUE_NAME@ #define env_Cmd env_Cmd@DB_VERSION_UNIQUE_NAME@ #define tcl_EnvRemove tcl_EnvRemove@DB_VERSION_UNIQUE_NAME@ #define tcl_EnvIdReset tcl_EnvIdReset@DB_VERSION_UNIQUE_NAME@ @@ -1786,24 +1787,6 @@ #define tcl_TxnTimeout tcl_TxnTimeout@DB_VERSION_UNIQUE_NAME@ #define tcl_TxnRecover tcl_TxnRecover@DB_VERSION_UNIQUE_NAME@ #define bdb_RandCommand bdb_RandCommand@DB_VERSION_UNIQUE_NAME@ -#define __txn_begin_pp __txn_begin_pp@DB_VERSION_UNIQUE_NAME@ -#define __txn_begin __txn_begin@DB_VERSION_UNIQUE_NAME@ -#define __txn_xa_begin __txn_xa_begin@DB_VERSION_UNIQUE_NAME@ -#define __txn_recycle_id __txn_recycle_id@DB_VERSION_UNIQUE_NAME@ -#define __txn_compensate_begin __txn_compensate_begin@DB_VERSION_UNIQUE_NAME@ -#define __txn_continue __txn_continue@DB_VERSION_UNIQUE_NAME@ -#define __txn_commit __txn_commit@DB_VERSION_UNIQUE_NAME@ -#define __txn_abort __txn_abort@DB_VERSION_UNIQUE_NAME@ -#define __txn_discard_int __txn_discard_int@DB_VERSION_UNIQUE_NAME@ -#define __txn_prepare __txn_prepare@DB_VERSION_UNIQUE_NAME@ -#define __txn_id __txn_id@DB_VERSION_UNIQUE_NAME@ -#define __txn_get_name __txn_get_name@DB_VERSION_UNIQUE_NAME@ -#define __txn_set_name __txn_set_name@DB_VERSION_UNIQUE_NAME@ -#define __txn_set_timeout __txn_set_timeout@DB_VERSION_UNIQUE_NAME@ -#define __txn_activekids __txn_activekids@DB_VERSION_UNIQUE_NAME@ -#define __txn_force_abort __txn_force_abort@DB_VERSION_UNIQUE_NAME@ -#define __txn_preclose __txn_preclose@DB_VERSION_UNIQUE_NAME@ -#define __txn_reset __txn_reset@DB_VERSION_UNIQUE_NAME@ #define __txn_regop_42_read __txn_regop_42_read@DB_VERSION_UNIQUE_NAME@ #define __txn_regop_log __txn_regop_log@DB_VERSION_UNIQUE_NAME@ #define __txn_regop_read __txn_regop_read@DB_VERSION_UNIQUE_NAME@ @@ -1825,6 +1808,24 @@ #define __txn_xa_regop_print __txn_xa_regop_print@DB_VERSION_UNIQUE_NAME@ #define __txn_recycle_print __txn_recycle_print@DB_VERSION_UNIQUE_NAME@ #define __txn_init_print __txn_init_print@DB_VERSION_UNIQUE_NAME@ +#define __txn_begin_pp __txn_begin_pp@DB_VERSION_UNIQUE_NAME@ +#define __txn_begin __txn_begin@DB_VERSION_UNIQUE_NAME@ +#define __txn_xa_begin __txn_xa_begin@DB_VERSION_UNIQUE_NAME@ +#define __txn_recycle_id __txn_recycle_id@DB_VERSION_UNIQUE_NAME@ +#define __txn_compensate_begin __txn_compensate_begin@DB_VERSION_UNIQUE_NAME@ +#define __txn_continue __txn_continue@DB_VERSION_UNIQUE_NAME@ +#define __txn_commit __txn_commit@DB_VERSION_UNIQUE_NAME@ +#define __txn_abort __txn_abort@DB_VERSION_UNIQUE_NAME@ +#define __txn_discard_int __txn_discard_int@DB_VERSION_UNIQUE_NAME@ +#define __txn_prepare __txn_prepare@DB_VERSION_UNIQUE_NAME@ +#define __txn_id __txn_id@DB_VERSION_UNIQUE_NAME@ +#define __txn_get_name __txn_get_name@DB_VERSION_UNIQUE_NAME@ +#define __txn_set_name __txn_set_name@DB_VERSION_UNIQUE_NAME@ +#define __txn_set_timeout __txn_set_timeout@DB_VERSION_UNIQUE_NAME@ +#define __txn_activekids __txn_activekids@DB_VERSION_UNIQUE_NAME@ +#define __txn_force_abort __txn_force_abort@DB_VERSION_UNIQUE_NAME@ +#define __txn_preclose __txn_preclose@DB_VERSION_UNIQUE_NAME@ +#define __txn_reset __txn_reset@DB_VERSION_UNIQUE_NAME@ #define __txn_checkpoint_pp __txn_checkpoint_pp@DB_VERSION_UNIQUE_NAME@ #define __txn_checkpoint __txn_checkpoint@DB_VERSION_UNIQUE_NAME@ #define __txn_getactive __txn_getactive@DB_VERSION_UNIQUE_NAME@ @@ -1856,6 +1857,7 @@ #define __txn_region_mutex_count __txn_region_mutex_count@DB_VERSION_UNIQUE_NAME@ #define __txn_id_set __txn_id_set@DB_VERSION_UNIQUE_NAME@ #define __txn_oldest_reader __txn_oldest_reader@DB_VERSION_UNIQUE_NAME@ +#define __txn_update_oldlsn __txn_update_oldlsn@DB_VERSION_UNIQUE_NAME@ #define __txn_add_buffer __txn_add_buffer@DB_VERSION_UNIQUE_NAME@ #define __txn_remove_buffer __txn_remove_buffer@DB_VERSION_UNIQUE_NAME@ #define __txn_stat_pp __txn_stat_pp@DB_VERSION_UNIQUE_NAME@ --- db-4.6.21/dbinc_auto/lock_ext.h 2007-05-17 07:13:06.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/lock_ext.h 2007-12-14 23:00:24.242407708 +1100 @@ -7,6 +7,7 @@ #endif int __lock_vec_pp __P((DB_ENV *, u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); +int __lock_sicommit __P((DB_ENV *, DB_LOCKER *)); int __lock_vec __P((DB_ENV *, DB_LOCKER *, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); int __lock_get_pp __P((DB_ENV *, u_int32_t, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); int __lock_get __P((DB_ENV *, DB_LOCKER *, u_int32_t, const DBT *, db_lockmode_t, DB_LOCK *)); --- db-4.6.21/dbinc_auto/log_ext.h 2007-05-04 08:36:40.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/log_ext.h 2007-12-14 23:00:24.242407708 +1100 @@ -6,6 +6,9 @@ extern "C" { #endif +int __log_archive_pp __P((DB_ENV *, char **[], u_int32_t)); +int __log_get_stable_lsn __P((DB_ENV *, DB_LSN *)); +void __log_autoremove __P((DB_ENV *)); int __log_open __P((DB_ENV *, int)); int __log_find __P((DB_LOG *, int, u_int32_t *, logfile_validity *)); int __log_valid __P((DB_LOG *, u_int32_t, int, DB_FH **, u_int32_t, logfile_validity *, u_int32_t *)); @@ -22,9 +25,6 @@ void __log_inmem_copyin __P((DB_LOG *, size_t, void *, size_t)); void __log_set_version __P((DB_ENV *, u_int32_t)); int __log_get_oldversion __P((DB_ENV *, u_int32_t *)); -int __log_archive_pp __P((DB_ENV *, char **[], u_int32_t)); -int __log_get_stable_lsn __P((DB_ENV *, DB_LSN *)); -void __log_autoremove __P((DB_ENV *)); int __log_check_page_lsn __P((DB_ENV *, DB *, DB_LSN *)); int __log_printf_capi __P((DB_ENV *, DB_TXN *, const char *, ...)) __attribute__ ((__format__ (__printf__, 3, 4))); int __log_printf_pp __P((DB_ENV *, DB_TXN *, const char *, va_list)); --- db-4.6.21/dbinc_auto/os_ext.h 2007-05-04 08:36:40.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/os_ext.h 2007-12-14 23:00:24.242407708 +1100 @@ -6,6 +6,27 @@ extern "C" { #endif +#ifndef HAVE_FCLOSE +int fclose __P((FILE *)); +#endif +#ifndef HAVE_FGETC +int fgetc __P((FILE *)); +#endif +#ifndef HAVE_FGETS +char *fgets __P((char *, int, FILE *)); +#endif +#ifndef HAVE_FOPEN +FILE *fopen __P((const char *, const char *)); +#endif +#ifndef HAVE_FWRITE +size_t fwrite __P((const void *, size_t, size_t, FILE *)); +#endif +#ifndef HAVE_LOCALTIME +struct tm *localtime __P((const time_t *)); +#endif +#ifndef HAVE_TIME +time_t time __P((time_t *)); +#endif void __os_abort __P((void)); int __os_abspath __P((const char *)); int __os_umalloc __P((DB_ENV *, size_t, void *)); @@ -67,27 +88,6 @@ int __os_region_unlink __P((DB_ENV *, const char *)); int __os_unlink __P((DB_ENV *, const char *)); void __os_yield __P((DB_ENV *)); -#ifndef HAVE_FCLOSE -int fclose __P((FILE *)); -#endif -#ifndef HAVE_FGETC -int fgetc __P((FILE *)); -#endif -#ifndef HAVE_FGETS -char *fgets __P((char *, int, FILE *)); -#endif -#ifndef HAVE_FOPEN -FILE *fopen __P((const char *, const char *)); -#endif -#ifndef HAVE_FWRITE -size_t fwrite __P((const void *, size_t, size_t, FILE *)); -#endif -#ifndef HAVE_LOCALTIME -struct tm *localtime __P((const time_t *)); -#endif -#ifndef HAVE_TIME -time_t time __P((time_t *)); -#endif #ifdef HAVE_QNX int __os_qnx_region_open __P((DB_ENV *, const char *, int, int, DB_FH *)); #endif --- db-4.6.21/dbinc_auto/qam_ext.h 2007-05-17 07:13:06.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/qam_ext.h 2007-12-14 23:00:24.242407708 +1100 @@ -6,13 +6,6 @@ extern "C" { #endif -int __qam_position __P((DBC *, db_recno_t *, db_lockmode_t, u_int32_t, int *)); -int __qam_pitem __P((DBC *, QPAGE *, u_int32_t, db_recno_t, DBT *)); -int __qam_append __P((DBC *, DBT *, DBT *)); -int __qamc_dup __P((DBC *, DBC *)); -int __qamc_init __P((DBC *)); -int __qam_truncate __P((DBC *, u_int32_t *)); -int __qam_delete __P((DBC *, DBT *)); int __qam_incfirst_log __P((DB *, DB_TXN *, DB_LSN *, u_int32_t, db_recno_t, db_pgno_t)); int __qam_incfirst_read __P((DB_ENV *, void *, __qam_incfirst_args **)); int __qam_mvptr_log __P((DB *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, db_recno_t, db_recno_t, db_recno_t, db_recno_t, DB_LSN *, db_pgno_t)); @@ -30,6 +23,13 @@ int __qam_add_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __qam_delext_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __qam_init_print __P((DB_ENV *, int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), size_t *)); +int __qam_position __P((DBC *, db_recno_t *, db_lockmode_t, u_int32_t, int *)); +int __qam_pitem __P((DBC *, QPAGE *, u_int32_t, db_recno_t, DBT *)); +int __qam_append __P((DBC *, DBT *, DBT *)); +int __qamc_dup __P((DBC *, DBC *)); +int __qamc_init __P((DBC *)); +int __qam_truncate __P((DBC *, u_int32_t *)); +int __qam_delete __P((DBC *, DBT *)); int __qam_mswap __P((PAGE *)); int __qam_pgin_out __P((DB_ENV *, db_pgno_t, void *, DBT *)); int __qam_fprobe __P((DB *, db_pgno_t, DB_TXN *, void *, qam_probe_mode, DB_CACHE_PRIORITY, u_int32_t)); --- db-4.6.21/dbinc_auto/tcl_ext.h 2007-05-04 08:36:40.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/tcl_ext.h 2007-12-14 23:00:24.246407621 +1100 @@ -17,8 +17,8 @@ void _DbInfoDelete __P((Tcl_Interp *, DBTCL_INFO *)); int db_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); int tcl_CompactStat __P((Tcl_Interp *, DBTCL_INFO *)); -int tcl_rep_send __P((DB_ENV *, const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)); int dbc_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); +int tcl_rep_send __P((DB_ENV *, const DBT *, const DBT *, const DB_LSN *, int, u_int32_t)); int env_Cmd __P((ClientData, Tcl_Interp *, int, Tcl_Obj * CONST*)); int tcl_EnvRemove __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *, DBTCL_INFO *)); int tcl_EnvIdReset __P((Tcl_Interp *, int, Tcl_Obj * CONST*, DB_ENV *)); --- db-4.6.21/dbinc_auto/txn_ext.h 2007-06-29 10:25:01.000000000 +1000 +++ db-4.6.21-safe-si2/dbinc_auto/txn_ext.h 2007-12-14 23:00:24.246407621 +1100 @@ -6,24 +6,6 @@ extern "C" { #endif -int __txn_begin_pp __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); -int __txn_begin __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); -int __txn_xa_begin __P((DB_ENV *, DB_TXN *)); -int __txn_recycle_id __P((DB_ENV *)); -int __txn_compensate_begin __P((DB_ENV *, DB_TXN **)); -int __txn_continue __P((DB_ENV *, DB_TXN *, TXN_DETAIL *)); -int __txn_commit __P((DB_TXN *, u_int32_t)); -int __txn_abort __P((DB_TXN *)); -int __txn_discard_int __P((DB_TXN *, u_int32_t flags)); -int __txn_prepare __P((DB_TXN *, u_int8_t *)); -u_int32_t __txn_id __P((DB_TXN *)); -int __txn_get_name __P((DB_TXN *, const char **)); -int __txn_set_name __P((DB_TXN *, const char *)); -int __txn_set_timeout __P((DB_TXN *, db_timeout_t, u_int32_t)); -int __txn_activekids __P((DB_ENV *, u_int32_t, DB_TXN *)); -int __txn_force_abort __P((DB_ENV *, u_int8_t *)); -int __txn_preclose __P((DB_ENV *)); -int __txn_reset __P((DB_ENV *)); int __txn_regop_42_read __P((DB_ENV *, void *, __txn_regop_42_args **)); int __txn_regop_log __P((DB_ENV *, DB_TXN *, DB_LSN *, u_int32_t, u_int32_t, int32_t, u_int32_t, const DBT *)); int __txn_regop_read __P((DB_ENV *, void *, __txn_regop_args **)); @@ -45,6 +27,24 @@ int __txn_xa_regop_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __txn_recycle_print __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); int __txn_init_print __P((DB_ENV *, int (***)(DB_ENV *, DBT *, DB_LSN *, db_recops, void *), size_t *)); +int __txn_begin_pp __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); +int __txn_begin __P((DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t)); +int __txn_xa_begin __P((DB_ENV *, DB_TXN *)); +int __txn_recycle_id __P((DB_ENV *)); +int __txn_compensate_begin __P((DB_ENV *, DB_TXN **)); +int __txn_continue __P((DB_ENV *, DB_TXN *, TXN_DETAIL *)); +int __txn_commit __P((DB_TXN *, u_int32_t)); +int __txn_abort __P((DB_TXN *)); +int __txn_discard_int __P((DB_TXN *, u_int32_t flags)); +int __txn_prepare __P((DB_TXN *, u_int8_t *)); +u_int32_t __txn_id __P((DB_TXN *)); +int __txn_get_name __P((DB_TXN *, const char **)); +int __txn_set_name __P((DB_TXN *, const char *)); +int __txn_set_timeout __P((DB_TXN *, db_timeout_t, u_int32_t)); +int __txn_activekids __P((DB_ENV *, u_int32_t, DB_TXN *)); +int __txn_force_abort __P((DB_ENV *, u_int8_t *)); +int __txn_preclose __P((DB_ENV *)); +int __txn_reset __P((DB_ENV *)); int __txn_checkpoint_pp __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t)); int __txn_checkpoint __P((DB_ENV *, u_int32_t, u_int32_t, u_int32_t)); int __txn_getactive __P((DB_ENV *, DB_LSN *)); @@ -76,6 +76,7 @@ u_int32_t __txn_region_mutex_count __P((DB_ENV *)); int __txn_id_set __P((DB_ENV *, u_int32_t, u_int32_t)); int __txn_oldest_reader __P((DB_ENV *, DB_LSN *)); +int __txn_update_oldlsn __P((DB_ENV *, DB_LSN *)); int __txn_add_buffer __P((DB_ENV *, TXN_DETAIL *)); int __txn_remove_buffer __P((DB_ENV *, TXN_DETAIL *, db_mutex_t)); int __txn_stat_pp __P((DB_ENV *, DB_TXN_STAT **, u_int32_t)); --- db-4.6.21/lock/lock.c 2007-05-18 05:33:04.000000000 +1000 +++ db-4.6.21-safe-si2/lock/lock.c 2007-12-14 23:00:24.246407621 +1100 @@ -11,6 +11,7 @@ #include "db_int.h" #include "dbinc/lock.h" #include "dbinc/log.h" +#include "dbinc/txn.h" static int __lock_freelock __P((DB_LOCKTAB *, struct __db_lock *, DB_LOCKER *, u_int32_t)); @@ -25,6 +26,8 @@ static int __lock_put_nolock __P((DB_ENV *, DB_LOCK *, int *, u_int32_t)); static int __lock_remove_waiter __P((DB_LOCKTAB *, DB_LOCKOBJ *, struct __db_lock *, db_status_t)); +static int __lock_siclean_obj __P((DB_ENV *, DB_LOCKOBJ *, DB_LSN *)); +static int __lock_sicleanup __P((DB_ENV *)); static int __lock_trade __P((DB_ENV *, DB_LOCK *, DB_LOCKER *)); static int __lock_vec_api __P((DB_ENV *, u_int32_t, u_int32_t, DB_LOCKREQ *, int, DB_LOCKREQ **)); @@ -81,6 +84,148 @@ return (ret); } +static TXN_DETAIL *__lock_owner(DB_ENV *dbenv, struct __db_lock *lp) { + return LOCK_OWNER(dbenv, lp); +} + + +static int __lock_siclean_obj(dbenv, obj, old_lsnp) + DB_ENV *dbenv; + DB_LOCKOBJ *obj; + DB_LSN *old_lsnp; +{ + DB_LOCKER *sh_locker; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DB_LSN old_lsn; + struct __db_lock *lp, *next_lock; + int ret; + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + ret = 0; + + if (old_lsnp == NULL) { + old_lsnp = &old_lsn; + if ((ret = __txn_oldest_reader(dbenv, old_lsnp)) != 0) + return (ret); + } + + OBJECT_LOCK_NDX(lt, obj->indx); + for (lp = SH_TAILQ_FIRST(&obj->sireaders, __db_lock); + lp != NULL; lp = next_lock) { + next_lock = SH_TAILQ_NEXT(lp, links, __db_lock); + if (LOCK_OWNER(dbenv, lp)->status == TXN_RUNNING) + continue; + if (!(IS_MAX_LSN(LOCK_COMMITLSN(dbenv, lp)) && + LOG_COMPARE(&LOCK_READLSN(dbenv, lp), old_lsnp) > 0) && + LOG_COMPARE(&LOCK_COMMITLSN(dbenv, lp), old_lsnp) > 0) + continue; + + sh_locker = R_ADDR(<->reginfo, lp->holder); + sh_locker->nlocks--; + + if (F_ISSET(sh_locker, DB_LOCKER_FREED) && + sh_locker->nlocks == 0) { + LOCK_LOCKERS(dbenv, region); + ret = __lock_freelocker(lt, region, sh_locker); + UNLOCK_LOCKERS(dbenv, region); + DB_ASSERT(dbenv, ret == 0); + } + + if ((ret = __lock_put_internal(lt, lp, obj->indx, + DB_LOCK_FREE | DB_LOCK_DOALL)) != 0) + goto err; + } +err: OBJECT_UNLOCK(lt, obj->indx); + + return (ret); +} + +static int __lock_sicleanup(dbenv) + DB_ENV *dbenv; +{ + DB_LOCKOBJ *obj, *next_obj; + DB_LOCKREGION *lrp; + DB_LOCKTAB *lt; + DB_LSN old_lsn; + u_int32_t i; + int ret; + + lt = dbenv->lk_handle; + lrp = lt->reginfo.primary; + + if ((ret = __txn_oldest_reader(dbenv, &old_lsn)) != 0) + return (ret); + + for (i = 0; i < lrp->object_t_size; i++) + for (obj = SH_TAILQ_FIRST(<->obj_tab[i], __db_lockobj); + obj != NULL; obj = next_obj) { + next_obj = SH_TAILQ_NEXT(obj, links, __db_lockobj); + if ((ret = __lock_siclean_obj(dbenv, obj, &old_lsn)) != 0) + return (ret); + } + + return (0); +} + +/* + * At transaction commit time, set the LSN in SI read locks and detach them + * from the locker. + * + * PUBLIC: int __lock_sicommit __P((DB_ENV *, DB_LOCKER *)); + */ +int +__lock_sicommit(dbenv, sh_locker) + DB_ENV *dbenv; + DB_LOCKER *sh_locker; +{ + struct __db_lock *lp, *next_lock; + DB_LOCKOBJ *obj; + DB_LOCKREGION *region; + DB_LOCKTAB *lt; + DB_LSN old_lsn; + int first_lock, ret = 0; + + /* Check if locks have been globally turned off. */ + if (F_ISSET(dbenv, DB_ENV_NOLOCKING)) + return (0); + + lt = dbenv->lk_handle; + region = lt->reginfo.primary; + + LOCK_SYSTEM_LOCK(dbenv); + + first_lock = 1; + for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); + lp != NULL; lp = next_lock) { + next_lock = SH_LIST_NEXT(lp, locker_links, __db_lock); + if (lp->mode == DB_LOCK_SIREAD) { + SH_LIST_REMOVE(lp, locker_links, __db_lock); +#if 0 /* need to keep lockers around now. */ + if (first_lock) { + if ((ret = __txn_oldest_reader(dbenv, &old_lsn)) != 0) + return (ret); + first_lock = 0; + } + + /* + * We are not letting lock_put_internal unlink the + * lock, so we'll have to update counts here. + */ + lp->holder = DB_LOCK_INVALIDID; + sh_locker->nlocks--; + if ((ret = + __lock_siclean_obj(dbenv, obj, &old_lsn)) != 0) + goto err; +#endif + } + } +err: LOCK_SYSTEM_UNLOCK(dbenv); + + return (ret); +} + /* * __lock_vec -- * DB_ENV->lock_vec. @@ -103,7 +248,8 @@ DB_LOCKREQ *list, **elistp; { struct __db_lock *lp, *next_lock; - DB_LOCK lock; DB_LOCKOBJ *sh_obj; + DB_LOCK lock; + DB_LOCKOBJ *sh_obj; DB_LOCKREGION *region; DB_LOCKTAB *lt; DBT *objlist, *np; @@ -191,10 +337,10 @@ /* Now traverse the locks, releasing each one. */ for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); lp != NULL; lp = next_lock) { - sh_obj = (DB_LOCKOBJ *) - ((u_int8_t *)lp + lp->obj); next_lock = SH_LIST_NEXT(lp, locker_links, __db_lock); + sh_obj = (DB_LOCKOBJ *) + ((u_int8_t *)lp + lp->obj); if (writes == 1 || lp->mode == DB_LOCK_READ || lp->mode == DB_LOCK_READ_UNCOMMITTED) { @@ -467,13 +613,13 @@ db_timeout_t timeout; DB_LOCK *lock; { - struct __db_lock *newl, *lp; + struct __db_lock *newl, *lp, *next_lock, *sireadlp; DB_ENV *dbenv; DB_LOCKOBJ *sh_obj; DB_LOCKREGION *region; DB_THREAD_INFO *ip; u_int32_t ndx; - int did_abort, ihold, grant_dirty, no_dd, ret, t_ret; + int did_abort, ihold, grant_dirty, no_dd, ret, rwconf, safe_si, t_ret; roff_t holder, sh_off; /* @@ -503,6 +649,8 @@ no_dd = ret = 0; newl = NULL; sh_obj = NULL; + safe_si = LF_ISSET(DB_LOCK_SNAPSHOT_SAFE) ? 1 : 0; + LF_CLR(DB_LOCK_SNAPSHOT_SAFE); /* Check that the lock mode is valid. */ if (lock_mode >= (db_lockmode_t)region->stat.st_nmodes) { @@ -555,8 +703,7 @@ * list, unless we are upgrading or this is a dirty reader in which * case the locker goes at or near the front of the list. */ - ihold = 0; - grant_dirty = 0; + ihold = grant_dirty = rwconf = 0; holder = 0; /* @@ -576,7 +723,7 @@ DB_ASSERT(dbenv, lp->status != DB_LSTAT_FREE); if (sh_off == lp->holder) { if (lp->mode == lock_mode && - lp->status == DB_LSTAT_HELD) { + (lp->status == DB_LSTAT_HELD)) { if (LF_ISSET(DB_LOCK_UPGRADE)) goto upgrade; @@ -592,6 +739,10 @@ lock->gen = lp->gen; lock->mode = lp->mode; goto done; + } else if (lp->mode == DB_LOCK_WRITE && + lock_mode == DB_LOCK_SIREAD) { + LOCK_INIT(*lock); + goto done; } else { ihold = 1; } @@ -606,6 +757,66 @@ } } + if (safe_si && lp == NULL && (lock_mode == DB_LOCK_WRITE || lock_mode == DB_LOCK_SIREAD)) { + for (sireadlp = SH_TAILQ_FIRST(&sh_obj->sireaders, __db_lock); + sireadlp != NULL; sireadlp = next_lock) { + next_lock = SH_TAILQ_NEXT(sireadlp, links, __db_lock); + if (lock_mode == DB_LOCK_WRITE && + sh_off == sireadlp->holder) { + /* + * We are "upgrading" our own SIREAD lock to a + * WRITE lock. Just drop the SIREAD lock -- + * otherwise we will get lots of unnecessary + * conflicts. + */ + ret = __lock_put_internal(lt, + sireadlp, sireadlp->indx, + DB_LOCK_DOALL | DB_LOCK_UNLINK | DB_LOCK_FREE); + DB_ASSERT(dbenv, ret == 0); + } else if (lock_mode == DB_LOCK_WRITE && + sh_off != sireadlp->holder && + (LOCK_OWNER(dbenv, sireadlp)->status == TXN_RUNNING || + LOG_COMPARE(&LOCK_COMMITLSN(dbenv, sireadlp), &LOCKER_TD(dbenv, sh_locker)->read_lsn) > 0)) { + if (F_ISSET(LOCK_OWNER(dbenv, sireadlp), TXN_DTL_WCONF) && LOCK_OWNER(dbenv, sireadlp)->status == TXN_COMMITTED) { + ret = DB_SNAPSHOT_UNSAFE; + goto err; + } + rwconf = 1; + /* Set the incoming flag on our transaction. + * We can skip this if the other transaction + * will abort */ + if (LOCK_OWNER(dbenv, sireadlp)->status == TXN_COMMITTED || + !F_ISSET(LOCK_OWNER(dbenv, sireadlp), TXN_DTL_WCONF)) { +#if 1 /* optimization */ + if (F_ISSET(LOCKER_TD(dbenv, sh_locker), TXN_DTL_RCONF)) { + ret = DB_SNAPSHOT_UNSAFE; + goto err; + } +#endif + F_SET(LOCKER_TD(dbenv, sh_locker), TXN_DTL_WCONF); + } + F_SET(LOCK_OWNER(dbenv, sireadlp), TXN_DTL_RCONF); + } else if (lock_mode == DB_LOCK_SIREAD && + sh_off == sireadlp->holder) { + sireadlp->refcount++; + lock->off = R_OFFSET(<->reginfo, sireadlp); + lock->gen = sireadlp->gen; + lock->mode = sireadlp->mode; + goto done; + } + } + /* + * If we find an SI read lock that doesn't conflict with this + * write, check whether there are obsolete locks that can be + * reclaimed. This avoids lots of scans of the whole lock + * table. + */ + if (lock_mode == DB_LOCK_WRITE && !rwconf && + !SH_TAILQ_EMPTY(&sh_obj->sireaders) && + (ret = __lock_siclean_obj(dbenv, sh_obj, NULL)) != 0) + goto err; + } + /* * If there are conflicting holders we will have to wait. If we * already hold a lock on this object or are doing an upgrade or @@ -695,8 +906,15 @@ LOCK_LOCKS(dbenv, region); if ((newl = SH_TAILQ_FIRST(®ion->free_locks, __db_lock)) == NULL) { - ret = __lock_nomem(dbenv, "locks"); - goto err; + if ((ret = __lock_sicleanup(lt->dbenv)) != 0) { + (void)__lock_nomem(lt->dbenv, "locks"); + goto err; + } + newl = SH_TAILQ_FIRST(®ion->free_locks, __db_lock); + if (newl == NULL) { + ret = __lock_nomem(lt->dbenv, "locks"); + goto err; + } } SH_TAILQ_REMOVE(®ion->free_locks, newl, links, __db_lock); @@ -758,7 +976,10 @@ break; case GRANT: newl->status = DB_LSTAT_HELD; - SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links); + if (lock_mode == DB_LOCK_SIREAD) + SH_TAILQ_INSERT_TAIL(&sh_obj->sireaders, newl, links); + else + SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links); break; case HEAD: case TAIL: @@ -935,6 +1156,7 @@ } OBJECT_UNLOCK(lt, ndx); + return (0); err: if (!LF_ISSET(DB_LOCK_UPGRADE | DB_LOCK_SWITCH)) @@ -1177,20 +1399,23 @@ lt, sh_obj, lockp, DB_LSTAT_ABORTED)) != 0) return (ret); } else { - SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock); + if (lockp->mode == DB_LOCK_SIREAD) + SH_TAILQ_REMOVE(&sh_obj->sireaders, lockp, links, __db_lock); + else + SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock); lockp->links.stqe_prev = -1; } - if (LF_ISSET(DB_LOCK_NOPROMOTE)) + if (LF_ISSET(DB_LOCK_NOPROMOTE) || lockp->mode == DB_LOCK_SIREAD) state_changed = 0; - else - if ((ret = __lock_promote(lt, sh_obj, &state_changed, - LF_ISSET(DB_LOCK_NOWAITERS))) != 0) - return (ret); + else if ((ret = __lock_promote(lt, sh_obj, &state_changed, + LF_ISSET(DB_LOCK_NOWAITERS))) != 0) + return (ret); /* Check if object should be reclaimed. */ - if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL && - SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) { + if (SH_TAILQ_EMPTY(&sh_obj->holders) && + SH_TAILQ_EMPTY(&sh_obj->sireaders) && + SH_TAILQ_EMPTY(&sh_obj->waiters)) { SH_TAILQ_REMOVE( <->obj_tab[obj_ndx], sh_obj, links, __db_lockobj); LOCK_OBJECTS(dbenv, region); @@ -1240,12 +1465,18 @@ region = lt->reginfo.primary; if (LF_ISSET(DB_LOCK_UNLINK)) { - SH_LIST_REMOVE(lockp, locker_links, __db_lock); if (lockp->status == DB_LSTAT_HELD) { sh_locker->nlocks--; if (IS_WRITELOCK(lockp->mode)) sh_locker->nwrites--; + if (F_ISSET(sh_locker, DB_LOCKER_FREED) && + sh_locker->nlocks == 0) { + LOCK_LOCKERS(dbenv, region); + ret = __lock_freelocker(lt, region, sh_locker); + UNLOCK_LOCKERS(dbenv, region); + DB_ASSERT(dbenv, ret == 0); + } } } @@ -1319,9 +1550,18 @@ /* Create new object and then insert it into hash table. */ if ((sh_obj = SH_TAILQ_FIRST(®ion->free_objs, __db_lockobj)) == NULL) { + if ((ret = __lock_sicleanup(lt->dbenv)) != 0) { + (void)__lock_nomem(lt->dbenv, "object entries"); + UNLOCK_OBJECTS(dbenv, region); + goto err; + } + sh_obj = SH_TAILQ_FIRST(®ion->free_objs, + __db_lockobj); UNLOCK_OBJECTS(dbenv, region); - ret = __lock_nomem(lt->dbenv, "object entries"); - goto err; + if (sh_obj == NULL) { + ret = __lock_nomem(lt->dbenv, "object entries"); + goto err; + } } /* @@ -1350,6 +1590,7 @@ sh_obj->indx = ndx; SH_TAILQ_INIT(&sh_obj->waiters); SH_TAILQ_INIT(&sh_obj->holders); + SH_TAILQ_INIT(&sh_obj->sireaders); sh_obj->lockobj.size = obj->size; sh_obj->lockobj.off = (roff_t)SH_PTR_TO_OFF(&sh_obj->lockobj, p); @@ -1470,10 +1711,10 @@ } /* - * In order to make it possible for a parent to have - * many, many children who lock the same objects, and - * not require an inordinate number of locks, we try - * to merge the child's locks with its parent's. + * In order to make it possible for a parent to have many, many + * children who lock the same objects, and not require an inordinate + * number of locks, we try to merge the child's locks with its + * parent's. */ poff = R_OFFSET(<->reginfo, sh_parent); for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock); @@ -1483,9 +1724,14 @@ /* See if the parent already has a lock. */ obj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj); - SH_TAILQ_FOREACH(hlp, &obj->holders, links, __db_lock) - if (hlp->holder == poff && lp->mode == hlp->mode) - break; + if (lp->mode == DB_LOCK_SIREAD) + SH_TAILQ_FOREACH(hlp, &obj->sireaders, links, __db_lock) + if (hlp->holder == poff && lp->mode == hlp->mode) + break; + else + SH_TAILQ_FOREACH(hlp, &obj->holders, links, __db_lock) + if (hlp->holder == poff && lp->mode == hlp->mode) + break; if (hlp != NULL) { /* Parent already holds lock. */ @@ -1493,7 +1739,10 @@ /* Remove lock from object list and free it. */ DB_ASSERT(dbenv, lp->status == DB_LSTAT_HELD); - SH_TAILQ_REMOVE(&obj->holders, lp, links, __db_lock); + if (lp->mode == DB_LOCK_SIREAD) + SH_TAILQ_REMOVE(&obj->sireaders, lp, links, __db_lock); + else + SH_TAILQ_REMOVE(&obj->holders, lp, links, __db_lock); (void)__lock_freelock(lt, lp, sh_locker, DB_LOCK_FREE); } else { /* Just move lock to parent chains. */ @@ -1642,8 +1891,7 @@ if (SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock) == NULL) { LOCK_OBJECTS(lt->dbenv, region); sh_obj->generation++; - SH_TAILQ_REMOVE( - ®ion->dd_objs, + SH_TAILQ_REMOVE(®ion->dd_objs, sh_obj, dd_links, __db_lockobj); UNLOCK_OBJECTS(lt->dbenv, region); } --- db-4.6.21/lock/lock_deadlock.c 2007-05-18 05:33:04.000000000 +1000 +++ db-4.6.21-safe-si2/lock/lock_deadlock.c 2007-12-14 23:00:24.250407534 +1100 @@ -551,6 +551,8 @@ CLEAR_MAP(tmpmap, nentries); SH_TAILQ_FOREACH(lp, &op->holders, links, __db_lock) { + if (lp->mode == DB_LOCK_SIREAD) + continue; lockerp = (DB_LOCKER *)R_ADDR(<->reginfo, lp->holder); if (lockerp->dd_id == DD_INVALID_ID) { @@ -799,7 +801,7 @@ * deadlocks that we already have. */ if (__os_realloc(dbenv, - ndeadalloc * sizeof(u_int32_t), + ndeadalloc * sizeof(u_int32_t *), &retp) != 0) { retp[ndead] = NULL; *deadp = retp; --- db-4.6.21/lock/lock_id.c 2007-05-18 03:18:00.000000000 +1000 +++ db-4.6.21-safe-si2/lock/lock_id.c 2007-12-14 23:00:24.250407534 +1100 @@ -11,6 +11,7 @@ #include "db_int.h" #include "dbinc/lock.h" #include "dbinc/log.h" +#include "dbinc/txn.h" /* * __lock_id_pp -- @@ -188,6 +189,14 @@ region = lt->reginfo.primary; ret = 0; + if (sh_locker->nlocks != 0 && !F_ISSET(sh_locker, DB_LOCKER_FREED)) { + /* safe SI: mark freed. */ + ret = __txn_add_buffer(dbenv, LOCKER_TD(dbenv, sh_locker)); + DB_ASSERT(dbenv, ret == 0); + F_SET(sh_locker, DB_LOCKER_FREED); + return (0); + } + if (sh_locker->nlocks != 0) { __db_errx(dbenv, "Locker still has locks"); ret = EINVAL; @@ -447,7 +456,17 @@ DB_LOCKER *sh_locker; { + DB_ENV *dbenv; u_int32_t indx; + + dbenv = lt->dbenv; + + if (F_ISSET(sh_locker, DB_LOCKER_FREED)) { + /* The transaction no longer owns this locker, decrement it's reference count. */ + int ret = __txn_remove_buffer(dbenv, LOCKER_TD(dbenv, sh_locker), MUTEX_INVALID); + DB_ASSERT(dbenv, ret == 0); + } + LOCKER_HASH(lt, region, sh_locker->id, indx); SH_TAILQ_REMOVE(<->locker_tab[indx], sh_locker, links, __db_locker); SH_TAILQ_INSERT_HEAD( --- db-4.6.21/lock/lock_region.c 2007-05-18 01:15:43.000000000 +1000 +++ db-4.6.21-safe-si2/lock/lock_region.c 2007-12-14 23:00:24.250407534 +1100 @@ -19,18 +19,19 @@ * The conflict arrays are set up such that the row is the lock you are * holding and the column is the lock that is desired. */ -#define DB_LOCK_RIW_N 9 +#define DB_LOCK_RIW_N 10 static const u_int8_t db_riw_conflicts[] = { -/* N R W WT IW IR RIW DR WW */ -/* N */ 0, 0, 0, 0, 0, 0, 0, 0, 0, -/* R */ 0, 0, 1, 0, 1, 0, 1, 0, 1, -/* W */ 0, 1, 1, 1, 1, 1, 1, 1, 1, -/* WT */ 0, 0, 0, 0, 0, 0, 0, 0, 0, -/* IW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, -/* IR */ 0, 0, 1, 0, 0, 0, 0, 0, 1, -/* RIW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, -/* DR */ 0, 0, 1, 0, 1, 0, 1, 0, 0, -/* WW */ 0, 1, 1, 0, 1, 1, 1, 0, 1 +/* N R W WT IW IR RIW DR WW SI */ +/* N */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* R */ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, +/* W */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, +/* WT */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/* IW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, +/* IR */ 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, +/* RIW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, +/* DR */ 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, +/* WW */ 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, +/* SI */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; /* --- db-4.6.21/lock/lock_stat.c 2007-06-23 03:38:24.000000000 +1000 +++ db-4.6.21-safe-si2/lock/lock_stat.c 2007-12-14 23:00:24.250407534 +1100 @@ -493,6 +493,8 @@ SH_TAILQ_FOREACH(lp, &op->holders, links, __db_lock) __lock_printlock(lt, mbp, lp, 1); + SH_TAILQ_FOREACH(lp, &op->sireaders, links, __db_lock) + __lock_printlock(lt, mbp, lp, 1); SH_TAILQ_FOREACH(lp, &op->waiters, links, __db_lock) __lock_printlock(lt, mbp, lp, 1); } @@ -566,6 +568,9 @@ case DB_LOCK_WAIT: mode = "WAIT"; break; + case DB_LOCK_SIREAD: + mode = "SI_READ"; + break; default: mode = "UNKNOWN"; break; --- db-4.6.21/mp/mp_alloc.c 2007-09-28 01:28:25.000000000 +1000 +++ db-4.6.21-safe-si2/mp/mp_alloc.c 2007-12-14 23:00:24.254407446 +1100 @@ -40,7 +40,7 @@ size_t freed_space; u_int32_t buckets, buffers, high_priority, priority; u_int32_t put_counter, total_buckets; - int aggressive, alloc_freeze, giveup, got_oldest, ret; + int aggressive, alloc_freeze, giveup, ret; u_int8_t *endp; void *p; @@ -50,7 +50,7 @@ hp_end = &dbht[c_mp->htab_buckets]; buckets = buffers = put_counter = total_buckets = 0; - aggressive = alloc_freeze = giveup = got_oldest = 0; + aggressive = alloc_freeze = giveup = 0; hp_tmp = NULL; STAT(c_mp->stat.st_alloc++); @@ -316,13 +316,9 @@ bhp = oldest_bhp; else if (BH_OBSOLETE(oldest_bhp, hp->old_reader, vlsn)) bhp = oldest_bhp; - else if (!got_oldest && - __txn_oldest_reader(dbenv, &hp->old_reader) == 0) { - got_oldest = 1; - if (BH_OBSOLETE( - oldest_bhp, hp->old_reader, vlsn)) - bhp = oldest_bhp; - } + else if (__txn_oldest_reader(dbenv, &hp->old_reader) == 0 && + BH_OBSOLETE(oldest_bhp, hp->old_reader, vlsn)) + bhp = oldest_bhp; } if (bhp->ref != 0 || (bhp != oldest_bhp && @@ -351,12 +347,15 @@ * oldest reader in the system. */ if (ret == 0 && bh_mfp->multiversion) { - if (!got_oldest && !SH_CHAIN_HASPREV(bhp, vc) && - !BH_OBSOLETE(bhp, hp->old_reader, vlsn)) { + if (!SH_CHAIN_HASPREV(bhp, vc) && + !BH_OBSOLETE(bhp, hp->old_reader, vlsn)) (void)__txn_oldest_reader(dbenv, &hp->old_reader); - got_oldest = 1; - } + /* + * If a buffer is in the middle of a multiversion chain + * or it could be requested by an active transaction, + * we may need to freeze it. + */ if (SH_CHAIN_HASPREV(bhp, vc) || !BH_OBSOLETE(bhp, hp->old_reader, vlsn)) { /* @@ -366,6 +365,7 @@ if (!aggressive || F_ISSET(bhp, BH_FROZEN) || bhp->ref != 0) goto next_hb; + fprintf(stderr, "Freezing, aggressive = %d, old_reader = {%u/%u}\n", aggressive, hp->old_reader.file, hp->old_reader.offset); ret = __memp_bh_freeze(dbmp, infop, hp, bhp, &alloc_freeze); } --- db-4.6.21/mp/mp_fget.c 2007-09-28 01:28:25.000000000 +1000 +++ db-4.6.21-safe-si2/mp/mp_fget.c 2007-12-14 23:00:24.254407446 +1100 @@ -105,7 +105,7 @@ void *addrp; { enum { FIRST_FOUND, FIRST_MISS, SECOND_FOUND, SECOND_MISS } state; - BH *alloc_bhp, *bhp, *frozen_bhp, *oldest_bhp; + BH *alloc_bhp, *bhp, *frozen_bhp, *next_bhp, *oldest_bhp; DB_ENV *dbenv; DB_LSN *read_lsnp, vlsn; DB_MPOOL *dbmp; @@ -113,7 +113,7 @@ MPOOL *c_mp; MPOOLFILE *mfp; REGINFO *infop, *t_infop; - TXN_DETAIL *td; + TXN_DETAIL *newer_td, *td; roff_t mf_offset; u_int32_t st_hsearch; int b_incr, b_locked, dirty, edit, extending, first; @@ -165,10 +165,12 @@ td = (TXN_DETAIL *)txn->td; if (F_ISSET(txn, TXN_SNAPSHOT)) { read_lsnp = &td->read_lsn; - if (IS_MAX_LSN(*read_lsnp) && - (ret = __log_current_lsn(dbenv, read_lsnp, - NULL, NULL)) != 0) - return (ret); + if (IS_MAX_LSN(*read_lsnp)) { + if ((ret = __log_current_lsn(dbenv, read_lsnp, + NULL, NULL)) != 0) + return (ret); + (void)__txn_update_oldlsn(dbenv, read_lsnp); + } } if ((dirty || LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW)) && td->mvcc_mtx == MUTEX_INVALID && (ret = @@ -247,8 +249,16 @@ if (mvcc && !edit && read_lsnp != NULL) { while (bhp != NULL && !BH_OWNED_BY(dbenv, bhp, txn) && - !BH_VISIBLE(dbenv, bhp, read_lsnp, vlsn)) + !BH_VISIBLE(dbenv, bhp, read_lsnp, vlsn)) { bhp = SH_CHAIN_PREV(bhp, vc, __bh); + } + + DB_ASSERT(dbenv, bhp != NULL); + + /* Use more recent, aborted changes, if any. */ + while (SH_CHAIN_HASNEXT(bhp, vc) && + BH_OWNER(dbenv, SH_CHAIN_NEXTP(bhp, vc, __bh))->status == TXN_ABORTED) + bhp = SH_CHAIN_NEXTP(bhp, vc, __bh); DB_ASSERT(dbenv, bhp != NULL); } @@ -275,6 +285,9 @@ } ++bhp->ref; b_incr = 1; +#ifdef HAVE_STATISTICS + ++mfp->stat.st_cache_hit; +#endif /* * BH_LOCKED -- @@ -344,25 +357,50 @@ goto retry; } - /* - * If the buffer we wanted was frozen or thawed while we - * waited, we need to start again. - */ - if (SH_CHAIN_HASNEXT(bhp, vc) && - SH_CHAIN_NEXTP(bhp, vc, __bh)->td_off == bhp->td_off) { - --bhp->ref; - b_incr = 0; - MUTEX_UNLOCK(dbenv, hp->mtx_hash); - bhp = frozen_bhp = NULL; - goto retry; - } else if (dirty && SH_CHAIN_HASNEXT(bhp, vc)) { - ret = DB_LOCK_DEADLOCK; - goto err; +recheck: if (SH_CHAIN_HASNEXT(bhp, vc)) { + next_bhp = SH_CHAIN_NEXTP(bhp, vc, __bh); + /* + * If the buffer we wanted was frozen or thawed while + * we waited, we need to start again. + */ + if (next_bhp->td_off == bhp->td_off || + BH_OWNER(dbenv, next_bhp)->status == TXN_ABORTED) { + --bhp->ref; + b_incr = 0; + MUTEX_UNLOCK(dbenv, hp->mtx_hash); + bhp = frozen_bhp = NULL; + goto retry; + } else if (dirty) { + ret = DB_SNAPSHOT_CONFLICT; + goto err; + } else if (F_ISSET(txn, TXN_SNAPSHOT_SAFE)) { + /* + * There is a newer version, so we have a + * read-write conflict. + */ + for (; next_bhp != NULL; + next_bhp = SH_CHAIN_NEXT(next_bhp, vc, __bh)) { + newer_td = BH_OWNER(dbenv, next_bhp); + if (newer_td->status == TXN_ABORTED) + continue; + + if (newer_td->status == TXN_COMMITTED && + F_ISSET(newer_td, TXN_DTL_RCONF)) { + ret = DB_SNAPSHOT_UNSAFE; + goto err; + } +#if 1 /* optimization */ + if (F_ISSET(td, TXN_DTL_WCONF)) { + ret = DB_SNAPSHOT_UNSAFE; + goto err; + } +#endif + F_SET(newer_td, TXN_DTL_WCONF); + F_SET(td, TXN_DTL_RCONF); + } + } } -#ifdef HAVE_STATISTICS - ++mfp->stat.st_cache_hit; -#endif break; } @@ -653,7 +691,7 @@ if (bhp != NULL) { MUTEX_LOCK(dbenv, hp->mtx_hash); b_locked = 1; - break; + goto recheck; } DB_ASSERT(dbenv, frozen_bhp == NULL); goto retry; @@ -858,6 +896,10 @@ /* Copy-on-write. */ if (makecopy && state != SECOND_MISS) { + if (SH_CHAIN_HASNEXT(bhp, vc)) { + ret = DB_SNAPSHOT_CONFLICT; + goto err; + } DB_ASSERT(dbenv, !SH_CHAIN_HASNEXT(bhp, vc)); DB_ASSERT(dbenv, bhp != NULL); DB_ASSERT(dbenv, alloc_bhp != NULL); @@ -943,20 +985,19 @@ #ifdef DIAGNOSTIC __memp_check_order(dbenv, hp); - { - BH *next_bhp = SH_CHAIN_NEXT(bhp, vc, __bh); + next_bhp = SH_CHAIN_NEXT(bhp, vc, __bh); DB_ASSERT(dbenv, !mfp->multiversion || !F_ISSET(bhp, BH_DIRTY) || next_bhp == NULL); DB_ASSERT(dbenv, !mvcc || edit || read_lsnp == NULL || bhp->td_off == INVALID_ROFF || BH_OWNED_BY(dbenv, bhp, txn) || - (BH_VISIBLE(dbenv, bhp, read_lsnp, vlsn) && + ((BH_VISIBLE(dbenv, bhp, read_lsnp, vlsn) || + BH_OWNER(dbenv, bhp)->status == TXN_ABORTED) && (next_bhp == NULL || F_ISSET(next_bhp, BH_FROZEN) || (next_bhp->td_off != INVALID_ROFF && (BH_OWNER(dbenv, next_bhp)->status != TXN_COMMITTED || !BH_VISIBLE(dbenv, next_bhp, read_lsnp, vlsn)))))); - } #endif MUTEX_UNLOCK(dbenv, hp->mtx_hash); --- db-4.6.21/mp/mp_fset.c 2007-09-28 01:28:25.000000000 +1000 +++ db-4.6.21-safe-si2/mp/mp_fset.c 2007-12-14 23:00:24.254407446 +1100 @@ -67,7 +67,7 @@ (!BH_OWNED_BY(dbenv, bhp, ancestor) || SH_CHAIN_HASNEXT(bhp, vc))) { slow: if ((ret = __memp_fget(dbmfp, &pgno, txn, flags, addrp)) != 0) { - if (ret != DB_LOCK_DEADLOCK) + if (ret != DB_SNAPSHOT_CONFLICT && ret != DB_SNAPSHOT_UNSAFE) __db_errx(dbenv, "%s: error getting a page for writing", __memp_fn(dbmfp)); --- db-4.6.21/process-results.sh 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/process-results.sh 2007-12-14 23:01:58.488350332 +1100 @@ -0,0 +1,9 @@ +#!/bin/sh + +for f in run*.log ; do + echo -n "$f" | sed 's/^run-//;s/\.log//;s/txns\/s//;s/-/,/g;s/ /,/' + echo -n ,`tail -1 $f | cut -d' ' -f1 | sed 's/^ //'` + echo -n ,`grep -c DEADLOCK $f` + echo -n ,`grep -c CONFLICT $f` + echo ,`grep -c UNSAFE $f` +done --- db-4.6.21/run_bench 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/run_bench 2007-12-14 23:23:22.620544554 +1100 @@ -0,0 +1,20 @@ +#!/bin/bash + +# case params in +# std) args="" ;; +# nosync) args="-Dlog_flush=nosync" ;; +# lowcontention) args="-Dpagesize=512" ;; +# esac + +# Should be run from the build directory, assumes that ./db_perf exists. + +for run in 1 2 3 4 5 ; do + for isolation in SSI serializable SI ; do + for mpl in 1 2 3 5 10 20 50 ; do + LOG_SUFFIX=$isolation-$mpl-$run.log + ./db_perf -D mpl=$mpl -D isolation=$isolation $@ -I -c ../smallbank.conf + ./db_perf -D mpl=$mpl -D isolation=$isolation $@ > run-$LOG_SUFFIX + ./db_stat -h TESTDIR -e > stat-$LOG_SUFFIX + done + done +done --- db-4.6.21/smallbank.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/smallbank.conf 2007-12-14 23:01:58.504349983 +1100 @@ -0,0 +1,218 @@ +## More or less equivalent to running: dbs -B + +########################################### +## Environment and miscellaneous +########################################### +## Run in TDS mode. +appl_type TDS + +## Added to DB_CONFIG file +set_cachesize 0 100000000 0 +set_tx_max 50000 +set_lk_max_locks 300000 +set log_flush fsync +SWITCH $log_flush { + CASE {write_nosync} { + set_flags DB_TXN_WRITE_NOSYNC + } + CASE {nosync} { + set_flags DB_TXN_NOSYNC + } +} +#set_flags DB_LOG_AUTOREMOVE +#set_lg_bsize 4194304 + +set encrypt 0 +set cksum 0 + +## Deadlock thread strategy. +deadlock DB_LOCK_DEFAULT + +## Set to do dirty reads or not. +#SET dirty SELECT {0 0 0 1 1} +SET dirty 0 + +## Prepare transactions or not. If not zero, number of seconds to +## wait between prepare and commit. +SET prepare 0 + +## Number of iterations +set iter 2000 +total_iter $iter +#steady_state_time 60 +#steady_state_max_time 120 + +## Other variables used below. +SET method btree + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG +SET recnum 0 + +## Default page size of 8KB +SET pagesize 8192 + +## Data scaling parameter +set data_scale 1 + +## Default to one operation per transaction (scales the transaction complexity) +set numops 1 + +## Default to serializable isolation +SET isolation serializable + +########################################### +## Database files and access methods +########################################### +file 0 { +# saving table for SmallBank + file_pagesize $pagesize + file_name perfdb0 + file_method $method + + SWITCH $isolation { + CASE {SI SSI} { + file_special multiversion + } + } + + ## Specify the keys, based on the access method. + ## Because we specified DBS mode above, we will automatically + ## get nthreads duplicates per key--or, in the case of record- + ## number-based databases, (nthreads * key_count) records. + ## The one exception is if we're testing secondary indices, + ## in which case this file is a primary and can't have duplicates. + SWITCH $method { + CASE {hash} { + key_group 0 { + key_size 4 + key_chars_per_slot $data_scale 20 20 12 + } + } + CASE {btree} { + SET sortdups 0 + key_group 0 { + key_size 4 + key_chars_per_slot $data_scale 20 20 12 + } + } + CASE {queue recno} { + SET sortdups 0 + key_group 0 { + key_count $num_rec + } + } + } + + data_length_dists { + ## % type param1 param2 + dist 100 U 100 100 + } +} + +## First secondary, which has no duplicates. +file 1 { +# cheque table for SmallBank + file_pagesize $pagesize + file_name perfdb1 + SET method2 btree + file_method $method2 + + SWITCH $isolation { + CASE {SI SSI} { + file_special multiversion + } + } + + ## Specify the keys, based on the access method. + SWITCH $method2 { + CASE {hash} { + key_group 0 { + key_size 4 + key_chars_per_slot $data_scale 20 20 12 + } + } + CASE {btree} { + key_group 0 { + key_size 4 + key_chars_per_slot $data_scale 20 20 12 + } + } + CASE {queue recno} { + key_group 0 { + key_count $num_rec + } + } + } + + data_length_dists { + ## % type param1 param2 + dist 100 U 100 100 + } +} + +############################################## +## Transaction types. +############################################## +txn_type 0 { +# balance +txn_type_special same_key + read 0 0 0 $numops $numops + read 1 0 0 $numops $numops +} + +txn_type 1 { +# deposit checking + update 1 0 0 $numops $numops +} + +txn_type 2 { +# transact savings +txn_type_special same_key + read 0 0 0 $numops $numops + read 1 0 0 $numops $numops + update 0 0 0 $numops $numops +} + +txn_type 3 { +# amalgamate + update 0 0 0 $numops $numops + update 1 0 0 $numops $numops + update 1 0 0 $numops $numops +} + +txn_type 4 { +# write cheque +txn_type_special same_key + read 0 0 0 $numops $numops + read 1 0 0 $numops $numops + update 1 0 0 $numops $numops +} + +########################################### +## Threads +########################################### +thread_type 0 { + ## Bias towards read-only txns + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + + SWITCH $isolation { + CASE {SI} { + thread_type_special snapshot + } + CASE {SSI} { + thread_type_special snapshot-safe + } + } +} + +SET mpl 1 +thread_counts { + thread_type 0 $mpl +} --- db-4.6.21/test_perf/benchmark.pl 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/benchmark.pl 2007-11-05 15:51:13.655919000 +1100 @@ -0,0 +1,299 @@ +#!/usr/local/bin/perl + +$bigfile = "/export/home/ubell/BIG"; +$db_dir = "../build_unix"; + +$home_dir = "TESTDIR"; +$saved_dir = "SAVED"; +$num_runs = 5; +$main_part = ""; + +## Process the command-line arguments. +use Getopt::Long; +&GetOptions('c|config=s' => \$conf_file, + 'h|home=s' => \$home_dir, + 'n|numruns=i' => \$num_runs, + 'r|results=s' => \$main_part, + 's|saved=s' => \$saved_dir); + +if (! $conf_file) { + print STDERR "usage: ./benchmark -c [-h ] [-s ] [-n ]\n"; + exit(0); +} + + +## Determine the names of the database files. +open(CONFIG, "<$conf_file") || die "Can't open $conf_file\n"; + +$num_db = 0; +while () { + if (/file\s+[0-9]+\s*\{/) { + ## default name + $dbname[$num_db] = "perfdb" . $num_db; + $num_db++; + } + + if (/file_name/) { + ($dbname[$num_db - 1]) = /file_name\s+(\S+)/; + } +} + +close(CONFIG); + + +## Make sure that temp files from aborted benchmark runs are removed. +`rm -f bench.stat`; +`rm -f bench.out`; + + +## Initialize the database(s) if there aren't any previously created copies. +`test ! -d $home_dir && mkdir $home_dir`; +`rm -f $home_dir/*`; + +`test ! -d $saved_dir && mkdir $saved_dir`; + +for ($i = 0; $i < $num_db; $i++) { + if (! -f "$saved_dir/$dbname[$i]") { + print "Initializing databases...\n"; + eval { `db_perf -h $home_dir -c $conf_file -I`; }; + if ($?) { + warn "Error: $?"; + exit(1); + } + `db_checkpoint -1 -h $home_dir`; + `rm -f \`db_archive -a -h $home_dir \``; + `rm -f $home_dir/OUTPUT $home_dir/__db.* $home_dir/DB_CONFIG`; + `mv $home_dir/* $saved_dir`; + last; + } +} + + +## Perform the benchmark runs. +for ($i = 0; $i < $num_runs; $i++) { + printf("run %2d of %2d\n", $i+1, $num_runs); + + ## remove everything -- including the database(s) + `rm -f $home_dir/*`; + + ## bring in copies of the database(s) and the logs created + ## during initialization + `cp $saved_dir/* $home_dir`; + + ## clear out the OS buffer cache by reading a large file + `cp $bigfile $bigfile.bak`; + `rm $bigfile.bak`; + + ## run the benchmark + eval { `db_perf -h $home_dir -r > $home_dir/OUTPUT`; }; + if ($?) { + warn "Error: $?"; + exit(1); + } + + ## save database stats + `echo "" >> bench.stat`; + `echo "---------------------------------------------------" >> bench.stat`; + $k = $i + 1; + `echo "run $k of $num_runs" >> bench.stat`; + `echo "" >> bench.stat`; + for ($j = 0; $j < $num_db; $j++) { + `echo "database stats for $dbname[$j]:" >> bench.stat`; + eval { `$db_dir/db_stat -h$home_dir -d $dbname[$j] >> bench.stat`; }; + if ($?) { + warn "Error: $?"; + exit(1); + } + `echo "" >> bench.stat`; + } + + ## the remaining stats are at the end of the OUTPUT file + $_ = `grep -n "total transactions" $home_dir/OUTPUT`; + ($start_line) = /^([0-9]+)\:/; + $end_line = `wc -l $home_dir/OUTPUT`; + $num_lines = $end_line - $start_line + 1; + `tail -$num_lines $home_dir/OUTPUT >> bench.stat`; + + ## save the contents of the OUTPUT file + `cat $home_dir/OUTPUT >> bench.out`; +} + + +## Process the results: + +## (1) find and total the relevant values +open(OUTPUT, ") { + if (/^total transactions/) { + &record_value("total txns"); + $_ = ; + $_ = ; + while (/[a-z_]+:.*[0-9]+/) { + ($name) = /([a-z_]+)/; + &record_value($name); + $_ = ; + $_ = ; + } + + $_ = ; + $_ = ; + &record_value("total time"); + $_ = ; + &record_value("total tput"); + + $_ = ; + if (/^before measurement/) { + $_ = ; + &record_value("pre-window time"); + $_ = ; + &record_value("pre-window tput"); + ; + $_ = ; + &record_value("window time"); + $_ = ; + &record_value("window tput"); + } + + $runnum++; + } +} +close(OUTPUT); + + +## (2) compute the means and standard deviations +foreach $type (sort keys %stat_total) { + $mean{$type} = $stat_total{$type} / $num_vals{$type}; + + for ($i = 0; $i < $num_vals{$type}; $i++) { + $sum_sq_diff{$type} += ($stat{$type}[$i] - $mean{$type}) * + ($stat{$type}[$i] - $mean{$type}); + } + + $std_dev{$type} = sqrt($sum_sq_diff{$type} / $num_vals{$type}); +} + + +## (3) output the stats +$_ = ">" . $main_part . "<"; +if (/>$results_file") || die "Can't open $results_file\n"; + +print RESULTS "configuration file: $conf_file\n"; +print RESULTS "number of runs: $num_runs\n\n"; + +## mean values for time and throughput +print RESULTS "mean values (std dev as percent of the mean):\n"; +print RESULTS "---------------------------------------------\n"; +foreach $type (sort keys %stat_total) { + $_ = $type; + if (/time/ || /tput/) { + printf(RESULTS "\t$type:\t"); + if (length($type) < 12) { + printf(RESULTS "\t"); + } + printf(RESULTS "%f (%.2f \%)\n", $mean{$type}, + ($std_dev{$type} / $mean{$type} * 100)); + } +} + +## mean values for transaction and operation counts +printf(RESULTS "\nnum transactions:\t%f (%.2f \%)\n", $mean{"total txns"}, + ($std_dev{"total txns"} / $mean{"total txns"} * 100)); +print RESULTS "ops by type:\n"; +foreach $type (sort keys %stat_total) { + $_ = $type; + if (! (/time/ || /tput/ || /txns/)) { + printf(RESULTS "\t$type:\t"); + if (length($type) < 8) { + printf(RESULTS "\t"); + } + printf(RESULTS "%f (%.2f \%) (%.2f x)\n", $mean{$type}, + ($std_dev{$type} / $mean{$type} * 100), + ($mean{$type} / $mean{"total txns"})); + } +} + +## actual sets of values for time and throughput +print RESULTS "\ntime and throughput values:"; +print RESULTS "\n---------------------------\n"; +print RESULTS "run\tpre-window\t\twindow\t\ttotal\n"; +for ($i = 0; $i < $num_vals{"total time"}; $i++) { + printf(RESULTS "%d\t", $i+1); + + if ($stat{"pre-window time"}[$i]) { + printf(RESULTS "%.2f\t%.2f\t\t", $stat{"pre-window time"}[$i], + $stat{"pre-window tput"}[$i]); + printf(RESULTS "%.2f\t%.2f\t\t", $stat{"window time"}[$i], + $stat{"window tput"}[$i]); + } + else { + print RESULTS "\t\t\t\t\t\t"; + } + + printf(RESULTS "%.2f\t%.2f\n", $stat{"total time"}[$i], + $stat{"total tput"}[$i]); +} + +## actual sets of values for transaction and operation counts +print RESULTS "\ntxn and op values:"; +print RESULTS "\n------------------\n"; +print RESULTS "run\ttxns"; +foreach $type (sort keys %stat_total) { + $_ = $type; + if (! (/time/ || /tput/ || /txns/)) { + printf(RESULTS "\t%s", substr($type, 0, 7)); + } +} +print RESULTS "\n"; +for ($i = 0; $i < $num_vals{"total txns"}; $i++) { + printf(RESULTS "%d\t%d", $i+1, $stat{"total txns"}[$i]); + foreach $type (sort keys %stat_total) { + $_ = $type; + if (! (/time/ || /tput/ || /txns/)) { + printf(RESULTS "\t%d", $stat{$type}[$i]); + } + } + print RESULTS "\n"; +} + +close(RESULTS); + + +## Add on the results from db_stat for each run. +`cat bench.stat >> $results_file`; +`rm bench.stat`; +`rm bench.out`; +exit(0); + + + +## Store a stat value and increment the total and value count of that stat. +sub record_value { + local ($name) = @_; + + ($stat{$name}[$runnum]) = /([0-9.]+)/; + $stat_total{$name} += $stat{$name}[$runnum]; + $num_vals{$name}++; +} --- db-4.6.21/test_perf/configs/archive_btree.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/archive_btree.conf 2007-11-05 15:51:12.227956000 +1100 @@ -0,0 +1,142 @@ +## More or less equivalent to running: dbs -a TESTDIR.A -t btree + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Archive the database and perform recovery and verification +## on the archived DB. +archive TESTDIR.A + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name perfdb0 + file_method btree + + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + ## Because we specified DBS mode above, an ID and checksum + ## will be included, following the format of the DBS + ## data struct. + data_length_dists { + ## Uniform distribution from 9 to 511 + ## % type param1 param2 + dist 100 U 9 511 + } +} + +file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + + ## This database starts out empty. + file_special no_init + + file_name perfdb1 +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 1 +} + + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 5 + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## This type of thread adds new dupsets to the + ## second database instead of the first. + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 6 + + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_counts { + ## type num_threads + thread_type 0 3 + thread_type 1 2 +} --- db-4.6.21/test_perf/configs/archive_fatal_btree.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/archive_fatal_btree.conf 2007-11-05 15:51:12.255955000 +1100 @@ -0,0 +1,142 @@ +## More or less equivalent to running: dbs -A TESTDIR.A -t btree + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Archive the database and perform fatal recovery and verification +## on the archived DB. +archive_fatal TESTDIR.A + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name perfdb0 + file_method btree + + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + ## Because we specified DBS mode above, an ID and checksum + ## will be included, following the format of the DBS + ## data struct. + data_length_dists { + ## Uniform distribution from 9 to 511 + ## % type param1 param2 + dist 100 U 9 511 + } +} + +file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + + ## This database starts out empty. + file_special no_init + + file_name perfdb1 +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 1 +} + + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 5 + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## This group of threads adds new dupsets to the + ## second database instead of the first. + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 6 + + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_counts { + ## type num_threads + thread_type 0 3 + thread_type 1 2 +} --- db-4.6.21/test_perf/configs/archive_fatal_hash.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/archive_fatal_hash.conf 2007-11-05 15:51:12.219956000 +1100 @@ -0,0 +1,142 @@ +## More or less equivalent to running: dbs -A TESTDIR.A + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Archive the database and perform fatal recovery and verification +## on the archived DB. +archive_fatal TESTDIR.A + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name perfdb0 + file_method hash + + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + ## Because we specified DBS mode above, an ID and checksum + ## will be included, following the format of the DBS + ## data struct. + data_length_dists { + ## Uniform distribution from 9 to 511 + ## % type param1 param2 + dist 100 U 9 511 + } +} + +file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + + ## This database starts out empty. + file_special no_init + + file_name perfdb1 +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 1 +} + + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 5 + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## This group of threads adds new dupsets to the + ## second database instead of the first. + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 6 + + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_counts { + ## type num_threads + thread_type 0 3 + thread_type 1 2 +} --- db-4.6.21/test_perf/configs/cdb_q.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/cdb_q.conf 2007-11-05 15:51:12.231956000 +1100 @@ -0,0 +1,76 @@ +## More or less equivalent to running: qtest + +########################################### +## Environment and miscellaneous +########################################### +appl_type CDS + +## Number of iterations +total_iter 1000000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name access.db + file_method queue + q_extentsize 256 + + key_group 0 { + key_count 0 + key_start_recno 1 + } + + data_length_dists { + ## % type param1 param2 + dist 100 U 100 100 + } +} + + +############################################## +## Transaction types. +############################################## +## Reader +txn_type 0 { + read 0 { + flag DB_CONSUME_WAIT + } +} + +## Writer +txn_type 1 { + add 0 +} + +########################################### +## Threads +########################################### +## Reader +thread_type 0 { + ## weight txn_type + txn_type 100 0 +} + +## Writer +thread_type 1 { + ## weight txn_type + txn_type 100 1 +} + +thread_counts { + ## type num_threads + thread_type 0 1 + thread_type 1 1 +} --- db-4.6.21/test_perf/configs/crash_test_1.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/crash_test_1.conf 2007-11-05 15:51:12.275955000 +1100 @@ -0,0 +1,146 @@ +## More or less equivalent to running: dbs -K30 -a TESTDIR.A -tbtree -c 10 -k 4 + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Archive the database and perform recovery and verification +## on the archived DB. +archive TESTDIR.A + +## Run a killtest 4 times, waiting 30 seconds before killing the child. +killtest_iter 4 +killtest_interval 30 + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name perfdb0 + file_method btree + + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + ## Because we specified DBS mode above, an ID and checksum + ## will be included, following the format of the DBS + ## data struct. + data_length_dists { + ## Uniform distribution from 9 to 511 + ## % type param1 param2 + dist 100 U 9 511 + } +} + +file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + + ## This database starts out empty. + file_special no_init + + file_name perfdb1 +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 1 +} + + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 5 + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## This group of threads adds new dupsets to the + ## second database instead of the first. + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 6 + + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_counts { + ## type num_threads + thread_type 0 5 + thread_type 1 5 +} --- db-4.6.21/test_perf/configs/crash_test_2.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/crash_test_2.conf 2007-11-05 15:51:12.299954000 +1100 @@ -0,0 +1,146 @@ +## More or less equivalent to running: dbs -K60 -A TESTDIR.A -tbtree -c 10 -k 4 + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Archive the database and perform fatal recovery and verification +## on the archived DB. +archive_fatal TESTDIR.A + +## Run a killtest 4 times, waiting 60 seconds before killing the child. +killtest_iter 4 +killtest_interval 60 + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name perfdb0 + file_method btree + + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + ## Because we specified DBS mode above, an ID and checksum + ## will be included, following the format of the DBS + ## data struct. + data_length_dists { + ## Uniform distribution from 9 to 511 + ## % type param1 param2 + dist 100 U 9 511 + } +} + +file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + + ## This database starts out empty. + file_special no_init + + file_name perfdb1 +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 1 +} + + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 5 + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## This group of threads adds new dupsets to the + ## second database instead of the first. + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 6 + + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_counts { + ## type num_threads + thread_type 0 5 + thread_type 1 5 +} --- db-4.6.21/test_perf/configs/crash_test_3.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/crash_test_3.conf 2007-11-05 15:51:12.323954000 +1100 @@ -0,0 +1,146 @@ +## More or less equivalent to running: dbs -K180 -aTESTDIR.A -tbtree -c10 -k4 + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Archive the database and perform recovery and verification +## on the archived DB. +archive TESTDIR.A + +## Run a killtest 4 times, waiting 3 minutes before killing the child. +killtest_iter 4 +killtest_interval 180 + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name perfdb0 + file_method btree + + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + ## Because we specified DBS mode above, an ID and checksum + ## will be included, following the format of the DBS + ## data struct. + data_length_dists { + ## Uniform distribution from 9 to 511 + ## % type param1 param2 + dist 100 U 9 511 + } +} + +file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + + ## This database starts out empty. + file_special no_init + + file_name perfdb1 +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 1 +} + + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 5 + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## This group of threads adds new dupsets to the + ## second database instead of the first. + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 6 + + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_counts { + ## type num_threads + thread_type 0 5 + thread_type 1 5 +} --- db-4.6.21/test_perf/configs/crash_test_4.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/crash_test_4.conf 2007-11-05 15:51:12.335953000 +1100 @@ -0,0 +1,147 @@ +## More or less equivalent to running: +## dbs -K60 -aTESTDIR.A -tbtree -c10 -k4 + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Archive the database and perform recovery and verification +## on the archived DB. +archive TESTDIR.A + +## Run a killtest 4 times, waiting 60 seconds before killing the child. +killtest_iter 4 +killtest_interval 60 + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name perfdb0 + file_method btree + + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + ## Because we specified DBS mode above, an ID and checksum + ## will be included, following the format of the DBS + ## data struct. + data_length_dists { + ## Uniform distribution from 9 to 511 + ## % type param1 param2 + dist 100 U 9 511 + } +} + +file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + + ## This database starts out empty. + file_special no_init + + file_name perfdb1 +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 1 +} + + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 5 + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## This group of threads adds new dupsets to the + ## second database instead of the first. + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 6 + + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_counts { + ## type num_threads + thread_type 0 5 + thread_type 1 5 +} --- db-4.6.21/test_perf/configs/inmemory.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/inmemory.conf 2007-11-05 15:51:12.227956000 +1100 @@ -0,0 +1,651 @@ +## More or less equivalent to running: dbs -B + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_flags DB_LOG_INMEMORY +set_flags DB_LOG_AUTOREMOVE +set_lg_bsize 4194304 +set_lg_max 262144 + +## Do we want to do encryption or checksumming? +set encrypt 0 +set cksum 0 +SWITCH RANGE {1 20} { + CASE {1} { + set encrypt 1 + set_encrypt keith + } + CASE {2} { + set encrypt 2 + set_encrypt michael + } + CASE {3} { + set cksum 1 + } + CASE {4} { + set cksum 2 + } +} + +## Set lock timeouts or not. +set timeout 0 +switch RANGE {1 5} { + CASE {1} { + set timeout SELECT {400 500 800 1000} + set_txn_timeout $timeout + } + CASE {2} { + set timeout SELECT {10 50 80 100} + set_lock_timeout $timeout + } +} + +set expire DB_LOCK_EXPIRE +switch {$timeout} { + CASE {0} { + set expire DB_LOCK_NORUN + } +} + +## Deadlock thread strategy. NORUN actually means don't run +## a separate thread. +# deadlock SELECT { $expire $expire DB_LOCK_NORUN DB_LOCK_DEFAULT DB_LOCK_MAXLOCKS DB_LOCK_MAXWRITE DB_LOCK_MINLOCKS DB_LOCK_MINWRITE DB_LOCK_OLDEST DB_LOCK_RANDOM DB_LOCK_YOUNGEST } +deadlock DB_LOCK_NORUN + +## Set to do dirty reads or not. +#SET dirty SELECT {0 0 0 1 1} +SET dirty 0 + +## Prepare transactions or not. If not zero, number of seconds to +## wait between prepare and commit. +SET prepare SELECT {0 0 0 1 1 2} + +## Number of iterations +total_iter SELECT {1000 2000 3000 4000 5000 6000 7000 8000 9000 10000} + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## On one-fifth of the runs, we simulate running out of disk space by +## returning an error if we try to extend a file. See the function +## write_err() in perf_misc.c. +SWITCH RANGE {1 5} { + CASE {1} { + write_err_start RANGE {1 10000} + write_err_count RANGE {10 500} + write_rand RANGE {1 500} + } +} + +## On half of the runs, run with a private environment. +#SWITCH RANGE {1 2} { +# CASE {1} { +# env_flag DB_PRIVATE +# } +#} + +## On one-fifth of the runs, test secondary indices. +SET test_secon SELECT {0 0 0 0 1} +SWITCH $test_secon { + CASE {0} { + # Move data more often than not. + SET move_data SELECT {0 1 1 1 1 1 1} + SET sort_dup 1 + SET key_mult 1 + SET num_rec 676 + } + CASE {1} { + # Don't move data when testing secondaries. + SET move_data 0 + SET sort_dup 0 + SET key_mult RANGE {3 17} + SET num_rec (* $key_mult 676) + } +} + +## Other variables used below. +SET subdb_type SELECT {0 1 2} +SET method SELECT {hash btree btree queue recno hash btree btree} + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + +set recnum 0 + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize SELECT {512 1024 2048 4096 8192 16384 32768 65536} + file_name perfdb0 + file_method $method + + ## If we're not using a queue and $subdb_type != 0, + ## use a subdatabase for this database. + SWITCH $method { + CASE {hash btree recno} { + SWITCH $subdb_type { + CASE {1 2} { + subdb_name sub1 + } + } + } + } + + SWITCH $dirty { + CASE {1} { + file_special dirty_read + } + } + + ## Specify the keys, based on the access method. + ## Because we specified DBS mode above, we will automatically + ## get nthreads duplicates per key--or, in the case of record- + ## number-based databases, (nthreads * key_count) records. + ## The one exception is if we're testing secondary indices, + ## in which case this file is a primary and can't have duplicates. + SWITCH $method { + CASE {hash} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 $key_mult + } + # Sort duplicates one-half of the time. + SET sortdups SELECT {0 $sort_dup} + SWITCH $sortdups { + CASE {1} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + } + } + } + CASE {btree} { + ## In one-fifth of the btree cases, use + ## record numbers. In one-half of the + ## remaining cases, sort duplicates. + SWITCH RANGE {1 5} { + CASE {1} { + SET sortdups 0 + SET recnum 1 + set_flags DB_RECNUM + bt_compare dbs_int_compare + key_group 0 { + key_count $num_rec + } + } + CASE {2 3} { + SET sortdups $sort_dup + SWITCH $sortdups { + CASE {1} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + } + } + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 $key_mult + } + } + CASE {4 5} { + SET sortdups 0 + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 $key_mult + } + } + } + } + CASE {queue recno} { + SET sortdups 0 + key_group 0 { + key_count $num_rec + } + } + } + SWITCH {$encrypt} { + CASE {1 2} { + set_flags DB_ENCRYPT + } + } + SWITCH {$cksum} { + case {1 2} { + set_flags DB_CHKSUM + } + } + + data_length_dists { + SWITCH $test_secon { + CASE {0} { + ## Uniform distribution from 9 to 400 + ## % type param1 param2 + dist 100 U 9 400 + } + CASE {1} { + ## We need a larger minimum size so + ## that we can fit the secondary keys. + ## % type param1 param2 + dist 100 U 40 400 + } + } + } +} + +## If we're moving data between databases, we need a second database. +## Note that move_data is always false when we're testing secondaries. +SWITCH $move_data { + CASE {1} { + file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + SWITCH $recnum { + CASE {0} { + SWITCH $method { + case {hash btree} { + SET method2 SELECT {hash btree} + file_method $method2 + SWITCH $sortdups { + CASE {1} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + } + } + } + } + } + } + + ## This database starts out empty. + file_special no_init + SWITCH $dirty { + CASE {1} { + file_special dirty_read + } + } + + + ## Handle the use of subdatabases, depending + ## on the access method and the value of + ## $subdb_type. + set newfile 1 + SWITCH $method { + CASE {queue} { + file_name perfdb1 + } + + CASE {hash btree recno} { + SWITCH $subdb_type { + CASE {0} { + ## Don't use a subdb. + file_name perfdb1 + } + CASE {1} { + ## Use a subdb in the + ## same file used for + ## file 0. + file_name perfdb0 + subdb_name sub2 + set newfile 0 + } + CASE {2} { + ## Use a subdb in a + ## new file. + file_name perfdb1 + subdb_name sub2 + } + } + } + } + + # If we have two files select a page size + SWITCH $newfile { + CASE {1} { + file_pagesize SELECT {512 1024 2048 4096 8192 16384 32768 65536} + } + } + SWITCH {$encrypt} { + CASE {2} { + set_flags DB_ENCRYPT + } + } + SWITCH {$cksum} { + case {2} { + set_flags DB_CHKSUM + } + } + } + } +} + +## If we're testing secondary indices, we need two other databases. +SWITCH $test_secon { + CASE {1} { + ## First secondary, which has no duplicates. + file 1 { + file_pagesize SELECT {512 1024 2048 4096 8192 16384 32768 65536} + file_name secon1 + SET method3 SELECT {hash btree btree queue recno hash btree btree} + file_method $method3 + file_type secondary 0 + + ## Specify the keys, based on the access method. + SWITCH $method3 { + CASE {hash} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 $key_mult + } + } + CASE {btree} { + ## In one-fifth of the btree cases, use + ## record numbers. + SWITCH RANGE {1 5} { + CASE {1} { + SET recnum 1 + set_flags DB_RECNUM + bt_compare dbs_int_compare + key_group 0 { + key_count $num_rec + } + } + CASE {2 3 4 5} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 $key_mult + } + } + } + } + CASE {queue recno} { + key_group 0 { + key_count $num_rec + } + } + } + + SWITCH $dirty { + CASE {1} { + file_special dirty_read + } + } + SWITCH {$encrypt} { + CASE {2} { + set_flags DB_ENCRYPT + } + } + SWITCH {$cksum} { + case {2} { + set_flags DB_CHKSUM + } + } + } + + ## Second secondary, which has $key_mult duplicates -- + ## unless it is a record-based access method, in which case + ## it has no duplicates. + file 2 { + file_pagesize SELECT {512 1024 2048 4096 8192 16384 32768 65536} + file_name secon2 + SET method4 SELECT {hash btree btree queue recno hash btree btree} + file_method $method4 + file_type secondary 0 + + ## Specify the keys, based on the access method. + SWITCH $method4 { + CASE {hash} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + # Sort duplicates one-half of the time. + SET sortdups SELECT {0 1} + SWITCH $sortdups { + CASE {1} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + } + } + } + CASE {btree} { + ## In one-fifth of the btree cases, use + ## record numbers. In one-half of the + ## remaining cases, sort duplicates. + SWITCH RANGE {1 5} { + CASE {1} { + SET recnum 1 + set_flags DB_RECNUM + bt_compare dbs_int_compare + key_group 0 { + key_count $num_rec + } + } + CASE {2 3} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + } + CASE {4 5} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + } + } + } + CASE {queue recno} { + key_group 0 { + key_count $num_rec + } + } + } + + SWITCH $dirty { + CASE {1} { + file_special dirty_read + } + } + SWITCH {$encrypt} { + CASE {2} { + set_flags DB_ENCRYPT + } + } + SWITCH {$cksum} { + case {2} { + set_flags DB_CHKSUM + } + } + } + } +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases if $move_data == 1. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + SWITCH $move_data { + CASE {1} { + dbs_add_dupset 1 + } + } +} + +txn_type 7 { + txn_type_special retry + dbs_read 0 +} + +SWITCH $test_secon { + CASE {1} { + txn_type 8 { + dbs_swap_data 0 + } + + txn_type 9 { + dbs_read 1 + dbs_read 2 + } + + txn_type 10 { + dbs_read_dupset 1 + dbs_read_dupset 2 + } + + txn_type 11 { + dbs_del_readd_secon 1 + } + + txn_type 12 { + dbs_del_readd_secon 2 + } + + txn_type 13 { + txn_type_special retry + dbs_read 1 + dbs_read 2 + } + } +} + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + + ## We can't use txn_type 5 if we're testing secondaries + ## because it will permanently modify the secondaries. + SWITCH $test_secon { + CASE {0} { + txn_type 10 5 + } + CASE {1} { + txn_type 10 8 + txn_type 10 9 + txn_type 10 10 + txn_type 10 11 + txn_type 10 12 + } + } + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + + ## We can't use txn_types 5 or 6 if we're testing secondaries + ## because they will permanently modify the secondaries. + SWITCH $test_secon { + CASE {0} { + ## If we're moving data, then this group of threads adds + ## new dupsets to the second database instead of the first. + SWITCH $move_data { + CASE {0} { + txn_type 10 5 + } + CASE {1} { + txn_type 10 6 + } + } + } + CASE {1} { + txn_type 10 8 + txn_type 10 9 + txn_type 10 10 + txn_type 10 11 + txn_type 10 12 + } + } + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 2 { + SWITCH $dirty { + CASE {0} { + thread_type_special no_txn + } + case {1} { + thread_type_special dirty_read + } + } + txn_type 100 7 + SWITCH $test_secon { + CASE {1} { + txn_type 10 13 + } + } +} + +SET threads_per_group RANGE {1 7} +SET read_threads RANGE {1 3} +thread_counts { + thread_type 0 $threads_per_group + thread_type 1 $threads_per_group + thread_type 2 $read_threads +} --- db-4.6.21/test_perf/configs/qtest.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/qtest.conf 2007-11-05 15:51:12.227956000 +1100 @@ -0,0 +1,90 @@ +## More or less equivalent to running: qtest + +########################################### +## Environment and miscellaneous +########################################### + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name perfdb0 + file_method queue + + key_group 0 { + key_count 0 + key_start_recno 1 + } + + data_length_dists { + ## % type param1 param2 + dist 100 U 10 10 + } +} + + +############################################## +## Transaction types. +############################################## +## Reader +txn_type 0 { + read 0 { + flag DB_CONSUME_WAIT + } +} + +## Writer +txn_type 1 { + add 0 +} + +## Scanner +txn_type 2 { + qtest_scan 0 +} + + +########################################### +## Threads +########################################### +## Reader +thread_type 0 { + ## weight txn_type + txn_type 100 0 +} + +## Writer +thread_type 1 { + ## weight txn_type + txn_type 100 1 +} + +## Scanner +thread_type 2 { + ## weight txn_type + txn_type 100 2 +} + +thread_counts { + ## type num_threads + thread_type 0 2 + thread_type 1 2 + thread_type 2 1 +} --- db-4.6.21/test_perf/configs/random.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/random.conf 2007-11-05 15:51:12.283955000 +1100 @@ -0,0 +1,817 @@ +## More or less equivalent to running: dbs -B + +## If FAILCHECK is 1 then we always do a kill test. +SET FAILCHECK 0 + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 +SWITCH RANGE {1 3} { + case {1} { + set_flags DB_DSYNC_LOG + } +} + +set inmem 0 +switch $FAILCHECK { + case {0} { + set top 8 + } + case {1} { + set top 7 + } +} +## How do we want to commit transactions? +## Since we don't power fail, default to DB_TXN_NOSYNC. +SWITCH RANGE {1 $top} { + CASE {1} { + set_flags DB_TXN_WRITE_NOSYNC + } + case {3 4 5 6 7} { + set_flags DB_TXN_NOSYNC + } + case {8} { + set inmem 1 + set_flags DB_LOG_INMEMORY + set_lg_bsize 16777216 + } +} + +## Do we want to do encryption or checksumming? +set encrypt 0 +set cksum 0 +SWITCH RANGE {1 20} { + CASE {1} { + set encrypt 1 + set_encrypt keith + } + CASE {2} { + set encrypt 2 + set_encrypt michael + } + CASE {3} { + set cksum 1 + } + CASE {4} { + set cksum 2 + } +} + +## Set lock timeouts or not. +set timeout 0 +switch RANGE {1 5} { + CASE {1} { + set timeout SELECT {400 500 800 1000} + set_txn_timeout $timeout + } + CASE {2} { + set timeout SELECT {10 50 80 100} + set_lock_timeout $timeout + } +} + +set expire DB_LOCK_EXPIRE +switch {$timeout} { + CASE {0} { + set expire DB_LOCK_NORUN + } +} + +## Deadlock thread strategy. NORUN actually means don't run +## a separate thread. +deadlock SELECT { $expire $expire DB_LOCK_NORUN DB_LOCK_DEFAULT DB_LOCK_MAXLOCKS DB_LOCK_MAXWRITE DB_LOCK_MINLOCKS DB_LOCK_MINWRITE DB_LOCK_OLDEST DB_LOCK_RANDOM DB_LOCK_YOUNGEST } + +## Set the isolation level +SET isolation SELECT {serializable read-committed snapshot dirty} + +## Prepare transactions or not. If not zero, number of seconds to +## wait between prepare and commit. +SET prepare SELECT {0 0 0 1 1 2} + +## Number of iterations +total_iter SELECT {1000 2000 3000 4000 5000 6000 7000 8000 9000 10000} + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## IF we want to test fail check, no private envs. +switch $FAILCHECK { + case {0} { + SET top 5 + } + case {1} { + set top 4 + } +} + +## Do archive/recovery one-half of the time--1/8 archive and 1/8 archive_fatal. +SWITCH $inmem { + case {0} { + SWITCH RANGE {1 8} { + CASE {1} { + archive TESTDIR.A + } + CASE {2} { + archive_fatal TESTDIR.A + } + CASE {3} { + hotbackup TESTDIR.A + SET top 4 + } + CASE {4} { + hotupdate TESTDIR.A + SET top 4 + } + } + } + case {1} { + SET top 4 + } +} +## On one-fifth of the runs, run with a private environment +## unless we are running db_hotbackup or in memory. +SWITCH RANGE {1 $top} { + CASE {5} { + env_flag DB_PRIVATE + } +} + + +## On one-fifth of the runs, we simulate running out of disk space by +## returning an error if we try to extend a file. See the function +## write_err() in perf_misc.c. +SWITCH RANGE {1 5} { + CASE {1} { + write_err_start RANGE {1 10000} + write_err_count RANGE {10 500} + write_rand RANGE {1 500} + } +} + +## On one-eighth of the runs, perform a killtest. +## For between 1 and 10 iterations, fork a child, wait for between +## 30 seconds and 5 minutes before killing it, and then perform recovery +## and verify the databases. Pause so that fail_chk might +## find something to do. +## If we are preparing, then always kill test. +SWITCH $prepare { + CASE {0} { + SET top 8 + } + CASE {1 2} { + prepare $prepare + set top 1 + } +} +## IF we want to test fail check, we always want to do a kill test. +SWITCH $FAILCHECK { + case {1} { + set top 1 + } +} +## No kill test if we are not durable +set bottom 1 +SWITCH $inmem { + case {1} { + set bottom 0 + set top 0 + } +} +SWITCH RANGE {$bottom $top} { + CASE {1} { + killtest_iter RANGE {1 7} + killtest_interval SELECT {30 45 60 75 90 105 120 135 150 165 180 195 210 225 240 255 270 285 300} + pause SELECT {0 0 1 3 5} + } +} + +## On one-fifth of the runs, test secondary indices. +SET test_secon SELECT {0 0 0 0 1} +SWITCH $test_secon { + CASE {0} { + # Move data more often than not. + SET move_data SELECT {0 1 1 1 1 1 1} + SET sort_dup 1 + SET key_mult 1 + SET num_rec 676 + } + CASE {1} { + # Don't move data when testing secondaries. + SET move_data 0 + SET sort_dup 0 + SET key_mult RANGE {3 17} + SET num_rec (* $key_mult 676) + } +} + +## On one-fifth of the runs, give each thread its own DB handle +## for each file. +SWITCH RANGE {1 5} { + CASE {1} { + set_multiple_handles 1 + } +} + +## Other variables used below. +SET subdb_type SELECT {0 1 2} +SET method SELECT {hash btree btree queue recno hash btree btree} + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + +set recnum 0 + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize SELECT {512 1024 2048 4096 8192 16384 32768 65536} + file_name perfdb0 + file_method $method + + ## If we're not using a queue and $subdb_type != 0, + ## use a subdatabase for this database. + SWITCH $method { + CASE {hash btree recno} { + SWITCH $subdb_type { + CASE {1 2} { + subdb_name sub1 + } + } + } + } + + SWITCH $isolation { + CASE {dirty} { + file_special dirty_read + } + } + + # Turn on MVCC one quarter of the time. + SWITCH $method { + CASE {btree hash recno} { + SWITCH RANGE {1 4} { + CASE {1} { + file_special multiversion + } + } + } + } + + ## Specify the keys, based on the access method. + ## Because we specified DBS mode above, we will automatically + ## get nthreads duplicates per key--or, in the case of record- + ## number-based databases, (nthreads * key_count) records. + ## The one exception is if we're testing secondary indices, + ## in which case this file is a primary and can't have duplicates. + SWITCH $method { + CASE {hash} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 $key_mult + } + # Sort duplicates one-half of the time. + SET sortdups SELECT {0 $sort_dup} + SWITCH $sortdups { + CASE {1} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + } + } + } + CASE {btree} { + ## In one-fifth of the btree cases, use + ## record numbers. In one-half of the + ## remaining cases, sort duplicates. + SWITCH RANGE {1 5} { + CASE {1} { + SET sortdups 0 + SET recnum 1 + set_flags DB_RECNUM + bt_compare dbs_int_compare + key_group 0 { + key_count $num_rec + } + } + CASE {2 3} { + SET sortdups $sort_dup + SWITCH $sortdups { + CASE {1} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + } + } + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 $key_mult + } + } + CASE {4 5} { + SET sortdups 0 + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 $key_mult + } + } + } + } + CASE {queue recno} { + SET sortdups 0 + key_group 0 { + key_count $num_rec + } + } + } + SWITCH {$encrypt} { + CASE {1 2} { + set_flags DB_ENCRYPT + } + } + SWITCH {$cksum} { + case {1 2} { + set_flags DB_CHKSUM + } + } + + data_length_dists { + SWITCH $test_secon { + CASE {0} { + ## Uniform distribution from 9 to 400 + ## % type param1 param2 + dist 100 U 9 400 + } + CASE {1} { + ## We need a larger minimum size so + ## that we can fit the secondary keys. + ## % type param1 param2 + dist 100 U 40 400 + } + } + } +} + +SET method2 none +## If we're moving data between databases, we need a second database. +## Note that move_data is always false when we're testing secondaries. +SWITCH $move_data { + CASE {1} { + SET method2 $method + file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + SWITCH $recnum { + CASE {0} { + SWITCH $method { + case {hash btree} { + SET method2 SELECT {hash btree} + file_method $method2 + SWITCH $sortdups { + CASE {1} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + } + } + } + } + } + } + + ## This database starts out empty. + file_special no_init + SWITCH $isolation { + CASE {dirty} { + file_special dirty_read + } + } + + ## Handle the use of subdatabases, depending + ## on the access method and the value of + ## $subdb_type. + set newfile 1 + SWITCH $method { + CASE {queue} { + file_name perfdb1 + } + + CASE {hash btree recno} { + SWITCH $subdb_type { + CASE {0} { + ## Don't use a subdb. + file_name perfdb1 + } + CASE {1} { + ## Use a subdb in the + ## same file used for + ## file 0. + file_name perfdb0 + subdb_name sub2 + set newfile 0 + } + CASE {2} { + ## Use a subdb in a + ## new file. + file_name perfdb1 + subdb_name sub2 + } + } + } + } + + # If we have two files select a page size + SWITCH $newfile { + CASE {1} { + file_pagesize SELECT {512 1024 2048 4096 8192 16384 32768 65536} + } + } + SWITCH {$encrypt} { + CASE {2} { + set_flags DB_ENCRYPT + } + } + SWITCH {$cksum} { + case {2} { + set_flags DB_CHKSUM + } + } + } + } +} + +## If we're testing secondary indices, we need two other databases. +SWITCH $test_secon { + CASE {1} { + ## First secondary, which has no duplicates. + file 1 { + file_pagesize SELECT {512 1024 2048 4096 8192 16384 32768 65536} + file_name secon1 + SET method3 SELECT {hash btree btree queue recno hash btree btree} + file_method $method3 + file_type secondary 0 + + ## Specify the keys, based on the access method. + SWITCH $method3 { + CASE {hash} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 $key_mult + } + } + CASE {btree} { + ## In one-fifth of the btree cases, use + ## record numbers. + SWITCH RANGE {1 5} { + CASE {1} { + SET recnum 1 + set_flags DB_RECNUM + bt_compare dbs_int_compare + key_group 0 { + key_count $num_rec + } + } + CASE {2 3 4 5} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 $key_mult + } + } + } + } + CASE {queue recno} { + key_group 0 { + key_count $num_rec + } + } + } + + SWITCH $isolation { + CASE {dirty} { + file_special dirty_read + } + } + SWITCH {$encrypt} { + CASE {2} { + set_flags DB_ENCRYPT + } + } + SWITCH {$cksum} { + case {2} { + set_flags DB_CHKSUM + } + } + } + + ## Second secondary, which has $key_mult duplicates -- + ## unless it is a record-based access method, in which case + ## it has no duplicates. + file 2 { + file_pagesize SELECT {512 1024 2048 4096 8192 16384 32768 65536} + file_name secon2 + SET method4 SELECT {hash btree btree queue recno hash btree btree} + file_method $method4 + file_type secondary 0 + + ## Specify the keys, based on the access method. + SWITCH $method4 { + CASE {hash} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + # Sort duplicates one-half of the time. + SET sortdups SELECT {0 1} + SWITCH $sortdups { + CASE {1} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + } + } + } + CASE {btree} { + ## In one-fifth of the btree cases, use + ## record numbers. In one-half of the + ## remaining cases, sort duplicates. + SWITCH RANGE {1 5} { + CASE {1} { + SET recnum 1 + set_flags DB_RECNUM + bt_compare dbs_int_compare + key_group 0 { + key_count $num_rec + } + } + CASE {2 3} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + } + CASE {4 5} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + } + } + } + CASE {queue recno} { + key_group 0 { + key_count $num_rec + } + } + } + + SWITCH $isolation { + CASE {dirty} { + file_special dirty_read + } + } + SWITCH {$encrypt} { + CASE {2} { + set_flags DB_ENCRYPT + } + } + SWITCH {$cksum} { + case {2} { + set_flags DB_CHKSUM + } + } + } + } +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases if $move_data == 1. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + SWITCH $move_data { + CASE {1} { + dbs_add_dupset 1 + } + } +} + +txn_type 7 { + txn_type_special retry + dbs_read 0 +} + +SET next 8 + +SWITCH $test_secon { + CASE {1} { + txn_type 8 { + dbs_swap_data 0 + } + + txn_type 9 { + dbs_read 1 + dbs_read 2 + } + + txn_type 10 { + dbs_read_dupset 1 + dbs_read_dupset 2 + } + + txn_type 11 { + dbs_del_readd_secon 1 + } + + txn_type 12 { + dbs_del_readd_secon 2 + } + + txn_type 13 { + txn_type_special retry + dbs_read 1 + dbs_read 2 + } + set next 14 + } +} +SET test_reorg 0 +SWITCH SELECT {$method NONE} { + CASE {BTREE RECNO} { + SET test_reorg 1 + SET reorg_type $next + txn_type $next { + reorg 0 + SWITCH $method2 { + CASE {BTREE RECNO} { + reorg 1 + } + } + } + } +} + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + + ## We can't use txn_type 5 if we're testing secondaries + ## because it will permanently modify the secondaries. + SWITCH $test_secon { + CASE {0} { + txn_type 10 5 + } + CASE {1} { + txn_type 10 8 + txn_type 10 9 + txn_type 10 10 + txn_type 10 11 + txn_type 10 12 + } + } + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + + ## We can't use txn_types 5 or 6 if we're testing secondaries + ## because they will permanently modify the secondaries. + SWITCH $test_secon { + CASE {0} { + ## If we're moving data, then this group of threads adds + ## new dupsets to the second database instead of the first. + SWITCH $move_data { + CASE {0} { + txn_type 10 5 + } + CASE {1} { + txn_type 10 6 + } + } + } + CASE {1} { + txn_type 10 8 + txn_type 10 9 + txn_type 10 10 + txn_type 10 11 + txn_type 10 12 + } + } + + SWITCH $isolation { + CASE {snapshot} { + thread_type_special snapshot + } + } + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +## read thread +thread_type 2 { + SWITCH $isolation { + CASE {dirty} { + thread_type_special dirty_read + } + CASE {read-committed} { + thread_type_special no_txn + } + CASE {snapshot} { + thread_type_special snapshot + } + } + txn_type 100 7 + SWITCH $test_secon { + CASE {1} { + txn_type 10 13 + } + } +} + +SWITCH $test_reorg { + CASE {1} { + ## reorg thread + thread_type 3 { + thread_type_special no_txn + txn_type 100 $reorg_type + } + } +} + +SET threads_per_group RANGE {1 7} +SET read_threads RANGE {1 3} +SET reorg_threads SELECT {1 1 2} +thread_counts { + thread_type 0 $threads_per_group + thread_type 1 $threads_per_group + thread_type 2 $read_threads + + SWITCH $test_reorg { + CASE {1} { + thread_type 3 $reorg_threads + } + } +} --- db-4.6.21/test_perf/configs/sample2.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/sample2.conf 2007-11-05 15:51:12.231956000 +1100 @@ -0,0 +1,126 @@ +set_cachesize 0 200000000 0 + +# env_flag DB_LOCKDOWN + +file 0 { + file_name database + file_method btree + file_pagesize 8192 + + ## key groups 0-2 represent the first 30% of the database-- + ## i.e., keys that begin with the letters a, b and c + key_group 0 { + key_size 5 + key_chars_per_slot 3 10 10 10 10 + key_start_key aaaaa + + key_numdup_dists { + dist_count 1 + + ## % type mean std_dev + dist 100 N 5 1 + } + } + + key_group 1 { + key_size 7 + key_chars_per_slot 3 5 2 5 2 5 2 + key_start_key aaaaaaa + # assume that this and the rest of the key groups use + # the same numdup_dists as key_group 0 + } + + key_group 2 { + key_size 9 + key_chars_per_slot 3 5 2 5 2 5 2 5 2 + key_start_key aaaaaaaaa + } + + ## key groups 3-5 represent the second 70% of the database-- + ## i.e., keys that begin with the letters d-j + key_group 3 { + key_size 5 + key_chars_per_slot 7 10 10 10 10 + key_start_key daaaa + } + + key_group 4 { + key_size 7 + key_chars_per_slot 7 5 2 5 2 5 2 + key_start_key daaaaaa + } + + key_group 5 { + key_size 9 + key_chars_per_slot 7 5 2 5 2 5 2 5 2 + key_start_key daaaaaaaa + } + + data_length_dists { + ## Normal distribution with mean 50 and std_dev 5 + ## % type param1 param2 + dist 100 N 50 5 + } +} + + +txn_type 0 { + read 0 { + keygrp_dists { + dist 100 U 0 2 + } + iter_dists { + dist 100 U 1 10 + } + } +} + +txn_type 1 { + read 0 { + keygrp_dists { + dist 100 U 3 5 + } + iter_dists { + dist 100 U 1 10 + } + } +} + +txn_type 2 { + update 0 { + keygrp_dists { + dist 100 U 0 2 + } + iter_dists { + dist 100 U 1 5 + } + } +} + +txn_type 3 { + update 0 { + keygrp_dists { + dist 100 U 3 5 + } + iter_dists { + dist 100 U 1 5 + } + } +} + + +thread_type 0 { + ## 80% of the accesses are to 30% of the database (key groups 0-2) + ## % txn_type + txn_type 60 0 + txn_type 20 2 + + ## the rest of the accesses are to the rest of the database (kg's 3-5) + txn_type 15 1 + txn_type 5 3 +} + +thread_counts { + ## type num_threads + thread_type 0 80 +} --- db-4.6.21/test_perf/configs/sample.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/sample.conf 2007-11-05 15:51:12.307954000 +1100 @@ -0,0 +1,141 @@ +set_cachesize 0 53687091 0 + +file 0 { + file_name primary + file_method btree + file_pagesize 8192 + + ## 100,000 keys + key_group 0 { + key_size 9 + key_incr_order r_to_l + key_chars_per_slot 10 10 5 2 5 2 5 2 1 + } + + data_length_dists { + ## Normal distribution with mean 50 and std_dev 5 + ## % type param1 param2 + dist 100 N 100 10 + } +} + +file 1 { + file_name secon1 + file_type secondary 0 + file_method btree + file_pagesize 8192 + + ## A total of 100,000 keys. Because there are 100,000 + ## keys in file 0 as well, there will be no duplicates. + key_group 0 { + key_size 6 + key_count 10000 + } + + key_group 1 { + key_size 6 + key_count 15000 + } + + key_group 2 { + key_size 6 + key_count 20000 + } + + key_group 3 { + key_size 6 + key_count 30000 + } + + key_group 4 { + key_size 6 + key_count 15000 + } + + key_group 5 { + key_size 6 + key_count 10000 + } +} + +file 2 { + file_name secon2 + file_type secondary 0 + file_method btree + file_pagesize 8192 + + ## A total of 25,000 keys. Because there are 100,000 + ## keys in file 0 as well, there will be 4 duplicates per key. + key_group 0 { + key_size 5 + key_count 10000 + } + + key_group 1 { + key_size 5 + key_count 15000 + } +} + + +txn_type 0 { + read 0 { + iter_dists { + dist 100 U 1 10 + } + } +} + +txn_type 1 { + ## This will get the primary data items. + read 1 { + iter_dists { + dist 100 U 1 10 + } + } +} + +txn_type 2 { + ## This always updates all of the secondary keys associated + ## with an item in the primary. + update 0 { + iter_dists { + dist 100 U 1 5 + } + + ## Specifies that an item should be modified 80% + ## of the time. + update_pct 80 + } +} + +txn_type 3 { + ## This allows us to specify which (if any) of the secondary + ## keys should be modified. + update_prim 0 { + iter_dists { + dist 100 U 1 5 + } + + ## file # probability of updating + secon_to_update 2 50 + } +} + + +thread_type 0 { + ## % txn_type + txn_type 30 0 + txn_type 40 2 + txn_type 30 3 +} + +thread_type 1 { + txn_type 100 1 +} + +thread_counts { + ## type num_threads + thread_type 0 7 + thread_type 1 3 +} --- db-4.6.21/test_perf/configs/sanity_btree.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/sanity_btree.conf 2007-11-05 15:51:12.351953000 +1100 @@ -0,0 +1,138 @@ +## More or less equivalent to running: dbs -t btree + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name perfdb0 + file_method btree + + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + ## Because we specified DBS mode above, an ID and checksum + ## will be included, following the format of the DBS + ## data struct. + data_length_dists { + ## Uniform distribution from 9 to 511 + ## % type param1 param2 + dist 100 U 9 511 + } +} + +file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + + ## This database starts out empty. + file_special no_init + + file_name perfdb1 +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 1 +} + + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 5 + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## This group of threads adds new dupsets to the + ## second database instead of the first. + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 6 + + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_counts { + ## type num_threads + thread_type 0 3 + thread_type 1 2 +} --- db-4.6.21/test_perf/configs/sanity_hash.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/sanity_hash.conf 2007-11-05 15:51:12.267955000 +1100 @@ -0,0 +1,138 @@ +## More or less equivalent to running: dbs + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize 4096 + file_name perfdb0 + file_method hash + + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + ## Because we specified DBS mode above, an ID and checksum + ## will be included, following the format of the DBS + ## data struct. + data_length_dists { + ## Uniform distribution from 9 to 511 + ## % type param1 param2 + dist 100 U 9 511 + } +} + +file 1 { + ## Start with a copy of file 0, so + ## that we share its settings. + file_copy 0 + + ## This database starts out empty. + file_special no_init + + file_name perfdb1 +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + +txn_type 6 { + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 1 +} + + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 5 + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_type 1 { + ## This group of threads adds new dupsets to the + ## second database instead of the first. + txn_type 10 0 + txn_type 10 1 + txn_type 10 2 + txn_type 10 3 + txn_type 10 4 + txn_type 10 6 + + txn_size_dists { + dist 100 U 1 6 + } +} + +thread_counts { + ## type num_threads + thread_type 0 3 + thread_type 1 2 +} --- db-4.6.21/test_perf/configs/vx_random.conf 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/configs/vx_random.conf 2007-11-05 15:51:12.223956000 +1100 @@ -0,0 +1,191 @@ +## More or less equivalent to running dbs on VxWorks. + +########################################### +## Environment and miscellaneous +########################################### +## Run in DBS mode. +appl_type DBS + +## Added to DB_CONFIG file +set_lg_max 131072 + +## Number of iterations +total_iter 1000 + +## In addition to setting this flag, a function is set that causes +## threads to yield the processor on every page request to maximize +## concurrency. +set_flags DB_YIELDCPU + +## Use SET to create variables for use in later sections of the file. +SET subdb_type SELECT {0 1 2} +SET method SELECT {hash btree queue recno} + +## Output information about the run so that we know what happened +## during post mortem. +output_info dbs_output_info RUN_LOG + + +########################################### +## Database files and access methods +########################################### +file 0 { + file_pagesize SELECT {512 1024 2048 4096 8192 16384 32768 65536} + file_name perfdb0 + file_method $method + + ## If we're not using a queue and $subdb_type != 0, + ## use a subdatabase for this database. + SWITCH $method { + CASE {hash btree recno} { + SWITCH $subdb_type { + CASE {1 2} { + subdb_name sub1 + } + } + } + } + + ## Specify the keys, based on the access method. + ## Because we specified DBS mode above, we will automatically + ## get nthreads duplicates per key--or, in the case of record- + ## number-based databases, (nthreads * key_count) records. + SWITCH $method { + CASE {hash} { + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + + # Sort duplicates one-half of the time. + SET sortdups SELECT {0 1} + SWITCH $sortdups { + CASE {1} { + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + } + } + } + CASE {btree} { + ## In one-fifth of the btree cases, use + ## record numbers. In one-half of the + ## remaining cases, sort duplicates. + SWITCH RANGE {1 5} { + CASE {1} { + SET sortdups 0 + set_flags DB_RECNUM + bt_compare dbs_int_compare + key_group 0 { + key_count 676 + } + } + CASE {2 3} { + SET sortdups 1 + set_flags DB_DUPSORT + dup_compare dbs_dup_compare + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + } + CASE {4 5} { + SET sortdups 0 + key_group 0 { + key_size 4 + key_chars_per_slot 1 26 26 1 + } + } + } + } + CASE {queue recno} { + SET sortdups 0 + key_group 0 { + key_count 676 + } + } + } + + ## Because we specified DBS mode above, an ID and checksum + ## will be included, following the format of the DBS + ## data struct. + data_length_dists { + ## Uniform distribution from 9 to 400 + ## % type param1 param2 + dist 100 U 9 400 + } +} + + +############################################## +## Transaction types. +## Note that because we're running in DBS +## mode, get(), c_get(), and del() calls will be +## tried on both databases if $move_data == 1. +############################################## +txn_type 0 { + dbs_read 0 +} + +txn_type 1 { + dbs_update 0 +} + +txn_type 2 { + dbs_del_readd 0 +} + +txn_type 3 { + dbs_read_dupset 0 +} + +txn_type 4 { + dbs_partial_write 0 +} + +txn_type 5 { + ## Use the same key for both operations. + txn_type_special same_key + dbs_del 0 + dbs_add_dupset 0 +} + + +########################################### +## Threads +########################################### +thread_type 0 { + ## Each transaction type is chosen with equal frequency. + ## weight txn_type + txn_type 10 0 + txn_type 10 2 + txn_type 10 3 + txn_type 10 5 + + ## We can only do updates and partial writes if we're not + ## sorting duplicates. + SWITCH $sortdups { + CASE {0} { + txn_type 10 1 + txn_type 10 4 + } + } + + ## Perform up to 6 transaction types per actual transaction. + txn_size_dists { + dist 100 U 1 6 + } +} + +SWITCH $method { + CASE {hash} { + SET thr_count RANGE {2 10} + } + CASE {btree queue recno} { + SET thr_count RANGE {2 13} + } +} + +thread_counts { + ## type num_threads + thread_type 0 $thr_count +} --- db-4.6.21/test_perf/db_perf.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/db_perf.c 2007-11-07 22:34:37.810563000 +1100 @@ -0,0 +1,1654 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996,2007 Oracle. All rights reserved. + * + * $Id: db_perf.c,v 12.52 2007/06/22 21:18:26 ubell Exp $ + */ + +#include "perf_extern.h" + +#ifndef STDERR_FILENO +#define STDERR_FILENO 2 +#endif + +struct __perf_globals g; + +/* Access method threads. */ +static os_thread_t *am_threads; + +/* + * Access method procs -- used if we're forking separate procs instead + * of using threads. + */ +os_pid_t children[1024]; + +static int configure_cache __P((DB_ENV *)); +static void do_cleanup __P((void)); +static int do_killtest __P((int, int)); +static int fork_procs __P((void)); +static int init_databases __P((void)); +static void my_printf __P((int, const char *, ...)); +static int report_stats __P((int)); +static int run_threads __P((int, int, int)); +static int run_verify __P((int)); +static int setup_environment __P((int, int)); +static int txn_recover __P((DB_ENV *)); +#ifndef HAVE_VXWORKS +static void usage __P((void)); +#endif + +int archive; + +/* + * A configurable performance tool that allows DB's performance to be + * measured on a variety of different workloads. The VxWorks "main" + * function is in perf_vx.c + */ +#ifndef HAVE_VXWORKS +int +main(argc, argv) + int argc; + char *argv[]; +{ + extern char *optarg; + extern int optind; + struct stat sb; + pid_t pid; + int batch, ch, init_only, region_stats, ret; + int thread_id, use_procs, verify; + char buf[DB_MAXPATHLEN], *valuestr; + + /* Misc. initializations */ + archive = batch = init_only = region_stats = use_procs = verify = 0; + thread_id = -1; + valuestr = NULL; + setvbuf(stdout, NULL, _IOLBF, BUFSIZ); + /* + * The globals below are initialized differently under + * VxWorks, so we do them here. + */ + g.home = DATAHOME; + if ((g.progname = __db_rpath(argv[0])) == NULL) + g.progname = argv[0]; + else + g.progname++; + g.progpath = argv[0]; + + __os_id(NULL, &pid, NULL); + g.seed = time(NULL) ^ pid << 7; + g.verbose = 0; + + if ((ret = open_var_db()) != 0) { + fprintf(stderr, "Failed to open variable db\n"); + return (EXIT_FAILURE); + } + + /* Process the command-line arguments. */ + while ((ch = getopt(argc, argv, "ABc:CD:Fh:i:ILo:prR:sS:T:vVxXyZ:")) + != EOF) + switch (ch) { + case 'A': /* Called from archive */ + archive = 1; + break; + case 'B': /* Batch mode */ + batch = 1; + break; + case 'c': /* Config file */ + g.config = optarg; + break; + case 'C': /* Checkpoint thread */ + g.checkpoint = 1; + break; + case 'D': /* "#define": override a SET */ + valuestr = strchr(optarg, '='); + if (valuestr == NULL) + valuestr = "1"; + else /* overwrite the '=' */ + *valuestr++ = '\0'; + set_variable(optarg, valuestr, 1); + break; + case 'F': /* Fork procs to access DBs */ + use_procs = 1; + break; + case 'h': /* DBHOME */ + g.home = optarg; + break; + case 'i': /* Iterations */ + if (__db_getlong(NULL, g.progname, + optarg, 1, LONG_MAX, &g.iterations)) + return (EXIT_FAILURE); + break; + case 'I': /* Init the databases */ + init_only = 1; + break; + case 'L': /* Log clean thread */ + g.logclean = CLEAN_ARCHIVE; + break; + case 'o': /* Use an output file */ + g.outfile_name = optarg; + break; + case 'p': /* Use DB_PRIVATE */ + g.private = 1; + break; + case 'r': /* Output region stats */ + region_stats = 1; + break; + case 'R': /* Use an RPC server */ + g.rpc_server = optarg; + break; + case 's': /* Trace stats over time */ + g.stats_trace = 1; + break; + case 'S': /* Random seed */ + g.seed = atoi(optarg); + break; + case 'T': /* Trickle thread */ + if (__db_getlong(NULL, g.progname, + optarg, 1, 100, &g.trickle)) + return (EXIT_FAILURE); + break; + case 'v': /* Verbose output */ + g.verbose = 1; + break; + case 'V': /* Verify db. */ + verify = 1; + break; + case 'x': /* Recover db. */ + g.recover = 1; + break; + case 'X': /* FatalRecover db. */ + g.recover += 2; + break; + case 'y': /* Sync thread */ + g.sync = 1; + break; + case 'Z': + /* + * When we're using a separate process for + * each access-method "thread," this argument + * gives the thread ID of this process. + */ + g.child = atoi(optarg); + break; + default: + fprintf(stderr, "Unknown flag: '%c'.\n", ch); + case '?': + usage(); + /* NOTREACHED */ + } + + /* + * Some basic argument checking. Fail if: + * 1) there are extra arguments, or + * 2) no config file is specified for init/batch + * 3) batch and stats_trace are specified together + */ + if (argc != optind) { + fprintf(stderr, "Unexpected arguments\n"); + usage(); + /* NOTREACHED */ + } + if ((init_only || batch) && !g.config) { + fprintf(stderr, "With -I or -B, must specify -c\n"); + usage(); + /* NOTREACHED */ + } + if (batch && g.stats_trace) { + fprintf(stderr, "-s and -B are incompatible"); + usage(); + } + + /* + * Seed the random-number generator here, in case the config + * file requires us to randomly select a configuration value. + */ + db_init_random(&g.rand_state, g.seed); + + if (g.config != NULL && stat(g.config, &sb) != 0) { + fprintf(stderr, "Config file \"%s\" does not exist\n", + g.config); + return (EXIT_FAILURE); + } + + /* Create or clean the DB home directory. */ + if (stat(g.home, &sb) != 0 || !S_ISDIR(sb.st_mode)) { + if ((ret = mkdir(g.home, 0777)) != 0) { + fprintf(stderr, "Failed to create directory %s: %s\n", + g.home, strerror(ret)); + return (EXIT_FAILURE); + } + } else if (init_only && (ret = clear_dir(g.home)) != 0) { + fprintf(stderr, "Could not clean home directory %s\n", g.home); + return (EXIT_FAILURE); + } + + if (batch) + ret = run_batch(0); + else { + if (g.stats_trace || g.outfile_name != NULL) { + if (g.outfile_name == NULL) + g.outfile_name = OUTFILE; + sprintf(buf, "%s/%s", g.home, g.outfile_name); + if ((g.outfp = fopen(buf, "a")) == NULL) { + fprintf(stderr, + "Could not open output file\n"); + return (EXIT_FAILURE); + } + } else + g.outfp = stdout; + setvbuf(g.outfp, NULL, _IOLBF, BUFSIZ); + + ret = driver(init_only, init_only, verify, thread_id, + use_procs, region_stats); + } + + (void)close_var_db(); + return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); +} +#endif + +/* + * driver - perform any setup that is common to both VxWorks and + * non-VxWorks environments, and then call the appropriate function + * to initialize or verify the database(s), or to run a workload. + * This is called by both main() and run_batch(). + */ +int +driver(init_only, user_config, verify, thread_id, use_procs, region_stats) + int init_only, user_config, verify, thread_id; + int use_procs, region_stats; +{ + int ret; + struct stat sb; + + ret = 0; + + /* Apply the configuration file. */ + if ((ret = apply_configuration(user_config)) != 0) + return (ret); + + if (g.rpc_server && !g.iterations) { + fprintf(stderr, "With RPC, you must specify a " \ + "fixed number of iterations\n"); + ret = EINVAL; + goto err; + } + + /* Create or clean the archive directory. */ + if (config.archive != NULL && + (stat(config.archive, &sb) != 0 || !S_ISDIR(sb.st_mode))) { + if ((ret = mkdir(config.archive, 0777)) != 0) { + fprintf(stderr, "Failed to create directory %s: %s\n", + config.archive, strerror(ret)); + return (EXIT_FAILURE); + } + } else if (init_only && config.archive != NULL && + (ret = clear_dir(config.archive)) != 0) { + fprintf(stderr, "Could not clean archive directory %s\n", + config.archive); + goto err; + } + + g.shutdown = 0; + + /* If we're using a run log, add an entry to it. */ + if (config.output_info_fn != NULL) + config.output_info_fn(init_only, verify, thread_id, use_procs); + + if (init_only) + ret = init_databases(); + else if (verify) { + ret = run_verify(g.recover); + if (config.dbs && ret == 0) + ret = dbs_check_databases(); + } else if (config.killtest_iter > 0) + ret = do_killtest(thread_id, use_procs); + else + ret = run_threads(thread_id, use_procs, region_stats); + +err: do_cleanup(); + return (ret); +} + +/* + * init_databases - initialize the database files, populating them + * with their initial key, value pairs. + */ +static int +init_databases() +{ + int checkpoint_save, i, ret, t_ret; + perf_clean_t logclean_save; + /* Don't try to perform recovery. */ + if ((ret = open_handles(1, 0)) != 0) + return (ret); + + /* + * Archiving during initialization can be problematic (e.g., a + * database may not be created when we try to archive it). + * Therefore, if the workload includes archiving, we don't + * allow the checkpoint and logclean threads to run during + * initialization. + */ + checkpoint_save = g.checkpoint; + logclean_save = g.logclean; + if (config.archive != NULL) { + if (g.logclean != CLEAN_NONE) + g.dbenv->errx(g.dbenv, + "checkpoint and logclean threads will not be run " + "during database init"); + g.checkpoint = 0; + g.logclean = CLEAN_NONE; + } + + /* Start supporting threads. */ + if (g.trickle && trickle_init(g.trickle) != 0) + return (1); + if (g.checkpoint && checkpoint_init() != 0) + return (1); + if (g.logclean && log_init() != 0) + return (1); + if (g.sync && sync_init() != 0) + return (1); + + /* Initialize the files. */ + for (i = 0; i < config.file_count; i++) { + if ((ret = config.file[i]->init(config.file[i])) != 0) { + perror("file init"); + goto err; + } + } + + /* Tell remaining threads to exit; wait for them. */ + g.shutdown = 1; + g.dbenv->errx(g.dbenv, "Waiting for support threads to wake up..."); + + if (g.logclean && log_shutdown() != 0) + return (1); + if (g.trickle != 0 && trickle_shutdown() != 0) + return (1); + if (g.sync && sync_shutdown() != 0) + return (1); + if (g.checkpoint && checkpoint_shutdown() != 0) + return (1); + + g.checkpoint = checkpoint_save; + g.logclean = logclean_save; + + /* If we are private, we must checkpoint for a clean shutdown. */ + if (((config.env_flags & DB_PRIVATE) || g.private) && + (ret = g.dbenv->txn_checkpoint(g.dbenv, 0, 0, 0)) != 0) { + fprintf(g.outfp, "%s: g.dbenv->checkpoint: %s\n", + g.progname, db_strerror(ret)); + } + +err: if ((t_ret = close_handles()) != 0 && ret == 0) + ret = t_ret; + + return (ret); +} + +/* + * run_threads - run the benchmark. If thread_id != -1, then we are + * using processes instead of threads to access the databases and + * this function is being called by one of the access-method processes. + * Otherwise, we fork off all of the necessary threads or processes, + * based on the value of use_procs. + */ +static int +run_threads(thread_id, use_procs, show_region_stats) + int thread_id, use_procs, show_region_stats; +{ + int i, stat_check; + + /* Open the DB_ENV and DB handles. */ + if (open_handles(0, g.recover) != 0) + return (1); + + /* + * If we're using multiple processes, this code should only be + * executed once. + */ + if (thread_id == -1) { + g.threads_started = 0; + g.dbenv->errx(g.dbenv, "global seed: %d", g.seed); + + /* Perform the specified initial scans (if any). */ + for (i = 0; i < config.num_initial_scans; i++) { + assert(config.scan[i] != NULL); + if (config.scan[i]->execute(config.scan[i]) != 0) { + g.dbenv->errx(g.dbenv, "file_scan"); + return (1); + } + } + } + + /* Start supporting threads. */ + if (g.trickle && trickle_init(g.trickle) != 0) + return (1); + if (g.checkpoint && checkpoint_init() != 0) + return (1); + if (g.logclean && log_init() != 0) + return (1); + if (g.sync && sync_init() != 0) + return (1); + if (config.deadlock != DB_LOCK_NORUN && dead_init() != 0) + return (1); + /* + * There are two cases in which we don't run the stat-check + * thread: + * - the user has specified the "special" application type and has + * not specified DB_THREAD as one of the environment flags. + * - the user has specified a fixed number of iterations for + * each thread to perform and has not chosen to trace the stats + */ + stat_check = (config.env_flags & DB_THREAD) && + (g.stats_trace || !g.iterations); + if (stat_check && stat_init() != 0) + return (1); + + /* Start access method threads and wait for them. */ + if (use_procs) { + /* Run each "thread" as a separate process. */ + if (fork_procs() != 0 || + wait_procs("am_threads", children) != 0) + return (1); + } else if (thread_id != -1) { + /* + * This process is serving as a single "thread" + * accessing the database. + */ + thread_run((void *)(uintptr_t)thread_id); + return (0); + } else { + /* Actually use threads. */ + am_threads = + spawn_kids("am_threads", config.nthreads, thread_run); + if (am_threads == NULL) + return (1); + if (wait_kids("am_threads", am_threads) != 0) + return (1); + } + assert(thread_id == -1); + + /* Tell remaining threads to exit; wait for them. */ + g.shutdown = 1; + g.dbenv->errx(g.dbenv, "Waiting for support threads to wake up..."); + + if (g.logclean && log_shutdown() != 0) + return (1); + if (g.trickle != 0 && trickle_shutdown() != 0) + return (1); + if (g.sync && sync_shutdown() != 0) + return (1); + if (g.checkpoint && checkpoint_shutdown() != 0) + return (1); + if (stat_check && stat_shutdown() != 0) + return (1); + if (config.deadlock && dead_shutdown() != 0) + return (1); + + /* Report the statistics. */ + if (report_stats(show_region_stats) != 0) + return (1); + + /* Since verify cannot run in the environment, sync it. */ + (void)g.dbenv->txn_checkpoint(g.dbenv, 0, 0, DB_FORCE); + + /* Close the handles */ + if (close_handles() != 0) + return (1); + + return (0); +} + +#define MAX_PREP 64 +static int +txn_recover(dbenv) + DB_ENV *dbenv; +{ + DB_PREPLIST *p, prep[MAX_PREP]; + u_int32_t mode; + int ret; + long count, i; + + mode = DB_FIRST; + do { + if ((ret = dbenv->txn_recover(dbenv, + prep, MAX_PREP, &count, mode)) != 0) { + dbenv->err(dbenv, + ret, "DB_ENV->txn_recover"); + goto err; + } + p = prep; + for (i = 0; i < count; i++, p++) { + /* Abort is harder. */ + if (random_int(&g.rand_state, + 0, 3) == 0) { + dbenv->errx(dbenv, + "Committing: %x", + p->txn->id(p->txn)); + p->txn->commit(p->txn, 0); + } else { + dbenv->errx(dbenv, + "Aborting: %x", + p->txn->id(p->txn)); + p->txn->abort(p->txn); + } + } + mode = DB_NEXT; + } while (count == MAX_PREP); +err: + return (ret); +} + +/* + * run_verify - run DB->verify() on the databases, including any + * archived databases. + */ +static int +run_verify(recover) + int recover; +{ + DB *dbp; + DB_ENV *dbenv; + FILE_INFO *file; + const char *env, *subdb_name; + int flags, i, subdb_checked, ret, ver_flag; + + dbenv = NULL; + dbp = NULL; + ret = 0; + +#ifndef DB_WIN32 + /* Just to be sure - turn off write errors. */ + db_env_set_func_write(NULL); +#endif + + /* + * Verify the current database and, if we're archiving, verify the + * archived database, too. + */ + for (env = g.home; env != NULL; env = config.archive, recover = 1) { + fprintf(g.outfp, "%s: db_verify\n", env); + + /* Create an environment handle. */ +start: if ((ret = db_env_create( + &dbenv, g.rpc_server ? DB_RPCCLIENT : 0)) != 0) { + fprintf(g.outfp, "%s: db_env_create: %s\n", + g.progname, db_strerror(ret)); + return (1); + } + + if (g.rpc_server) { + if ((ret = dbenv->set_rpc_server(dbenv, + NULL, g.rpc_server, 0, 0, 0)) != 0) + goto err; + } + +#ifdef HAVE_VXWORKS + (void)dbenv->set_shm_key(dbenv, SHMKEY_DBS); + (void)dbenv->set_tmp_dir(dbenv, g.tmp); +#endif + (void)dbenv->set_errfile(dbenv, stderr); + dbenv->set_thread_count(dbenv, 100); + if ((ret = configure_cache(dbenv)) != 0) + goto err; + + /* Try to ensure that we have enough locks. */ + if ((ret = dbenv->set_lk_max_lockers(dbenv, 100000)) != 0) { + dbenv->err(dbenv, ret, "set_lk_max_lockers: 100000"); + goto err; + } + if ((ret = dbenv->set_lk_max_locks(dbenv, 100000)) != 0) { + dbenv->err(dbenv, ret, "set_lk_max_locks: 100000"); + goto err; + } + if ((ret = dbenv->set_lk_max_objects(dbenv, 100000)) != 0) { + dbenv->err(dbenv, ret, "set_lk_max_objects: 100000"); + goto err; + } + + /* Recover if requested */ + if (recover) { + if ((ret = dbenv->set_verbose(dbenv, DB_VERB_RECOVERY, + 1)) != 0) { + dbenv->err(dbenv, ret, + "set_verbose: DB_VERB_RECOVERY, 1"); + goto err; + } + flags = DB_THREAD | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | + ((recover == 1) ? DB_RECOVER : DB_RECOVER_FATAL); + if ((config.env_flags & DB_PRIVATE) || g.private) + flags |= DB_PRIVATE; + if (config.passwd != NULL) { + ret = dbenv->set_encrypt( + dbenv, config.passwd, DB_ENCRYPT_AES); + if (ret != 0) { + dbenv->err(dbenv, ret, + "%s: set_encrypt", config.passwd); + goto err; + } + } + if ((ret = dbenv->open(dbenv, env, flags, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + + /* + * Recover transactions only if this is not an + * archive operation. We do not want to abort + * something that may actually get committed + * in the primary database. In particular this + * does not work for Queue. + */ + if (!archive && (ret = txn_recover(dbenv)) != 0) + goto err; + + (void)dbenv->close(dbenv, 0); + recover = 0; + goto start; + } + + /* Open it. */ + flags = DB_INIT_MPOOL | DB_PRIVATE | DB_CREATE; + if (config.passwd != NULL) { + ret = dbenv->set_encrypt( + dbenv, config.passwd, DB_ENCRYPT_AES); + if (ret != 0) { + dbenv->err(dbenv, + ret, "%s: set_encrypt", config.passwd); + goto err; + } + } + if ((ret = dbenv->open(dbenv, env, flags, 0)) != 0) { + dbenv->err(dbenv, ret, "DB_ENV->open"); + goto err; + } + + /* Iterate through the databases. */ + for (i = 0; i < config.file_count; i++) { + file = config.file[i]; + + /* + * On the first (and possibly only) call to + * db_verify, we check the file as a whole. + * If there are subdatabases in the file, we + * use the DB_NOORDERCHK flag now and later + * check the ordering of the individual + * subdatabases. + */ + subdb_name = NULL; + subdb_checked = 0; + ver_flag = (file->subdb_name != NULL ? + DB_NOORDERCHK : 0); + + /* Create a DB handle for the file. */ +file_check: if ((ret = db_create(&dbp, dbenv, 0)) != 0) { + fprintf(stderr, + "%s: db_create: %s\n", g.progname, + db_strerror(ret)); + goto err; + } + + /* + * Only activate the settings that matter for + * verification purposes. + */ + if (file->flags & DB_RECNUM) { + if ((ret = dbp->set_flags(dbp, + DB_RECNUM)) != 0) { + dbp->err(dbp, ret, "set_flags"); + goto err; + } + if (file->method_params != NULL && + file->method_params[1] != NULL) { + ret = dbp->set_bt_compare(dbp, + (int (*)(DB *, const DBT *, + const DBT *)) + file->method_params[1]); + if (ret != 0) { + dbp->err(dbp, ret, + "set_bt_compare"); + goto err; + } + } + + } + if (file->flags & DB_DUPSORT) { + if ((ret = dbp->set_flags(dbp, + DB_DUPSORT)) != 0) { + dbp->err(dbp, ret, "set_flags"); + goto err; + } + if (file->method_params != NULL && + file->method_params[2] != NULL) { + ret = dbp->set_dup_compare(dbp, + (int (*)(DB *, const DBT *, + const DBT *)) + file->method_params[2]); + if (ret != 0) { + dbp->err(dbp, ret, + "set_dup_compare"); + goto err; + } + } + } + + /* + * Perform the verification -- the verify method is + * a destructor. + */ + if ((ret = dbp->verify(dbp, file->name, + subdb_name, NULL, ver_flag)) != 0) { + dbenv->err(dbenv, ret, "DB->verify: %s %s", + file->name, (subdb_name != NULL) ? + subdb_name : "(nil)"); + dbp = NULL; + goto err; + } + dbp = NULL; + + /* + * If this database is a subdatabase, + * we perform a second verification on just + * the subdatabase so that we can check its + * ordering. + */ + if (!subdb_checked && file->subdb_name != NULL) { + subdb_name = file->subdb_name; + ver_flag = DB_ORDERCHKONLY; + subdb_checked = 1; + goto file_check; + } + } + + /* Close DB_ENV handle. */ + if ((ret = dbenv->close(dbenv, 0)) != 0) { + fprintf(stderr, + "DB_ENV->close: %s: %s", env, db_strerror(ret)); + dbenv = NULL; + goto err; + } + dbenv = NULL; + + /* + * We use strcmp because it is possible for + * g.home to be identical to config.archive. + */ + if (config.archive != NULL && !strcmp(env, config.archive)) + break; + } + +err: if (dbp != NULL) + (void)dbp->close(dbp, 0); + if (dbenv != NULL) + (void)dbenv->close(dbenv, 0); + + return (ret); +} + +int +say_dead(dbenv, pid, tid, flags) + DB_ENV *dbenv; + pid_t pid; + db_threadid_t tid; + u_int32_t flags; +{ + COMPQUIET(dbenv, NULL); + COMPQUIET(*(u_int8_t *)&tid, 0); + COMPQUIET(flags, 0); + + return (pid == getpid() ? 1 : 0); +} + +/* + * do_killtest - repeatedly fork a child to run the benchmark, and + * kill it after a specified interval, at which point we run recovery + * and verify the databases. + */ +static int +do_killtest(thread_id, use_procs) + int thread_id, use_procs; +{ +#if (!defined(HAVE_VXWORKS) && !defined(HAVE_QNX) && !defined(DB_WIN32)) + FILE *file; + int i, pid, ret, retry_cnt, status; + char buf[1024], path[1024]; + + retry_cnt = 0; + + for (; config.killtest_iter > 0; config.killtest_iter--) { + /* Perform the fork. */ + g.child = fork(); + if (g.child == -1) { + perror("Fork failed"); + return (-1); + } else if (g.child != 0) { + /* Parent */ + + /* Open the env so we can run failcheck. */ + if (!((config.env_flags & DB_PRIVATE) || g.private) && + setup_environment(0, 0) != 0) { + fprintf(g.outfp, "[Parent] Open failed\n"); + return (1); + } + /* Let child run for the specified interval. */ + fprintf(g.outfp, "[Parent] sleeping\n"); + __os_sleep(g.dbenv, config.killtest_interval, 0); + + /* Wait for archive recovery to finish, if any. */ + snprintf(buf, sizeof(buf), "%s/RECOVER", g.home); + while ((file = fopen(buf, "r")) != NULL) { + (void)fclose(file); + if ((pid = + waitpid(g.child, &status, WNOHANG)) != 0) + goto died; + fprintf(g.outfp, "[Parent] wait RECOVER\n"); + __os_sleep(g.dbenv, 5, 0); + } + /* Kill child and get the result. */ + fprintf(g.outfp, "[Parent] kill child\n"); + kill(g.child, SIGTERM); + fprintf(g.outfp, "[Parent] wait child\n"); + +retry: pid = wait(&status); +died: if (pid == -1) { + perror("Wait failed"); + if (retry_cnt++ > 10) + return (1); + goto retry; + } + if (pid != g.child) { + fprintf(g.outfp, "Wrong child %d != %d\n", + g.child, pid); + return (1); + } + if (WIFSIGNALED(status) ? + (status & 0x7f) != SIGTERM + : WEXITSTATUS(status) != 0) { + fprintf(g.outfp, "Child exit %d\n", status); + return (1); + } + +#ifndef HAVE_MUTEX_SOLARIS_LWP + /* + * This is horrible, but it's possible on some systems + * (e.g. Linux) that the child process has exited, but + * that some of its threads are still active. Wait for + * a while and hope that the signal has propagated to + * all threads. + */ + __os_sleep(g.dbenv, 5, 0); +#endif + + /* Take a copy of the databases before recovery. */ + for (i = 0; i < config.file_count; i++) { + sprintf(buf, "cp %s/%s %s/%s.save", + g.home, config.file[i]->name, + g.home, config.file[i]->name); + if (my_system(buf, 1)) + return (1); + } + + if ((config.env_flags & DB_PRIVATE) || g.private) + goto no_fc; + /* Try to see if we can continue. */ + fprintf(g.outfp, "[Parent] Fail check... "); + /* For the record, see whats there. */ + strcpy(path, g.progpath); + path[strlen(path) - strlen(g.progname)] = '\0'; + if (config.passwd != NULL) + sprintf(buf, + "%sdb_stat -Ne -P %s -h %s | grep thread", + path, config.passwd, g.home); + else + sprintf(buf, + "%sdb_stat -Ne -h %s | grep thread", + path, g.home); + if (my_system(buf, 1)) + return (1); + g.dbenv->set_isalive(g.dbenv, say_dead); + if (g.dbenv->failchk(g.dbenv, 0) == 0) { + fprintf(g.outfp, "passed!\n"); + if (txn_recover(g.dbenv)) { + fprintf(g.outfp, + "[Parent] txn_recover failed\n"); + return (1); + } + if (g.dbenv->close(g.dbenv, 0) != 0) { + fprintf(g.outfp, + "[Parent] close failed\n"); + return (1); + } + continue; + } + fprintf(g.outfp, "failed!\n"); + g.dbenv->set_flags(g.dbenv, DB_NOLOCKING, 1); + if (g.dbenv->close(g.dbenv, 0) != 0) { + fprintf(g.outfp, + "[Parent] close failed\n"); + return (1); + } +no_fc: + /* Run recovery and verify the databases. */ + fprintf(g.outfp, "[Parent] recover/verify\n"); + if (run_verify(1) != 0) + return (1); + + if (config.dbs && dbs_check_databases() != 0) + return (1); + + for (i = 0; i < config.file_count; i++) { + sprintf(buf, "cp %s/%s %s/%s.prev", + g.home, config.file[i]->name, + g.home, config.file[i]->name); + if (my_system(buf, 1)) + return (1); + } + } else { + /* Child */ + exit(run_threads(thread_id, use_procs, 0)); + } + } + /* Open the env so we can checkpoint for verify. */ + if (setup_environment(0, 0) != 0) { + fprintf(g.outfp, "[Parent] Open failed\n"); + return (1); + } + do { + ret = g.dbenv->txn_checkpoint(g.dbenv, 0, 0, DB_FORCE); + if (ret != 0 && ret != EIO) { + fprintf(g.outfp, "[Parent] checkpoint failed\""); + return (1); + } + } while (ret != 0); + if (g.dbenv->close(g.dbenv, 0) != 0) { + fprintf(g.outfp, + "[Parent] close failed\""); + return (1); + } + +#endif + + return (0); +} + +/* + * my_printf - used for some error and status messages in run_batch(). + * This allows us to get messages to the original stderr, even after + * we have redirected stderr to an output file. + */ +static void +#ifdef STDC_HEADERS +my_printf(int fd, const char *format, ...) +#else +my_printf(format, va_alist) + int fd; + const char *format; + va_dcl +#endif +{ + char buf[1024]; + va_list ap; +#ifdef STDC_HEADERS + va_start(ap, format); +#else + va_start(ap); +#endif + vsprintf(buf, format, ap); + write(fd, buf, strlen(buf)); + + va_end(ap); +} + +/* + * run_batch - perform the specified number of runs using the same + * config file. + */ +int +run_batch(batch_loops) + int batch_loops; +{ + DB_ENV *rmdbenv; + FILE *proc; + perf_clean_t logclean; + time_t now; + int err_fd, i, ret; + const char *user_config; + char buf[256], time_buf[CTIME_BUFLEN]; + + if ((proc = fopen("db_perf.pid", "w")) != NULL) { + fprintf(proc, "%d\n", getpid()); + fclose(proc); + } + /* If batch_loops is 0, make it essentially infinite. */ + if (batch_loops == 0) + batch_loops = 1*1024*1024*1024; + +#ifdef HAVE_VXWORKS + err_fd = fileno(stderr); +#else + err_fd = dup(STDERR_FILENO); +#endif + + ret = 0; + user_config = g.config; + g.checkpoint = 1; + logclean = g.logclean; + + /* Run the batch. */ + for (i = 0; i < batch_loops; i++) { + /* Remove the current environment. */ + if ((ret = db_env_create(&rmdbenv, + g.rpc_server ? DB_RPCCLIENT : 0)) != 0) { + my_printf(err_fd, "%s: db_env_create: %s\n", + g.progname, db_strerror(ret)); + break; + } + + if (g.rpc_server) { + if ((ret = rmdbenv->set_rpc_server(rmdbenv, + NULL, g.rpc_server, 0, 0, 0)) != 0) + break; + } + +#ifdef HAVE_VXWORKS + rmdbenv->set_shm_key(rmdbenv, SHMKEY_DBS); + rmdbenv->set_tmp_dir(rmdbenv, g.tmp); +#endif + if ((ret = rmdbenv->remove(rmdbenv, g.home, DB_FORCE)) != 0) { + my_printf(err_fd, "%s: envremove: %s\n", + g.progname, db_strerror(ret)); + break; + } + + /* Remove and recreate the home directory */ + if (clear_dir(g.home) != 0) { + my_printf(err_fd, + "Could not clean home directory %s\n", g.home); + break; + } + + /* + * Clean the directory that preserves the contents of the + * previous archive. + */ + if (access("PRESERVE", W_OK|X_OK) == 0) { + snprintf(buf, sizeof(buf), "rm -f PRESERVE/*"); + if ((ret = my_system(buf, 1)) != 0) { + fprintf(stderr, + "logclean: command failed(%d): %s", errno, buf); + break; + } + } + + /* Prepare the output file. */ + sprintf(buf, "%s/%s", g.home, OUTFILE); + if ((g.outfp = fopen(buf, "a")) == NULL) { + fprintf(stderr, "Could not open output file\n"); + break; + } + setvbuf(g.outfp, NULL, _IOLBF, BUFSIZ); + + /* Print a status message. */ + time(&now); + g.seed = now; + my_printf(err_fd, + "Starting iteration %d: %s", i, __db_ctime(&now, time_buf)); + fprintf(g.outfp, + "Starting iteration %d: %s", i, __db_ctime(&now, time_buf)); + +#ifndef HAVE_VXWORKS + /* Put stderr and stdout into our output file. */ + fclose(stderr); + fclose(stdout); + sprintf(buf, "%s/OUTPUT", g.home); + if (freopen(buf, "a", stderr) == NULL) + abort(); + if (freopen(buf, "a", stdout) == NULL) + abort(); + setvbuf(stdout, NULL, _IOLBF, BUFSIZ); + setvbuf(stderr, NULL, _IOLBF, BUFSIZ); + fprintf(stdout, "%s: %s\n", g.progpath, g.config); +#endif + + g.config = user_config; + g.logclean = logclean; + + /* Initialize the database. */ + if ((ret = driver(1, 1, 0, 0, 0, 0)) != 0) + break; + + /* Run the workload. */ + if ((ret = driver(0, 0, 0, -1, 0, 0)) != 0) + break; + +#ifndef HAVE_VXWORKS + /* Verify the database. */ + if ((ret = driver(0, 0, 1, 0, 0, 0)) != 0) + break; +#endif + + fclose(g.outfp); + g.outfp = NULL; + my_printf(err_fd, " succeeded.\n"); + } + + if (g.outfp != NULL) { + /* + * If g.outfp is non-NULL, then any error return + * value indicates a failure in the last run. + */ + if (ret != 0) + my_printf(err_fd, " FAILED; error code: %d\n", ret); + fclose(g.outfp); + } + return (ret); +} + +/* + * open_handles - open the DB_ENV and DB handles. init indicates + * whether we are about to initialize the databases. + */ +int +open_handles(init, recover) + int init, recover; +{ + int i, ret; + + /* Setup the environment. */ + if ((ret = setup_environment(init, recover)) != 0) + return (1); + + /* Create and open the DB handles for the files. */ + for (i = 0; i < config.file_count; i++) { + do + ret = config.file[i]->open_handles(config.file[i], + (init ? DB_CREATE : 0) | DB_THREAD); + while (ret == DB_LOCK_DEADLOCK || ret == DB_LOCK_NOTGRANTED); + + if (ret != 0) { + perror("create/open of DB handle"); + return (1); + } + } + + return (0); +} + +/* + * close_handles - close the DB_ENV and DB handles. + */ +int +close_handles() +{ + int i, ret, t_ret; + + ret = 0; + +#ifndef DB_WIN32 + /* Turn off write errors. */ + db_env_set_func_write(NULL); +#endif + + /* Close the DB handles. */ + for (i = 0; i < config.file_count; i++) { + t_ret = config.file[i]->close_handles(config.file[i]); + if (t_ret != 0) + ret = t_ret; + } + + /* Close the DB_ENV handle. */ + if (g.dbenv != NULL && (t_ret = g.dbenv->close(g.dbenv, 0)) != 0) { + if (ret == 0) + ret = t_ret; + fprintf(g.outfp, "%s: g.dbenv->close: %s\n", + g.progname, db_strerror(ret)); + } + g.dbenv = NULL; + + return (ret); +} + +/* + * setup_environment - create the DB environment handle and initialize + * it appropriately. + */ +static int +setup_environment(init, recover) + int init, recover; +{ + DB_ENV *dbenv; + char **dir, *lg, buf[1024]; + int ret; + + /* Create the environment handle. */ + ret = db_env_create(&g.dbenv, g.rpc_server ? DB_RPCCLIENT : 0); + if (ret != 0) { + fprintf(g.outfp, "%s: db_env_create: %s\n", + g.progname, db_strerror(ret)); + return (1); + } + dbenv = g.dbenv; + + if (g.rpc_server) + dbenv->set_rpc_server(dbenv, NULL, g.rpc_server, 0, 0, 0); + + /* Miscellaneous items */ + if (g.rpc_server) { + if ((ret = dbenv->set_rpc_server(dbenv, + NULL, g.rpc_server, 0, 0, 0)) != 0) { + fprintf(g.outfp, "%s: set_rpc_server: %s\n", + g.progname, db_strerror(ret)); + goto err; + } + } + +#ifdef HAVE_VXWORKS + dbenv->set_shm_key(dbenv, SHMKEY_DBS); + dbenv->set_tmp_dir(dbenv, g.tmp); +#endif + dbenv->set_errfile(dbenv, g.outfp); + dbenv->set_errpfx(dbenv, g.home); + dbenv->set_thread_count(dbenv, 100); + + if (!g.rpc_server) { + /* Try to ensure that we have enough locks. */ + if ((ret = dbenv->set_lk_max_lockers(dbenv, 100000)) != 0) { + dbenv->err(dbenv, ret, "set_lk_max_lockers: 100000"); + goto err; + } + if ((ret = dbenv->set_lk_max_locks(dbenv, 100000)) != 0) { + dbenv->err(dbenv, ret, "set_lk_max_locks: 100000"); + goto err; + } + if ((ret = dbenv->set_lk_max_objects(dbenv, 100000)) != 0) { + dbenv->err(dbenv, ret, "set_lk_max_objects: 100000"); + goto err; + } + + /* Run the deadlock detector on every lock conflict. */ + if (config.deadlock == DB_LOCK_NORUN && + (ret = dbenv->set_lk_detect(dbenv, DB_LOCK_DEFAULT)) + != 0) { + dbenv->err(dbenv, ret, + "set_lk_detect: DB_LOCK_DEFAULT"); + goto err; + } + + /* + * Tell DB to display additional information when performing + * recovery. + */ + if ((ret = + dbenv->set_verbose(dbenv, DB_VERB_RECOVERY, 1)) != 0) { + dbenv->err(dbenv, ret, + "set_verbose: DB_VERB_RECOVERY, 1"); + goto err; + } + } + + /* + * Default flags are set in main() and may be modified by + * statements in the config file. However, the need for the + * following flags can be indicated by command-line + * arguments, so we may need to add them here. + */ + if (g.private) + config.env_flags |= DB_PRIVATE; + config.env_flags &= ~(DB_RECOVER | DB_RECOVER_FATAL); + if (recover == 1) + config.env_flags |= DB_RECOVER; + else if (recover == 2) + config.env_flags |= DB_RECOVER_FATAL; + if (init || recover) + config.env_flags |= DB_CREATE; + + /* + * Deal with cases in which we want to deliberately yield the + * processor on every page request to maximize concurrency. + */ + if (config.yield_on_request) + dbenv->set_flags(dbenv, DB_YIELDCPU, 1); + + /* Configure the cachesize as needed. */ + if ((ret = configure_cache(dbenv)) != 0) + goto err; + + /* Open/create the environment. */ + if (config.passwd != NULL) { + ret = dbenv->set_encrypt(dbenv, config.passwd, DB_ENCRYPT_AES); + if (ret != 0) { + dbenv->err(dbenv, + ret, "%s: set_encrypt", config.passwd); + goto err; + } + } + ret = dbenv->open(dbenv, g.home, config.env_flags, 0); + if (ret != 0) { + dbenv->err(dbenv, ret, "%s: open", g.home); + goto err; + } + + dbenv->get_data_dirs(dbenv, (const char ***)&dir); + if (dir != NULL && *dir != NULL) { + sprintf(buf, "%s/%s", g.home, *dir); + __os_mkdir(dbenv, buf, __db_omode("rwx------")); + } + dbenv->get_lg_dir(dbenv, (const char **)&lg); + if (lg != NULL) { + sprintf(buf, "%s/%s", g.home, lg); + __os_mkdir(dbenv, buf, __db_omode("rwx------")); + } + + /* + * If we're using a write-error function to test recovering + * from not enough space, enable it here. + */ + if (!init && config.write_err_start != -1) + write_err_init(dbenv); + + return (0); + +err: if (dbenv != NULL) + dbenv->close(dbenv, 0); + g.dbenv = NULL; + return (ret); +} + +/* + * configure_cache - configure the cache size as needed. + */ +static int +configure_cache(dbenv) + DB_ENV *dbenv; +{ + long fsize, size; + int bytes, gbytes, i, ncache, ntxns, pgmax, pgmin, pgsize, ret; + + bytes = gbytes = ncache = 0; + + /* Get the total size of the databases and the max / min page sizes. */ + pgmax = 0; + pgmin = 64 * 1024; + for (i = 0, size = 0; i < config.file_count; i++) { + fsize = config.file[i]->get_size(config.file[i]); + if (fsize == 0) { + dbenv->errx(dbenv, + "Configuration error - empty database"); + return (EINVAL); + } else + size += fsize; + pgsize = config.file[i]->pagesize; + if (pgsize > pgmax) + pgmax = pgsize; + if (pgsize < pgmin) + pgmin = pgsize; + } + + /* + * Ordinarily, the cache will be sized by adding an + * appropriate line to the DB_CONFIG file. However, there + * are two special cases that we handle here. + */ + + /* (1) We're running in DBS mode. */ + if (config.dbs) { + /* Determine the number of mpool-cache files. */ + ncache = random_int(&g.rand_state, 0, 3); + dbenv->errx(dbenv, "Using %d mpool caches", ncache); + + /* 20 pages per thread */ + size = pgmax * config.nthreads * 20; + gbytes = size / GIGABYTE; + bytes = size % GIGABYTE; + } + + /* + * (2) The cache size has been specified as a percentage + * of the database size. + */ + else if (config.cache_size_percent > 0) { + /* Scale by the specified percentage. */ + size = size * config.cache_size_percent / 100; + gbytes = size / GIGABYTE; + bytes = size % GIGABYTE; + + /* + * DB increases cache sizes under 500 MB by 25%, + * so we start with a size that is 20% smaller to + * cancel out the increase. + */ + if (gbytes == 0 && bytes < 500 * MEGABYTE) + bytes = bytes * 4 / 5; + } + + if (size < gbytes * GIGABYTE + bytes) /* XXX: cache < 4GB! */ + size = gbytes * GIGABYTE + bytes; + + if ((gbytes != 0 || bytes != 0 || ncache != 0) && + (ret = dbenv->set_cachesize(dbenv, gbytes, bytes, ncache)) != 0) { + dbenv->err(dbenv, ret, "set_cachesize: %d %d %d", + gbytes, bytes, ncache); + return (ret); + } + + /* + * Set the maximum number of transactions here, since it is related to + * the cache size (via MVCC). Make sure that there is enough room in + * the transaction region for (worst case) one transaction per page in + * the cache plus 3 transactions per thread (to cover sub-transactions). + */ + ntxns = (int)(5 * size / (4 * pgmin)) + 3 * config.nthreads; + printf("size = %ld, pgmin = %d, ntxns = %d\n", size, pgmin, ntxns); + if ((ret = + dbenv->set_tx_max(dbenv, ntxns)) != 0) { + dbenv->err(dbenv, ret, "set_tx_max: %d", ntxns); + return (ret); + } + + return (0); +} + +/* + * fork_procs - fork separate processes for each access-method + * "thread." + */ +static int +fork_procs() +{ + int i; + char prefix[1024], command[1024]; + + /* Make sure that the children array is large enough. */ + if ((u_int)config.nthreads >= sizeof(children) / sizeof(children[0])) { + g.dbenv->errx(g.dbenv, "children array too small, recompile"); + return (1); + } + + /* + * Prepare the portion of the command line that is the same + * for each process. + */ + sprintf(prefix, "%s -h %s -i %ld -S %d", + g.progpath, g.home, g.iterations, g.seed); + + /* Fork and exec the processes. */ + for (i = 0; i < config.nthreads; i++) { + sprintf(command, "%s -Z %d", prefix, i); + fprintf(g.outfp, "%s\n", command); + if ((children[i] = my_system(command, 0)) == OS_BAD_PID) + return (1); + } + + return (0); +} + +/* + * report_stats - compute and report the statistics for the run + */ +static int +report_stats(region_stats) + int region_stats; +{ + db_timespec v; + double end_time, start_time, time_before, time_during; + int i, j, total_ops, total_iter, total_records; + int txns_during; + + /* + * Determine the first start time and last end time, + * and the total number of iterations. + */ + __os_gettime(g.dbenv, &v); + start_time = MKTIME(v); + end_time = 0; + total_iter = 0; + for (i = 0; i < config.nthreads; i++) { + if (config.thread[i]->start_time < start_time) + start_time = config.thread[i]->start_time; + if (config.thread[i]->end_time > end_time) + end_time = config.thread[i]->end_time; + total_iter += config.thread[i]->txn_count; + } + + /* Report numbers of transactions and operations */ + fprintf(g.outfp, "\n%s\n", g.tag); + fprintf(g.outfp, "total transactions:\t%d\n", total_iter); + fprintf(g.outfp, "total ops by type\n"); + for (i = 0; i < NUM_OP_TYPES; i++) { + total_ops = 0; + total_records = 0; + for (j = 0; j < config.nthreads; j++) { + total_ops += config.thread[j]->op_count[i]; + total_records += config.thread[j]->rec_count[i]; + } + if (total_ops > 0) + fprintf(g.outfp, "\t%s:\t%d (%f)\n", + op_names[i], total_ops, + (double)total_ops / (double)total_iter); + if (total_records > 0) + fprintf(g.outfp, "\t\t%d (%f)\n", + total_records, + (double)total_records / (double)total_ops); + } + + /* Report elapsed time and throughput. */ + if (!g.iterations) { + end_time = g.steady_state_end_time; + total_iter = g.steady_state_end_txns; + } + fprintf(g.outfp, "\ntotal run:\n\t%f sec clock\n\t%f txns/s\n", + end_time - start_time, + total_iter / (end_time - start_time)); + if (!g.iterations) { + time_before = g.steady_state_start_time - start_time; + time_during = + g.steady_state_end_time - g.steady_state_start_time; + txns_during = + g.steady_state_end_txns - g.steady_state_start_txns; + fprintf(g.outfp, "before measurement window:\n" + "\t%f sec clock\n\t%f txns/s\n", + time_before, (g.steady_state_start_txns / time_before)); + fprintf(g.outfp, "during measurement window (%s):\n", + (g.steady_state_reached ? "steady state" : + "** NOT a steady state -- max time exceeded **")); + fprintf(g.outfp, "\t%f sec clock\n\t%f txns/s\n\n", + time_during, (txns_during / time_during)); + if (region_stats) { + fprintf(g.outfp, + "stats during measurement window:\n"); + output_stats(txns_during); + } + } + + return (0); +} + +/* + * do_cleanup - free memory allocated in processing the config file. + */ +static void +do_cleanup() +{ + SET_NODE *nnode, *snode; + int i; + + for (i = 0; i < config.file_count; i++) + file_info_destroy(config.file[i]); + __os_free(g.dbenv, config.file); + config.file = NULL; + + for (i = 0; i < config.num_initial_scans; i++) + __os_free(g.dbenv, config.scan[i]); + __os_free(g.dbenv, config.scan); + config.scan = NULL; + + for (i = 0; i < config.txn_type_count; i++) + txn_type_destroy(config.txn_type[i]); + __os_free(g.dbenv, config.txn_type); + config.txn_type = NULL; + + for (i = 0; i < config.thr_type_count; i++) + thr_type_destroy(config.thr_type[i]); + __os_free(g.dbenv, config.thr_type); + config.thr_type = NULL; + + for (i = 0; i < config.nthreads; i++) + thr_info_destroy(config.thread[i]); + __os_free(g.dbenv, config.thread); + config.thread = NULL; + + snode = config.shared_sets; + while (snode != NULL) { + set_destroy(snode->set, 1); + nnode = snode->next; + __os_free(g.dbenv, snode); + snode = nnode; + } + + if (config.archive != NULL) { + __os_free(g.dbenv, config.archive); + config.archive = NULL; + } + + if (g.mpstat != NULL) + __os_ufree(g.dbenv, g.mpstat); + g.mpstat = NULL; + + if (g.lkstat != NULL) + __os_ufree(g.dbenv, g.lkstat); + g.lkstat = NULL; + + if (g.lgstat != NULL) + __os_ufree(g.dbenv, g.lgstat); + g.lgstat = NULL; + + if (g.txstat != NULL) + __os_ufree(g.dbenv, g.txstat); + g.txstat = NULL; + + if (g.rusage != NULL) + __os_free(g.dbenv, g.rusage); + g.rusage = NULL; +} + +#ifndef HAVE_VXWORKS +static void +usage() +{ + fprintf(stderr, "Usage: %s -I -c config [-pv]\n\t\t%s\n", + g.progname, + "[-h home] [-S seed] [-D key=value]"); + fprintf(stderr, " or: %s [-CFLprsvVxX]\n\t\t%s\n", + g.progname, + "[-h home] [-i iterations] [-S seed] [-T trickle] [-D key=value]"); + fprintf(stderr, " or: %s -B -c config [-CLpsvxX]\n\t\t%s\n", + g.progname, + "[-h home] [-i iterations] [-S seed] [-T trickle] [-D key=value]"); + exit(EXIT_FAILURE); +} +#endif --- db-4.6.21/test_perf/perf_checkpoint.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_checkpoint.c 2007-11-05 15:51:13.631920000 +1100 @@ -0,0 +1,68 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999,2007 Oracle. All rights reserved. + * + * $Id: perf_checkpoint.c,v 12.6 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +static os_thread_t *checkpoint_threads; /* Checkpoint threads. */ + +int +checkpoint_init() +{ + return ((checkpoint_threads = spawn_kids( + "checkpoint threads", 1, checkpoint_thread)) == NULL ? 1 : 0); +} + +int +checkpoint_shutdown() +{ + return (wait_kids("checkpoint_threads", checkpoint_threads)); +} + +void * +checkpoint_thread(arg) + void *arg; +{ + int ret; + + arg = 0; /* UNUSED. */ +#ifdef HAVE_VXWORKS + ioTaskStdSet(0, 1, g.stdfd); + ioTaskStdSet(0, 2, g.stdfd); +#endif + + g.dbenv->errx(g.dbenv, "Checkpoint thread: %lu", + (u_long)os_thread_id()); + + for (;;) { + g.dbenv->errx(g.dbenv, "Starting checkpoint"); + + if ((ret = + g.dbenv->txn_checkpoint(g.dbenv, + config.checkpoint_size, config.checkpoint_time, 0)) != 0 && + ret != EIO) { + g.dbenv->err(g.dbenv, ret, + "checkpoint thread: %s", db_strerror(ret)); + g.shutdown = 1; + return (NULL); + } + + if (g.shutdown) + return (NULL); + + if (ret == EIO) + g.dbenv->errx(g.dbenv, "checkpoint thread: EIO"); + else + g.dbenv->errx(g.dbenv, "checkpoint thread: complete"); + + /* XXX: Don't convoy. */ + snooze(g.dbenv, 1000 * random_int(&g.rand_state, 5, 60)); + if (g.shutdown) + return (NULL); + } + /* NOTREACHED */ +} --- db-4.6.21/test_perf/perf_config.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_config.c 2007-11-05 15:51:13.651919000 +1100 @@ -0,0 +1,2406 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000,2007 Oracle. All rights reserved. + * + * $Id: perf_config.c,v 12.17 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +#define DEFAULT_MAX_FILES 10 +#define DEFAULT_MAX_KEYGRPS 10 +#define DEFAULT_MAX_DISTS 10 +#define DEFAULT_MAX_SCANS 100 +#define DEFAULT_MAX_TXN_TYPES 10 +#define DEFAULT_MAX_THR_TYPES 10 +#define DEFAULT_MAX_THREADS 10 +#define DEFAULT_MAX_OPS 10 +#define DEFAULT_CHARSET_SIZE 10 + +#define SYNTAX(expected, ret) { \ + fprintf(g.outfp, "%s:%d: incorrect syntax: expected \"%s\"\n", \ + g.config, g.linenum, expected); \ + fflush(g.outfp); \ + return (ret); \ +} + +#define ERR_RET(msg, ret) { \ + fprintf(g.outfp, "%s:%d: %s\n", \ + g.config, g.linenum, msg); \ + fflush(g.outfp); \ + return (ret); \ +} + +#define INCOMPLETE(ret) { \ + fprintf(g.outfp, "incomplete config file: %s\n", \ + g.config); \ + fflush(g.outfp); \ + return (ret); \ +} + +#define VAL_DEFINED(var, ret) { \ + fprintf(g.outfp, "%s:%d: value already defined: %s\n", \ + g.config, g.linenum, var); \ + fflush(g.outfp); \ + return (ret); \ +} + +struct __perf_conf config; + +static int add_shared_set __P((SET *)); +static int apply_config_env __P((char *)); +static int apply_config_files __P((char *)); +static int apply_config_threads __P((char *)); +static int apply_config_thr_types __P((char *)); +static int apply_config_txns __P((char *)); +static void init_config __P((void)); +static int open_files __P((int)); +static SET *process_dists __P((void)); +static OP *process_op __P((op_type, int)); +static int process_thread_counts __P((int, int)); +static THR_TYPE *process_thread_type __P((void)); +static TXN_TYPE *process_txn_type __P((int)); +static FILE_INFO *process_file_info __P((int)); +static KEY_GROUP *process_key_group __P((FILE_INFO *)); +static int make_secondary __P((FILE_INFO *, int)); +static int finalize_file_info __P((FILE_INFO *, SET *)); + +/* + * init_config - initialize the configuration structure + */ +static void +init_config() +{ + if (config.passwd != NULL) + free(config.passwd); + + memset(&config, 0, sizeof(config)); + + config.env_flags = (DB_THREAD | DB_INIT_LOCK | DB_INIT_LOG | + DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE); + config.logdir = g.home; + config.stat_check_interval = 5; + config.steady_state_time = 300; + config.steady_state_num_samples = 5; + config.steady_state_max_time = 900; + config.steady_state_variance = 0.0001; + config.steady_state_variance_pct = 0.0025; + config.write_err = config.write_err_count = -1; + config.write_err_start = config.write_rand = -1; +} + +/* + * open_files - open the files used in processing the configuration. + */ +static int +open_files(user_config) + int user_config; +{ + FILE *ret; + + g.conf_fp = g.current_fp = NULL; + + /* + * Form the filename for a version of the config file that + * includes the results of any random selections made when the + * database is/was initialized. + */ + snprintf(config.conf_current, sizeof(config.conf_current), + "%s/current.conf", g.home); + + /* + * If a config file has been specified, open current.conf for + * writing. Otherwise, open it for reading and use it as + * the config file. + */ + if (user_config) + ret = g.current_fp = fopen(config.conf_current, "w"); + else + ret = g.conf_fp = fopen(config.conf_current, "r"); + if (ret == NULL) { + fprintf(stderr, "error opening config file %s\n", + config.conf_current); + return (EINVAL); + } + + if (!user_config) { + g.config = config.conf_current; + return (0); + } + + fprintf(g.current_fp, "# Generated from %s\n", g.config); + + /* + * If a config file has been specified, we also need to open + * this file. + */ + if ((g.conf_fp = fopen(g.config, "r")) == NULL) { + perror("error opening config file"); + fprintf(g.outfp, "error opening file: %s\n", g.config); + return (EINVAL); + } + + return (0); +} + +/* + * add_shared_set - add a set to the list of possibly shared sets + */ +static int +add_shared_set(set) + SET *set; +{ + SET_NODE *node; + int ret; + + if ((ret = __os_malloc(g.dbenv, sizeof(SET_NODE), &node)) != 0) + ERR_RET("malloc failed", ret); + node->set = set; + + /* Insert the node at the head of the list. */ + node->next = config.shared_sets; + config.shared_sets = node; + + return (0); +} + +/* + * apply_configuration - process the configuration file and initialize + * the variables that hold the config. info. + */ +int +apply_configuration(user_config) + int user_config; +{ + FILE_INFO *file, *secon; + KEY_GROUP *kg; + PROB_DIST *dist; + SET *dup_dists; + int dsize_needed, i, j, ret; + char buf[DB_MAXPATHLEN]; + + init_config(); + init_parser(); + if ((ret = open_files(user_config)) != 0) + return (ret); + + /* Process the various sections of the config. file. */ + + /* + * The last line read by one of the apply_config_* + * functions is the first line needed by the next function. + * To maintain consistency, we get the first line needed + * by apply_config_env and passing it in. + */ + if ((ret = get_line(buf, g.conf_fp)) != 0) + goto exit; + if ((ret = apply_config_env(buf)) != 0) + goto exit; + if ((ret = apply_config_files(buf)) != 0) + goto exit; + if ((ret = apply_config_txns(buf)) != 0) + goto exit; + if ((ret = apply_config_thr_types(buf)) != 0) + goto exit; + if ((ret = apply_config_threads(buf)) != 0) + goto exit; + + dup_dists = NULL; + for (i = 0; i < config.file_count; i++) { + file = config.file[i]; + if (HAS_SECONDARIES(file)) { + /* + * Make sure that the primary data items are + * large enough to store all of the associated + * secondary keys. + */ + dsize_needed = 0; + + /* Need to store a key for each secondary... */ + for (j = 0; j < file->secondaries->item_count; j++) { + secon = (FILE_INFO *)file-> + secondaries->item[j]; + dsize_needed += secon->max_key_size(secon); + } + /* ... and a header of offsets. */ + dsize_needed += (file->secondaries->item_count + 1) * + sizeof(u_int); + + if (dsize_needed > file->min_data_size(file)) { + fprintf(g.outfp, "data items too small for " + "primary file: file %d\n", i); + ret = EINVAL; + goto exit; + } + } + + /* + * In DBS mode, key groups should have one duplicate + * per thread--or, in the case of record-based access + * methods, nthreads times the specified number of + * records. There are two exceptions: files that have + * one or more secondary indices, and the secondary + * indices themselves. + */ + if (config.dbs && !HAS_SECONDARIES(file) && + !IS_SECONDARY(file)) { + if (!IS_RECORD_BASED(file)) { + file->flags |= DB_DUP; + dup_dists = set_create(1); + dist = prob_dist_create(DIST_UNIFORM, + config.nthreads, config.nthreads); + dup_dists->add_item(dup_dists, dist, 100); + if ((ret = add_shared_set(dup_dists)) != 0) + goto exit; + } + for (j = 0; j < file->key_groups->item_count; j++) { + kg = file->key_groups->item[j]; + if (IS_RECORD_BASED(file)) { + assert(kg->type == KEYGRP_REC); + kg->key_count *= config.nthreads; + } else + /* + * If a numdup_dists was was + * already specified, it will + * be on the list of shared + * sets and will be destroyed + * later. + */ + kg->numdup_dists = dup_dists; + } + } + } + + if (config.dbs && g.logclean == CLEAN_NONE) + g.logclean = CLEAN_ARCHIVE; + +exit: if (g.conf_fp != NULL) { + (void)fclose(g.conf_fp); + g.conf_fp = NULL; + } + if (g.current_fp != NULL) { + (void)fclose(g.current_fp); + g.current_fp = NULL; + } + return (ret); +} + +/* + * apply_config_env - parse and apply the portion of the config file + * that deals with the environment. line is the buffer into which + * the lines of the file will be read. When the function is invoked, + * line should contain the first line of the environment section. + */ +static int +apply_config_env(line) + char *line; +{ + FILE *dbconf_fp, *werr_fp; + SCAN *scan, **tmp; + double v6; + int appl_type_specified, flags_specified, i, nargs; + int ret, smax, steady_state_info_specified; + u_int v1, v2, v3, v4, v5; + char *name, *value, v7; + char buf[DB_MAXPATHLEN], format[40]; + + /* + * !!! + * The value of 40 is hard-coded into format arguments to sscanf + * below. It can't be changed here without changing it there, too. + */ + char arg[40]; + + appl_type_specified = flags_specified = 0; + steady_state_info_specified = 0; + smax = DEFAULT_MAX_SCANS; + + /* Form the filename home/DB_CONFIG. */ + snprintf(buf, sizeof(buf), "%s/DB_CONFIG", g.home); + + /* Open/create the DB_CONFIG file. */ + if ((dbconf_fp = fopen(buf, "w")) == NULL) + ERR_RET("can't open DB_CONFIG file", EINVAL); + + /* Clear the tag */ + strcpy(g.tag, ""); + + do { + if ((ret = get_name_value(line, &name, &value)) != 0) { + (void)fclose(dbconf_fp); + return (ret); + } + + if (!strncmp(name, "file", 4)) { + /* + * This is the first line of the file-info + * section, so we put the space back at the + * end of the name so that the line will be parsed + * correctly by apply_config_files. + */ + for (++name; *name != '\0'; ++name) + ; + *name = ' '; + + /* + * Indicate that this line has already been + * read once, so get_name_value() won't + * output it twice to select.conf. + */ + g.line_preread = 1; + + /* + * If any of the write-error parameters + * has been specified (as indicated by + * config.write_err having a value of 0), + * all of them must have been specified. + */ + if (config.write_err == 0 && + (config.write_err_count == -1 || + config.write_err_start == -1 || + config.write_rand == -1)) + ERR_RET("incomplete write_err parameter set", + EINVAL); + + (void)fclose(dbconf_fp); + return (0); + } else if (!strcasecmp(name, "appl_type")) { + if (appl_type_specified) + VAL_DEFINED("appl_type", EINVAL); + if (flags_specified) + ERR_RET("appl_type must come before " + "all env_flag entries", EINVAL); + if (sscanf(value, "%40s %c", arg, &v7) != 1) + SYNTAX("appl_type ", EINVAL); + if (!strcasecmp(arg, "TDS")) + config.env_flags = (DB_THREAD | DB_INIT_LOCK | + DB_INIT_LOG | DB_INIT_MPOOL | + DB_INIT_TXN | DB_CREATE); + else if (!strcasecmp(arg, "CDS")) + config.env_flags = (DB_THREAD | DB_INIT_CDB | + DB_INIT_MPOOL | DB_CREATE); + else if (!strcasecmp(arg, "DS")) + config.env_flags = (DB_THREAD | + DB_INIT_MPOOL | DB_CREATE); + else if (!strcasecmp(arg, "special")) + config.env_flags = 0; + else if (!strcasecmp(arg, "DBS")) { + config.env_flags = (DB_THREAD | DB_INIT_LOCK | + DB_INIT_LOG | DB_INIT_MPOOL | + DB_INIT_TXN | DB_CREATE); + config.dbs = 1; + } else + ERR_RET("unrecognized appl_type value", + EINVAL); + appl_type_specified = 1; + strcat(g.tag, value); + } else if (!strcasecmp(name, "archive")) { + sprintf(format, "%%%ds %%c", DB_MAXPATHLEN); + if (sscanf(value, format, buf, &v7) != 1) + SYNTAX("archive ", EINVAL); + if ((ret = __os_malloc(g.dbenv, DB_MAXPATHLEN, + &config.archive)) != 0) + ERR_RET("malloc failed", EINVAL); + strncpy(config.archive, value, DB_MAXPATHLEN); + /* Don't override command line */ + if (g.logclean == CLEAN_NONE) + g.logclean = CLEAN_ARCHIVE; + } else if (!strcasecmp(name, "archive_fatal")) { + sprintf(format, "%%%ds %%c", DB_MAXPATHLEN); + if (sscanf(value, format, buf, &v7) != 1) + SYNTAX("archive_fatal ", EINVAL); + if ((ret = __os_malloc(g.dbenv, DB_MAXPATHLEN, + &config.archive)) != 0) + ERR_RET("malloc failed", EINVAL); + strncpy(config.archive, value, DB_MAXPATHLEN); + /* Don't override command line */ + if (g.logclean == CLEAN_NONE) + g.logclean = CLEAN_UPDATE; + } else if (!strcasecmp(name, "cache_size_percent")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("cache_size_percent ", EINVAL); + config.cache_size_percent = v1; + } else if (!strcasecmp(name, "checkpoint_size")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("checkpoint_size ", EINVAL); + config.checkpoint_size = v1; + } else if (!strcasecmp(name, "checkpoint_time")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("checkpoint_time ", EINVAL); + config.checkpoint_time = v1; + } else if (!strcasecmp(name, "env_flag")) { + if (sscanf(value, "%40s %c", arg, &v7) != 1) + SYNTAX("env_flag ", EINVAL); + if (!strcasecmp(arg, "DB_INIT_CDB")) + config.env_flags |= DB_INIT_CDB; + else if (!strcasecmp(arg, "DB_INIT_LOCK")) + config.env_flags |= DB_INIT_LOCK; + else if (!strcasecmp(arg, "DB_INIT_LOG")) + config.env_flags |= DB_INIT_LOG; + else if (!strcasecmp(arg, "DB_INIT_MPOOL")) + config.env_flags |= DB_INIT_MPOOL; + else if (!strcasecmp(arg, "DB_INIT_TXN")) + config.env_flags |= DB_INIT_TXN; + else if (!strcasecmp(arg, "DB_RECOVER")) { + /* Don't override command line */ + if (g.recover == 0) + g.recover = 1; + } else if (!strcasecmp(arg, "DB_RECOVER_FATAL")) { + /* Don't override command line */ + if (g.recover == 0) + g.recover = 2; + } else if (!strcasecmp(arg, "DB_USE_ENVIRON")) + config.env_flags |= DB_USE_ENVIRON; + else if (!strcasecmp(arg, "DB_USE_ENVIRON_ROOT")) + config.env_flags |= DB_USE_ENVIRON_ROOT; + else if (!strcasecmp(arg, "DB_CREATE")) + config.env_flags |= DB_CREATE; + else if (!strcasecmp(arg, "DB_LOCKDOWN")) + config.env_flags |= DB_LOCKDOWN; + else if (!strcasecmp(arg, "DB_NOMMAP")) + config.env_flags |= DB_NOMMAP; + else if (!strcasecmp(arg, "DB_PRIVATE")) + config.env_flags |= DB_PRIVATE; + else if (!strcasecmp(arg, "DB_SYSTEM_MEM")) + config.env_flags |= DB_SYSTEM_MEM; + else if (!strcasecmp(arg, "DB_THREAD")) + config.env_flags |= DB_THREAD; + else + ERR_RET("unknown env_flag flag", EINVAL); + flags_specified = 1; + } else if (!strcasecmp(name, "hotbackup")) { + sprintf(format, "%%%ds %%c", DB_MAXPATHLEN); + if (sscanf(value, format, buf, &v7) != 1) + SYNTAX("hotbackup ", EINVAL); + if ((ret = __os_malloc(g.dbenv, DB_MAXPATHLEN, + &config.archive)) != 0) + ERR_RET("malloc failed", EINVAL); + strncpy(config.archive, value, DB_MAXPATHLEN); + if (g.logclean == CLEAN_NONE) + g.logclean = CLEAN_HOTBACKUP; + } else if (!strcasecmp(name, "hotupdate")) { + sprintf(format, "%%%ds %%c", DB_MAXPATHLEN); + if (sscanf(value, format, buf, &v7) != 1) + SYNTAX("hotupdate ", EINVAL); + if ((ret = __os_malloc(g.dbenv, DB_MAXPATHLEN, + &config.archive)) != 0) + ERR_RET("malloc failed", EINVAL); + strncpy(config.archive, value, DB_MAXPATHLEN); + if (g.logclean == CLEAN_NONE) + g.logclean = CLEAN_HOTUPDATE; + } else if (!strcasecmp(name, "iter_per_thread") || + !strcasecmp(name, "total_iter")) { + if (steady_state_info_specified) + ERR_RET("can't execute fixed # of " + "iterations when searching for a" + "steady state", EINVAL); + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("iter_per_thread ", EINVAL); + /* Don't override command-line value */ + if (g.iterations == 0) + g.iterations = v1; + } else if (!strcasecmp(name, "killtest_iter")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("killtest_iter ", EINVAL); + config.killtest_iter = v1; + if (config.killtest_interval == 0) + config.killtest_interval = + random_int(&g.rand_state, 2, 20) * 15; + } else if (!strcasecmp(name, "killtest_interval")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("killtest_interval ", EINVAL); + config.killtest_interval = v1; + if (config.killtest_iter == 0) + config.killtest_iter = INT_MAX; + } else if (!strcasecmp(name, "output_info")) { + sprintf(format, "%%40s %%%ds %%c", DB_MAXPATHLEN); + if (sscanf(value, format, arg, buf, &v7) != 2) + SYNTAX("output_info ", + EINVAL); + if (!strcasecmp(arg, "dbs_output_info")) + config.output_info_fn = dbs_output_info; + else + ERR_RET("unknown output-info func", EINVAL); + strcpy(config.output_info_fname, buf); + } else if (!strcasecmp(name, "pause")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("pause ", EINVAL); + config.pause = v1; + } else if (!strcasecmp(name, "scan_sequential") || + !strcasecmp(name, "scan_keygroup")) { + if (config.scan == NULL) { + if ((ret = __os_malloc(g.dbenv, + smax * sizeof(SCAN *), &config.scan)) != 0) + ERR_RET("malloc failed", EINVAL); + } + if (config.num_initial_scans == smax) { + /* Grow the array. */ + smax *= 2; + if ((ret = __os_malloc(g.dbenv, + smax * sizeof(SCAN *), &tmp)) != 0) + ERR_RET("malloc failed", EINVAL); + for (i = 0; + i < config.num_initial_scans; i++) { + tmp[i] = config.scan[i]; + __os_free(g.dbenv, config.scan); + config.scan = tmp; + } + } + + nargs = sscanf(value, "%u %u %u %u %u %c", + &v1, &v2, &v3, &v4, &v5, &v7); + + if (!strcasecmp(name, "scan_sequential")) { + if (nargs < 2 || nargs > 3) + SYNTAX("scan_sequential " + " []", EINVAL); + scan = scan_create_sequential(v1, v2, + (nargs > 2 ? v3 : 1)); + } else { + /* scan_keygroup */ + if (nargs < 3 || nargs > 5) + SYNTAX("scan_keygroup " + " [ ]", + EINVAL); + scan = scan_create_keygroup(v1, v2, v3, + (nargs > 3 ? v4 : 0), + (nargs > 4 ? v5 : 1)); + } + + if (scan == NULL) + ERR_RET("malloc failed", EINVAL); + config.scan[config.num_initial_scans] = scan; + config.num_initial_scans++; + } else if (!strcasecmp(name, "set_encrypt")) { + if (sscanf(value, "%40s %c", arg, &v7) != 1) + SYNTAX("set_encrypt ", EINVAL); + config.passwd = strdup(arg); + } else if (!strcasecmp(name, "set_lg_dir")) { + sprintf(format, "%%%ds %%c", DB_MAXPATHLEN); + if (sscanf(value, format, buf, &v7) != 1) + SYNTAX("set_lg_dir ", EINVAL); + config.logdir = strdup(buf); + fprintf(dbconf_fp, "%s %s\n", name, value); + } else if (!strcasecmp(name, "set_flags")) { + if (sscanf(value, "%40s %c", arg, &v7) != 1) + SYNTAX("set_flags ", EINVAL); + if (!strcasecmp(arg, "DB_YIELDCPU")) + config.yield_on_request = 1; + else + /* + * Other instances of set_flags are + * copied to the DB_CONFIG file. + */ + fprintf(dbconf_fp, "%s %s\n", name, arg); + } else if (!strcasecmp(name, "set_multiple_handles")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("set_multiple_handles ", EINVAL); + config.use_multiple_handles = v1; + } else if (!strcasecmp(name, "stat_check_interval")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("stat_check_interval ", EINVAL); + config.stat_check_interval = v1; + } else if (!strcasecmp(name, "steady_state_max_time")) { + if (g.iterations > 0) + ERR_RET("can't look for steady state " + "when executing fixed # of iterations", + EINVAL); + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("steady_state_max_time ", + EINVAL); + config.steady_state_max_time = v1; + steady_state_info_specified = 1; + } else if (!strcasecmp(name, "steady_state_num_samples")) { + if (g.iterations > 0) + ERR_RET("can't look for steady state " + "when executing fixed # of iterations", + EINVAL); + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("steady_state_num_samples ", + EINVAL); + config.steady_state_num_samples = v1; + steady_state_info_specified = 1; + } else if (!strcasecmp(name, "steady_state_time")) { + if (g.iterations > 0) + ERR_RET("can't look for steady state " + "when executing fixed # of iterations", + EINVAL); + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("steady_state_time ", EINVAL); + config.steady_state_time = v1; + steady_state_info_specified = 1; + } else if (!strcasecmp(name, "steady_state_variance")) { + if (g.iterations > 0) + ERR_RET("can't look for steady state " + "when executing fixed # of iterations", + EINVAL); + if (sscanf(value, "%lf %c", &v6, &v7) != 1) + SYNTAX("steady_state_variance ", + EINVAL); + config.steady_state_variance = v6; + steady_state_info_specified = 1; + } else if (!strcasecmp(name, "steady_state_stddev_pct")) { + if (g.iterations > 0) + ERR_RET("can't look for steady state " + "when executing fixed # of iterations", + EINVAL); + if (sscanf(value, "%lf %c", &v6, &v7) != 1) + SYNTAX("steady_state_stddev_pct ", + EINVAL); + config.steady_state_variance_pct = v6/100; + config.steady_state_variance_pct *= + config.steady_state_variance_pct; + steady_state_info_specified = 1; + } else if (!strcasecmp(name, "write_error")) { + if (sscanf(value, "%u %u %u %c", + &v1, &v2, &v3, &v7) != 3) + SYNTAX("write_error ", + EINVAL); + config.write_err_start = v1; + config.write_err_count = v2; + config.write_rand = v3; + config.write_err = 0; + } else if (!strcasecmp(name, "write_error_file")) { + sprintf(format, "%%%ds %%c", DB_MAXPATHLEN); + if (sscanf(value, format, buf, &v7) != 1) + SYNTAX("write_error_file ", EINVAL); + if ((werr_fp = fopen(value, "r")) == NULL) + ERR_RET("file open failed", EINVAL); + if (fscanf(werr_fp, "%u %u %u %c", + &v1, &v2, &v3, &v7) != 3) + ERR_RET("invalid write-error file", EINVAL); + (void)fclose(werr_fp); + config.write_err_start = v1; + config.write_err_count = v2; + config.write_rand = v3; + config.write_err = 0; + } else if (!strcasecmp(name, "write_err_start")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("write_err_start ", EINVAL); + config.write_err_start = v1; + config.write_err = 0; + } else if (!strcasecmp(name, "write_err_count")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("write_err_count ", EINVAL); + config.write_err_count = v1; + config.write_err = 0; + } else if (!strcasecmp(name, "write_rand")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("write_rand ", EINVAL); + config.write_rand = v1; + config.write_err = 0; + } else if (!strcasecmp(name, "prepare")) { + if (sscanf(value, "%u %c", &v1, &v7) != 1) + SYNTAX("prepare ", EINVAL); + config.prepare = v1; + } else if (!strcasecmp(name, "deadlock")) { + if (sscanf(value, "%40s %c", arg, &v7) != 1) + SYNTAX("deadlock ", EINVAL); + if (!strcasecmp(arg, "DB_LOCK_NORUN")) + config.deadlock = DB_LOCK_NORUN; + else if (!strcasecmp(arg, "DB_LOCK_DEFAULT")) + config.deadlock = DB_LOCK_DEFAULT; + else if (!strcasecmp(arg, "DB_LOCK_EXPIRE")) + config.deadlock = DB_LOCK_EXPIRE; + else if (!strcasecmp(arg, "DB_LOCK_MAXLOCKS")) + config.deadlock = DB_LOCK_MAXLOCKS; + else if (!strcasecmp(arg, "DB_LOCK_MAXWRITE")) + config.deadlock = DB_LOCK_MAXWRITE; + else if (!strcasecmp(arg, "DB_LOCK_MINLOCKS")) + config.deadlock = DB_LOCK_MINLOCKS; + else if (!strcasecmp(arg, "DB_LOCK_MINWRITE")) + config.deadlock = DB_LOCK_MINWRITE; + else if (!strcasecmp(arg, "DB_LOCK_OLDEST")) + config.deadlock = DB_LOCK_OLDEST; + else if (!strcasecmp(arg, "DB_LOCK_RANDOM")) + config.deadlock = DB_LOCK_RANDOM; + else if (!strcasecmp(arg, "DB_LOCK_YOUNGEST")) + config.deadlock = DB_LOCK_YOUNGEST; + else + ERR_RET("unrecognized deadlock flag", EINVAL); + } else { + /* + * We assume that any other line should + * be copied to the DB_CONFIG file. + */ + fprintf(dbconf_fp, "%s %s\n", name, value); + } + } while (get_line(line, g.conf_fp) == 0); + + INCOMPLETE(EINVAL); +} + +/* + * apply_config_files - parse and apply the portion of the config file + * that deals with the database files. line is the buffer into which + * the lines of the file will be read. When the function is invoked, + * line should contain the first line of the db section. + */ +static int +apply_config_files(line) + char *line; +{ + FILE_INFO *file; + FILE_INFO **tmp; + int fcount_def_by_user, fmax, i, ret; + u_int v1; + char *name, *value, v4; + + fmax = DEFAULT_MAX_FILES; + fcount_def_by_user = 0; + config.file = NULL; + config.file_count = 0; + + do { + if ((ret = get_name_value(line, &name, &value)) != 0) + return (ret); + + if (!strcasecmp(name, "file_count")) { + if (fcount_def_by_user) + VAL_DEFINED("file_count", EINVAL); + if (config.file != NULL) + ERR_RET("file_count must come before " + "the first file block", EINVAL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("file_count ", EINVAL); + if (v1 == 0) + ERR_RET("file_count must be greater than 0", + EINVAL); + fmax = v1; + fcount_def_by_user = 1; + } else if (!strcasecmp(name, "file")) { + if (config.file == NULL) { + if ((ret = __os_malloc(g.dbenv, + fmax * sizeof(FILE_INFO *), + &config.file)) != 0) + ERR_RET("malloc failed", EINVAL); + } + if (config.file_count == fmax) { + if (fcount_def_by_user) { + ERR_RET("more files than specified", + EINVAL); + } else { + /* Grow the array. */ + fmax *= 2; + if ((ret = __os_malloc(g.dbenv, + fmax * sizeof(FILE_INFO *), + &tmp)) != 0) + ERR_RET("malloc failed", + EINVAL); + for (i = 0; i < config.file_count; i++) + tmp[i] = config.file[i]; + __os_free(g.dbenv, config.file); + config.file = tmp; + } + } + if (sscanf(value, "%u { %c", &v1, &v4) != 1) + SYNTAX("file {", EINVAL); + if ((int)v1 != config.file_count) + ERR_RET("file blocks must be defined " + "in order: file 0, file 1, etc.", EINVAL); + file = process_file_info(config.file_count); + if (file == NULL) + ERR_RET("failed to process file info", EINVAL); + config.file[config.file_count] = file; + config.file_count++; + } else { + /* This must be the first line of the next section. */ + + if (fcount_def_by_user && config.file_count != fmax) + ERR_RET("fewer files than specified", EINVAL); + + /* Put the space back at the end of the name. */ + for (++name; *name != '\0'; ++name) + ; + *name = ' '; + + /* + * Indicate that this line has already been + * read once, so get_name_value() won't + * output it twice to current.conf. + */ + g.line_preread = 1; + + return (0); + } + } while (get_line(line, g.conf_fp) == 0); + + INCOMPLETE(EINVAL); +} + +/* + * process_file_info - process and apply the info about a single database + * file, and return a pointer to the FILE_INFO object that is created + * to hold the information. + */ +static FILE_INFO * +process_file_info(fnum) + int fnum; +{ + FILE_INFO *file; + KEY_GROUP *kg, *kg_old; + PROB_DIST *dist; + SET *dsize_file, *numdup_dists; + int i, kgcount_def_by_user, kgmax, nargs, ret; + u_int v1; + char *name, *value, v4; + char buf[DB_MAXPATHLEN], line[DB_MAXPATHLEN]; + char format[40]; + + /* + * !!! + * The value of 40 is hard-coded into format arguments to sscanf + * below. It can't be changed here without changing it there, too. + */ + char arg[40]; + + file = file_info_create(); + kgmax = DEFAULT_MAX_KEYGRPS; + kgcount_def_by_user = 0; + kg = NULL; + dsize_file = numdup_dists = NULL; + + while (get_line(line, g.conf_fp) == 0 && + get_name_value(line, &name, &value) == 0) { + /* Check for closing brace. */ + if (value == NULL) { + if (file->key_groups == NULL) + ERR_RET("no key groups defined", NULL); + if (kgcount_def_by_user && + file->key_groups->item_count != kgmax) + ERR_RET("fewer key groups than specified", + NULL); + return (finalize_file_info(file, dsize_file) == 0 ? + file : NULL); + } + + if (!strcasecmp(name, "file")) + ERR_RET("no closing brace for prec. file info", NULL) + else if (!strcasecmp(name, "file_copy")) { + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("file_copy ", NULL); + if ((int)v1 >= fnum) + ERR_RET("specified file doesn't exist", NULL); + if (IS_SECONDARY(config.file[v1])) + ERR_RET("cannot copy a secondary", NULL); + file_info_destroy(file); + file = file_info_copy(config.file[v1]); + } else if (!strcasecmp(name, "file_method")) { + if (kg != NULL) + ERR_RET("file_method must come before the " + "first key_group", NULL); + if (file->method_params != NULL) { + __os_free(g.dbenv, file->method_params); + file->method_params = NULL; + } + if (sscanf(value, "%40s %c", arg, &v4) != 1) + SYNTAX("file_method ", NULL); + if (!strcasecmp(arg, "btree")) { + file->method = DB_BTREE; + file->set_method_params = + __file_info_set_method_params_btree; + } else if (!strcasecmp(arg, "hash")) { + file->method = DB_HASH; + file->set_method_params = + __file_info_set_method_params_hash; + } else if (!strcasecmp(arg, "queue")) { + file->method = DB_QUEUE; + file->set_method_params = + __file_info_set_method_params_queue; + } else if (!strcasecmp(arg, "recno")) { + file->method = DB_RECNO; + file->set_method_params = + __file_info_set_method_params_recno; + } else + ERR_RET("unrecognized file_method value", + NULL); + } else if (!strcasecmp(name, "file_name")) { + sprintf(format, "%%%ds %%c", DBNAME_MAXSIZE); + if (sscanf(value, format, buf, &v4) != 1) + SYNTAX("file_name ", NULL); + strncpy(file->name, value, DBNAME_MAXSIZE); + } else if (!strcasecmp(name, "file_pagesize")) { + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("file_pagesize ", NULL); + file->pagesize = v1; + } else if (!strcasecmp(name, "file_special")) { + if (sscanf(value, "%40s %c", arg, &v4) != 1) + SYNTAX("file_special ", NULL); + if (!strcasecmp(arg, "no_init")) + file->special |= FILE_INFO_NO_INIT; + else if (!strcasecmp(arg, "multiversion")) + file->special |= FILE_INFO_MULTIVERSION; + else if (!strcasecmp(arg, "dirty_read")) + file->special |= FILE_INFO_DIRTY_READ; + else + ERR_RET("unrecognized file_special value", + NULL); + } else if (!strcasecmp(name, "file_type")) { + if (file->key_groups != NULL) + ERR_RET("file_type must come before the " + "first key_group", NULL); + nargs = sscanf(value, "%40s %u %c", arg, &v1, &v4); + if (nargs < 1 || nargs > 2) + SYNTAX("file_type []", NULL); + if (!strcasecmp(arg, "secondary")) { + if (nargs != 2) + ERR_RET("\"file_type secondary\" must " + "be followed by the number of the " + "corresponding primary", NULL); + if (make_secondary(file, v1) != 0) + return (NULL); + } else + ERR_RET("unrecognized file_type value", NULL); + } else if (!strcasecmp(name, "subdb_name")) { + sprintf(format, "%%%ds %%c", DBNAME_MAXSIZE); + if (sscanf(value, format, buf, &v4) != 1) + SYNTAX("subdb_name ", NULL); + if (file->subdb_name != NULL) + __os_free(g.dbenv, file->subdb_name); + if ((ret = __os_malloc(g.dbenv, DBNAME_MAXSIZE, + &file->subdb_name)) != 0) + ERR_RET("malloc failed", NULL); + strncpy(file->subdb_name, value, DBNAME_MAXSIZE); + } else if (!strcasecmp(name, "set_flags")) { + if (sscanf(value, "%40s %c", arg, &v4) != 1) + SYNTAX("set_flags ", NULL); + if (!strcasecmp(arg, "0")) + file->flags = 0; + else if (!strcasecmp(arg, "DB_RECNUM")) { + if (file->method != DB_BTREE) + ERR_RET("bad flag for access method", + NULL); + if (file->flags & DB_DUP) + ERR_RET("DB_RECNUM and DB_DUP", NULL); + if (file->flags & DB_DUPSORT) + ERR_RET("DB_RECNUM and DB_DUPSORT", + NULL); + file->flags |= DB_RECNUM; + } else if (!strcasecmp(arg, "DB_DUPSORT")) { + if (file->method != DB_BTREE && + file->method != DB_HASH) + ERR_RET("bad flag for access method", + NULL); + if (file->flags & DB_RECNUM) + ERR_RET("DB_RECNUM and DB_DUPSORT", + NULL); + file->flags |= (DB_DUP | DB_DUPSORT); + } else if (!strcasecmp(value, "DB_CHKSUM")) { + file->flags |= DB_CHKSUM; + } else if (!strcasecmp(value, "DB_ENCRYPT")) { + file->flags |= DB_ENCRYPT; + } else + ERR_RET("unrecognized set_flags value", NULL); + } else if (!strcasecmp(name, "dup_compare")) { + if (file->method != DB_BTREE && + file->method != DB_HASH) + ERR_RET("invalid setting for access method", + NULL); + if (sscanf(value, "%40s %c", arg, &v4) != 1) + SYNTAX("dup_compare ", NULL); + if (file->method_params == NULL) + file->init_method_params(file); + if (!strcasecmp(arg, "dbs_dup_compare")) + file->method_params[2] = + (void *)dbs_dup_compare; + else + ERR_RET("unrecognized dup_compare func", NULL); + } else if (!strcasecmp(name, "file_priority")) { + if (file->method != DB_BTREE) + ERR_RET("btree setting for non-btree file", + NULL); + if (sscanf(value, "%s %c", arg, &v4) != 1) + SYNTAX("file_priority ", NULL); + if (strcasecmp(arg, "DB_PRIORITY_VERY_LOW") == 0) + file->priority = DB_PRIORITY_VERY_LOW; + else if (strcasecmp(arg, "DB_PRIORITY_LOW") == 0) + file->priority = DB_PRIORITY_LOW; + else if (strcasecmp(arg, "DB_PRIORITY_DEFAULT") == 0) + file->priority = DB_PRIORITY_DEFAULT; + else if (strcasecmp(arg, "DB_PRIORITY_HIGH") == 0) + file->priority = DB_PRIORITY_HIGH; + else if (strcasecmp(arg, "DB_PRIORITY_VERY_HIGH") == 0) + file->priority = DB_PRIORITY_VERY_HIGH; + else + ERR_RET("unrecognized file_priority flag", + NULL); + } else if (!strcasecmp(name, "bt_minkey")) { + if (file->method != DB_BTREE) + ERR_RET("btree setting for non-btree file", + NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("bt_minkey ", NULL); + if (file->method_params == NULL) + file->init_method_params(file); + file->method_params[0] = (void *)(uintptr_t)v1; + } else if (!strcasecmp(name, "bt_compare")) { + if (file->method != DB_BTREE) + ERR_RET("btree setting for non-btree file", + NULL); + if (sscanf(value, "%40s %c", arg, &v4) != 1) + SYNTAX("bt_compare ", NULL); + if (file->method_params == NULL) + file->init_method_params(file); + if (!strcasecmp(arg, "dbs_int_compare")) + file->method_params[1] = + (void *)dbs_int_compare; + else + ERR_RET("unrecognized bt_compare func", NULL); + } else if (!strcasecmp(name, "h_ffactor")) { + if (file->method != DB_HASH) + ERR_RET("hash setting for non-hash file", + NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("h_ffactor ", NULL); + if (file->method_params == NULL) + file->init_method_params(file); + file->method_params[0] = (void *)(uintptr_t)v1; + } else if (!strcasecmp(name, "h_nelem")) { + if (file->method != DB_HASH) + ERR_RET("hash setting for non-hash file", + NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("h_nelem ", NULL); + if (file->method_params == NULL) + file->init_method_params(file); + file->method_params[1] = (void *)(uintptr_t)v1; + } else if (!strcasecmp(name, "q_extentsize")) { + if (file->method != DB_QUEUE) + ERR_RET("queue setting for non-queue file", + NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("q_extentsize ", NULL); + if (file->method_params == NULL) + file->init_method_params(file); + file->method_params[1] = (void *)(uintptr_t)v1; + } else if (!strcasecmp(name, "re_len")) { + if (file->method != DB_QUEUE && + file->method != DB_RECNO) + ERR_RET("re_len setting for non-rec file", + NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("re_len ", NULL); + if (file->method_params == NULL) + file->init_method_params(file); + file->method_params[0] = (void *)(uintptr_t)v1; + } else if (!strcasecmp(name, "re_source")) { + if (file->method != DB_RECNO) + ERR_RET("re_source setting for non-recno file", + NULL); + sprintf(format, "%%%ds %%c", DBNAME_MAXSIZE); + if (sscanf(value, format, buf, &v4) != 1) + SYNTAX("re_source ", NULL); + if (file->method_params == NULL) + file->init_method_params(file); + if ((ret = __os_malloc(g.dbenv, + DBNAME_MAXSIZE, &file->method_params[1])) != 0) + ERR_RET("malloc failed", NULL); + strncpy((char *)file->method_params[1], + value, DBNAME_MAXSIZE); + } else if (!strcasecmp(name, "key_group_count")) { + if (kgcount_def_by_user) + VAL_DEFINED("key_group_count", NULL); + if (file->key_groups != NULL) + ERR_RET("key_group_count must come " + "before the first key_group", NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("key_group_count ", NULL); + if (v1 == 0) + ERR_RET("key_group_count must be > 0", NULL); + kgmax = v1; + kgcount_def_by_user = 1; + } else if (!strcasecmp(name, "key_group")) { + if (file->key_groups == NULL) + file->key_groups = set_create(kgmax); + if (sscanf(value, "%u { %c", &v1, &v4) != 1) + SYNTAX("key_group {", NULL); + if ((int)v1 > file->key_groups->item_count) + ERR_RET("key_groups must be defined in order: " + "key_group 0, key_group 1, etc.", NULL); + if (file->key_groups->item_count == kgmax) { + if (kgcount_def_by_user) { + ERR_RET("too many keygroups", NULL); + } else { + /* Create a larger set */ + kgmax *= 2; + file->key_groups->enlarge( + file->key_groups); + } + } + if ((kg = process_key_group(file)) == NULL) + return (NULL); + + if (kg->numdup_dists == NULL && !config.dbs && + !IS_SECONDARY(file)) + /* Reuse the previous dist. (if any) */ + kg->numdup_dists = numdup_dists; + else if (kg->numdup_dists != NULL) { + file->flags |= DB_DUP; + numdup_dists = kg->numdup_dists; + } + + if ((int)v1 == file->key_groups->item_count) + file->key_groups->add_item(file->key_groups, + kg, kg->key_count); + else { + /* Replace an existing key group. */ + kg_old = file->key_groups->item[(int)v1]; + if (kg->dsize_dists == NULL) + kg->dsize_dists = kg_old->dsize_dists; + file->key_groups->freq_total += + (kg->key_count - kg_old->key_count); + key_group_destroy(kg_old); + file->key_groups->freq[(int)v1] = + kg->key_count; + file->key_groups->item[(int)v1] = kg; + } + } else if (!strcasecmp(name, "data_length_dists")) { + if (dsize_file != NULL) + VAL_DEFINED("data_length_dists", NULL); + if ((dsize_file = process_dists()) == NULL) + return (NULL); + if (add_shared_set(dsize_file) != 0) + return (NULL); + for (i = 0; i < dsize_file->item_count; i++) { + dist = dsize_file->item[i]; + if (dist->get_maxval(dist) > MAX_DATA_SIZE) + ERR_RET("data size too big", NULL); + if (dist->get_minval(dist) < 1) + ERR_RET("data size too small", NULL); + } + } else + ERR_RET("invalid file-info entry", NULL); + } + + INCOMPLETE(NULL); +} + +/* + * process_key_group - process and apply the info about a single key + * group, and return a pointer to the KEY_GROUP object that is created. + */ +static KEY_GROUP * +process_key_group(file) + FILE_INFO *file; +{ + KEY_GROUP *kg, *other; + PROB_DIST *dist; + int ch, charset_size, i, keysize; + int numkeys, num_prev_kg, r_to_l, slot; + u_int v1; + char *name, *value, v4; + char arg[MAX_KEY_SIZE], line[DB_MAXPATHLEN]; + + kg = NULL; + keysize = 0; + charset_size = DEFAULT_CHARSET_SIZE; + r_to_l = 0; + + while (get_line(line, g.conf_fp) == 0 && + get_name_value(line, &name, &value) == 0) { + /* Check for closing brace. */ + if (value == NULL) { + /* + * Try to ensure that this keygroup doesn't + * overlap previous ones. + */ + num_prev_kg = file->key_groups->item_count; + for (i = 0; i < num_prev_kg; i++) { + other = file->key_groups->item[i]; + + if (kg->type == KEYGRP_REC) { + if (kg->start_recno < other->key_count) + kg->start_recno += + other->key_count; + } else if (kg->key_size == other->key_size && + !strcmp(kg->start_key, other->start_key)) { + ch = i % kg->key_size; + kg->start_key[ch] += kg->char_set_size; + } + } + + return (kg); + } + + if (!strcasecmp(name, "key_group")) + ERR_RET("no closing brace for last keygroup", NULL) + else if (!strcasecmp(name, "key_size")) { + if (keysize != 0) + VAL_DEFINED("key_size", NULL); + if (kg != NULL) + ERR_RET("key_size must come before " + "key_count or key_chars_per_slot", NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("key_size ", NULL); + if (v1 > MAX_KEY_SIZE) + ERR_RET("key size too large", NULL); + keysize = v1; + } else if (!strcasecmp(name, "key_charset_size")) { + if (kg != NULL) + ERR_RET("key_charset_size must come before " + "key_count", NULL); + if (IS_RECORD_BASED(file)) + ERR_RET("bad arg for rec-based access method", + NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("key_charset_size ", NULL); + charset_size = v1; + } else if (!strcasecmp(name, "key_chars_per_slot")) { + if (kg != NULL) + ERR_RET("key_chars_per_slot cannot be used " + "with a key_count entry or another " + "key_chars_per_slot entry", NULL); + if (IS_RECORD_BASED(file)) + ERR_RET("bad arg for rec-based access method", + NULL); + if (keysize == 0) + ERR_RET("key_chars_per_slot must be preceded " + "by a key_size entry", NULL); + kg = key_group_create_chars_per_slot(file, keysize, + r_to_l); + + /* Get the number of characters for each slot. */ + slot = 0; + numkeys = 1; + while (sscanf(value, "%u", &v1) == 1) { + if (slot == keysize) + ERR_RET("too many values", NULL); + kg->chars_per_slot[slot] = v1; + if ((int)v1 > kg->char_set_size) + kg->char_set_size = v1; + numkeys *= v1; + + /* Advance to the next argument. */ + slot++; + for (++value; + *value != '\0' && !isspace((int)*value); + ++value) + ; + } + + /* + * If not enough numbers have been specified, + * reuse the last value for the remaining + * slots. + */ + if (slot != keysize) { + for ( ; slot < keysize; slot++) { + kg->chars_per_slot[slot] = + kg->chars_per_slot[slot-1]; + numkeys *= kg->chars_per_slot[slot]; + } + } + + kg->key_count = numkeys; + } else if (!strcasecmp(name, "key_count")) { + if (kg != NULL) + ERR_RET("key_count cannot be used with a " + "key_chars_per_slot entry or another " + "key_count entry", NULL); + if (!IS_RECORD_BASED(file) && keysize == 0) + ERR_RET("for a non-record-based file, " + "key_count must be preceded by a " + "key_size entry", NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("key_count ", NULL); + if (IS_RECORD_BASED(file)) + kg = key_group_create_rec(file, v1); + else + kg = key_group_create_key_count(file, + keysize, v1, charset_size, r_to_l); + } else if (!strcasecmp(name, "key_incr_order")) { + if (kg != NULL) + ERR_RET("key_incr_order must come before " + "key_count or key_chars_per_slot", NULL); + if (sscanf(value, "%40s %c", arg, &v4) != 1) + SYNTAX("key_incr_order ", NULL); + if (!strcasecmp(value, "l_to_r")) + r_to_l = 0; + else if (!strcasecmp(value, "r_to_l")) + r_to_l = 1; + else + ERR_RET("unrecognized key_incr_order value", + NULL); + } else if (!strcasecmp(name, "key_start_key")) { + if (IS_RECORD_BASED(file)) + ERR_RET("bad arg for rec-based access method", + NULL); + if (kg == NULL) + ERR_RET("key_start_key must come after " + "key_count or key_chars_per_slot", NULL); + if (sscanf(value, "%1000s %c", arg, &v4) != 1) + SYNTAX("key_start_key ", NULL); + strncpy(kg->start_key, arg, kg->key_size); + } else if (!strcasecmp(name, "key_start_recno")) { + if (!IS_RECORD_BASED(file)) + ERR_RET("bad arg for non-rec-based method", + NULL); + if (kg == NULL) + ERR_RET("key_start_recno must come before " + "key_count", NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("key_start_recno ", NULL); + kg->start_recno = v1; + } else if (!strcasecmp(name, "key_numdup_dists")) { + if (IS_RECORD_BASED(file)) + ERR_RET("bad arg for rec-based access method", + NULL); + if (IS_SECONDARY(file)) + ERR_RET("duplicates for secondary indices " + "are determined automatically", NULL); + if (kg->numdup_dists != NULL) + VAL_DEFINED("key_numdup_dists", NULL); + if ((kg->numdup_dists = process_dists()) == NULL) + return (NULL); + if (add_shared_set(kg->numdup_dists) != 0) + return (NULL); + } else if (!strcasecmp(name, "data_length_dists")) { + if (kg->dsize_dists != NULL) + VAL_DEFINED("data_length_dists", NULL); + if ((kg->dsize_dists = process_dists()) == NULL) + return (NULL); + if (add_shared_set(kg->dsize_dists) != 0) + return (NULL); + for (i = 0; i < kg->dsize_dists->item_count; i++) { + dist = kg->dsize_dists->item[i]; + if (dist->get_maxval(dist) > MAX_DATA_SIZE) + ERR_RET("data size too big", NULL); + if (dist->get_minval(dist) < 1) + ERR_RET("data size too small", NULL); + } + } else + ERR_RET("invalid keygroup-info entry", NULL); + } + + INCOMPLETE(NULL); +} + +/* + * make_secondary - make the specified file a secondary index for + * the file with the specified number. + */ +static int +make_secondary(secondary, prim_filenum) + FILE_INFO *secondary; + int prim_filenum; +{ + FILE_INFO *primary; + KEY_GROUP *kg; + int i; + + if (prim_filenum >= config.file_count) + ERR_RET("invalid primary filenum", EINVAL); + primary = config.file[prim_filenum]; + if (IS_SECONDARY(primary)) + ERR_RET("specified primary is a secondary", EINVAL); + if (primary->secondaries == NULL) + primary->secondaries = set_create(MAX_SECONDARIES); + + /* Update the secondary. */ + secondary->primary = primary; + secondary->callback = __callback_fn; + secondary->secon_num = primary->secondaries->item_count; + + /* Update the primary. */ + primary->secondaries->add_item(primary->secondaries, + secondary, 1); + for (i = 0; i < primary->key_groups->item_count; i++) { + kg = primary->key_groups->item[i]; + if (kg->numdup_dists != NULL) + ERR_RET("primary index with dups", EINVAL); + } + + return (0); +} + +/* + * finalize_file_info - take the specified file and perform the steps + * needed to complete its processing. These steps can only be + * performed after parsing the entire file block in the config file. + */ +static int +finalize_file_info(file, dsize_file) + FILE_INFO *file; + SET *dsize_file; +{ + FILE *info_fp; + KEY_GROUP *kg; + SET *dsize_prev; + int i, keycount_prim, keycount_sec; + int ret, secon_key_size; + u_int v1; + char v4; + char buf[DB_MAXPATHLEN], line[DB_MAXPATHLEN]; + + dsize_prev = NULL; + + /* + * Check the key groups and modify them as + * needed. + */ + secon_key_size = 0; + for (i = 0; i < file->key_groups->item_count; i++) { + kg = file->key_groups->item[i]; + + if (IS_SECONDARY(file)) { + kg->fill_data = __key_group_fill_data_secon; + if (secon_key_size == 0) + secon_key_size = kg->key_size; + else if (secon_key_size != kg->key_size) + ERR_RET("the keys of a secondary index " + "must all have the same size", EINVAL); + } else { + /* + * If a data-size dist hasn't been specified, + * use the one specified for the file or, if + * there isn't one, the previous one specified + * for a key group. + */ + if (kg->dsize_dists == NULL) { + if (dsize_file != NULL) + kg->dsize_dists = dsize_file; + else if (dsize_prev != NULL) + kg->dsize_dists = dsize_prev; + else + ERR_RET("no dsize dists", EINVAL); + } else + dsize_prev = kg->dsize_dists; + } + } + + if (IS_SECONDARY(file)) { + /* + * Make sure that a secondary index doesn't have more + * keys than the corresponding primary index. + */ + keycount_sec = file->get_num_items(file); + keycount_prim = file->primary->get_num_items(file->primary); + if (keycount_sec > keycount_prim) + ERR_RET("sec keys > prim keys", EINVAL); + + if (keycount_sec < keycount_prim) + file->flags |= DB_DUP; + } + + /* + * Does this database already have an associated info file? + */ + ret = snprintf(buf, sizeof(buf), "%s/%s.info", g.home, file->name); + if (ret < 0 || ret == sizeof(buf)) + ERR_RET("name of database info file too long", EINVAL); + if ((info_fp = fopen(buf, "r")) == NULL) + return (0); + + /* + * If so, read in the start keys of the key groups. + */ + for (i = 0; i < file->key_groups->item_count; i++) { + ret = get_line(line, info_fp); + g.linenum--; + if (ret != 0) + ERR_RET("info file missing start key", EINVAL); + kg = file->key_groups->item[i]; + if (IS_RECORD_BASED(file)) { + if (sscanf(line, "%u %c", &v1, &v4) != 1) + ERR_RET("invalid info file", EINVAL); + kg->start_recno = v1; + } else { + if (sscanf(line, "%1000s %c", buf, &v4) != 1) + ERR_RET("invalid info file", EINVAL); + strncpy(kg->start_key, buf, kg->key_size); + } + } + + fclose(info_fp); + return (0); +} + +/* + * process_dists - process and apply the info about a set of probability + * distributions and return a pointer to the SET object that is created. + */ +static SET * +process_dists() +{ + SET *dists; + PROB_DIST *dist; + dist_type type; + double v2, v3; + int dist_count_def_by_user, dist_max, dist_num; + u_int v1; + char *name, *value, v4, v5; + char line[DB_MAXPATHLEN]; + + dists = NULL; + dist_max = DEFAULT_MAX_DISTS; + dist_count_def_by_user = 0; + dist_num = 0; + + while (get_line(line, g.conf_fp) == 0 && + get_name_value(line, &name, &value) == 0) { + /* Check for closing brace. */ + if (value == NULL) { + if (dist_count_def_by_user && dist_num != dist_max) + ERR_RET("fewer distributions than specified", + NULL); + return (dists); + } + + if (!strcasecmp(name, "dist_count")) { + if (dist_count_def_by_user) + VAL_DEFINED("dist_count", NULL); + if (dists != NULL) + ERR_RET("dist_count must come before " + "the first dist entry", NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("dist_count ", NULL); + if (v1 == 0) + ERR_RET("dist_count must be > 0", NULL); + dist_max = v1; + dist_count_def_by_user = 1; + } else if (!strcasecmp(name, "dist")) { + if (dists == NULL) + dists = set_create(dist_max); + if (dist_num == dist_max) { + if (dist_count_def_by_user) { + ERR_RET("too many dists", NULL); + } else { + /* Create a larger set. */ + dist_max *= 2; + dists->enlarge(dists); + } + } + if (sscanf(value, "%u %c %lf %lf %c", + &v1, &v4, &v2, &v3, &v5) != 4) + SYNTAX("dist " + " ", NULL); + + if (v4 == 'U' || v4 == 'u') + type = DIST_UNIFORM; + else if (v4 == 'N' || v4 == 'n') + type = DIST_NORMAL; + else + ERR_RET("unrecognized dist type", NULL); + if (v2 < 0 || v3 < 0) + ERR_RET("dist parameters must be " + "non-negative", NULL); + + if ((dist = prob_dist_create(type, v2, v3)) == NULL) + return NULL; + dists->add_item(dists, dist, v1); + dist_num++; + } else + ERR_RET("invalid distribution entry", NULL); + } + + INCOMPLETE(NULL); +} + +/* + * apply_config_txns - process and apply the portion of the config file + * that deals with the transaction types. line is the buffer into which + * the lines of the file will be read. When the function is invoked, + * line should contain the first line of the transactions section. + */ +static int +apply_config_txns(line) + char *line; +{ + TXN_TYPE **tmp; + TXN_TYPE *ttype; + int i, ret, tcount_def_by_user, tmax, tnum; + u_int v1; + char *name, *value, v4; + + config.txn_type = NULL; + tmax = DEFAULT_MAX_TXN_TYPES; + tcount_def_by_user = 0; + tnum = 0; + + do { + if ((ret = get_name_value(line, &name, &value)) != 0) + return (ret); + + if (!strcasecmp(name, "txn_type_count")) { + if (tcount_def_by_user) + VAL_DEFINED("txn_type_count", EINVAL); + if (config.txn_type != NULL) + ERR_RET("txn_type_count must come before " + "the first txn_type block", EINVAL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("txn_type_count ", EINVAL); + if (v1 == 0) + ERR_RET("txn_type_count must be > 0", EINVAL); + tmax = v1; + tcount_def_by_user = 1; + } else if (!strcasecmp(name, "txn_type")) { + if (config.txn_type == NULL) { + if ((ret = __os_malloc(g.dbenv, + tmax * sizeof(TXN_TYPE *), + &config.txn_type)) != 0) + ERR_RET("malloc failed", EINVAL); + } + if (tnum == tmax) { + if (tcount_def_by_user) { + ERR_RET("too many txn types", + EINVAL); + } else { + /* Grow the array */ + tmax *= 2; + if ((ret = __os_malloc(g.dbenv, + tmax * sizeof(TXN_TYPE *), + &tmp)) != 0) + ERR_RET("malloc failed", + EINVAL); + for (i = 0; i < tnum; i++) + tmp[i] = config.txn_type[i]; + __os_free(g.dbenv, config.txn_type); + config.txn_type = tmp; + } + } + if (sscanf(value, "%u { %c", &v1, &v4) != 1) + SYNTAX("txn_type {", EINVAL); + if ((int)v1 != tnum) + ERR_RET("txn_types must be defined in order: " + "txn_type 0, txn_type 1, etc.", EINVAL); + if ((ttype = process_txn_type(tnum)) == NULL) + ERR_RET("failed to parse txn type", EINVAL); + config.txn_type[tnum] = ttype; + tnum++; + } else { + /* This must be the first line of the next section. */ + + if (tcount_def_by_user && tnum != tmax) + ERR_RET("too few txn types", EINVAL); + config.txn_type_count = tnum; + + /* Put the space back at the end of the name. */ + for (++name; *name != '\0'; ++name) + ; + *name = ' '; + + /* + * Indicate that this line has already been + * read once, so get_name_value() won't + * output it twice to select.conf. + */ + g.line_preread = 1; + + return (0); + } + } while (get_line(line, g.conf_fp) == 0); + + INCOMPLETE(EINVAL); +} + +/* + * process_txn_type - process and apply the info about a single + * transaction type and return a pointer to the TXN_TYPE object + * that is created. + */ +static TXN_TYPE * +process_txn_type(id) + int id; +{ + TXN_TYPE *ttype, *tmp; + OP *op; + op_type type; + int i, numargs, opcount_def_by_user, opmax, opnum; + u_int v1, v2, v3, v4, v5, v6, v7; + char *name, *value, v8; + char line[DB_MAXPATHLEN]; + + /* + * !!! + * The value of 40 is hard-coded into format arguments to sscanf + * below. It can't be changed here without changing it there, too. + */ + char arg[40]; + + type = OP_READ_NODUP; + ttype = NULL; + opmax = DEFAULT_MAX_OPS; + opcount_def_by_user = 0; + opnum = 0; + + while (get_line(line, g.conf_fp) == 0 && + get_name_value(line, &name, &value) == 0) { + /* Check for closing brace. */ + if (value == NULL) { + if (opcount_def_by_user && opnum != opmax) + ERR_RET("fewer ops than specified", NULL); + return (ttype); + } + + if (!strcasecmp(name, "op_count")) { + if (opcount_def_by_user) + VAL_DEFINED("op_count", NULL); + if (ttype != NULL) + ERR_RET("op_count must come before the " + "first op in the txn_type", NULL); + if (sscanf(value, "%u %c", &v1, &v8) != 1) + SYNTAX("op_count ", NULL); + if (v1 == 0) + ERR_RET("op_count must be > 0", NULL); + opmax = v1; + opcount_def_by_user = 1; + } else if (!strcasecmp(name, "txn_type_special")) { + if (sscanf(value, "%40s %c", arg, &v8) != 1) + SYNTAX("txn_type_special ", NULL); + if (ttype == NULL) + ttype = txn_type_create(id, opmax); + if (!strcasecmp(value, "same_key")) + ttype->special |= TXN_TYPE_SAME_KEY; + else if (!strcasecmp(value, "must_exist")) { + ttype->special &= ~TXN_TYPE_RETRY_NOTFOUND; + ttype->special |= TXN_TYPE_NO_NOTFOUND; + } else if (!strcasecmp(value, "retry")) { + ttype->special &= ~TXN_TYPE_NO_NOTFOUND; + ttype->special |= TXN_TYPE_RETRY_NOTFOUND; + } else + ERR_RET("unrecognized txn_type_special value", + NULL); + } else { + if (!strcasecmp(name, "read")) + type = OP_READ_NODUP; + else if (!strcasecmp(name, "update")) + type = OP_UPDATE_NODUP; + else if (!strcasecmp(name, "add")) + type = OP_ADD_NODUP; + else { + for (i = 0; i < NUM_OP_TYPES; i++) { + if (!strcasecmp(name, op_names[i])) { + type = op_types[i]; + break; + } + } + if (i == NUM_OP_TYPES) + ERR_RET("unrecognized op type", NULL); + } + + if (ttype == NULL) + ttype = txn_type_create(id, opmax); + if (opnum == opmax) { + if (opcount_def_by_user) { + ERR_RET("more ops than specified", + NULL); + } else { + /* Make room for more ops. */ + opmax *= 2; + tmp = txn_type_create(id, opmax); + for (i = 0; i < opnum; i++) + tmp->add_op(tmp, ttype->op[i]); + txn_type_destroy(ttype); + ttype = tmp; + } + } + + /* First check for a block-style definition */ + if (strchr(value, '{') != NULL) { + numargs = sscanf(value, "%u { %c", &v1, &v8); + if (numargs == 1) + op = process_op(type, v1); + else + ERR_RET("missing file number between " + "op name and {", NULL); + } else { + /* + * Next check for the style of definition in + * which everything is specified on one line. + */ + numargs = sscanf(value, + "%u %u %u %u %u %u %u %c", + &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8); + if (numargs == 7) { + if (type != OP_CURS_READ && + type != OP_CURS_UPDATE && + type != OP_CURS_UPDATE_PRIM && + type != OP_CURS_DEL) + ERR_RET("not a cursor op", + NULL); + op = op_create(type, v1, v2, v3, + v4, v5, v6, v7); + } else if (numargs == 5) + op = op_create(type, v1, v2, v3, + v4, v5, 1, 1); + else if (numargs == 3) + op = op_create(type, v1, v2, v3, + 1, 1, 1, 1); + else if (numargs == 1) + op = op_create_defaults(type, v1); + else + ERR_RET("op name should be followed " + "by 1, 3, 5, or 7 " + "parameters", NULL); + } + + if (op == NULL) + return (NULL); + if (type == OP_UPDATE_PRIM && + !HAS_SECONDARIES(op->file)) + ERR_RET("update_prim is only for primaries", + NULL); + if (type == OP_DBS_DEL_READD_SECON && + !IS_SECONDARY(op->file)) + ERR_RET("dbs_del_readd_secon is only for " + "secondaries", NULL); + if (IS_SECONDARY(op->file) && + (IS_UPDATE_OP(op) || + op->type == OP_OVERWRITE || + op->type == OP_ADD_NODUP || + op->type == OP_ADD_DUP || + op->type == OP_DBS_UPDATE || + op->type == OP_DBS_PARTIAL_WRITE || + op->type == OP_DBS_DEL || + op->type == OP_DBS_DEL_READD || + op->type == OP_QTEST_SCAN)) + ERR_RET("cannot directly update a secondary", + NULL); + if ((type == OP_READ_DUP || type == OP_UPDATE_DUP || + type == OP_ADD_DUP) && + !(op->file->flags & DB_DUP)) + ERR_RET("dup op for file w/o dups", NULL); + if (IS_DBS_OP(op) && !config.dbs) + ERR_RET("DBS ops are only valid in DBS mode", + NULL); + if (!IS_DBS_OP(op) && config.dbs) + ERR_RET("non-DBS ops are not valid " + "in DBS mode", NULL); + + ttype->add_op(ttype, op); + opnum++; + } + } + + INCOMPLETE(NULL); +} + +/* + * process_op - process and apply the info about a single database + * operation and return a pointer to the OP object that is created. + */ +static OP * +process_op(type, fnum) + op_type type; + int fnum; +{ + OP *op; + FILE_INFO *secon; + PROB_DIST *dist; + int i, item_def_by_user, iter_def_by_user, keygrp_def_by_user; + int num_keygrps; + u_int v1, v2; + char *name, *value, v4; + char line[DB_MAXPATHLEN]; + + /* + * !!! + * The value of 40 is hard-coded into format arguments to sscanf + * below. It can't be changed here without changing it there, too. + */ + char arg[40]; + + item_def_by_user = iter_def_by_user = keygrp_def_by_user = 0; + + op = op_create_defaults(type, fnum); + assert(op != NULL && op->file != NULL && + op->file->key_groups != NULL); + num_keygrps = op->file->key_groups->item_count; + + while (get_line(line, g.conf_fp) == 0 && + get_name_value(line, &name, &value) == 0) { + /* Check for closing brace. */ + if (value == NULL) + return (op); + + if (strchr(value, '{') != NULL && + sscanf(value, "%u { %c", &v1, &v4) == 1) { + ERR_RET("no closing brace for prec. operation", NULL) + } else if (!strcasecmp(name, "flag")) { + if (sscanf(value, "%40s %c", arg, &v4) != 1) + SYNTAX("flag ", NULL); + if (!strcasecmp(arg, "NULL")) + continue; + else if (!strcasecmp(arg, "DB_RMW")) { + if (!IS_UPDATE_OP(op)) + ERR_RET("DB_RMW can only be used " + "with update ops", NULL); + op->flags |= DB_RMW; + } else if (IS_CURSOR_OP(op)) { + if (!strcasecmp(arg, "DB_NEXT")) + op->flags = DB_NEXT; + else if (!strcasecmp(arg, "DB_PREV")) + op->flags = DB_PREV; + else if (!strcasecmp(arg, "DB_NEXT_DUP")) + op->flags = DB_NEXT_DUP; + else if (!strcasecmp(arg, "DB_NEXT_NODUP")) + op->flags = DB_NEXT_NODUP; + else if (!strcasecmp(arg, "DB_PREV_DUP")) + op->flags = DB_PREV_DUP; + else if (!strcasecmp(arg, "DB_PREV_NODUP")) + op->flags = DB_PREV_NODUP; + else + ERR_RET("invalid flag for " + "cursor op", NULL); + } else if (op->type == OP_READ_NODUP || + op->type == OP_READ_MULTI) { + if (!strcasecmp(arg, "DB_CONSUME")) + op->flags = DB_CONSUME; + else if (!strcasecmp(arg, "DB_CONSUME_WAIT")) + op->flags = DB_CONSUME_WAIT; + else if (!strcasecmp(arg, "DB_GET_BOTH")) + op->flags = DB_GET_BOTH; + else if (!strcasecmp(arg, "DB_SET_RECNO")) { + if (op->file->method != DB_BTREE || + !(op->file->flags & DB_RECNUM)) + ERR_RET("invalid flag", NULL); + op->flags = DB_SET_RECNO; + } else + ERR_RET("invalid flag for " + "read_nodup or read_multi", NULL); + if ((op->flags == DB_CONSUME || + op->flags == DB_CONSUME_WAIT) && + op->file->method != DB_QUEUE) + ERR_RET("queue flag for non-queue DB", + NULL); + } else + ERR_RET("unrecognized op flag", NULL); + } else if (!strcasecmp(name, "keygrp_dists")) { + if (keygrp_def_by_user) + VAL_DEFINED("keygrp_dists", NULL); + /* + * Destroy the default dist and replace it + * with the user-defined one. + */ + set_destroy(op->keygrp_dists, 1); + if ((op->keygrp_dists = process_dists()) == NULL) + return (NULL); + keygrp_def_by_user = 1; + + for (i = 0; i < op->keygrp_dists->item_count; i++) { + dist = op->keygrp_dists->item[i]; + if (dist->type != DIST_UNIFORM) + ERR_RET("keygrp dists must be uniform", + NULL); + if (dist->get_maxval(dist) >= num_keygrps || + dist->get_minval(dist) < 0) + ERR_RET("invalid keygrp range", NULL); + } + } else if (!strcasecmp(name, "iter_dists")) { + if (iter_def_by_user) + VAL_DEFINED("iter_dists", NULL); + /* + * Destroy the default dist and replace it + * with the user-defined one. + */ + set_destroy(op->iter_dists, 1); + if ((op->iter_dists = process_dists()) == NULL) + return (NULL); + iter_def_by_user = 1; + + for (i = 0; i < op->iter_dists->item_count; i++) { + dist = op->iter_dists->item[i]; + if (dist->get_minval(dist) < 0) + ERR_RET("invalid iter range", NULL); + } + } else if (!strcasecmp(name, "item_dists")) { + if (item_def_by_user) + VAL_DEFINED("item_dists", NULL); + if (!IS_CURSOR_OP(op)) + ERR_RET("item dist for non-cursor operation", + NULL); + + /* + * Destroy the default dist and replace it + * with the user-defined one. + */ + set_destroy(op->item_dists, 1); + if ((op->item_dists = process_dists()) == NULL) + return (NULL); + item_def_by_user = 1; + + for (i = 0; i < op->item_dists->item_count; i++) { + dist = op->item_dists->item[i]; + if (dist->get_minval(dist) < 0) + ERR_RET("invalid item-count range", + NULL); + } + } else if (!strcasecmp(name, "secon_to_update")) { + if (op->type != OP_UPDATE_PRIM && + op->type != OP_CURS_UPDATE_PRIM) + ERR_RET("secon_to_update can only be used " + "with update_prim and curs_update_prim", + NULL); + if (op->other == NULL) + op->other = set_create(MAX_SECONDARIES); + if (sscanf(value, "%u %u %c", &v1, &v2, &v4) != 2) + SYNTAX("secon_to_update ", + NULL); + if ((int)v1 >= config.file_count) + ERR_RET("invalid file index", NULL); + if (!IS_SECONDARY(config.file[v1])) + ERR_RET("not a secondary index", NULL); + if (v2 == 0 || v2 > 100) + ERR_RET("invalid secon update pct", NULL); + for (i = 0; + i < op->file->secondaries->item_count; i++) { + secon = op->file->secondaries->item[i]; + if (secon == config.file[v1]) { + op->other->add_item(op->other, + (void *)(uintptr_t)i, v2); + break; + } + } + if (i == op->file->secondaries->item_count) + ERR_RET("wrong primary for this secondary", + NULL); + } else if (!strcasecmp(name, "update_pct")) { + if (!IS_UPDATE_OP(op)) + ERR_RET("update_pct for non-update operation", + NULL); + if (sscanf(value, "%u %c", &v1, &v4) != 1) + SYNTAX("update_pct ", NULL); + if (v1 == 0 || v1 > 100) + ERR_RET("invalid update_pct value", NULL); + op->update_pct = v1; + } else + ERR_RET("invalid operation-definition entry", NULL); + } + + INCOMPLETE(NULL); +} + +/* + * apply_config_thr_types - process and apply the portion of the config + * file that deals with the thread types. line is the buffer into which + * the lines of the file will be read. When the function is invoked, + * line should contain the first line of the thread-types section. + */ +static int +apply_config_thr_types(line) + char *line; +{ + THR_TYPE **tmp; + THR_TYPE *ttype; + int i, ret, ttcount_def_by_user, ttmax, ttnum; + u_int v1; + char *name, *value, v2; + + config.thr_type = NULL; + ttmax = DEFAULT_MAX_THR_TYPES; + ttcount_def_by_user = 0; + ttnum = 0; + + do { + if ((ret = get_name_value(line, &name, &value)) != 0) { + if (ret == EOF) + break; + return (ret); + } + + if (!strcasecmp(name, "thread_type_count")) { + if (ttcount_def_by_user) + VAL_DEFINED("thread_type_count", EINVAL); + if (config.thr_type != NULL) + ERR_RET("thread_type_count must come before " + "the first thread_type block", EINVAL); + if (sscanf(value, "%u %c", &v1, &v2) != 1) + SYNTAX("thread_type_count ", EINVAL); + if (v1 == 0) + ERR_RET("thread_type_count must be > 0", + EINVAL); + ttmax = v1; + ttcount_def_by_user = 1; + } else if (!strcasecmp(name, "thread_type")) { + if (config.thr_type == NULL) { + if ((ret = __os_malloc(g.dbenv, + ttmax * sizeof(THR_TYPE *), + &config.thr_type)) != 0) + ERR_RET("malloc failed", EINVAL); + } + if (ttnum == ttmax) { + if (ttcount_def_by_user) { + ERR_RET("too many thr types", + EINVAL); + } else { + /* Grow the array */ + ttmax *= 2; + if ((ret = __os_malloc(g.dbenv, + ttmax * sizeof(THR_TYPE *), + &tmp)) != 0) + ERR_RET("malloc failed", + EINVAL); + for (i = 0; i < ttnum; i++) + tmp[i] = config.thr_type[i]; + __os_free(g.dbenv, config.thr_type); + config.thr_type = tmp; + } + } + if (sscanf(value, "%u { %c", &v1, &v2) != 1) + SYNTAX("thread_type {", EINVAL); + if ((int)v1 != ttnum) + ERR_RET("thread_types must be defined " + "in order: thread_type 0, thread_type 1, " + "etc.", EINVAL); + if ((ttype = process_thread_type()) == NULL) + ERR_RET("failed to parse thread type", EINVAL); + config.thr_type[ttnum] = ttype; + ttnum++; + } else { + /* This must be the first line of the next section. */ + + if (ttcount_def_by_user && ttnum != ttmax) + ERR_RET("too few thr types", EINVAL); + config.thr_type_count = ttnum; + + /* Put the space back at the end of the name. */ + for (++name; *name != '\0'; ++name) + ; + *name = ' '; + + /* + * Indicate that this line has already been + * read once, so get_name_value() won't + * output it twice to select.conf. + */ + g.line_preread = 1; + + return (0); + } + } while (get_line(line, g.conf_fp) == 0); + + INCOMPLETE(EINVAL); +} + +/* + * process_thread_type - process and apply the info about a single thread + * type and return a pointer to the THR_TYPE object that is created. + */ +static THR_TYPE * +process_thread_type() +{ + THR_TYPE *thr_type; + int ttcount_def_by_user, ttmax, ttnum; + u_int v1, v2; + char *name, *value, v3; + char line[DB_MAXPATHLEN]; + char arg[40]; + + thr_type = thr_type_create(); + ttmax = DEFAULT_MAX_TXN_TYPES; + ttcount_def_by_user = 0; + ttnum = 0; + + while (get_line(line, g.conf_fp) == 0 && + get_name_value(line, &name, &value) == 0) { + /* Check for closing brace. */ + if (value == NULL) { + if (ttcount_def_by_user && ttnum != ttmax) + ERR_RET("too few txn types", NULL); + return (thr_type); + } + + if (!strcasecmp(name, "txn_size_dists")) { + if (thr_type->txn_size_dists != NULL) + VAL_DEFINED("txn_size_dists", NULL); + thr_type->txn_size_dists = process_dists(); + if (thr_type->txn_size_dists == NULL) + return (NULL); + } else if (!strcasecmp(name, "txn_type_count")) { + if (ttcount_def_by_user) + VAL_DEFINED("txn_type_count", NULL); + if (thr_type->txn_types != NULL) + ERR_RET("txn_type_count must come before " + "the first txn_type entry for this " + "thr_type block", NULL); + if (sscanf(value, "%u %c", &v1, &v3) != 1) + SYNTAX("txn_type_count ", NULL); + if (v1 == 0) + ERR_RET("txn_type_count must be > 0", NULL); + ttmax = v1; + ttcount_def_by_user = 1; + } else if (!strcasecmp(name, "txn_type")) { + if (thr_type->txn_types == NULL) + thr_type->txn_types = set_create(ttmax); + if (ttnum == ttmax) { + if (ttcount_def_by_user) { + ERR_RET("too many txn types", NULL); + } else { + /* Enlarge the set. */ + ttmax *= 2; + thr_type->txn_types-> + enlarge(thr_type->txn_types); + } + } + if (sscanf(value, "%u %u %c", &v1, &v2, &v3) != 2) + SYNTAX("txn_type ", NULL); + if ((int)v2 > config.txn_type_count) + ERR_RET("specified txn_type " + "has not been defined", NULL); + thr_type->txn_types->add_item(thr_type->txn_types, + config.txn_type[v2], v1); + ttnum++; + } else if (!strcasecmp(name, "thread_type_special")) { + if (sscanf(value, "%40s %c", arg, &v3) != 1) + SYNTAX("thread_type_special ", NULL); + if (!strcasecmp(arg, "sub_txn")) + thr_type->special |= THREAD_TYPE_SUB_TXN; + else if (!strcasecmp(arg, "no_txn")) + thr_type->special |= THREAD_TYPE_NO_TXN; + else if (!strcasecmp(arg, "dirty_read")) + thr_type->special |= THREAD_TYPE_DIRTY_READ; + else if (!strcasecmp(arg, "snapshot")) + thr_type->special |= THREAD_TYPE_SNAPSHOT; + else if (!strcasecmp(arg, "snapshot-safe")) { + thr_type->special |= THREAD_TYPE_SNAPSHOT_SAFE; + } else + ERR_RET("unrecognized thread_type_special " + "value", NULL); + } else + ERR_RET("invalid thread_type entry", NULL); + } + + INCOMPLETE(NULL); +} + +/* + * apply_config_threads - process and apply the portion of the config + * file that specifies the number of threads of each type. line is + * the buffer into which the lines of the file will be read. When the + * function is invoked, line should contain the first line of the + * threads section of the config file. + */ +static int +apply_config_threads(line) + char *line; +{ + int nthreads_def_by_user, ret, thr_total, thr_max; + u_int v1; + char *name, *value, v2; + + config.thread = NULL; + thr_total = 0; + thr_max = DEFAULT_MAX_THREADS; + nthreads_def_by_user = 0; + + do { + if ((ret = get_name_value(line, &name, &value)) != 0) { + if (ret == EOF) + break; + return (ret); + } + + if (!strcasecmp(name, "thread_count_total")) { + if (nthreads_def_by_user) + VAL_DEFINED("thread_count_total", EINVAL); + if (config.thread != NULL) + ERR_RET("thread_count_total must come before " + "the thread_counts block", EINVAL); + if (sscanf(value, "%u %c", &v1, &v2) != 1) + SYNTAX("thread_count_total ", EINVAL); + if (v1 == 0) + ERR_RET("thread_count_total must be > 0", + EINVAL); + if (v1 > 1 && !(config.env_flags | DB_THREAD)) + ERR_RET("DB_THREAD not specified", EINVAL); + thr_max = v1; + nthreads_def_by_user = 1; + strcat(g.tag, value); + } else if (!strcasecmp(name, "thread_counts")) { + if (config.thread != NULL) + ERR_RET("cannot have more than one " + "thread_counts block", EINVAL); + if ((ret = __os_malloc(g.dbenv, + thr_max * sizeof(THR_INFO *), + &config.thread)) != 0) + ERR_RET("malloc failed", EINVAL); + thr_total = process_thread_counts(thr_max, + nthreads_def_by_user); + if (thr_total <= 0) + ERR_RET("failed to parse thread counts", + EINVAL); + config.nthreads = thr_total; + } else + ERR_RET("invalid entry for threads section", EINVAL); + } while (get_line(line, g.conf_fp) == 0); + + if (config.thread == NULL) + ERR_RET("missing thread_counts block", EINVAL); + return (0); +} + +/* + * process_thread_counts - process and apply the info about the number of + * threads of each type and return the total number of threads, or -1 if + * the thread-count block is invalid + */ +static int +process_thread_counts(thr_max, nthreads_def_by_user) + int thr_max, nthreads_def_by_user; +{ + THR_INFO **tmp; + THR_INFO *thr; + int count, i, thr_total, type, ret; + u_int v1, v2; + char *name, *value, v3; + char line[DB_MAXPATHLEN]; + + thr = NULL; + thr_total = 0; + + while (get_line(line, g.conf_fp) == 0 && + get_name_value(line, &name, &value) == 0) { + /* Check for closing brace. */ + if (value == NULL) { + if (nthreads_def_by_user && thr_total != thr_max) + ERR_RET("fewer threads than specified", -1); + return (thr_total); + } + + if (!strcasecmp(name, "thread_type")) { + if (sscanf(value, "%u %u %c", &v1, &v2, &v3) != 2) + SYNTAX("thread_type ", -1); + type = v1; + count = v2; + if ((int)type > config.thr_type_count) + ERR_RET("specified thr_type " + "has not been defined", -1); + if (count == 0) + ERR_RET("thread counts must be > 0", -1); + if (thr_total + count > thr_max) { + if (nthreads_def_by_user) { + ERR_RET("more threads than specified", + -1); + } else { + /* Grow the array */ + do { + thr_max *= 2; + } while (thr_total + count > thr_max); + if ((ret = __os_malloc(g.dbenv, + thr_max * sizeof(THR_INFO *), + &tmp)) != 0) + ERR_RET("malloc failed", -1); + for (i = 0; i < thr_total; i++) + tmp[i] = config.thread[i]; + __os_free(g.dbenv, config.thread); + config.thread = tmp; + } + } + + /* Create the THR_INFO objects of this type. */ + for (i = 0; i < count; i++) { + thr = thr_info_create(thr_total + i, + config.thr_type[type]); + if (thr == NULL) + ERR_RET("couldn't create thr_info", + -1); + config.thread[thr_total + i] = thr; + } + thr_total += count; + } else + ERR_RET("invalid thread_counts entry", -1); + } + + INCOMPLETE(-1); +} --- db-4.6.21/test_perf/perf_dbs.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_dbs.c 2007-11-05 15:51:13.663919000 +1100 @@ -0,0 +1,1488 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999,2007 Oracle. All rights reserved. + * + * $Id: perf_dbs.c,v 12.21 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +/* + * Functions used when db_perf is run in DBS mode. Databases created in + * this mode include an id number and checksum in each data item, and + * this information is verified by most of the DBS-specific operations. + * In addition, DBS databases have one duplicate per thread for each key + * in the database. + */ + +static int check_data __P((u_int32_t, u_int32_t, DBT *, DBT *)); +static int check_dupset + __P((int, DB_TXN *, DBT *, DBT *, int, int, DBC **, DB **)); +static int check_sec_key __P((DB *, DB_TXN *, DBT *, DBT *)); +static int compact_stat_print __P((DB_ENV *, DB_COMPACT *sp, u_int32_t)); +int32_t compute_checksum __P((char *, int)); + +/* + * compute_checksum -- sum the values in a string. + * + * This routine MUST have the property that if the string = sub1 || sub2 that + * compute_checksum(string) = compute_checksum(sub1) + compute_checksum(sub2). + */ +int32_t +compute_checksum(start, len) + char *start; + int len; +{ + int32_t sum; + + sum = 0; + while (len--) + sum += *start++; + + return (sum); +} + +/* + * check_data - verify the id and check sum of a single data item that + * has already been retrieved into datap by the thread with ID thr_id. + * The expected id of the data item is given in data_id. + */ +static int +check_data(thr_id, data_id, keyp, datap) + u_int32_t thr_id, data_id; + DBT *keyp, *datap; +{ + struct data *data_str; + int sum; + + data_str = (struct data *)datap->data; + + /* Verify the id in the data item. */ + if (data_str->id != data_id) { + g.dbenv->errx(g.dbenv, + "[%ld] data mismatch for key %s: expected %ld got %ld", + (long)thr_id, (char *)keyp->data, (long)data_id, + (long)data_str->id); + return (EINVAL); + } + + /* Verify the checksum. */ + sum = compute_checksum(data_str->str, datap->size - DBS_STR_OFFSET); + if (data_str->sum != sum) { + g.dbenv->errx(g.dbenv, + "[%ld] chksum %d != %d for key %s id %d", + (long)thr_id, data_str->sum, sum, (char *)keyp->data, + data_id); + return (EINVAL); + } + + return (0); +} + +/* + * check_dupset - verify the ids and checksums of a set of duplicate + * items with the key specified in keyp. The first duplicate should + * have the id specified by first, and we continue checking through + * the duplicate with the id specified by last. If the check is + * successful, cursp is used to return a pointer to the cursor used to + * access the dupset, and dbp returns a pointer to the corresponding + * DB handle. + */ +static int +check_dupset(thr_id, txn, keyp, datap, first, last, cursp, dbp) + int thr_id; + DB_TXN *txn; + DBT *keyp, *datap; + int first, last; + DBC **cursp; + DB **dbp; +{ + DBC *curs; + int curs_ret, i, id, ret, retry; + + /* + * Because we may be moving items from one file to + * another, we check for the item in each of the databases. + */ + curs = *cursp = NULL; + retry = 0; + +again: ret = 0; + for (i = 0; i < config.file_count; i++) { + *dbp = config.file[i]->dbp[thr_id]; + if ((ret = (*dbp)->cursor(*dbp, txn, &curs, 0)) != 0) + return (ret); + + ret = curs->get(curs, keyp, datap, DB_SET); + if (ret == 0) + break; + else { + if ((curs_ret = curs->close(curs)) != 0) { + g.dbenv->err(g.dbenv, curs_ret, + "[%ld]: c_close failed", + thr_id, curs_ret); + return (ret); + } + } + + if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY) + return (ret); + } + if ((ret == DB_NOTFOUND || ret == DB_KEYEMPTY) && + config.file[0]->method == DB_QUEUE && + *(db_recno_t*)keyp->data == 1 && retry++ < 10) { + __os_sleep(NULL, 1, 0); + goto again; + } + if (ret != 0) + return (ret); + assert(curs != NULL); + *cursp = curs; + + /* Check the set of duplicates. */ + id = first; + do { + if ((ret = check_data(thr_id, id, keyp, datap)) != 0) + return (ret); + id++; + } while (id <= last && + (ret = curs->get(curs, keyp, datap, DB_NEXT)) == 0); + + return (ret); +} + +/* + * check_sec_key - verify the duplicates associated with the specified + * secondary key. + */ +static int +check_sec_key(dbp, txn, keyp, datap) + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBC *curs; + int ret; + + /* Open a cursor. */ + if ((ret = dbp->cursor(dbp, txn, &curs, 0)) != 0) + return (ret); + + /* Get all of the duplicates associated with the specified key. */ + ret = curs->get(curs, keyp, datap, DB_SET); + while (ret == 0) + ret = curs->get(curs, keyp, datap, DB_NEXT_DUP); + if (ret == DB_NOTFOUND) + ret = 0; + + /* Close the cursor. */ + curs->close(curs); + + return (ret); +} + +/* + * __key_group_fill_data_dbs - implementation of KEY_GROUP->fill_data() + * for databases created in DBS mode. The data item is filled + * according to the "data" struct defined in perf_extern.h: a random + * string, preceded by an id number and the checksum of the string. + * The id parameter specifies the id to be used. If id == -1, we + * reuse the data item's current id. + */ +void +__key_group_fill_data_dbs(keygrp, rs, datap, id) + KEY_GROUP *keygrp; + db_rand_state *rs; + DBT *datap; + int id; +{ + struct data *data_str; + + assert(keygrp != NULL && keygrp->dsize_dists != NULL); + assert(datap != NULL); + assert(!IS_SECONDARY(keygrp->file)); + assert(config.dbs != 0); + + data_str = (struct data *)datap->data; + + /* + * If the function is called with an id parameter of -1, + * we reuse the data item's current id. + */ + if (id == -1) + id = data_str->id; + + /* + * Select one of the data-size subdistributions, + * and then select one of the integers from that + * subdistribution as the size. + */ + datap->size = keygrp->dsize_dists-> + select_from_dists(keygrp->dsize_dists, rs, 1); + if (datap->size > MAX_DATA_SIZE) + datap->size = MAX_DATA_SIZE; + + /* + * Make sure that we have room for the id, the checksum, and + * at least one character in the string. + */ + if (datap->size <= DBS_STR_OFFSET) + datap->size = DBS_STR_OFFSET + 1; + + /* Fill the string portion of the data item. */ + random_data(rs, data_str->str, datap->size - DBS_STR_OFFSET); + if (HAS_SECONDARIES(keygrp->file)) + fill_data_primary(keygrp->file, data_str->str, + datap->ulen - DBS_STR_OFFSET); + + /* Write the id and the checksum. */ + data_str->id = id; + data_str->sum = compute_checksum(data_str->str, + datap->size - DBS_STR_OFFSET); +} + +/* + * __op_dbs_read - read an item from a DBS-style database and verify + * its contents using the checksum stored in the item. + */ +int +__op_dbs_read(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + int data_id, i, ret, retry; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(config.dbs != 0); + retry = 0; + + /* + * When reading from a secondary, we don't verify the contents + * of the data item because there are times when we would need + * the primary key to do so. + */ + if (IS_SECONDARY(op->file)) { +sagain: ret = dbp->get(dbp, txn, keyp, datap, op->flags); + if ((ret == DB_NOTFOUND || ret == DB_KEYEMPTY) && + config.file[0]->method == DB_QUEUE && + *(db_recno_t*)keyp->data == 1 && retry++ < 10) { + __os_sleep(NULL, 1, 0); + goto sagain; + } + return (ret); + } + + /* + * Because we may be moving items from one file to + * another, we check for the item in each of the databases. + */ +again: ret = 0; + for (i = 0; i < config.file_count; i++) { + dbp = config.file[i]->dbp[thr_id]; + ret = dbp->get(dbp, txn, keyp, datap, op->flags); + if (ret == 0 || (ret != DB_NOTFOUND && ret != DB_KEYEMPTY)) + break; + } + if ((ret == DB_NOTFOUND || ret == DB_KEYEMPTY) && + config.file[0]->method == DB_QUEUE && + *(db_recno_t*)keyp->data == 1 && retry++ < 10) { + __os_sleep(NULL, 1, 0); + goto again; + } + if (ret != 0) + return (ret); + + data_id = (IS_RECORD_BASED(op->file) ? + (*((db_recno_t *)keyp->data) - 1) % config.nthreads : 0); + return (check_data(thr_id, data_id, keyp, datap)); +} + +/* + * __op_dbs_read_dupset - read an entire duplicate set from a + * DBS-style database and verify the contents of the items. + */ +int +__op_dbs_read_dupset(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBC *curs; + int curs_ret, first, last, ret; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(config.dbs != 0); + + /* + * When reading from a secondary, we don't verify the contents + * of the data items because there are times when we would need + * the primary key to do so. + */ + if (IS_SECONDARY(op->file)) + return (check_sec_key(op->file->dbp[thr_id], txn, + keyp, datap)); + + /* Determine the ids of the first and last duplicates in the set. */ + if (IS_RECORD_BASED(op->file)) { + first = (*((db_recno_t *)keyp->data) - 1) % config.nthreads; + last = first; + } else if (HAS_SECONDARIES(op->file)) + first = last = 0; + else { + first = 0; + last = config.nthreads - 1; + } + + /* Check the entire set. */ + ret = check_dupset(thr_id, txn, keyp, datap, first, last, &curs, &dbp); + + /* Close the cursor and return. */ + if (curs != NULL) { + if ((curs_ret = curs->close(curs)) != 0) { + g.dbenv->err(g.dbenv, curs_ret, + "[%ld]: c_close failed", thr_id); + return (curs_ret); + } + } + return (ret); +} + +/* + * __op_dbs_update - find the duplicate in a DBS-style database + * that belongs to this thread and change it. + */ +int +__op_dbs_update(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBC *curs; + struct data *data_str; + int curs_ret, first, last, ret; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(config.thread[thr_id]->keygrp != NULL); + assert(config.dbs != 0); + + data_str = (struct data *)datap->data; + + /* + * Obtain a cursor pointing to the data item that "belongs" to + * this thread, verifying both the data item itself and the + * duplicates (if any) that are encountered while retrieving + * the data item. + */ + if (IS_RECORD_BASED(op->file)) { + first = (*((db_recno_t *)keyp->data) - 1) % config.nthreads; + last = first; + } else if (HAS_SECONDARIES(op->file)) + first = last = 0; + else { + first = 0; + last = thr_id; + } + ret = check_dupset(thr_id, txn, keyp, datap, first, last, &curs, &dbp); + + /* Change the data item pointed to by the cursor. */ + if (ret == 0) { + /* + * Fill with random data and update the checksum. + * We don't do this if a file has secondaries, + * because changing the data item will change + * entries in the secondaries. + */ + if (!HAS_SECONDARIES(op->file)) + config.thread[thr_id]->keygrp->fill_data( + config.thread[thr_id]->keygrp, THR_RS, datap, + data_str->id); + + /* Replace it. */ + ret = curs->put(curs, keyp, datap, DB_CURRENT); + } + + /* Close the cursor and return. */ + if (curs != NULL) { + if ((curs_ret = curs->close(curs)) != 0) { + g.dbenv->err(g.dbenv, curs_ret, + "[%ld]: c_close failed", thr_id); + return (curs_ret); + } + } + return (ret); +} + +/* + * __op_dbs_partial_write - find the duplicate in a DBS-style database + * that belongs to this thread and overwrite part of it. + */ +int +__op_dbs_partial_write(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBC *curs; + SET *dsize_dists; + struct data old_data, *data_str; + int32_t sum; + int curs_ret, first, inlen, last; + int offset, old_size, outlen, ret; + char *new_chars, *replaced_chars; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(config.thread[thr_id]->keygrp != NULL); + assert(config.dbs != 0); + + new_chars = config.thread[thr_id]->tmp_data; + replaced_chars = config.thread[thr_id]->tmp_data2; + + /* + * Obtain a cursor pointing to the data item that "belongs" to + * this thread, verifying both the data item itself and the + * duplicates (if any) that are encountered while retrieving + * the data item. + */ + if (IS_RECORD_BASED(op->file)) { + first = (*((db_recno_t *)keyp->data) - 1) % config.nthreads; + last = first; + } else if (HAS_SECONDARIES(op->file)) + first = last = 0; + else { + first = 0; + last = thr_id; + } + ret = check_dupset(thr_id, txn, keyp, datap, first, last, &curs, &dbp); + if (ret != 0) + goto exit; + + /* Partially overwrite the data item in this thread's duplicate. */ + datap->flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + + /* Save a copy of the current data item. */ + data_str = (struct data *)datap->data; + memcpy(&old_data, data_str, sizeof(struct data)); + + /* + * Choose a random offset and length for the + * characters to be overwritten, and save a copy of + * the overwritten characters. + */ + datap->doff = random_int(THR_RS, DBS_STR_OFFSET, datap->size - 1); + datap->dlen = random_int(THR_RS, 1, datap->size - datap->doff); + memcpy(replaced_chars, &(data_str->str[datap->doff - DBS_STR_OFFSET]), + datap->dlen); + offset = datap->doff; + outlen = datap->dlen; + + /* Generate the random data for the partial write. */ + if (HAS_SECONDARIES(op->file) || op->file->method == DB_QUEUE || + (op->file->method == DB_RECNO && + op->file->method_params != NULL && + op->file->method_params[0] != NULL)) + datap->size = datap->dlen; + else { + /* + * Choose a size for the partial write. + * If size > dlen, the record will grow. + * If size < dlen, the record will shrink. + */ + old_size = datap->size; + dsize_dists = config.thread[thr_id]->keygrp->dsize_dists; + datap->size = dsize_dists->select_from_dists(dsize_dists, + THR_RS, 1); + /* Make sure we don't exceed the max record length. */ + if (datap->size > old_size - datap->dlen) + datap->size -= (old_size - datap->dlen); + } + + /* + * We reuse the old data if the file has secondaries, + * because changing the data item will change entries in the + * secondaries. + */ + if (HAS_SECONDARIES(op->file)) + datap->data = (char *)datap->data + datap->doff; + else + random_data(THR_RS, (char *)datap->data, datap->size); + memcpy(new_chars, (char*)datap->data, datap->size); + inlen = datap->size; + + /* Compute the new checksum. */ + sum = old_data.sum; + sum -= compute_checksum(&(((char *)&old_data)[datap->doff]), + datap->dlen); + sum += compute_checksum((char *)datap->data, datap->size); + + /* Do the partial write. */ + if ((ret = curs->put(curs, keyp, datap, DB_CURRENT)) != 0) + goto exit; + + /* Do another partial write to update the checksum. */ + datap->size = datap->dlen = sizeof(int32_t); + datap->doff = sizeof(int32_t); /* skip over the id */ + datap->data = ∑ + if ((ret = curs->put(curs, keyp, datap, DB_CURRENT)) != 0) + goto exit; + + /* Restore the contents of the DBT pointed to by datap. */ + datap->data = data_str; + datap->flags = DB_DBT_USERMEM; + datap->dlen = datap->doff = 0; + +#define CHECK +#ifdef CHECK + /* If checking, read back and verify the data. */ + if ((ret = curs->get(curs, keyp, datap, DB_CURRENT)) != 0) + goto exit; + sum = compute_checksum(data_str->str, datap->size - DBS_STR_OFFSET); + if (sum != data_str->sum) + g.dbenv->errx(g.dbenv, + "[%ld] %d != %d for key %s: inlen %d outlen %d " + "off %d,\n old: %d %s\n out: %s\nin:%s", + (long)thr_id, data_str->sum, sum, + (char *)keyp->data, inlen, outlen, offset, + old_data.sum, old_data.str, replaced_chars, new_chars); +#endif + +exit: /* Close the cursor and return. */ + if (curs != NULL) { + if ((curs_ret = curs->close(curs)) != 0) { + g.dbenv->err(g.dbenv, curs_ret, + "[%ld]: c_close failed", thr_id); + return (curs_ret); + } + } + return (ret); +} + +/* + * __op_dbs_del - delete an item from a DBS-style database, potentially + * checking in multiple databases to find it. + */ +int +__op_dbs_del(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + int i, ret, retry; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(config.dbs != 0); + /* + * We can't use this if the file has secondaries or is a + * secondary, because we will lose entries in one or more files. + * Use __op_dbs_del_readd or __op_dbs_del_readd_secon instead. + */ + assert(!HAS_SECONDARIES(op->file) && !IS_SECONDARY(op->file)); + + /* + * Because we may be moving items from one file to + * another, we check for the item in each of the databases. + */ + retry = 0; +again: ret = 0; + for (i = 0; i < config.file_count; i++) { + dbp = config.file[i]->dbp[thr_id]; + ret = dbp->del(dbp, txn, keyp, 0); + if (ret == 0 || (ret != DB_NOTFOUND && ret != DB_KEYEMPTY)) + break; + } + if ((ret == DB_NOTFOUND || ret == DB_KEYEMPTY) && + config.file[0]->method == DB_QUEUE && + *(db_recno_t*)keyp->data == 1 && retry++ < 10) { + __os_sleep(NULL, 1, 0); + goto again; + } + return (ret); +} + +/* + * __op_dbs_add_dupset - add an entire duplicate set to a + * DBS-style database. This should be used after __op_del has been + * called to delete the duplicate set. + */ +int +__op_dbs_add_dupset(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + int first, data_id, last, ret; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(config.thread[thr_id]->keygrp != NULL); + assert(config.dbs != 0); + /* + * We can't use this to add items to a file with secondaries, + * because it may lead to duplicates in a secondary that + * doesn't support them. + */ + assert(!HAS_SECONDARIES(op->file)); + + ret = 0; + + /* Determine the ids of the first and last duplicates in the set. */ + if (IS_RECORD_BASED(op->file)) { + first = (*((db_recno_t *)keyp->data) - 1) % config.nthreads; + last = first; + } else { + first = 0; + last = config.nthreads - 1; + } + + /* Add the set of duplicates. */ + for (data_id = first; data_id <= last; data_id++) { + /* Fill with random data and update the checksum. */ + config.thread[thr_id]->keygrp->fill_data( + config.thread[thr_id]->keygrp, THR_RS, datap, data_id); + + /* Add the item. */ + if ((ret = dbp->put(dbp, txn, keyp, datap, 0)) != 0) + break; + } + return (ret); +} + +/* + * __op_dbs_del_readd - find the duplicate in a DBS-style database + * that belongs to this thread, delete it, and readd it. + */ +int +__op_dbs_del_readd(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBC *curs; + struct data *data_str; + int curs_ret, first, last, move_flag, put_flag, ret, sortdups; + int retry; + char *tmpkey; + void *keyptr; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(config.thread[thr_id]->keygrp != NULL); + assert(config.dbs != 0); + /* We use __op_del_readd_secon for secondaries. */ + assert(!IS_SECONDARY(op->file)); + + keyptr = keyp->data; + data_str = (struct data *)datap->data; + tmpkey = config.thread[thr_id]->tmp_key; + retry = 0; + put_flag = 0; + + /* + * Obtain a cursor pointing to the data item that "belongs" to + * this thread, verifying both the data item itself and the + * duplicates (if any) that are encountered while retrieving + * the data item. + */ + if (IS_RECORD_BASED(op->file)) { + first = (*((db_recno_t *)keyp->data) - 1) % config.nthreads; + last = first; + } else if (HAS_SECONDARIES(op->file)) + first = last = 0; + else { + first = 0; + last = thr_id; + } +again: + ret = check_dupset(thr_id, txn, keyp, datap, first, last, &curs, &dbp); + if (ret != 0) + goto exit; + + /* Delete the duplicate. */ + ret = curs->del(curs, 0); + if ((ret == DB_NOTFOUND || ret == DB_KEYEMPTY) && + config.file[0]->method == DB_QUEUE && + *(db_recno_t*)keyp->data == 1 && retry++ < 10) { + __os_sleep(NULL, 1, 0); + goto again; + } + if (ret != 0) + goto exit; + + if (!IS_RECORD_BASED(op->file) && !HAS_SECONDARIES(op->file)) { + sortdups = op->file->flags & DB_DUPSORT; + if (thr_id != 0) { + move_flag = DB_PREV; + put_flag = sortdups ? DB_KEYFIRST : DB_AFTER; + } else { + move_flag = DB_NEXT; + put_flag = sortdups ? DB_KEYFIRST : DB_BEFORE; + } + + /* + * Reposition the cursor by doing a get that returns + * 0 bytes. + */ + datap->flags = DB_DBT_USERMEM | DB_DBT_PARTIAL; + datap->dlen = 0; + keyp->data = tmpkey; + ret = curs->get(curs, keyp, datap, move_flag); + if (ret != 0 && ret != DB_NOTFOUND && ret != DB_KEYEMPTY) + goto exit; + data_str->id = thr_id; /* just to be safe */ + datap->flags = DB_DBT_USERMEM; + } + + /* + * Fill with random data and update the checksum -- unless the + * file has secondaries, in which case we reuse the prior + * value of the data item so that the deleted secondary items + * will also be restored. + */ + if (!HAS_SECONDARIES(op->file)) + config.thread[thr_id]->keygrp->fill_data( + config.thread[thr_id]->keygrp, THR_RS, datap, data_str->id); + + /* + * If we are running with one thread, then there is the + * possibility that we're now on a new key or that we have run + * off the end of the file. If that is the case, or if this + * is a record-based access method or a database with + * secondaries, we need to do a regular put. Otherwise, we do + * a cursor put. + */ + keyp->data = keyptr; + if (IS_RECORD_BASED(op->file) || HAS_SECONDARIES(op->file) || + (config.nthreads == 1 && + (ret == DB_NOTFOUND || memcmp(tmpkey, keyptr, keyp->size) != 0))) + ret = dbp->put(dbp, txn, keyp, datap, 0); + else + ret = curs->put(curs, keyp, datap, put_flag); + + /* Close the cursor and return. */ +exit: if (curs != NULL) { + if ((curs_ret = curs->close(curs)) != 0) { + g.dbenv->err(g.dbenv, curs_ret, + "[%ld]: c_close failed", thr_id); + return (curs_ret); + } + } + return (ret); +} + +/* + * __op_dbs_del_readd_secon - delete a duplicate in a secondary + * and then readd it (along with any other related secondary items) by + * adding the appropriate data item to the primary. + */ +int +__op_dbs_del_readd_secon(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBT pkey; + DBC *curs; + db_recno_t num_dups; + int curs_ret, ret, which_dup; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(config.thread[thr_id]->keygrp != NULL); + assert(config.dbs != 0); + assert(IS_SECONDARY(op->file)); + + memset(&pkey, 0, sizeof(pkey)); + pkey.flags = DB_DBT_USERMEM; + pkey.data = config.thread[thr_id]->tmp_key; + pkey.ulen = MAX_KEY_SIZE; + + /* Open a cursor. */ + ret = dbp->cursor(dbp, txn, &curs, + (config.env_flags&DB_INIT_CDB ? DB_WRITECURSOR : 0)); + if (ret != 0) + return (ret); + + /* + * Move the cursor to the first item with the specified key. + * We use DBC->pget() because we need the primary key to + * readd the deleted items. + */ + ret = curs->pget(curs, keyp, &pkey, datap, DB_SET|op->flags); + if (ret != 0) + goto exit; + + /* Choose a duplicate at random. */ + num_dups = 0; + ret = curs->count(curs, &num_dups, 0); + if (ret != 0) + goto exit; + which_dup = random_int(THR_RS, 1, (int)num_dups); + + /* Advance the cursor to the right duplicate. */ + do { + which_dup--; + } while (which_dup > 0 && + (ret = curs->pget(curs, keyp, &pkey, + datap, DB_NEXT_DUP|op->flags)) == 0); + if (ret == DB_NOTFOUND) + ret = 0; + if (ret != 0) + goto exit; + + if (g.verbose) { + if (IS_RECORD_BASED(op->file->primary)) + fprintf(g.outfp, "[%d: %d] (%x) primary key = %lu\n", + thr_id, config.thread[thr_id]->txn_count, + (txn != NULL ? txn->id(txn) : 0), + (u_long)*(db_recno_t *)pkey.data); + else { + ((char *)pkey.data)[pkey.size] = '\0'; + fprintf(g.outfp, "[%d: %d] (%x) primary key = %s\n", + thr_id, config.thread[thr_id]->txn_count, + (txn != NULL ? txn->id(txn) : 0), + (char *)pkey.data); + } + } + + /* + * Delete the duplicate in the secondary -- which should also + * delete the related items in the primary and other + * secondaries. + */ + if ((ret = curs->del(curs, 0)) != 0) + goto exit; + + /* Readd the deleted items via the primary. */ + ret = op->file->primary->dbp[thr_id]->put( + op->file->primary->dbp[thr_id], txn, &pkey, datap, 0); + + /* Close the cursor and return. */ +exit: if (curs != NULL) { + if ((curs_ret = curs->close(curs)) != 0) { + g.dbenv->err(g.dbenv, curs_ret, + "[%ld]: c_close failed", thr_id); + return (curs_ret); + } + } + return (ret); +} + +/* + * __op_dbs_swap_data - find the duplicate in a DBS-style database + * that belongs to this thread and swap its data value with the data + * value of another randomly chosen key. This allows us to modify the + * data items in a primary index (and thus the associations between + * the primary and its secondaries) without losing any of the + * secondary keys. XXX At present, this operation only works for + * databases without duplicates. + */ +int +__op_dbs_swap_data(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBT other_key, other_data; + DBC *curs, *other_curs; + struct data *data_str, *other_data_str; + u_int32_t tmp; + int curs_ret, first, last, ret; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(config.thread[thr_id]->keygrp != NULL); + assert(config.dbs != 0); + assert(!(op->file->flags&DB_DUP)); + + /* These will be used to store the other item. */ + memset(&other_key, 0, sizeof(other_key)); + other_key.flags = DB_DBT_USERMEM; + other_key.data = config.thread[thr_id]->tmp_key; + other_key.ulen = MAX_KEY_SIZE; + memset(&other_data, 0, sizeof(other_data)); + other_data.flags = DB_DBT_USERMEM; + other_data.data = config.thread[thr_id]->tmp_data; + other_data.ulen = MAX_DATA_SIZE; + + /* Open a second cursor for the other item. */ + other_curs = NULL; + ret = dbp->cursor(dbp, txn, &other_curs, + (config.env_flags&DB_INIT_CDB ? DB_WRITECURSOR : 0)); + if (ret != 0) + return (ret); + + /* + * Obtain a cursor pointing to the data item that "belongs" to + * this thread, verifying both the data item itself and the + * duplicates (if any) that are encountered while retrieving + * the data item. + */ + if (IS_RECORD_BASED(op->file)) { + first = (*((db_recno_t *)keyp->data) - 1) % config.nthreads; + last = first; + } else if (HAS_SECONDARIES(op->file)) + first = last = 0; + else { + first = 0; + last = thr_id; + } + ret = check_dupset(thr_id, txn, keyp, datap, first, last, &curs, &dbp); + + /* Perform the swap. */ + if (ret == 0) { + /* Select and retrieve the other data item. */ + op->file->select_key(op->file, THR_RS, &other_key); + if (g.verbose) { + if (IS_RECORD_BASED(op->file)) + fprintf(g.outfp, "[%d: %d] (%x) " + "other key = %lu\n", + thr_id, config.thread[thr_id]->txn_count, + (txn != NULL ? txn->id(txn) : 0), + (u_long)*(db_recno_t *)other_key.data); + else + fprintf(g.outfp, "[%d: %d] (%x) " + "other key = %s\n", + thr_id, config.thread[thr_id]->txn_count, + (txn != NULL ? txn->id(txn) : 0), + (char *)other_key.data); + } + ret = other_curs->get(other_curs, &other_key, + &other_data, DB_SET); + if (ret != 0) + goto end; + + /* + * We need to delete the other item first in case the + * file has one or more secondaries that do not + * support duplicates. + */ + if ((ret = other_curs->del(other_curs, 0)) != 0) + goto end; + + /* Swap the ids. */ + data_str = (struct data *)datap->data; + other_data_str = (struct data *)other_data.data; + tmp = data_str->id; + data_str->id = other_data_str->id; + other_data_str->id = tmp; + + /* Put the swapped data values. */ + if ((ret = dbp->put(dbp, txn, keyp, &other_data, 0)) != 0) + goto end; + /* + * We can't use c_put() with DB_CURRENT for the other + * item, because we deleted that item above. + */ + ret = dbp->put(dbp, txn, &other_key, datap, 0); + } + + /* Close the cursors and return. */ +end: if (curs != NULL) { + if ((curs_ret = curs->close(curs)) != 0) { + g.dbenv->err(g.dbenv, curs_ret, + "[%ld]: c_close failed", thr_id); + return (curs_ret); + } + } + if (other_curs != NULL) { + if ((curs_ret = other_curs->close(other_curs)) != 0) { + g.dbenv->err(g.dbenv, curs_ret, + "[%ld]: c_close failed", thr_id); + return (curs_ret); + } + } + return (ret); +} + +/* + * __op_qtest_scan - Scan the contents of a queue database. + */ +int +__op_qtest_scan(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBC *curs; + int curs_ret, found, move_flag, ret; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(op->file->method == DB_QUEUE); + + /* + * Switch directions on each call. This works because of + * the way that the transaction-count is incremented below. + */ + if ((config.thread[thr_id]->txn_count / 10) % 2 == 1) + move_flag = DB_PREV; + else + move_flag = DB_NEXT; + + /* Open a cursor. */ + ret = dbp->cursor(dbp, txn, &curs, 0); + if (ret != 0) + return (ret); + + /* Wait for at least one record to be on the queue. */ + while ((ret = curs->get(curs, keyp, datap, move_flag)) != 0 && + ret == DB_NOTFOUND) + __os_sleep(NULL, 1, 0); + if (ret != 0) + goto exit; + + /* Count the records that we find on the queue. */ + found = 0; + while (ret != DB_NOTFOUND) { + found++; + if ((ret = curs->get(curs, keyp, datap, move_flag)) != 0 && + ret != DB_NOTFOUND) + goto exit; + } + ret = 0; + fprintf(g.outfp, "[%ld: %d] found %d\n", (long)thr_id, + config.thread[thr_id]->txn_count, found); + + /* + * This will cause us to scan in the opposite direction + * on the next call. + */ + config.thread[thr_id]->txn_count += 49; + +exit: if ((curs_ret = curs->close(curs)) != 0) { + g.dbenv->err(g.dbenv, curs_ret, + "[%ld]: c_close failed", thr_id); + return (curs_ret); + } + return (ret); +} + +/* + * compact_stat_print -- + * Display btree/recno compact statistics. + */ +int +compact_stat_print(dbenv, sp, flags) + DB_ENV *dbenv; + DB_COMPACT *sp; + u_int32_t flags; +{ + if (LF_ISSET(DB_STAT_ALL)) { + __db_msg(dbenv, "%s", DB_GLOBAL(db_line)); + __db_msg(dbenv, "Default Btree/Recno compact information:"); + } + +#ifdef HAVE_STATISTICS + __db_dl(dbenv, "Pages freed", (u_long)sp->compact_pages_free); + __db_dl(dbenv, "Pages examined", (u_long)sp->compact_pages_examine); + __db_dl(dbenv, "Levels removed", (u_long)sp->compact_levels); + __db_dl(dbenv, "Deadlocks encountered", (u_long)sp->compact_deadlock); +#else + COMPQUIET(sp, NULL); +#endif + + return (0); +} + +int +__op_reorg(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id, *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + int ret; + DB_COMPACT c_data; + + COMPQUIET(countp, NULL); + COMPQUIET(txn, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(op->file->method == DB_BTREE || op->file->method == DB_RECNO); + memset(&c_data, 0, sizeof(c_data)); + + c_data.compact_fillpercent = random_int(THR_RS, 66, 100); + ret = dbp->compact(dbp, NULL, NULL, NULL, &c_data, + random_int(THR_RS, 0, 3) ? DB_FREE_SPACE : 0, NULL); + /* This is very expensive, so only do 1/20 of the operations. */ + config.thread[thr_id]->txn_count += 19; + + if (ret == 0) + ret = compact_stat_print(dbp->dbenv, &c_data, 0); + + return (ret); +} + +/* + * dbs_dup_compare - compare DBS duplicates by id number + */ +int +dbs_dup_compare(dbp, dbt1, dbt2) + DB *dbp; + const DBT *dbt1, *dbt2; +{ + struct data *data1, *data2; + + dbp = NULL; /* Quiet the compiler. */ + + data1 = dbt1->data; + data2 = dbt2->data; + + return (memcmp(&data1->id, &data2->id, sizeof(data1->id))); +} + +/* + * dbs_int_compare - a btree-comparison function used when the + * DB_RECNUM flag is specified. + */ +int +dbs_int_compare(dbp, dbt1, dbt2) + DB *dbp; + const DBT *dbt1, *dbt2; +{ + int data1, data2; + + dbp = NULL; /* Quiet the compiler. */ + + memcpy(&data1, dbt1->data, sizeof(data1)); + memcpy(&data2, dbt2->data, sizeof(data2)); + + return (data1 - data2); +} + +/* + * dbs_check_databases - confirm the contents of all the data items + * in databases created in DBS mode. + */ +int +dbs_check_databases() +{ + DB *dbp; + DBC *curs; + DBT data, key, skey; + FILE_INFO *file, *secon; + KEY_GROUP *keygrp; + struct data datastr; + db_recno_t recno; + int env_flags, first, i, j, k, last, ret, s, size_save; + + assert(config.nthreads > 0); + + /* + * Open the DB_ENV and DB handles. + * Don't recover - that's done separately. + */ + env_flags = config.env_flags; + config.env_flags = DB_INIT_MPOOL | DB_THREAD | DB_PRIVATE | DB_CREATE; + ret = open_handles(0, 0); + config.env_flags = env_flags; + + if (ret != 0) + return (1); + + /* Initialize the DBTs. */ + memset(&key, 0, sizeof(key)); + key.flags = DB_DBT_USERMEM; + memset(&skey, 0, sizeof(skey)); + skey.flags = DB_DBT_USERMEM; + memset(&data, 0, sizeof(data)); + data.data = &datastr; + data.ulen = MAX_DATA_SIZE; + data.flags = DB_DBT_USERMEM; + + /* Iterate through the files. */ + for (i = 0; i < config.file_count; i++) { + file = config.file[i]; + g.dbenv->errx(g.dbenv, "Checking file: %s", file->name); + + if (IS_RECORD_BASED(file)) { + key.data = &recno; + key.ulen = key.size = sizeof(recno); + } else { + key.data = config.thread[0]->keystr; + key.ulen = MAX_KEY_SIZE; + } + + /* Iterate through the key groups for this file. */ + for (j = 0; j < file->key_groups->item_count; j++) { + keygrp = file->key_groups->item[j]; + + /* Check the keys for this key group. */ + keygrp->first_key(keygrp, &key); + for (k = 0; k < keygrp->key_count; k++) { + if (IS_RECORD_BASED(file)) { + first = + (*((db_recno_t *)key.data) - 1) % + config.nthreads; + last = first; + } else if (HAS_SECONDARIES(file)) + first = last = 0; + else { + first = 0; + last = config.nthreads - 1; + } + + if (IS_SECONDARY(file)) + ret = check_sec_key(file->dbp[0], NULL, + &key, &data); + else { + ret = check_dupset(0, NULL, &key, + &data, first, last, &curs, &dbp); + if (curs != NULL) + (void)curs->close(curs); + } + + if (HAS_SECONDARIES(file)) { + data.flags |= DB_DBT_PARTIAL; + data.dlen = 0; + size_save = data.size; + for (s = 0; + s < file->secondaries->item_count; + s++) { + secon = file->secondaries-> + item[s]; + (void)secon->callback( + secon->dbp[0], &key, + &data, &skey); + ret = secon->dbp[0]->get( + secon->dbp[0], NULL, &skey, + &data, 0); + if (ret != 0) { + g.dbenv->errx(g.dbenv, + "secon key for %s" + " not found", + secon->name); + break; + } + data.size = size_save; + } + data.flags = DB_DBT_USERMEM; + } + + if (ret != 0) { + if (IS_RECORD_BASED(file)) + g.dbenv->errx(g.dbenv, + "check failed for %d: %s", + recno, + db_strerror(ret)); + else + g.dbenv->errx(g.dbenv, + "check failed for %s: %s", + config.thread[0]->keystr, + db_strerror(ret)); + return (ret); + } + + if (keygrp->next_key(keygrp, &key) != 0) + break; + } + } + } + + /* Close the DB_ENV and DB handles. */ + if (close_handles() != 0) + return (1); + + return (0); +} + +/* + * dbs_output_info - output information to a run-log file about the + * current run so that we will know what happened during a post + * mortem. + */ +void +dbs_output_info(do_init, verify, thread_id, use_procs) + int do_init, verify, thread_id, use_procs; +{ + FILE *fp; + FILE_INFO *file; + time_t now; + int i; + const char *access_method; + char time_buf[CTIME_BUFLEN]; + + file = NULL; + fp = fopen(config.output_info_fname, "a"); + if (fp == NULL) + return; + + (void)time(&now); + fprintf(fp, "=-=-=-=-=-=-=-=\n%s", __db_ctime(&now, time_buf)); + + if (do_init) { + fprintf(fp, "\t%s -I -h %s -c %s\tnthreads: %ld\n", + g.progname, g.home, g.config, config.nthreads); + + fprintf(fp, "\tdatabases: "); + for (i = 0; i < config.file_count; i++) { + file = config.file[i]; + switch (file->method) { + case DB_BTREE: + access_method = "btree"; + break; + case DB_HASH: + access_method = "hash"; + break; + case DB_QUEUE: + access_method = "queue"; + break; + case DB_RECNO: + access_method = "recno"; + break; + default: + access_method = "unknown"; + } + if (i > 0) + fprintf(fp, ", "); + if (file->subdb_name != NULL) + fprintf(fp, "%s in ", file->subdb_name); + fprintf(fp, "%s: %s - %d", + file->name, access_method, file->pagesize); + if (IS_SECONDARY(file)) + fprintf(fp, " (secondary)"); + if (file->flags & DB_ENCRYPT) + fprintf(fp, " (encrypt)"); + if (file->flags & DB_CHKSUM) + fprintf(fp, " (checksum)"); + } + fprintf(fp, "\n"); + + if (file->flags & DB_DUPSORT) + fprintf(fp, "\tsorted duplicates\n"); + else if (file->flags & DB_RECNUM) + fprintf(fp, "\taccess by record numbers\n"); + } else if (verify) { + if (g.recover) + fprintf(fp, "\t%s %s -h %s\n", + g.progname, (g.recover == 1 ? "-Vx" : "-VX"), + g.home); + else + fprintf(fp, "\t%s -V -h %s\n", + g.progname, g.home); + } else { + fprintf(fp, "\t%s -h %s -S %d", g.progname, + g.home, g.seed); + if (thread_id != -1) + fprintf(fp, " (id = %d)\n", thread_id); + else { + if (use_procs != 0) + fprintf(fp, " -F"); + if (g.checkpoint) + fprintf(fp, " -C"); + if (g.logclean && config.archive == NULL) + fprintf(fp, " -L"); + if (config.env_flags&DB_PRIVATE) + fprintf(fp, " -p"); + fprintf(fp, "\n"); + fprintf(fp, "\t%ld iterations per thread\n", + g.iterations); + switch (config.deadlock) { + case DB_LOCK_NORUN: + break; + case DB_LOCK_DEFAULT: + fprintf(fp, + "\tdeadlock thread: DB_LOCK_DEFAULT\n"); + break; + case DB_LOCK_EXPIRE: + fprintf(fp, + "\tdeadlock thread: DB_LOCK_EXPIRE\n"); + break; + case DB_LOCK_MAXLOCKS: + fprintf(fp, + "\tdeadlock thread: DB_LOCK_MAXLOCKS\n"); + break; + case DB_LOCK_MAXWRITE: + fprintf(fp, + "\tdeadlock thread: DB_LOCK_MAXWRITE\n"); + break; + case DB_LOCK_MINLOCKS: + fprintf(fp, + "\tdeadlock thread: DB_LOCK_MINLOCKS\n"); + break; + case DB_LOCK_MINWRITE: + fprintf(fp, + "\tdeadlock thread: DB_LOCK_MINWRITE\n"); + break; + case DB_LOCK_OLDEST: + fprintf(fp, + "\tdeadlock thread: DB_LOCK_OLDEST\n"); + break; + case DB_LOCK_RANDOM: + fprintf(fp, + "\tdeadlock thread: DB_LOCK_RANDOM\n"); + break; + case DB_LOCK_YOUNGEST: + fprintf(fp, + "\tdeadlock thread: DB_LOCK_YOUNGEST\n"); + break; + } + if (config.archive != NULL) + switch (g.logclean) { + case CLEAN_NONE: + break; + case CLEAN_ARCHIVE: + fprintf(fp, + "\tarchive: %s\n", config.archive); + break; + case CLEAN_UPDATE: + fprintf(fp, "\tfatal archive: %s\n", + config.archive); + break; + case CLEAN_HOTBACKUP: + fprintf(fp, "\thotbackup: %s\n", + config.archive); + break; + case CLEAN_HOTUPDATE: + fprintf(fp, "\thotupdate: %s\n", + config.archive); + break; + + } + if (config.killtest_iter != 0) + fprintf(fp, "\tkill test\n"); + if (config.use_multiple_handles) + fprintf(fp, "\tuse multiple DB handles\n"); + if (config.write_err_start > 0) + fprintf(fp, "\twrite errors\n"); + } + } + + (void)fclose(fp); +} --- db-4.6.21/test_perf/perf_dead.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_dead.c 2007-11-09 07:40:28.629197000 +1100 @@ -0,0 +1,55 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999,2007 Oracle. All rights reserved. + * + * $Id: perf_dead.c,v 12.6 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +static os_thread_t *dead_threads; /* Deadlock threads. */ + +int +dead_init() +{ + return ((dead_threads = + spawn_kids("dead threads", 1, dead_thread)) == NULL ? 1 : 0); +} + +int +dead_shutdown() +{ + return (wait_kids("dead threads", dead_threads)); +} + +void * +dead_thread(arg) + void *arg; +{ + int abort, ret; + + arg = 0; + g.dbenv->errx(g.dbenv, "Deadlock thread: %lu", (u_long)os_thread_id()); + + for (;;) { + if ((ret = + g.dbenv->lock_detect(g.dbenv, + 0, config.deadlock, &abort)) != 0) { + g.shutdown = 1; + g.dbenv->err(g.dbenv, ret, + "Deadlock thread: failed"); + return (NULL); + } + + if (g.verbose && abort != 0) + g.dbenv->errx(g.dbenv, + "Deadlock thread aborted: %d.", abort); + + if (g.shutdown) + return (NULL); + + __os_sleep(g.dbenv, 0, 100000); + } + /* NOTREACHED */ +} --- db-4.6.21/test_perf/perf_debug.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_debug.c 2007-11-05 15:51:13.631920000 +1100 @@ -0,0 +1,35 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999,2007 Oracle. All rights reserved. + * + * $Id: perf_debug.c,v 12.6 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +int debug_on; /* Enable checking. */ +int debug_print; /* Display current counter. */ +int debug_stop; /* Stop on each iteration. */ +int debug_test; /* Stop on iteration N. */ + +/* + * debug_check -- + * Convenient way to set breakpoints. + */ +void +debug_check() +{ + extern void __db_loadme(); + + if (debug_on == 0) + return; + + if (debug_print != 0) { + (void)fprintf(g.outfp, "\r%6d:", debug_on); + fflush(g.outfp); + } + + if (debug_on++ == debug_test || debug_stop) + __db_loadme(); +} --- db-4.6.21/test_perf/perf_extern.h 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_extern.h 2007-11-07 22:37:28.598697000 +1100 @@ -0,0 +1,805 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998,2007 Oracle. All rights reserved. + * + * $Id: perf_extern.h,v 12.19 2007/05/17 15:15:58 bostic Exp $ + */ + +#ifndef __PERF_EXTERN +#define __PERF_EXTERN + +#include "db_config.h" + +#include "db_int.h" + +#ifdef HAVE_SYSTEM_INCLUDE_FILES +#include +#include + +#include +#include + +#ifdef HAVE_VXWORKS +#include "ioLib.h" +#else +#include +#endif +#endif + +#include "dbinc/mp.h" +#include "dbinc/xa.h" + +#define DATAHOME "TESTDIR" /* Environment home. */ +#define OUTFILE "OUTPUT" /* Output file */ +#define RUNLOG "RUN_LOG" /* Log of execution */ +#define MAX_KEY_SIZE 1000 +#define MAX_DATA_SIZE 16384 +#define MAX_SECONDARIES 10 +#define RECLEN_DEFAULT 256 +#define SHMKEY_DBS 30 +#define DBNAME_MAXSIZE 80 + +/* predicates for FILE_INFO objects */ +#define IS_RECORD_BASED(F) ((F)->flags & DB_RECNUM || \ + (F)->method == DB_QUEUE || (F)->method == DB_RECNO) +#define IS_SECONDARY(F) ((F)->primary != NULL) +#define HAS_SECONDARIES(F) ((F)->secondaries != NULL) + +/* ops that make use of DBC->get() */ +#define IS_CURSOR_OP(O) ((O)->type == OP_CURS_READ || \ + (O)->type == OP_CURS_UPDATE || (O)->type == OP_CURS_DEL || \ + (O)->type == OP_CURS_UPDATE_PRIM) + +/* non-DBS ops that read and (possibly) write items */ +#define IS_UPDATE_OP(O) ((O)->type == OP_UPDATE_NODUP || \ + (O)->type == OP_UPDATE_DUP || (O)->type == OP_UPDATE_PRIM || \ + (O)->type == OP_CURS_UPDATE || (O)->type == OP_CURS_UPDATE_PRIM) + +/* ops that are only used in DBS mode */ +#define IS_DBS_OP(O) ((O)->type == OP_DBS_READ || \ + (O)->type == OP_DBS_READ_DUPSET || (O)->type == OP_DBS_UPDATE || \ + (O)->type == OP_DBS_PARTIAL_WRITE || (O)->type == OP_DBS_DEL || \ + (O)->type == OP_DBS_ADD_DUPSET || (O)->type == OP_DBS_DEL_READD || \ + (O)->type == OP_DBS_DEL_READD_SECON || \ + (O)->type == OP_DBS_SWAP_DATA || (O)->type == OP_REORG) + +/* thread-model definitions */ +#if HAVE_VXWORKS + #define STACK_SIZE 65536*2 + typedef int os_thread_t; + #define os_thread_id() taskIdSelf() + #define os_thread_create(name, pid, func, arg) \ + ((*pid = taskSpawn(name, 200, VX_FP_TASK, STACK_SIZE, \ + (FUNCPTR) func, i, 0, 0, 0, 0, 0, 0, 0, 0, 0)) == ERROR) +#elif DB_WIN32 + typedef HANDLE os_thread_t; + #define os_thread_id() GetCurrentThreadId() + #define os_thread_create(name, pid, func, arg) \ + ((*pid = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, \ + arg, 0, NULL)) == NULL) + #define S_ISDIR(m) ((m) & _S_IFDIR) +#else + typedef pthread_t os_thread_t; + #define os_thread_id() pthread_self() + #define os_thread_create(name, pid, func, arg) \ + (errno = pthread_create(pid, NULL, func, arg)) +#endif + +#if DB_WIN32 + #undef strcasecmp + #define strcasecmp _stricmp + #undef strncasecmp + #define strncasecmp _strnicmp + #undef lseek + #define lseek _lseeki64 + #define mkdir(d, perm) _mkdir(d) + + typedef uintptr_t os_pid_t; + #define OS_BAD_PID ((os_pid_t)NULL) + + #define os_access _access + #define W_OK 02 + #define R_OK 04 + #define X_OK (W_OK | R_OK) +#else + typedef int os_pid_t; + #define OS_BAD_PID -1 + + #define os_access access +#endif + +#define MKTIME(ts) ((ts).tv_sec + (double)(ts).tv_nsec / NS_PER_SEC) + +#include "perf_rand.h" + +/* Forward structure declarations */ +struct __file_info; typedef struct __file_info FILE_INFO; +struct __prob_dist; typedef struct __prob_dist PROB_DIST; +struct __set; typedef struct __set SET; +struct __key_group; typedef struct __key_group KEY_GROUP; +struct __op; typedef struct __op OP; +struct __txn_type; typedef struct __txn_type TXN_TYPE; +struct __thr_type; typedef struct __thr_type THR_TYPE; +struct __thr_info; typedef struct __thr_info THR_INFO; +struct __set_node; typedef struct __set_node SET_NODE; +struct __ttype_node; typedef struct __ttype_node TTYPE_NODE; +struct __scan; typedef struct __scan SCAN; +struct __stat; typedef struct __stat STAT; + +/* Format for data items in DBS mode */ +#define DBS_STR_OFFSET (2 * sizeof(int32_t)) +struct data { + u_int32_t id; + int32_t sum; + char str[MAX_DATA_SIZE - DBS_STR_OFFSET]; +}; + +/* Types of probability distributions */ +typedef enum { + DIST_UNIFORM, + DIST_NORMAL +} dist_type; + +/* A probability distribution over integers */ +struct __prob_dist { + dist_type type; + union { + int lower_bound; + double mean; + } param1; + union { + int upper_bound; + double std_dev; + } param2; + + int (*select_int) __P((PROB_DIST *, db_rand_state *)); + int (*get_maxval) __P((PROB_DIST *)); + int (*get_minval) __P((PROB_DIST *)); + int (*get_meanval) __P((PROB_DIST *)); +}; + +/* + * A set of items and associated frequencies. Calling select_item() + * returns items according to the specified frequencies. + */ +struct __set { + void **item; /* array of items */ + int *freq; /* array of frequencies */ + int max_items; + int item_count; + int freq_total; + + void (*add_item) __P((SET *, void *, int)); + void (*enlarge) __P((SET *)); + void* (*select_item) __P((SET *, db_rand_state *)); + int (*select_from_dists) __P((SET *, db_rand_state *, int)); + int (*mean_val_dists) __P((SET *)); + int (*min_val_dists) __P((SET *)); + int (*max_val_dists) __P((SET *)); +}; + +/* Information about a database */ +#define FILE_INFO_NO_INIT 0x001 +#define FILE_INFO_DIRTY_READ 0x002 +#define FILE_INFO_MULTIVERSION 0x004 +struct __file_info { + DB **dbp; /* pointers to one or more DB handles */ + char *name; + char *subdb_name; + DBTYPE method; + void **method_params; + int max_params; + int flags; + int pagesize; + int special; /* special flags */ + DB_CACHE_PRIORITY priority; + + /* info. about keys */ + SET *key_groups; + int num_key_chars; + + /* associated primary index and callback, if any */ + FILE_INFO *primary; + int (*callback) __P((DB *, const DBT *, const DBT *, DBT *)); + /* + * the index of this file in the list of secondary indices + * associated with the primary. + */ + int secon_num; + + /* associated secondary indices, if any */ + SET *secondaries; + + /* + * number of items currently in the database. This is only + * used during initialization, and it is not maintained after + * the database has been initialized. + */ + int nitems; + + int (*open_handles) __P((FILE_INFO *, int)); + int (*close_handles) __P((FILE_INFO *)); + void (*init_method_params) __P((FILE_INFO *)); + int (*set_method_params) __P((FILE_INFO *, int)); + int (*init) __P((FILE_INFO *)); + int (*add_items) __P((FILE_INFO *, db_rand_state *, + KEY_GROUP *, DB_TXN *, DBT *, DBT *, int *)); + void (*select_key) __P((FILE_INFO *, db_rand_state *, DBT *)); + void (*select_key_from_keygrp) __P((FILE_INFO *, + db_rand_state *, int, DBT *)); + int (*num_to_key) __P((FILE_INFO *, int, DBT *)); + long (*get_size) __P((FILE_INFO *)); + int (*get_num_items) __P((FILE_INFO *)); + int (*avg_key_size) __P((FILE_INFO *)); + int (*max_key_size) __P((FILE_INFO *)); + int (*avg_data_size) __P((FILE_INFO *)); + int (*min_data_size) __P((FILE_INFO *)); +}; + +/* Types of key groups */ +typedef enum { + KEYGRP_CHARS_PER_SLOT, + KEYGRP_KEY_COUNT, + KEYGRP_REC +} keygrp_type; + +/* + * Information about a group of keys. Each key group consists of keys + * of the same size. + */ +struct __key_group { + keygrp_type type; + FILE_INFO *file; + int key_size; + int key_count; + + /* + * array containing the number of possible char's + * for each element of the key (used for non-record-based + * keygroups only). + */ + int *chars_per_slot; + + /* + * the size of the set of characters from which the + * keys should be composed (used for KEYGRP_NUM_KEYS-type + * key groups only) + */ + int char_set_size; + + /* the first key of this keygroup */ + char *start_key; + + /* for record-based databases, we use this instead of start key */ + int start_recno; + + /* distribution over the number of duplicate items */ + SET *numdup_dists; + + /* distribution over the sizes of the data items */ + SET *dsize_dists; + + void (*select_key) __P((KEY_GROUP *, db_rand_state *, DBT *)); + void (*first_key) __P((KEY_GROUP *, DBT *)); + int (*next_key) __P((KEY_GROUP *, DBT *)); + int (*select_numdups) __P((KEY_GROUP *, db_rand_state *)); + void (*fill_data) __P((KEY_GROUP *, db_rand_state *, DBT *, int)); + int (*num_to_key) __P((KEY_GROUP *, int, DBT *)); +}; + +#define NUM_OP_TYPES 25 +typedef enum { + OP_READ_NODUP, OP_READ_DUP, OP_READ_MULTI, OP_UPDATE_NODUP, + OP_UPDATE_DUP, OP_UPDATE_PRIM, OP_OVERWRITE, OP_ADD_NODUP, + OP_ADD_DUP, OP_DEL, OP_CURS_READ, OP_CURS_UPDATE, + OP_CURS_UPDATE_PRIM, OP_CURS_DEL, OP_DBS_READ, OP_DBS_READ_DUPSET, + OP_DBS_UPDATE, OP_DBS_PARTIAL_WRITE, OP_DBS_DEL, OP_DBS_ADD_DUPSET, + OP_DBS_DEL_READD, OP_DBS_DEL_READD_SECON, OP_DBS_SWAP_DATA, + OP_QTEST_SCAN, OP_REORG +} op_type; + +extern const char *op_names[NUM_OP_TYPES]; +extern op_type op_types[NUM_OP_TYPES]; + +/* Information about one of the operations in a transaction. */ +struct __op { + op_type type; + const char *name; + FILE_INFO *file; + SET *iter_dists; + SET *keygrp_dists; + SET *item_dists; + SET *other; + int flags; + int update_pct; + + int (*execute) __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +}; + +/* Information about a type of transaction. */ +#define TXN_TYPE_SAME_KEY 0x001 +#define TXN_TYPE_NO_NOTFOUND 0x002 +#define TXN_TYPE_RETRY_NOTFOUND 0x004 +struct __txn_type { + int id; + int max_ops; + int op_count; + OP **op; /* array of ops to be performed */ + int special; /* special flags */ + + void (*add_op) __P((TXN_TYPE *, OP *)); +}; + +/* Information about a type of thread. */ +#define THREAD_TYPE_SUB_TXN 0x001 +#define THREAD_TYPE_NO_TXN 0x002 +#define THREAD_TYPE_DIRTY_READ 0x004 +#define THREAD_TYPE_SNAPSHOT 0x008 +#define THREAD_TYPE_SNAPSHOT_SAFE 0x010 +struct __thr_type { + SET *txn_types; + SET *txn_size_dists; + int special; +}; + +/* Per-thread information. */ +struct __thr_info { + int id; + THR_TYPE *type; + + /* + * Each thread has its own char arrays for keys and data items. + * This allows us to allocate these arrays just once, and to + * avoid running out of stack space. + */ + char *keystr; + char *prev_key; + char *tmp_key; + char *datastr; + char *tmp_data; + char *tmp_data2; + + /* What key group is currently being accessed by this thread? */ + KEY_GROUP *keygrp; + + /* + * A count of the number of txns completed by this thread, + * along with per-op-type counts of the number of ops + * performed and the number of records accessed. + */ + int txn_count; + int *op_count; + int *rec_count; + + double start_time; + double end_time; + + db_rand_state rand_state; +}; + +/* Node in a linked list of sets. */ +struct __set_node { + SET *set; + SET_NODE *next; +}; + +/* Types of initial scans. */ +typedef enum { + SCAN_SEQUENTIAL, + SCAN_KEYGROUP +} scan_type; + +/* An initial scan of one of the files. */ +struct __scan { + scan_type type; + int file_num; /* number of file to scan */ + int kgnum; /* number of keygroup to scan */ + int update_freq; /* with what prob. should we update? */ + int start_keynum; /* number of the first key in the scan */ + int incr; /* incr the cursor this much each time */ + + int (*execute) __P((SCAN *)); +}; + +/* A stat that is traced by the stat thread. */ +struct __stat { + /* + * The two variables below allow us to access fields in DB stat + * structs that will be allocated in the future (because these + * structs are allocated when the stat functions are called). + * base_addr_ptr is a pointer to the variable that will contain + * the address of the stat structure. offset is the index of + * the field when the struct is treated as an array of ints. + */ + u_int8_t **base_addr_ptr; + int offset; + + int prev_val; /* value when we last checked stats */ + int prev_sample_val; /* value when we last stored a + per-txn or per-time sample */ + + double *sample; /* queue of samples */ + int qhead; + double sum_samples; + double sum_squares; + int using_per_time; /* are the samples per-time? */ + + double *mean; /* queue of sample means */ + int mean_qhead; + double mean_sum_samples; + double mean_sum_squares; + + u_int32_t (*current_val) __P((STAT *)); + int (*delta) __P((STAT *)); + double (*per_txn) __P((STAT *, int)); + double (*per_time) __P((STAT *, double)); + + void (*update_prev) __P((STAT *)); + void (*update_samples) __P((STAT *, double)); + int (*steady) __P((STAT *)); +}; + +#if defined(HAVE_GETRUSAGE) +#define NUM_INDIV_STATS 13 +#define RUSAGE struct rusage +#else +#define NUM_INDIV_STATS 12 +#define RUSAGE int +#endif + +typedef enum { + CLEAN_NONE = 0, + CLEAN_ARCHIVE, + CLEAN_UPDATE, + CLEAN_HOTBACKUP, + CLEAN_HOTUPDATE +} perf_clean_t; + +/* + * Globals. Because VxWorks shares all globals with every task there + * is, we use a global structure to avoid naming issues. + */ +struct __perf_globals { + DB_ENV *dbenv; + + const char *config; /* -c: config filename */ + int checkpoint; /* -C: checkpoint thread */ + int debug; /* -d: debug */ + const char *home; /* -h: environment home */ + long iterations; /* -i: iterations */ + perf_clean_t logclean; /* -L: log clean thread */ + char *outfile_name; /* -o: name of output file */ + int private; /* -p: use DB_PRIVATE flag */ + u_long queuestart; /* -q: queue start point */ + int recover; /* -x,X: perform recovery */ + char *rpc_server; /* -P: server hostname */ + u_int seed; /* -S: seed */ + int stats_trace; /* -s: trace stats over time */ + long trickle; /* -T: trickle thread pct. */ + int verbose; /* -v: verbose messages */ + int sync; /* -y: sync thread */ + + char conf_current[DB_MAXPATHLEN]; + int child; + int linenum; + int line_preread; + const char *progname; /* program name */ + const char *progpath; /* full program name */ + db_rand_state rand_state; + int shutdown; /* we're done */ + int stdfd; /* stdout of parent */ + double steady_state_start_time; /* Used by stat thread */ + double steady_state_end_time; + int steady_state_start_txns; + int steady_state_end_txns; + int steady_state_reached; + int threads_started; + char tag[80]; + const char *tmp; /* tmp dir */ + + /* Statistics maintained by the stat thread */ + int *txns_ptr; + DB_MPOOL_STAT *mpstat; + DB_LOCK_STAT *lkstat; + DB_LOG_STAT *lgstat; + DB_TXN_STAT *txstat; + RUSAGE *rusage; + + FILE *outfp; /* output file FP */ + FILE *conf_fp; /* The config file handle */ + FILE *current_fp; /* + * A version of the config + * file that includes all of + * the random selections that + * were made at initialization. + */ + db_mutex_t write_mutex; /* Protect the write error flag */ +}; + +/* + * Configuration - determined by reading a config file. + */ +struct __perf_conf { + int env_flags; + + FILE_INFO **file; /* array of file-info objects */ + int file_count; + + TXN_TYPE **txn_type; /* array of txn-type objects */ + int txn_type_count; + + THR_TYPE **thr_type; /* array of thread-type objects */ + int thr_type_count; + + SET_NODE *shared_sets; /* + * sets that may be shared; + * used to delete them cleanly. + */ + + THR_INFO **thread; /* info on the am threads */ + long nthreads; /* total # of am threads */ + + int num_initial_scans; /* scan the files at the start? */ + SCAN **scan; /* info about the scan(s) */ + + /* Params for the stat-check thread. */ + int stat_check_interval; + int steady_state_time; + int steady_state_num_samples; + int steady_state_max_time; + double steady_state_variance; + double steady_state_variance_pct; + + char *archive; /* archive directory */ + int cache_size_percent; + u_int32_t checkpoint_size; + u_int32_t checkpoint_time; + char conf_current[DB_MAXPATHLEN]; + int dbs; /* running in DBS mode */ + int deadlock; /* Deadlock policy. */ + int killtest_iter; + int killtest_interval; + const char *logdir; /* Directory for log files. */ + void (*output_info_fn) __P((int, int, int, int)); + char output_info_fname[DB_MAXPATHLEN]; + char *passwd; /* Password for encryption. */ + u_int32_t pause; /* Pause between ops. */ + int prepare; /* prepare transactions */ + int use_multiple_handles; /* use one DB handle per thread? */ + int write_err; /* */ + int write_err_count; /* variables used in the */ + int write_err_start; /* write-error test */ + int write_rand; /* */ + int no_write_errors; /* turn em off */ + int yield_on_request; /* yield on every page request */ +}; + +extern struct __perf_globals g; +extern struct __perf_conf config; + +/* db_perf.c */ +int driver __P((int, int, int, int, int, int)); +int open_handles __P((int, int)); +int close_handles __P((void)); +int say_dead __P((DB_ENV *, pid_t, db_threadid_t, u_int32_t)); +int run_batch __P((int)); + +/* perf_checkpoint.c */ +int checkpoint_init __P((void)); +int checkpoint_shutdown __P((void)); +void *checkpoint_thread __P((void *)); + +/* perf_config.c */ +int apply_configuration __P((int)); + +/* perf_dbs.c */ +void __key_group_fill_data_dbs + __P((KEY_GROUP *, db_rand_state *, DBT *, int)); +int __op_dbs_read __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_dbs_read_dupset __P((OP *, int, int *, DB *, DB_TXN *, + DBT *, DBT *)); +int __op_dbs_update __P((OP *, int, int *, DB *, DB_TXN *, + DBT *, DBT *)); +int __op_dbs_partial_write __P((OP *, int, int *, DB *, DB_TXN *, + DBT *, DBT *)); +int __op_dbs_del __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_dbs_add_dupset __P((OP *, int, int *, DB *, DB_TXN *, + DBT *, DBT *)); +int __op_dbs_del_readd __P((OP *, int, int *, DB *, DB_TXN *, + DBT *, DBT *)); +int __op_dbs_del_readd_secon __P((OP *, int, int *, DB *, DB_TXN *, + DBT *, DBT *)); +int __op_dbs_swap_data __P((OP *, int, int *, DB *, DB_TXN *, + DBT *, DBT *)); +int __op_qtest_scan __P((OP *, int, int *, DB *, DB_TXN *, + DBT *, DBT *)); +int __op_reorg __P((OP *, int, int *, DB *, DB_TXN *, + DBT *, DBT *)); +int dbs_dup_compare __P((DB *, const DBT *, const DBT *)); +int dbs_int_compare __P((DB *, const DBT *, const DBT *)); +void * dbs_check_thread __P((void *)); +int dbs_check_databases __P((void)); +void dbs_output_info __P((int, int, int, int)); + +/* perf_debug.c */ +void debug_check __P((void)); + +/* perf_file.c */ +FILE_INFO *file_info_create __P((void)); +FILE_INFO *file_info_copy __P((FILE_INFO *)); +void file_info_destroy __P((FILE_INFO *)); +int __file_info_open_handles __P((FILE_INFO *, int)); +int __file_info_close_handles __P((FILE_INFO *)); +void __file_info_init_method_params __P((FILE_INFO *)); +int __file_info_set_method_params_btree __P((FILE_INFO *, int)); +int __file_info_set_method_params_hash __P((FILE_INFO *, int)); +int __file_info_set_method_params_queue __P((FILE_INFO *, int)); +int __file_info_set_method_params_recno __P((FILE_INFO *, int)); +int __file_info_init __P((FILE_INFO *)); +int __file_info_add_items __P((FILE_INFO *, db_rand_state *, + KEY_GROUP *, DB_TXN *, DBT *, DBT *, int *)); +void __file_info_select_key __P((FILE_INFO *, db_rand_state *, DBT *)); +void __file_info_select_key_from_keygrp + __P((FILE_INFO *, db_rand_state *, int, DBT *)); +int __file_info_num_to_key_non_rec __P((FILE_INFO *, int, DBT *)); +int __file_info_num_to_key_rec __P((FILE_INFO *, int, DBT *)); +long __file_info_get_size __P((FILE_INFO *)); +int __file_info_get_num_items __P((FILE_INFO *)); +int __file_info_avg_key_size __P((FILE_INFO *)); +int __file_info_max_key_size __P((FILE_INFO *)); +int __file_info_avg_data_size __P((FILE_INFO *)); +int __file_info_min_data_size __P((FILE_INFO *)); +SCAN *scan_create_sequential __P((int, int, int)); +SCAN *scan_create_keygroup __P((int, int, int, int, int)); +int __scan_execute_sequential __P((SCAN *)); +int __scan_execute_keygroup __P((SCAN *)); +int __callback_fn __P((DB *, const DBT *, const DBT *, DBT *skey)); + +/* perf_key.c */ +KEY_GROUP *key_group_create_chars_per_slot __P((FILE_INFO *, int, int)); +KEY_GROUP *key_group_create_key_count __P((FILE_INFO *, int, int, int, int)); +KEY_GROUP *key_group_create_rec __P((FILE_INFO *, int)); +KEY_GROUP *key_group_copy __P((KEY_GROUP *)); +void key_group_destroy __P((KEY_GROUP *)); +void __key_group_select_key_chars_per_slot + __P((KEY_GROUP *, db_rand_state *, DBT *)); +void __key_group_select_key_key_count + __P((KEY_GROUP *, db_rand_state *, DBT *)); +void __key_group_select_key_rec + __P((KEY_GROUP *, db_rand_state *, DBT *)); +void __key_group_first_key_non_rec __P((KEY_GROUP *, DBT *)); +void __key_group_first_key_rec __P((KEY_GROUP *, DBT *)); +int __key_group_next_key_non_rec_RtoL __P((KEY_GROUP *, DBT *)); +int __key_group_next_key_non_rec_LtoR __P((KEY_GROUP *, DBT *)); +int __key_group_next_key_rec __P((KEY_GROUP *, DBT *)); +int __key_group_select_numdups __P((KEY_GROUP *, db_rand_state *)); +void __key_group_fill_data_random + __P((KEY_GROUP *, db_rand_state *, DBT *, int)); +void __key_group_fill_data_secon + __P((KEY_GROUP *, db_rand_state *, DBT *, int)); +void fill_data_primary __P((FILE_INFO *, void *, int)); +int __key_group_num_to_key_non_rec __P((KEY_GROUP *, int, DBT *)); +int __key_group_num_to_key_rec __P((KEY_GROUP *, int, DBT *)); + +/* perf_log.c */ +int log_init __P((void)); +int log_shutdown __P((void)); +void *log_thread __P((void *)); +int log_clean __P((int)); + +/* perf_misc.c */ +void init_dbts __P((FILE_INFO *, DBT *, DBT *, char *, char *, + db_recno_t *, int, int)); +SET *set_create __P((int)); +void set_destroy __P((SET *, int)); +void __set_add_item __P((SET *, void *, int)); +void __set_enlarge __P((SET *)); +void *__set_select_item __P((SET *, db_rand_state *)); +int __set_select_from_dists __P((SET *, db_rand_state *, int)); +int __set_mean_val_dists __P((SET *)); +int __set_min_val_dists __P((SET *)); +int __set_max_val_dists __P((SET *)); +PROB_DIST *prob_dist_create __P((dist_type, double, double)); +int __prob_dist_select_int_uniform __P((PROB_DIST *, db_rand_state *)); +int __prob_dist_select_int_normal __P((PROB_DIST *, db_rand_state *)); +int __prob_dist_get_maxval_uniform __P((PROB_DIST *)); +int __prob_dist_get_maxval_normal __P((PROB_DIST *)); +int __prob_dist_get_minval_uniform __P((PROB_DIST *)); +int __prob_dist_get_minval_normal __P((PROB_DIST *)); +int __prob_dist_get_meanval_uniform __P((PROB_DIST *)); +int __prob_dist_get_meanval_normal __P((PROB_DIST *)); +void write_err_init __P((DB_ENV *)); +void write_no_errors __P((DB_ENV *, int)); +ssize_t write_err __P((int, const void *, size_t)); + +/* perf_op.c */ +OP *op_create __P((op_type, int, int, int, int, int, int, int)); +OP *op_create_defaults __P((op_type, int)); +void op_destroy __P((OP *)); +int __op_read_nodup __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_read_multi __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_read_dup __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_update_nodup __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_update_dup __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_update_prim __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_rmw __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_add_nodup __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_add_dup __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_del __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_curs_read __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_curs_update __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_curs_rmw __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); +int __op_curs_del __P((OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *)); + +/* perf_parse.c */ +void init_parser __P((void)); +int get_line __P((char *, FILE *)); +int get_name_value __P((char *, char **, char **)); +int open_var_db __P((void)); +int close_var_db __P((void)); +int get_variable __P((char *, char *, int)); +int set_variable __P((char *, char *, int)); + +/* perf_spawn.c */ +os_pid_t my_system __P((const char *, int)); +os_thread_t *spawn_kids __P((const char *, long, void *(*)(void *))); +int wait_kids __P((const char *, os_thread_t *)); +int wait_procs __P((const char *, os_pid_t *)); + +/* perf_stat.c */ +STAT *stat_create __P((void *, int)); +void stat_destroy __P((STAT *)); +u_int32_t __stat_current_val __P((STAT *)); +int __stat_delta __P((STAT *)); +double __stat_per_txn __P((STAT *, int)); +double __stat_per_time __P((STAT *, double)); +void __stat_update_prev __P((STAT *)); +void __stat_update_samples __P((STAT *, double)); +int __stat_steady __P((STAT *)); +int init_stats __P((STAT **)); +int get_stats __P((double *)); +void output_stats_line __P((STAT **, double, double, double)); +int steady_state __P((STAT **)); +void output_stats __P((int)); +void store_stat_deltas __P((DB_MPOOL_STAT *, DB_LOCK_STAT *, + DB_LOG_STAT *, DB_TXN_STAT *, RUSAGE *)); +int stat_init __P((void)); +int stat_shutdown __P((void)); +void *stat_thread __P((void *)); + +/* perf_dead.c */ +int dead_init __P((void)); +int dead_shutdown __P((void)); +void *dead_thread __P((void *)); + +/* perf_sync.c */ +int sync_init __P((void)); +int sync_shutdown __P((void)); +void *sync_thread __P((void *)); + +/* perf_thread.c */ +THR_TYPE *thr_type_create __P((void)); +void thr_type_destroy __P((THR_TYPE *)); +THR_INFO *thr_info_create __P((int, THR_TYPE *)); +void thr_info_destroy __P((THR_INFO *)); +void *thread_run __P((void *)); + +/* perf_trickle.c */ +int trickle_init __P((long)); +int trickle_shutdown __P((void)); +void *trickle_thread __P((void *)); + +/* perf_txn.c */ +TXN_TYPE *txn_type_create __P((int, int)); +void txn_type_destroy __P((TXN_TYPE *)); +void __txn_type_add_op __P((TXN_TYPE *, OP *)); +int txn_execute __P((SET *, int, DBT *, DBT *, long, int)); + +/* perf_util.c */ +void random_data __P((db_rand_state *, char *, size_t)); +int random_int __P((db_rand_state *, int, int)); +int clear_dir __P((const char *)); +int snooze __P((DB_ENV *, u_long)); + +#endif --- db-4.6.21/test_perf/perf_file.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_file.c 2007-11-05 15:51:13.775916000 +1100 @@ -0,0 +1,1563 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000,2007 Oracle. All rights reserved. + * + * $Id: perf_file.c,v 12.18 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +const char *am_names[6] = { + "dummy", "btree", "hash", "recno", "queue", "unknown" +}; + +static int init_curs __P((DBC **, DB *, DB_TXN *, DBT *, DBT *, int)); +static int open_handle __P((FILE_INFO *, int, int)); + +/* + * file_info_create - constructor for FILE_INFO objects + */ +FILE_INFO * +file_info_create() +{ + FILE_INFO *file; + int ret; + + if ((ret = __os_malloc(g.dbenv, sizeof(FILE_INFO), &file)) != 0) + return (NULL); + + /* + * Many of these will be modified as more of the configuration + * file is read. + */ + if ((ret = __os_malloc(g.dbenv, DBNAME_MAXSIZE, &file->name)) != 0) { + __os_free(g.dbenv, file); + return (NULL); + } + sprintf(file->name, "perfdb%d", config.file_count); + file->subdb_name = NULL; + file->method = DB_BTREE; + file->max_params = 0; + file->method_params = NULL; + file->pagesize = 8192; + file->flags = 0; + file->special = 0; + file->priority = DB_PRIORITY_DEFAULT; + file->primary = NULL; + file->callback = NULL; + file->secon_num = -1; + file->key_groups = NULL; + file->dbp = NULL; + file->secondaries = NULL; + file->nitems = 0; + + file->open_handles = __file_info_open_handles; + file->close_handles = __file_info_close_handles; + file->init_method_params = __file_info_init_method_params; + file->set_method_params = __file_info_set_method_params_btree; + file->init = __file_info_init; + file->add_items = __file_info_add_items; + file->select_key = __file_info_select_key; + file->select_key_from_keygrp = __file_info_select_key_from_keygrp; + file->num_to_key = (IS_RECORD_BASED(file) ? + __file_info_num_to_key_rec : __file_info_num_to_key_non_rec); + file->get_size = __file_info_get_size; + file->get_num_items = __file_info_get_num_items; + file->avg_key_size = __file_info_avg_key_size; + file->max_key_size = __file_info_max_key_size; + file->avg_data_size = __file_info_avg_data_size; + file->min_data_size = __file_info_min_data_size; + + return (file); +} + +/* + * file_info_copy - copy constructor for FILE_INFO objects + */ +FILE_INFO * +file_info_copy(source) + FILE_INFO *source; +{ + FILE_INFO *file; + KEY_GROUP *kg; + int i, ret; + + assert(source != NULL); + + if ((ret = __os_malloc(g.dbenv, sizeof(FILE_INFO), &file)) != 0) + return (NULL); + + /* names */ + assert(source->name != NULL); + if ((ret = __os_malloc(g.dbenv, + DBNAME_MAXSIZE, &file->name)) != 0) { + __os_free(g.dbenv, file); + return (NULL); + } + strcpy(file->name, source->name); + if (source->subdb_name == NULL) + file->subdb_name = NULL; + else { + if ((ret = __os_malloc(g.dbenv, + DBNAME_MAXSIZE, &file->subdb_name)) != 0) { + __os_free(g.dbenv, file->name); + __os_free(g.dbenv, file); + return (NULL); + } + strcpy(file->subdb_name, source->subdb_name); + } + + /* method and method parameters */ + file->method = source->method; + if (source->method_params == NULL) { + file->method_params = NULL; + file->max_params = 0; + } else { + /* Methods in file are not set up yet, so use source. */ + source->init_method_params(file); + assert(file->max_params == source->max_params); + for (i = 0; i < file->max_params; i++) + file->method_params[i] = source->method_params[i]; + } + + /* key groups */ + assert(source->key_groups != NULL); + file->key_groups = set_create(source->key_groups->max_items); + for (i = 0; i < source->key_groups->item_count; i++) { + kg = key_group_copy(source->key_groups->item[i]); + kg->file = file; + file->key_groups->add_item(file->key_groups, + kg, kg->key_count); + } + + /* misc. */ + file->pagesize = source->pagesize; + file->flags = source->flags; + file->priority = source->priority; + file->special = source->special; + + /* not shared */ + file->primary = NULL; + file->callback = NULL; + file->secon_num = -1; + file->dbp = NULL; + file->secondaries = NULL; + file->nitems = 0; + + /* methods */ + file->open_handles = source->open_handles; + file->close_handles = source->close_handles; + file->init_method_params = source->init_method_params; + file->set_method_params = source->set_method_params; + file->init = source->init; + file->add_items = source->add_items; + file->select_key = source->select_key; + file->select_key_from_keygrp = source->select_key_from_keygrp; + file->num_to_key = source->num_to_key; + file->get_size = source->get_size; + file->get_num_items = source->get_num_items; + file->avg_key_size = source->avg_key_size; + file->max_key_size = source->max_key_size; + file->avg_data_size = source->avg_data_size; + file->min_data_size = source->min_data_size; + + return (file); +} + +/* + * file_info_destroy - destructor for FILE_INFO objects + */ +void +file_info_destroy(file) + FILE_INFO *file; +{ + int i; + + if (file->name != NULL) + __os_free(g.dbenv, file->name); + if (file->subdb_name != NULL) + __os_free(g.dbenv, file->subdb_name); + if (file->method_params != NULL) + __os_free(g.dbenv, file->method_params); + + if (file->key_groups != NULL) { + for (i = 0; i < file->key_groups->item_count; i++) + key_group_destroy(file->key_groups->item[i]); + set_destroy(file->key_groups, 0); + } + + if (file->secondaries != NULL) + set_destroy(file->secondaries, 0); + + __os_free(g.dbenv, file); +} + +/* + * __file_info_open_handles - implementation of FILE_INFO->open_handles(). + * Creates one or more DB handles for this file and opens them using the + * specified flags. + */ +int +__file_info_open_handles(file, open_flags) + FILE_INFO *file; + int open_flags; +{ + int i, num_chars, ret; + + /* + * Determine the number of possible key characters for keys in + * this file. + */ + file->num_key_chars = 0; + for (i = 0; i < file->key_groups->item_count; i++) { + num_chars = + ((KEY_GROUP *)file->key_groups->item[i])->char_set_size; + if (num_chars > file->num_key_chars) + file->num_key_chars = num_chars; + } + + /* Create the array of pointers to DB handles. */ + if ((ret = __os_malloc(g.dbenv, config.nthreads * sizeof(DB), + &file->dbp)) != 0) { + g.dbenv->err(g.dbenv, ret, "allocating file->dbp array"); + return (ret); + } + + /* + * Create and initialize the DB handle(s). If we're not using + * multiple handles, each element of the file->dbp array will + * point to the same handle. Otherwise, we need to create and + * open a new handle for each thread. + */ + for (i = 0; i < config.nthreads; i++) { + if (!config.use_multiple_handles && i > 0) + file->dbp[i] = file->dbp[0]; + else if ((ret = open_handle(file, open_flags, i)) != 0) + break; + } + + return (ret); +} + +/* + * __file_info_close_handles - implementation of FILE_INFO->close_handles(). + * Closes the DB handles for this file. + */ +int +__file_info_close_handles(file) + FILE_INFO *file; +{ + int i, ret; + + if (file->dbp == NULL) + return (0); + + ret = 0; + for (i = 0; i < config.nthreads; i++) { + /* + * If all of the elements of the file->dbp array point + * to the same DB handle, we only need to close it + * once. + */ + if (!config.use_multiple_handles && i > 0) + break; + + /* + * Sync first so we get the OS started flushing buffers. + */ + if ((ret = file->dbp[i]->sync(file->dbp[i], 0)) != 0) + g.dbenv->err(g.dbenv, ret, "DB->sync"); + + if ((ret = file->dbp[i]->close(file->dbp[i], 0)) != 0) + g.dbenv->err(g.dbenv, ret, "%s: DB->close", + file->name); + + file->dbp[i] = NULL; + } + __os_free(g.dbenv, file->dbp); + + return (ret); +} + +/* + * __file_info_set_method_params_btree - implementation of + * FILE_INFO->init_method_params(), which initializes the + * method_params array for a FILE_INFO object when a user + * specifies a non-default value for one of its parameters. + */ +void +__file_info_init_method_params(file) + FILE_INFO *file; +{ + int i, ret; + + file->max_params = 0; + switch (file->method) { + case DB_BTREE: + file->max_params = 3; + break; + case DB_HASH: + file->max_params = 3; + break; + case DB_RECNO: + file->max_params = 2; + break; + case DB_QUEUE: + file->max_params = 2; + break; + default: + assert(0); + /* NOTREACHED */ + } + + ret = __os_malloc(g.dbenv, file->max_params * sizeof(void *), + &file->method_params); + assert(ret == 0); + for (i = 0; i < file->max_params; i++) + file->method_params[i] = NULL; +} + +/* + * __file_info_set_method_params_btree - implementation of + * FILE_INFO->set_method_params() for files using the btree access + * method. + */ +int +__file_info_set_method_params_btree(file, id) + FILE_INFO *file; + int id; +{ + int ret; + + assert(file->method == DB_BTREE); + + if (file->method_params == NULL) + return (0); + + /* min keys per page */ + if (file->method_params[0] != NULL) { + ret = file->dbp[id]->set_bt_minkey(file->dbp[id], + (int)(uintptr_t)file->method_params[0]); + if (ret != 0) { + file->dbp[id]->err(file->dbp[id], ret, + "set_bt_minkey: %d", + (int)(uintptr_t)file->method_params[0]); + return (EINVAL); + } + } + + /* comparison function */ + if (file->method_params[1] != NULL) { + ret = file->dbp[id]->set_bt_compare(file->dbp[id], + (int (*)(DB *, const DBT *, const DBT *)) + file->method_params[1]); + if (ret != 0) { + file->dbp[id]->err(file->dbp[id], ret, + "set_bt_compare"); + return (EINVAL); + } + } + + /* duplicate-comparison function */ + if (file->method_params[2] != NULL) { + ret = file->dbp[id]->set_dup_compare(file->dbp[id], + (int (*)(DB *, const DBT *, const DBT *)) + file->method_params[2]); + if (ret != 0) { + file->dbp[id]->err(file->dbp[id], ret, + "set_dup_compare"); + return (EINVAL); + } + } + + return (0); +} + +/* + * __file_info_set_method_params_hash - implementation of + * FILE_INFO->set_method_params() for files using the hash access + * method. + */ +int +__file_info_set_method_params_hash(file, id) + FILE_INFO *file; + int id; +{ + int ffactor, nelem, ret; + + assert(file->method == DB_HASH); + + /* + * fill factor -- if nothing is specified, use the rule from + * the DB documentation to compute a value. If the user + * specifies 0, don't set the fill factor at all. + */ + if (file->method_params != NULL && file->method_params[0] != NULL) + ffactor = (uintptr_t)file->method_params[0]; + else + ffactor = (file->pagesize - 32) / + (file->avg_key_size(file) + file->avg_data_size(file) + 8); + if (ffactor != 0) { + ret = file->dbp[id]->set_h_ffactor(file->dbp[id], ffactor); + if (ret != 0) { + file->dbp[id]->err(file->dbp[id], ret, + "set_h_ffactor: %d", ffactor); + return (EINVAL); + } + } + + /* + * table size -- if nothing is specified, estimate the number + * items in the initial database and increase it by 25%. If + * the user specifies 0, don't set the table size at all. + */ + if (file->method_params != NULL && file->method_params[1] != NULL) + nelem = (uintptr_t)file->method_params[1]; + else + nelem = file->get_num_items(file) * 5 / 4; + if (nelem != 0) { + ret = file->dbp[id]->set_h_nelem(file->dbp[id], nelem); + if (ret != 0) { + file->dbp[id]->err(file->dbp[id], ret, + "set_h_nelem: %d", nelem); + return (EINVAL); + } + } + + /* duplicate-comparison function */ + if (file->method_params != NULL && file->method_params[2] != NULL) { + ret = file->dbp[id]->set_dup_compare(file->dbp[id], + (int (*)(DB *, const DBT *, const DBT *)) + file->method_params[2]); + if (ret != 0) { + file->dbp[id]->err(file->dbp[id], ret, + "set_dup_compare"); + return (EINVAL); + } + } + + return (0); +} + +/* + * __file_info_set_method_params_queue - implementation of + * FILE_INFO->set_method_params() for files using the queue access + * method. + */ +int +__file_info_set_method_params_queue(file, id) + FILE_INFO *file; + int id; +{ + KEY_GROUP *kg; + int i, len, reclen, ret; + + assert(file->method == DB_QUEUE); + len = reclen = 0; + + /* record length */ + /* First, determine the length. */ + if (!IS_SECONDARY(file)) { + /* + * random data, so config file may have + * specified it, else determine the maximum + * data-item length + */ + if (file->method_params != NULL && + file->method_params[0] != NULL) + reclen = (uintptr_t)file->method_params[0]; + else + for (i = 0; i < file->key_groups->item_count; i++) { + kg = file->key_groups->item[i]; + len = kg->dsize_dists->max_val_dists( + kg->dsize_dists); + if (len > reclen) + reclen = len; + } + } else { + /* + * data = keys from other database, so use + * their max size + */ + assert(file->primary->key_groups != NULL); + + reclen = 0; + for (i = 0; i < file->primary->key_groups->item_count; i++) { + kg = file->primary->key_groups->item[i]; + if (kg->key_size > reclen) + reclen = kg->key_size; + } + } + assert(reclen > 0); + if ((ret = file->dbp[id]->set_re_len(file->dbp[id], reclen)) != 0) { + file->dbp[id]->err(file->dbp[id], ret, + "set_re_len: %d", reclen); + return (EINVAL); + } + + /* record padding XXX fixed for now */ + if ((ret = file->dbp[id]->set_re_pad(file->dbp[id], 0x0)) != 0) { + file->dbp[id]->err(file->dbp[id], ret, "set_re_pad: 0x0"); + return (EINVAL); + } + + /* extent size */ + if (file->method_params != NULL && file->method_params[1] != NULL) { + ret = file->dbp[id]->set_q_extentsize(file->dbp[id], + (int)(uintptr_t)file->method_params[1]); + if (ret != 0) { + file->dbp[id]->err(file->dbp[id], ret, + "set_q_extentsize: %d", + (int)(uintptr_t)file->method_params[1]); + return (EINVAL); + } + } + + return (0); +} + +/* + * __file_info_set_method_params_recno - implementation of + * FILE_INFO->set_method_params() for files using the recno access + * method. + */ +int +__file_info_set_method_params_recno(file, id) + FILE_INFO *file; + int id; +{ + int reclen, ret; + + assert(file->method == DB_RECNO); + reclen = 0; + + /* record length */ + if (file->method_params != NULL && file->method_params[0] != NULL) { + reclen = (uintptr_t)file->method_params[0]; + ret = file->dbp[id]->set_re_len(file->dbp[id], reclen); + if (ret != 0) { + file->dbp[id]->err(file->dbp[id], ret, + "set_re_len: %d", reclen); + return (EINVAL); + } + } + + /* record padding XXX fixed for now */ + if ((ret = file->dbp[id]->set_re_pad(file->dbp[id], 0x0)) != 0) { + file->dbp[id]->err(file->dbp[id], ret, "set_re_pad: 0x0"); + return (EINVAL); + } + + /* record source */ + if (file->method_params != NULL && file->method_params[1] != NULL) { + ret = file->dbp[id]->set_re_source(file->dbp[id], + (char *)file->method_params[1]); + if (ret != 0) { + file->dbp[id]->err(file->dbp[id], ret, + "set_re_source: %s", + (char *)file->method_params[1]); + return (EINVAL); + } + } + + return (0); +} + +/* + * __file_info_init - implementation of FILE_INFO->init(). Initializes + * a database file, populating it with its initial key, data pairs. + */ +int +__file_info_init(file) + FILE_INFO *file; +{ + DBT data, key; + DB_TXN *txn; + FILE *info_fp; + KEY_GROUP *keygrp, *save_kg; + db_recno_t key_recno, save_recno; + int first_item, i, items_added, j; + int ret, save_i, save_j, save_size, save_nitems; + char buf[DB_MAXPATHLEN * 2], *datastr, *keystr, *save_key; + + i = ret = 0; + txn = NULL; + + assert(config.nthreads > 0); + datastr = config.thread[0]->datastr; + keystr = config.thread[0]->keystr; + save_key = config.thread[0]->tmp_key; + + ret = snprintf(buf, sizeof(buf), "%s/%s.info", g.home, file->name); + if (ret < 0 || ret == sizeof(buf)) { + fprintf(g.outfp, "name of database info file too long\n"); + return (EINVAL); + } + + ret = 0; + + /* Open the database info file. */ + if ((info_fp = fopen(buf, "w")) == NULL) { + fprintf(g.outfp, "can't open database info file\n"); + return (EINVAL); + } + + /* + * Special case: the file is a secondary index (in which case + * it will be initialized in conjunction with the primary) or + * is initially empty, so we just create its .info file and + * return. Note that we will not be able to notice if the key + * groups overlap, so if this is a possibility, the config + * file should specify appropriate start-key values. + */ + if (IS_SECONDARY(file) || file->special & FILE_INFO_NO_INIT) { + for (i = 0; i < file->key_groups->item_count; i++) { + keygrp = file->key_groups->item[i]; + if (IS_RECORD_BASED(file)) + fprintf(info_fp, "%d\n", keygrp->start_recno); + else + fprintf(info_fp, "%s\n", keygrp->start_key); + } + + (void)fclose(info_fp); + return (0); + } + + /* Initialize the DBTs. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + init_dbts(file, &key, &data, keystr, datastr, &key_recno, + MAX_KEY_SIZE, MAX_DATA_SIZE); + + /* Begin the first transaction. */ + if ((config.env_flags & DB_INIT_TXN) && + (ret = g.dbenv->txn_begin(g.dbenv, NULL, &txn, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, "txn_begin failed"); + goto err; + } + +#ifdef HAVE_VXWORKS + fflush(stdout); + __os_sleep(g.dbenv, 3, 0); +#endif + + save_i = save_j = save_nitems = save_recno = save_size = 0; + save_kg = file->key_groups->item[0]; + + /* Iterate through the key groups. */ + for (i = 0; i < file->key_groups->item_count; i++) { + keygrp = file->key_groups->item[i]; + + /* + * Get the first key for this key group. It's + * possible that the initial first key will overlap + * keys from an existing key group, so we don't allow + * any overwrites until we're certain that this isn't + * the case. + */ + keygrp->first_key(keygrp, &key); + first_item = 1; + + /* Iterate through the keys for this key group. */ + for (j = 0; j < keygrp->key_count; j++) { + /* Start a new txn every 100 items. */ + if ((file->nitems % 100) == 0) { + if ((file->nitems % 1000) == 0) + g.dbenv->errx(g.dbenv, + "%d records done", file->nitems); + if (config.env_flags & DB_INIT_TXN) { + if ((ret = txn->commit(txn, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, + "DB_TXN->commit failed"); + + /* + * Commit failed, so + * go back to the start + * of the txn. + */ + i = save_i; + j = save_j; + file->nitems = save_nitems; + keygrp = save_kg; + if (i == 0 && j == 0) + save_kg->first_key( + save_kg, &key); + else if (IS_RECORD_BASED(file)) + key_recno = save_recno; + else + strcpy(keystr, + save_key); + if (i != 0 || j != 0) + key.size = save_size; + if (j == 0) + first_item = 1; + } + + if ((ret = g.dbenv->txn_begin(g.dbenv, + NULL, &txn, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, + "txn_begin failed"); + goto err; + } + + /* Store the start of the txn. */ + save_i = i; + save_j = j; + save_nitems = file->nitems; + save_kg = keygrp; + save_size = key.size; + if (IS_RECORD_BASED(file)) + save_recno = key_recno; + else + strcpy(save_key, keystr); + } + } + + /* Add some number of items for this key. */ + items_added = file->add_items(file, &g.rand_state, + keygrp, txn, &key, &data, &first_item); + if (items_added == 0) { + g.dbenv->errx(g.dbenv, + "file->add_items failed"); + ret = EINVAL; + goto err; + } + file->nitems += items_added; + + /* Get the next key, if there is one. */ + if (keygrp->next_key(keygrp, &key) != 0) + break; + } + + /* + * Store the start key for the key group in the + * database info file so that we can recover it + * for future runs. + */ + if (IS_RECORD_BASED(file)) + fprintf(info_fp, "%d\n", keygrp->start_recno); + else + fprintf(info_fp, "%s\n", keygrp->start_key); + } + + g.dbenv->errx(g.dbenv, "%d records done", file->nitems); +#ifdef HAVE_VXWORKS + printf("%d records done\n", file->nitems); +#endif + + /* Commit the final transaction. */ + if ((config.env_flags & DB_INIT_TXN) && + (ret = txn->commit(txn, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, "DB_TXN->commit failed\n"); + goto err; + } + +err: (void)fclose(info_fp); + return (ret); +} + +int32_t compute_checksum __P((char *, int)); +/* + * __file_info_add_items - implementation of FILE_INFO->add_items. + * Adds one or more data items to the file using the key stored in + * keyp->data. When adding the first item of the key_group, ensures + * that it doesn't overwrite data items from other key groups by + * advancing the key as necessary. Returns the number of items that + * are added, or 0 in case of error. + */ +int +__file_info_add_items(file, rs, kg, txn, keyp, datap, first_item) + FILE_INFO *file; + db_rand_state *rs; + KEY_GROUP *kg; + DB_TXN *txn; + DBT *keyp, *datap; + int *first_item; +{ + int i, id, j, ndups, putflag, ret; +#if 0 + char buf[8]; + struct data *data_str; +#endif + + /* Choose the number of duplicate items to be added. */ + ndups = kg->select_numdups(kg, rs); + + /* + * Make sure we don't overlap another key group + * when adding the first duplicate. + */ + if (file->method == DB_QUEUE || file->method == DB_RECNO) { + if (!*first_item) + putflag = DB_APPEND; + else + putflag = 0; + } else if (*first_item) + putflag = DB_NOOVERWRITE; + else + putflag = 0; + + for (i = 0; i < ndups; i++) { + /* Fill the data item. */ + id = (!IS_RECORD_BASED(file) ? i : + (int)(*((db_recno_t *)keyp->data) - 1) % config.nthreads); + kg->fill_data(kg, rs, datap, id); + + /* Add it to the database. */ +retry: ret = file->dbp[0]->put(file->dbp[0], txn, keyp, + datap, putflag); +#if 0 + sprintf(buf, "%4d", *(int *) keyp->data); + data_str = datap->data; + memcpy(data_str->str, buf, 4); + data_str->sum = compute_checksum(data_str->str, + datap->size - DBS_STR_OFFSET); + ret = file->dbp[0]->put(file->dbp[0], txn, keyp, datap, 0); +#endif + + if (*first_item) { + if (ret == DB_KEYEXIST) { + /* + * We're overlapping another key + * group, so we need to keep + * incrementing the key. + */ + if (kg->next_key(kg, keyp) != 0) { + g.dbenv->errx(g.dbenv, + "no more keys in key group"); + return (0); + } + goto retry; + } else { + *first_item = 0; + if (IS_RECORD_BASED(file)) + kg->start_recno = + *((db_recno_t *)keyp->data); + else { + strncpy(kg->start_key, + keyp->data, kg->key_size); + + /* + * Make sure that we won't get + * overflow in the key characters. + */ + for (j = 0; j < kg->key_size; j++) + assert(kg->start_key[j] + + kg->chars_per_slot[j] < + 256); + } + + /* Allow additional duplicates to be added. */ + if (!IS_RECORD_BASED(file)) + putflag = 0; + } + } else if (ret != 0) { + file->dbp[0]->err(file->dbp[0], ret, "%s: put", + am_names[file->method]); + return (0); + } + } + + return (ndups); +} + +/* + * __file_info_select_key - implementation of FILE_INFO->select_key. + * Randomly selects a key from the specified file. + */ +void +__file_info_select_key(file, rs, keyp) + FILE_INFO *file; + db_rand_state *rs; + DBT *keyp; +{ + KEY_GROUP *keygrp; + + assert(file != NULL && keyp != NULL); + assert(file->key_groups != NULL); + + keygrp = file->key_groups->select_item(file->key_groups, rs); + keyp->size = keygrp->key_size; + keygrp->select_key(keygrp, rs, keyp); + if (!IS_RECORD_BASED(file)) + ((char *)keyp->data)[keyp->size] = '\0'; /* for printing */ +} + +/* + * __file_info_select_key_from_keygrp - implementation of + * FILE_INFO->select_key_from_keygrp(). Randomly selects a key from + * the specified key group of the specified file. + */ +void +__file_info_select_key_from_keygrp(file, rs, kgnum, keyp) + FILE_INFO *file; + db_rand_state *rs; + int kgnum; + DBT *keyp; +{ + KEY_GROUP *keygrp; + + assert(file != NULL && keyp != NULL); + assert(file->key_groups != NULL); + assert(kgnum < file->key_groups->item_count); + + keygrp = file->key_groups->item[kgnum]; + keyp->size = keygrp->key_size; + keygrp->select_key(keygrp, rs, keyp); + if (!IS_RECORD_BASED(file)) + ((char *)keyp->data)[keyp->size] = '\0'; /* for printing */ +} + +/* + * __file_info_num_to_key_non_rec - implementation of + * FILE_INFO->num_to_key() for non-record-based access methods. Takes + * the specified number and converts it to the corresponding key. + */ +int +__file_info_num_to_key_non_rec(file, num, keyp) + FILE_INFO *file; + int num; + DBT *keyp; +{ + KEY_GROUP *kg; + int i, keys_in_file, running_sum, running_sum_prev; + + /* + * We can have num >= keys_in_file in the presence of + * duplicates, so we need to use the modulus. + */ + keys_in_file = file->key_groups->freq_total; + num = num % keys_in_file; + + /* Find the appropriate key group. */ + running_sum = 0; + running_sum_prev = 0; + for (i = 0; i < file->key_groups->item_count; i++) { + running_sum_prev = running_sum; + running_sum += file->key_groups->freq[i]; + if (num < running_sum) + break; + } + kg = file->key_groups->item[i]; + keyp->size = kg->key_size; + + /* Use the appropriate key from that key group. */ + kg->num_to_key(kg, num - running_sum_prev, keyp); + + return (0); +} + +/* + * __file_info_num_to_key_rec - implementation of + * FILE_INFO->num_to_key() for record-based key groups. + */ +int +__file_info_num_to_key_rec(file, num, keyp) + FILE_INFO *file; + int num; + DBT *keyp; +{ + + COMPQUIET(file, NULL); + + *((db_recno_t *)keyp->data) = num; + return (0); +} + +/* + * __file_info_get_size -- implementation of FILE_INFO->get_size(). + * Determines/estimates the size of the database. + */ +long +__file_info_get_size(file) + FILE_INFO *file; +{ + struct stat st; + long ret; + char buf[DBNAME_MAXSIZE * 2]; + + sprintf(buf, "%s/%s", g.home, file->name); + + if (stat(buf, &st) == 0) + ret = (long)st.st_size; + else + ret = 4 * (long)(file->get_num_items(file) * + (file->avg_key_size(file) + file->avg_data_size(file))); + + return (ret); +} + +/* + * __file_info_get_num_items -- implementation of FILE_INFO->get_num_items. + * Estimates the number of items in the database. + */ +int +__file_info_get_num_items(file) + FILE_INFO *file; +{ + KEY_GROUP *keygrp; + int avg_dups, i, total_count; + + total_count = 0; + for (i = 0; i < file->key_groups->item_count; i++) { + keygrp = file->key_groups->item[i]; + if (keygrp->numdup_dists != NULL) + avg_dups = keygrp->numdup_dists->mean_val_dists( + keygrp->numdup_dists); + else + avg_dups = 1; + + total_count += keygrp->key_count * avg_dups; + } + + return (total_count); +} + +/* + * __file_info_avg_key_size -- implementation of FILE_INFO->avg_key_size. + * Estimates the average size of the keys in the database. + */ +int +__file_info_avg_key_size(file) + FILE_INFO *file; +{ + KEY_GROUP *keygrp; + int i, total_count, total_size; + + if (IS_RECORD_BASED(file)) + return (sizeof(db_recno_t)); + + total_count = total_size = 0; + for (i = 0; i < file->key_groups->item_count; i++) { + keygrp = file->key_groups->item[i]; + total_count += keygrp->key_count; + total_size += keygrp->key_count * keygrp->key_size; + } + + assert(total_count > 0); + return (total_size / total_count); +} + +/* + * __file_info_max_key_size -- implementation of FILE_INFO->max_key_size. + * Determines the maximum size of the keys in the database. + */ +int +__file_info_max_key_size(file) + FILE_INFO *file; +{ + KEY_GROUP *keygrp; + int i, max_size; + + if (IS_RECORD_BASED(file)) + return (sizeof(db_recno_t)); + + max_size = 0; + for (i = 0; i < file->key_groups->item_count; i++) { + keygrp = file->key_groups->item[i]; + if (keygrp->key_size > max_size) + max_size = keygrp->key_size; + } + + return (max_size); +} + +/* + * __file_info_avg_data_size -- implementation of FILE_INFO->avg_data_size. + * Estimates the average size of the data items in the database. + */ +int +__file_info_avg_data_size(file) + FILE_INFO *file; +{ + KEY_GROUP *kg; + int i, weighted_sum, weight_total; + + if (IS_SECONDARY(file)) + return (file->primary->avg_key_size(file->primary)); + else { + weighted_sum = weight_total = 0; + for (i = 0; i < file->key_groups->item_count; i++) { + kg = file->key_groups->item[i]; + weighted_sum += + kg->dsize_dists->mean_val_dists(kg->dsize_dists) * + kg->key_count; + weight_total += kg->key_count; + } + + return (weighted_sum / weight_total); + } +} + +/* + * __file_info_min_data_size -- implementation of FILE_INFO->min_data_size. + * Estimates the minimum size of the data items in the database. + */ +int +__file_info_min_data_size(file) + FILE_INFO *file; +{ + KEY_GROUP *kg; + int dsize, i, min_size; + + min_size = MAX_DATA_SIZE; + for (i = 0; i < file->key_groups->item_count; i++) { + kg = file->key_groups->item[i]; + dsize = kg->dsize_dists->min_val_dists(kg->dsize_dists); + if (dsize < min_size) + min_size = dsize; + } + + return (min_size); +} + +/* + * open_handle -- + * Create and open a DB handle for the specified file using the flags + * provided. A pointer to the created handle is stored in the id-th + * element of the file->dbp array. + */ +int +open_handle(file, open_flags, id) + FILE_INFO *file; + int open_flags, id; +{ + DB *dbp, *prim_dbp; + int ret, t_ret; + +retry: /* Create the DB handle. */ + dbp = NULL; + if ((ret = db_create(&file->dbp[id], g.dbenv, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, "db_create"); + goto err; + } + dbp = file->dbp[id]; + + /* Set its flags. */ + if ((ret = dbp->set_flags(dbp, file->flags)) != 0) { + dbp->err(dbp, ret, "set_flags"); + goto err; + } + + /* XXX Should this go somewhere else? */ + if (FLD_ISSET(config.env_flags, DB_INIT_TXN)) + open_flags |= DB_AUTO_COMMIT; + + if (FLD_ISSET(file->special, FILE_INFO_DIRTY_READ)) + FLD_SET(open_flags, DB_READ_UNCOMMITTED); + if (FLD_ISSET(file->special, FILE_INFO_MULTIVERSION) && + FLD_ISSET(open_flags, DB_AUTO_COMMIT)) + FLD_SET(open_flags, DB_MULTIVERSION); + + /* Special steps if we're about to create the file. */ + if ((open_flags & DB_CREATE) != 0) { + if ((ret = dbp->set_pagesize(dbp, file->pagesize)) != 0) { + dbp->err(dbp, ret, "set_pagesize: %d", file->pagesize); + goto err; + } + } + + /* Set the parameters that are access-method specific. */ + if ((ret = file->set_method_params(file, id)) != 0) + goto err; + + /* Open the file. */ + if ((ret = dbp->open(dbp, NULL, file->name, + file->subdb_name, file->method, open_flags, 0644)) == EIO) { + dbp->close(dbp, 0); + goto retry; + } + if (ret != 0) { + g.dbenv->err(g.dbenv, ret, "open: %s", file->name); + goto err; + } + dbp->mpf->set_priority(dbp->mpf, file->priority); + + /* Associate a secondary with its primary. */ + if (IS_SECONDARY(file)) { + /* + * This will be used by the callback function + * to find the offsets for the keys associated with + * this secondary. + */ + dbp->app_private = (void *)(uintptr_t)file->secon_num; + + prim_dbp = file->primary->dbp[id]; + assert(prim_dbp != NULL); + if ((ret = prim_dbp->associate(prim_dbp, NULL, dbp, + file->callback, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, "associate: %s", + file->name); + goto err; + } + } + + return (0); + +err: if (dbp != NULL && (t_ret = dbp->close(dbp, 0)) != 0) + g.dbenv->err(g.dbenv, t_ret, "DB->close: %s", file->name); + + file->dbp[id] = NULL; + return (ret); +} + +/* + * scan_create_sequential - constructor for SCAN objects of type + * SCAN_SEQUENTIAL + */ +SCAN * +scan_create_sequential(file_num, update_freq, incr) + int file_num, update_freq, incr; +{ + SCAN *scan; + int ret; + + if ((ret = __os_malloc(g.dbenv, sizeof(SCAN), &scan)) != 0) + return (NULL); + + scan->type = SCAN_SEQUENTIAL; + scan->file_num = file_num; + scan->update_freq = update_freq; + scan->incr = incr; + + /* not used for sequential scans */ + scan->kgnum = -1; + scan->start_keynum = 0; + + scan->execute = __scan_execute_sequential; + + return (scan); +} + +/* + * scan_create_keygroup - constructor for SCAN objects of type + * SCAN_KEYGROUP + */ +SCAN * +scan_create_keygroup(file_num, kgnum, update_freq, start_keynum, incr) + int file_num, kgnum, update_freq, start_keynum, incr; +{ + SCAN *scan; + int ret; + + if ((ret = __os_malloc(g.dbenv, sizeof(SCAN), &scan)) != 0) + return (NULL); + + scan->type = SCAN_KEYGROUP; + scan->file_num = file_num; + scan->kgnum = kgnum; + scan->update_freq = update_freq; + scan->start_keynum = start_keynum; + scan->incr = incr; + + scan->execute = __scan_execute_keygroup; + + return (scan); +} + +/* + * init_curs - open a cursor and position it according to the + * specified key and flag. Used by __scan_execute_sequential() and + * __scan_execute_keygroup(). + */ +static int +init_curs(cursp, dbp, txn, keyp, datap, flag) + DBC **cursp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; + int flag; +{ + int ret; + + if ((ret = dbp->cursor(dbp, txn, cursp, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, "DBP->cursor failed"); + return (ret); + } + if ((ret = (*cursp)->get(*cursp, keyp, datap, flag)) != 0) { + g.dbenv->err(g.dbenv, ret, "CURS->get failed"); + return (ret); + } + + return (0); +} + +/* + * __scan_execute_sequential - implementation of SCAN->execute for + * initial scans that are of type SCAN_SEQUENTIAL. Performs a + * sequential scan of a database file. + */ +int +__scan_execute_sequential(scan) + SCAN *scan; +{ + DBT data, key; + DBC *curs; + DB_TXN *txn; + FILE_INFO *file; + KEY_GROUP *keygrp; + db_recno_t keyrec; + int i, j, ret, total_items, total_iter, tret; + char *keystr, *datastr; + + assert(scan->type == SCAN_SEQUENTIAL); + curs = NULL; + keygrp = NULL; + txn = NULL; + ret = 0; + + /* Get the file_info object. */ + if (scan->file_num >= config.file_count) { + g.dbenv->errx(g.dbenv, "invalid file_num for scan: %d", + scan->file_num); + return (EINVAL); + } + file = config.file[scan->file_num]; + + /* Initialize the DBTs. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + keystr = config.thread[0]->keystr; + datastr = config.thread[0]->datastr; + init_dbts(file, &key, &data, keystr, datastr, + &keyrec, MAX_KEY_SIZE, MAX_DATA_SIZE); + + /* Determine the number of iterations. */ + total_items = 0; + for (i = 0; i < file->key_groups->item_count; i++) { + keygrp = file->key_groups->item[i]; + total_items += keygrp->key_count; + } + total_iter = total_items / scan->incr; + + /* Begin the first transaction. */ + if ((config.env_flags & DB_INIT_TXN) && + (ret = g.dbenv->txn_begin(g.dbenv, NULL, &txn, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, "txn_begin failed"); + goto err; + } + + /* Open a cursor and position it on the first item. */ + ret = init_curs(&curs, file->dbp[0], txn, &key, &data, DB_FIRST); + if (ret != 0) + goto err; + + /* Perform the scan. */ + g.dbenv->errx(g.dbenv, + "Scanning file %d (update_freq = %d%%, incr = %d)...", + scan->file_num, scan->update_freq, scan->incr); + + for (i = 0; i < total_iter; i++) { + /* Skip to the next item that we actually get. */ + data.flags = DB_DBT_USERMEM|DB_DBT_PARTIAL; + data.dlen = 0; + for (j = 0; j < scan->incr; j++) { + if (i * scan->incr + j == total_items - 1) + goto err; + if (j == scan->incr - 1) + data.flags = DB_DBT_USERMEM; + + ret = curs->get(curs, &key, &data, DB_NEXT); + if (ret != 0) { + g.dbenv->err(g.dbenv, ret, + "CURS->get failed"); + goto err; + } + } + + /* Start a new txn every 1000 keys or so. */ + if (i != 0 && (i % (1000 / scan->incr)) == 0 && + config.env_flags & DB_INIT_TXN) { + curs->close(curs); + + /* Commit the txn and start a new one. */ + if ((ret = txn->commit(txn, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, + "txn_commit failed"); + goto err; + } + if ((ret = g.dbenv->txn_begin(g.dbenv, NULL, + &txn, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, + "txn_begin failed"); + goto err; + } + + /* Open and position a new cursor. */ + ret = init_curs(&curs, file->dbp[0], txn, + &key, &data, DB_SET); + if (ret != 0) + goto err; + } + + /* + * Perform an update according to the frequency + * specified in the config file. Since we simply + * want to dirty pages, we just re-put the + * existing data item. + */ + if (scan->update_freq != 0 && + random_int(&g.rand_state, 1, 100) <= scan->update_freq) { + ret = curs->put(curs, &key, &data, DB_CURRENT); + if (ret != 0) { + g.dbenv->err(g.dbenv, ret, + "CURS->put failed"); + goto err; + } + } + } + +err: if (curs != NULL) + curs->close(curs); + if (txn != NULL && (tret = txn->commit(txn, 0)) != 0) + g.dbenv->err(g.dbenv, tret, "txn_commit failed\n"); + + return (ret); +} + +/* + * __scan_execute_keygroup - implementation of SCAN->execute for + * initial scans that are of type SCAN_KEYGROUP. Scans the items in a + * particular keygroup of a database file. Note that we do *not* use + * a cursor, because consecutive keys in a keygroup may not be sequential. + */ +int +__scan_execute_keygroup(scan) + SCAN *scan; +{ + DBT data, key; + DB_TXN *txn; + FILE_INFO *file; + KEY_GROUP *keygrp; + db_recno_t keyrec; + int i, keynum, numkeys, ret, tret; + char *keystr, *datastr; + + assert(scan->type == SCAN_KEYGROUP); + txn = NULL; + ret = 0; + + /* Get the file_info object and keygroup. */ + if (scan->file_num >= config.file_count) { + g.dbenv->errx(g.dbenv, "invalid file_num for scan: %d", + scan->file_num); + return (EINVAL); + } + file = config.file[scan->file_num]; + if (IS_RECORD_BASED(file) && scan->start_keynum == 0) + scan->start_keynum = 1; + if (scan->kgnum >= file->key_groups->item_count) { + g.dbenv->errx(g.dbenv, "invalid key group for scan"); + return (EINVAL); + } + keygrp = file->key_groups->item[scan->kgnum]; + + /* Initialize the DBTs. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + keystr = config.thread[0]->keystr; + datastr = config.thread[0]->datastr; + init_dbts(file, &key, &data, keystr, datastr, + &keyrec, MAX_KEY_SIZE, MAX_DATA_SIZE); + + /* Begin the first transaction. */ + if ((config.env_flags & DB_INIT_TXN) && + (ret = g.dbenv->txn_begin(g.dbenv, NULL, &txn, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, "txn_begin failed"); + return (ret); + } + + g.dbenv->errx(g.dbenv, + "Scanning keygrp %d of file %d (upd_freq = %d%%, " + "start = %d, incr = %d)...", scan->kgnum, scan->file_num, + scan->update_freq, scan->start_keynum, scan->incr); + + /* Iterate through the keys for this scan. */ + numkeys = (keygrp->key_count - scan->start_keynum) / scan->incr; + for (i = 0; i < numkeys; i++) { + /* Start a new txn every 1000 gets. */ + if (i != 0 && (i % 1000) == 0) { + if (config.env_flags & DB_INIT_TXN) { + if ((ret = txn->commit(txn, 0)) != 0) { + g.dbenv->err(g.dbenv, + ret, "txn_commit failed"); + goto err; + } + if ((ret = g.dbenv->txn_begin(g.dbenv, + NULL, &txn, 0)) != 0) { + g.dbenv->err(g.dbenv, + ret, "txn_begin failed"); + goto err; + } + } + } + + /* Determine the key. */ + keynum = scan->start_keynum + (i * scan->incr); + if ((ret = keygrp->num_to_key(keygrp, keynum, &key)) != 0) + goto err; + + /* Read the associated item. */ + ret = file->dbp[0]->get(file->dbp[0], txn, &key, &data, 0); + if (ret != 0) { + g.dbenv->err(g.dbenv, ret, "DB->get failed"); + goto err; + } + + /* + * Perform an update according to the frequency + * specified in the config file. Since we simply + * want to dirty pages, we just re-put the + * existing data item. + */ + if (scan->update_freq != 0 && + random_int(&g.rand_state, 1, 100) <= scan->update_freq) { + ret = file->dbp[0]->put(file->dbp[0], txn, + &key, &data, 0); + if (ret != 0) { + g.dbenv->err(g.dbenv, ret, "DB->put failed"); + goto err; + } + } + } + +err: if (txn != NULL && (tret = txn->commit(txn, 0)) != 0) { + g.dbenv->err(g.dbenv, tret, "txn_commit failed\n"); + if (ret == 0) + ret = tret; + } + + return (ret); +} + +/* + * __callback_fn - the callback function for secondary indices + */ +int +__callback_fn(dbp, pkey, pdata, skey) + DB *dbp; + const DBT *pkey, *pdata; + DBT *skey; +{ + char *data; + u_int *offsets; + int secon_num; + + COMPQUIET(pkey, NULL); + + secon_num = (uintptr_t)dbp->app_private; + data = pdata->data; + if (config.dbs) + data += DBS_STR_OFFSET; + + offsets = (u_int *)data; + assert(offsets[secon_num + 1] > offsets[secon_num]); + assert(offsets[secon_num + 1] <= pdata->size); + + memset(skey, 0, sizeof(DBT)); + skey->data = ((char *)data) + offsets[secon_num]; + skey->size = offsets[secon_num + 1] - offsets[secon_num]; + + return (0); +} --- db-4.6.21/test_perf/perf_key.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_key.c 2007-11-05 15:51:13.635920000 +1100 @@ -0,0 +1,641 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000,2007 Oracle. All rights reserved. + * + * $Id: perf_key.c,v 12.7 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +/* + * key_group_create_* - constructors for KEY_GROUP objects. + */ +KEY_GROUP * +key_group_create_key_count(file, key_size, key_count, char_set_size, RtoL) + FILE_INFO *file; + int key_size, key_count, char_set_size, RtoL; +{ + KEY_GROUP *keygrp; + int i, ret; + + if ((ret = __os_malloc(g.dbenv, sizeof(KEY_GROUP), &keygrp)) != 0) + return (NULL); + + keygrp->type = KEYGRP_KEY_COUNT; + keygrp->file = file; + keygrp->key_size = key_size; + keygrp->key_count = key_count; + keygrp->char_set_size = char_set_size; + keygrp->numdup_dists = NULL; + keygrp->dsize_dists = NULL; + + if ((ret = __os_malloc(g.dbenv, + key_size * sizeof(int), &keygrp->chars_per_slot)) != 0) { + __os_free(g.dbenv, keygrp); + return (NULL); + } + if ((ret = __os_malloc(g.dbenv, + (key_size + 1) * sizeof(char), &keygrp->start_key)) != 0) { + __os_free(g.dbenv, keygrp->chars_per_slot); + __os_free(g.dbenv, keygrp); + return (NULL); + } + + while (pow(keygrp->char_set_size, key_size) < key_count) + keygrp->char_set_size *= 2; + for (i = 0; i < key_size; i++) { + keygrp->chars_per_slot[i] = keygrp->char_set_size; + keygrp->start_key[i] = 'a'; + } + keygrp->start_key[key_size] = '\0'; + + /* This doesn't apply. */ + keygrp->start_recno = -1; + + /* methods */ + keygrp->select_key = __key_group_select_key_key_count; + keygrp->first_key = __key_group_first_key_non_rec; + keygrp->next_key = (RtoL ? __key_group_next_key_non_rec_RtoL : + __key_group_next_key_non_rec_LtoR); + keygrp->select_numdups = __key_group_select_numdups; + keygrp->fill_data = (config.dbs ? __key_group_fill_data_dbs : + __key_group_fill_data_random); + keygrp->num_to_key = __key_group_num_to_key_non_rec; + + return (keygrp); +} + +KEY_GROUP * +key_group_create_chars_per_slot(file, key_size, RtoL) + FILE_INFO *file; + int key_size, RtoL; +{ + KEY_GROUP *keygrp; + int i, ret; + + if ((ret = __os_malloc(g.dbenv, sizeof(KEY_GROUP), &keygrp)) != 0) + return (NULL); + + keygrp->type = KEYGRP_CHARS_PER_SLOT; + keygrp->file = file; + keygrp->key_size = key_size; + keygrp->numdup_dists = NULL; + keygrp->dsize_dists = NULL; + + if ((ret = __os_malloc(g.dbenv, + key_size * sizeof(int), &keygrp->chars_per_slot)) != 0) { + __os_free(g.dbenv, keygrp); + return (NULL); + } + if ((ret = __os_malloc(g.dbenv, + (key_size + 1) * sizeof(char), &keygrp->start_key)) != 0) { + __os_free(g.dbenv, keygrp->chars_per_slot); + __os_free(g.dbenv, keygrp); + return (NULL); + } + + /* Most of these will get filled in or changed later. */ + keygrp->key_count = 0; + keygrp->char_set_size = 0; + for (i = 0; i < key_size; i++) { + keygrp->chars_per_slot[i] = 0; + keygrp->start_key[i] = 'a'; + } + keygrp->start_key[key_size] = '\0'; + + /* This doesn't apply. */ + keygrp->start_recno = -1; + + /* methods */ + keygrp->select_key = __key_group_select_key_chars_per_slot; + keygrp->first_key = __key_group_first_key_non_rec; + keygrp->next_key = (RtoL ? __key_group_next_key_non_rec_RtoL : + __key_group_next_key_non_rec_LtoR); + keygrp->select_numdups = __key_group_select_numdups; + keygrp->fill_data = (config.dbs ? __key_group_fill_data_dbs : + __key_group_fill_data_random); + keygrp->num_to_key = __key_group_num_to_key_non_rec; + + return (keygrp); +} + +KEY_GROUP * +key_group_create_rec(file, key_count) + FILE_INFO *file; + int key_count; +{ + KEY_GROUP *keygrp; + int ret; + + if ((ret = __os_malloc(g.dbenv, sizeof(KEY_GROUP), &keygrp)) != 0) + return (NULL); + + keygrp->type = KEYGRP_REC; + keygrp->file = file; + keygrp->key_count = key_count; + keygrp->start_recno = 1; + keygrp->key_size = sizeof(db_recno_t); + keygrp->dsize_dists = NULL; + + /* These don't apply. */ + keygrp->chars_per_slot = NULL; + keygrp->char_set_size = 0; + keygrp->start_key = NULL; + keygrp->numdup_dists = NULL; + + /* methods */ + keygrp->select_key = __key_group_select_key_rec; + keygrp->first_key = __key_group_first_key_rec; + keygrp->next_key = __key_group_next_key_rec; + keygrp->select_numdups = __key_group_select_numdups; + /* This will be overwritten for primaries and secondaries. */ + keygrp->fill_data = (config.dbs ? __key_group_fill_data_dbs : + __key_group_fill_data_random); + keygrp->num_to_key = __key_group_num_to_key_rec; + + return (keygrp); +} + +/* + * key_group_copy - copy constructor for KEY_GROUP objects + */ +KEY_GROUP * +key_group_copy(source) + KEY_GROUP *source; +{ + KEY_GROUP *keygrp; + int i, ret; + + if ((ret = __os_malloc(g.dbenv, sizeof(KEY_GROUP), &keygrp)) != 0) + return (NULL); + + /* data members */ + keygrp->type = source->type; + keygrp->file = source->file; + keygrp->key_size = source->key_size; + keygrp->key_count = source->key_count; + keygrp->char_set_size = source->char_set_size; + keygrp->start_recno = source->start_recno; + keygrp->numdup_dists = source->numdup_dists; + keygrp->dsize_dists = source->dsize_dists; + if (source->chars_per_slot == NULL) { + keygrp->chars_per_slot = NULL; + keygrp->start_key = NULL; + } else { + if ((ret = __os_malloc(g.dbenv, + keygrp->key_size * sizeof(int), + &keygrp->chars_per_slot)) != 0) { + __os_free(g.dbenv, keygrp); + return (NULL); + } + if ((ret = __os_malloc(g.dbenv, + (keygrp->key_size + 1) * sizeof(char), + &keygrp->start_key)) != 0) { + __os_free(g.dbenv, keygrp->chars_per_slot); + __os_free(g.dbenv, keygrp); + return (NULL); + } + + for (i = 0; i < keygrp->key_size; i++) { + keygrp->chars_per_slot[i] = source->chars_per_slot[i]; + keygrp->start_key[i] = source->start_key[i]; + } + keygrp->start_key[keygrp->key_size] = '\0'; + } + + /* methods */ + keygrp->select_key = source->select_key; + keygrp->first_key = source->first_key; + keygrp->next_key = source->next_key; + keygrp->select_numdups = source->select_numdups; + keygrp->fill_data = source->fill_data; + + return (keygrp); +} + +/* + * key_group_destroy - destructor for KEY_GROUP objects + */ +void +key_group_destroy(keygrp) + KEY_GROUP *keygrp; +{ + if (keygrp->chars_per_slot != NULL) + __os_free(g.dbenv, keygrp->chars_per_slot); + if (keygrp->start_key != NULL) + __os_free(g.dbenv, keygrp->start_key); + /* + * Because the dsize_dists and numdup_dists can be shared, + * they are freed elsewhere. + */ + __os_free(g.dbenv, keygrp); +} + +/* + * __key_group_select_key_chars_per_slot - implementation of + * KEY_GROUP->select_key() for keygroups that specify the number + * of possible characters for each element ("slot") of the key. + */ +void +__key_group_select_key_chars_per_slot(keygrp, rs, keyp) + KEY_GROUP *keygrp; + db_rand_state *rs; + DBT *keyp; +{ + u_int i; + + assert(keygrp->type == KEYGRP_CHARS_PER_SLOT); + + for (i = 0; i < keyp->size; i++) + ((char *)keyp->data)[i] = keygrp->start_key[i] + + (char)random_int(rs, 0, keygrp->chars_per_slot[i] - 1); +} + +/* + * __key_group_select_key_key_count - implementation of + * KEY_GROUP->select_key() for keygroups that specify the + * total number of keys and the number of characters from + * which they should be composed. + */ +void +__key_group_select_key_key_count(keygrp, rs, keyp) + KEY_GROUP *keygrp; + db_rand_state *rs; + DBT *keyp; +{ + int num; + + assert(keygrp->type == KEYGRP_KEY_COUNT); + + /* Select a number corresponding to one of the keys. */ + num = random_int(rs, 0, keygrp->key_count - 1); + + /* Convert this number into a key. */ + keygrp->num_to_key(keygrp, num, keyp); +} + +/* + * __key_group_select_key_rec - implementation of + * KEY_GROUP->select_key() for record-based keygroups. + */ +void +__key_group_select_key_rec(keygrp, rs, keyp) + KEY_GROUP *keygrp; + db_rand_state *rs; + DBT *keyp; +{ + + assert(keygrp->type == KEYGRP_REC); + + /* Select a record number. */ + *((db_recno_t *)keyp->data) = keygrp->start_recno + + random_int(rs, 0, keygrp->key_count - 1); +} + +/* + * __key_group_first_key_non_rec - implementation of + * KEY_GROUP->first_key() for key groups that are not record-based. + */ +void +__key_group_first_key_non_rec(keygrp, keyp) + KEY_GROUP *keygrp; + DBT *keyp; +{ + + assert(keygrp->type != KEYGRP_REC); + + keyp->size = keygrp->key_size; + strncpy(keyp->data, keygrp->start_key, keyp->size); + ((char *)keyp->data)[keyp->size] = '\0'; /* for printing */ +} + +/* + * __key_group_first_key_rec - implementation of + * KEY_GROUP->first_key() for record-based key groups. + */ +void +__key_group_first_key_rec(keygrp, keyp) + KEY_GROUP *keygrp; + DBT *keyp; +{ + + assert(keygrp->type == KEYGRP_REC); + + *((db_recno_t *)keyp->data) = 1; + return; +} + +/* + * __key_group_next_key_non_rec_LtoR - implementation of + * KEY_GROUP->next_key() for non-record-based access methods + * in which the key is incremented from left to right, giving + * the following type of sequence: aaaaaa, baaaaa, caaaaa, etc. + * Takes the key in keyp->data and increments it to the value + * of the next key in the sequence. Used when populating the + * initial database and when performing initial scans. + * Returns EINVAL if the specified key is already the last key in + * the key group. + */ +int +__key_group_next_key_non_rec_LtoR(keygrp, keyp) + KEY_GROUP *keygrp; + DBT *keyp; +{ + char *keyval; + u_int i; + + assert(keygrp != NULL && keygrp->type != KEYGRP_REC); + + i = 0; + keyval = (char *)keyp->data; + keyval[i]++; + while (keyval[i] == + (char)(keygrp->start_key[i] + keygrp->chars_per_slot[i])) { + /* + * Need to "carry," resetting this slot and + * incrementing the next one. + */ + keyval[i] = keygrp->start_key[i]; + i++; + if (i == keyp->size) + return (EINVAL); + keyval[i]++; + } + + return (0); +} + +/* + * __key_group_next_key_non_rec_RtoL - implementation of + * KEY_GROUP->next_key() for non-record-based access methods + * in which the key is incremented from right to left, giving + * the following type of sequence: aaaaaa, aaaaab, aaaaac, etc. + * Takes the key in keyp->data and increments it to the value + * of the next key in the sequence. Used when populating the + * initial database and when performing initial scans. + * Returns EINVAL if the specified key is already the last key in + * the key group. + */ +int +__key_group_next_key_non_rec_RtoL(keygrp, keyp) + KEY_GROUP *keygrp; + DBT *keyp; +{ + char *keyval; + int i; + + assert(keygrp != NULL && keygrp->type != KEYGRP_REC); + + i = keygrp->key_size - 1; + keyval = (char *)keyp->data; + keyval[i]++; + while (keyval[i] == + (char)(keygrp->start_key[i] + keygrp->chars_per_slot[i])) { + /* + * Need to "carry," resetting this slot and + * incrementing the next one. + */ + keyval[i] = keygrp->start_key[i]; + i--; + if (i < 0) + return (EINVAL); + keyval[i]++; + } + + return (0); +} + +/* + * __key_group_next_key_rec - implementation of KEY_GROUP->next_key() + * for record-based access methods. Returns EINVAL if the specified + * key is already the last key in the key group. + */ +int +__key_group_next_key_rec(keygrp, keyp) + KEY_GROUP *keygrp; + DBT *keyp; +{ + + assert(keygrp->type == KEYGRP_REC); + + if (*((db_recno_t *)keyp->data) == (u_int)keygrp->key_count) + return (EINVAL); + + *((db_recno_t *)keyp->data) += 1; + return (0); +} + +/* + * __key_group_select_numdups - implementation of KEY_GROUP->select_numdups() + */ +int +__key_group_select_numdups(keygrp, rs) + KEY_GROUP *keygrp; + db_rand_state *rs; +{ + int numdups; + + assert(keygrp != NULL); + + /* Record-based files can't have duplicates. */ + assert(keygrp->type != KEYGRP_REC || keygrp->numdup_dists == NULL); + + if (keygrp->numdup_dists == NULL) + return (1); + + numdups = keygrp->numdup_dists-> + select_from_dists(keygrp->numdup_dists, rs, 1); + return (numdups); +} + +/* + * __key_group_fill_data_random - implementation of KEY_GROUP->fill_data() + * for databases with random data. Randomly selects the size of a data + * item, based on the data-size subdistributions given for the + * specified file, and fills it with random characters. + */ +void +__key_group_fill_data_random(keygrp, rs, datap, id) + KEY_GROUP *keygrp; + db_rand_state *rs; + DBT *datap; + int id; +{ + + assert(keygrp != NULL && keygrp->dsize_dists != NULL); + assert(datap != NULL); + assert(!IS_SECONDARY(keygrp->file)); + COMPQUIET(id, 0); + + /* + * Select one of the data-size subdistributions, + * and then select one of the integers from that + * subdistribution as the size. + */ + datap->size = keygrp->dsize_dists-> + select_from_dists(keygrp->dsize_dists, rs, 1); + if (datap->size > MAX_DATA_SIZE) + datap->size = MAX_DATA_SIZE; + + random_data(rs, (char *)datap->data, datap->size); + if (HAS_SECONDARIES(keygrp->file)) + fill_data_primary(keygrp->file, datap->data, datap->ulen); +} + +/* + * fill_data_primary - takes a pointer to the value of a data item + * that is about to be added to an index with secondaries and + * overwrites a header consisting of a set of secondary keys and an + * array of offsets for those keys. + */ +void +fill_data_primary(file, data, ulen) + FILE_INFO *file; + void *data; + int ulen; +{ + DBT key; + FILE_INFO *secon; + int i; + u_int curr_offset, *offsets, offsets_size; + + assert(data != NULL); + assert(file != NULL && HAS_SECONDARIES(file)); + assert(ulen > 0); + + memset(&key, 0, sizeof(key)); + key.flags = DB_DBT_USERMEM; + + /* + * The header begins with an array of offsets, one for each + * secondary key. These allow the callback functions to + * find the appropriate key. + */ + offsets = (u_int *)data; + offsets_size = (file->secondaries->item_count + 1) * sizeof(u_int); + curr_offset = offsets_size; + + /* Add the info. for each secondary. */ + for (i = 0; i < file->secondaries->item_count; i++) { + secon = file->secondaries->item[i]; + + /* Store a key for this secondary. */ + key.data = ((char *)data) + curr_offset; + key.ulen = ulen - curr_offset; + secon->num_to_key(secon, file->nitems, &key); + + /* Store the offset for this key. */ + offsets[i] = curr_offset; + curr_offset += key.size; + } + /* + * One last offset to allow us to determine the length of the + * key from the final secondary. + */ + offsets[i] = curr_offset; +} + +/* + * __key_group_fill_data_secon - implementation of + * KEY_GROUP->fill_data() for secondary indices. This should never be + * called, because we can't directly add items to a secondary. + */ +void +__key_group_fill_data_secon(keygrp, rs, datap, id) + KEY_GROUP *keygrp; + db_rand_state *rs; + DBT *datap; + int id; +{ + + COMPQUIET(keygrp, NULL); + COMPQUIET(rs, NULL); + COMPQUIET(datap, NULL); + COMPQUIET(id, 0); + + /* This method should never be called. */ + assert(0); +} + +/* __key_group_fill_data_dbs is found in perf_dbs.c */ + +/* + * __key_group_num_to_key_non_rec - implementation of + * KEY_GROUP->num_to_key() for non-record-based access methods. Takes + * the specified number and converts it to the corresponding key from + * the specified key group. We do this by converting the number (in + * base 10) to an equivalent number in base (keygrp->char_set_size) + * and using a character representation for the resulting number. + */ +int +__key_group_num_to_key_non_rec(keygrp, num, keyp) + KEY_GROUP *keygrp; + int num; + DBT *keyp; +{ + int i, place, place_delta, place_val; + char *keystr; + + assert(keygrp != NULL && keygrp->type != KEYGRP_REC); + if (num >= keygrp->key_count) { + g.dbenv->errx(g.dbenv, "num_to_key: num is too large"); + return (EINVAL); + } + + keystr = (char *)keyp->data; + if (keygrp->next_key == __key_group_next_key_non_rec_LtoR) { + place = 0; + place_delta = 1; + } else { + place = keygrp->key_size - 1; + place_delta = -1; + } + + /* Begin with the start key, which is equivalent to 0. */ + strncpy(keystr, keygrp->start_key, keygrp->key_size + 1); + + /* + * Determine the largest place value with a non-zero value--i.e., + * the largest element of the key that will differ from + * keygrp->start_key. + */ + place_val = 1; + while ((place_val * keygrp->chars_per_slot[place]) <= num) { + place_val *= keygrp->chars_per_slot[place]; + place += place_delta; + } + + /* Convert the number to the equivalent key. */ + for (i = place; i >= 0 && i < keygrp->key_size; i -= place_delta) { + keystr[i] = keygrp->start_key[i] + (char)(num / place_val); + num = num % place_val; + if (i - place_delta >= 0 && + i - place_delta < keygrp->key_size) + place_val /= keygrp->chars_per_slot[i - place_delta]; + } + + return (0); +} + +/* + * __key_group_num_to_key_rec - implementation of + * KEY_GROUP->num_to_key() for record-based key groups. + */ +int +__key_group_num_to_key_rec(keygrp, num, keyp) + KEY_GROUP *keygrp; + int num; + DBT *keyp; +{ + + assert(keygrp->type == KEYGRP_REC); + if (num >= keygrp->key_count) { + g.dbenv->errx(g.dbenv, "num_to_key: num is too large"); + return (EINVAL); + } + + *((db_recno_t *)keyp->data) = keygrp->start_recno + num; + + return (0); +} --- db-4.6.21/test_perf/perf_log.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_log.c 2007-11-05 15:51:13.791916000 +1100 @@ -0,0 +1,321 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999,2007 Oracle. All rights reserved. + * + * $Id: perf_log.c,v 12.18 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +static os_thread_t *log_threads; /* Log threads. */ + +int +log_init() +{ + return ((log_threads = + spawn_kids("log threads", 1, log_thread)) == NULL ? 1 : 0); +} + +int +log_shutdown() +{ + return (wait_kids("log threads", log_threads)); +} + +void * +log_thread(arg) + void *arg; +{ + int first, ret; + + COMPQUIET(arg, NULL); + ret = 0; + +#ifdef HAVE_VXWORKS + ioTaskStdSet(0, 1, g.stdfd); + ioTaskStdSet(0, 2, g.stdfd); +#endif + g.dbenv->errx(g.dbenv, "Log cleaning thread: %lu", + (u_long)os_thread_id()); + +#if !defined(HAVE_VXWORKS) +#endif + + /* + * Loop repeatedly, cleaning log files and possibly archiving. + */ + first = 1; + while (!g.shutdown && ret == 0) { + if ((ret = log_clean(first)) == 0) + snooze( + g.dbenv, 1000 * random_int(&g.rand_state, 5, 30)); + first = 0; + } + + g.shutdown = 1; + return ((void *)(uintptr_t)ret); +} + +int +log_clean(first) + int first; +{ + int i, ret; + char **begin, **list, buf[4096], path[1024]; +#if !defined(HAVE_VXWORKS) && !defined(DB_WIN32) + char tbuf[2048]; +#endif + + ret = 0; + + if (config.archive != NULL) { + (void)snprintf(buf, sizeof(buf), "touch %s/RECOVER", g.home); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", buf); + goto out; + } + /* Clean the archive directory. */ +#ifndef HAVE_VXWORKS + if (os_access("PRESERVE", W_OK|X_OK) == 0) { + (void)snprintf(buf, sizeof(buf), + "touch %s/x; cp %s/* PRESERVE", + config.archive, config.archive); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", buf); + goto out; + } + } +#endif + if (g.logclean == CLEAN_ARCHIVE || g.logclean == CLEAN_UPDATE) { + (void)snprintf(buf, sizeof(buf), + "rm -f %s/log.*", config.archive); + g.dbenv->errx(g.dbenv, "logclean: %s", buf); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, + errno, "logclean: command failed: %s", buf); + goto out; + } + } + + /* + * Copy the current db files, their corresponding + * info. files, and the DB_CONFIG file into the + * archive directory. + */ + for (i = 0; i < config.file_count; ++i) { + if (first) { + (void)snprintf(buf, sizeof(buf), + "cp %s/%s.info %s/DB_CONFIG %s", + g.home, config.file[i]->name, + g.home, config.archive); + g.dbenv->errx(g.dbenv, "logclean: %s", buf); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", buf); + goto out; + } + } + if (g.logclean == CLEAN_ARCHIVE) { + (void)snprintf(buf, sizeof(buf), + "dd if=%s/%s of=%s/%s bs=%d;", + g.home, config.file[i]->name, + config.archive, config.file[i]->name, + config.file[i]->pagesize); + g.dbenv->errx(g.dbenv, "logclean: %s", buf); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", buf); + goto out; + } + } + if (!first || g.logclean == CLEAN_ARCHIVE) { + (void)snprintf(buf, sizeof(buf), + "cp %s/%s %s/%s.bak", + config.archive, config.file[i]->name, + config.archive, config.file[i]->name); + g.dbenv->errx(g.dbenv, "logclean: %s", buf); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", buf); + goto out; + } + } + } + } + + if (g.logclean == CLEAN_HOTBACKUP || g.logclean == CLEAN_HOTUPDATE) { + strcpy(path, g.progpath); + /* Clean out dead thread slots before starting backup. */ + if (config.killtest_iter == 0) { + g.dbenv->set_isalive(g.dbenv, say_dead); + (void)g.dbenv->failchk(g.dbenv, 0); + } + path[strlen(path) - strlen(g.progname)] = '\0'; + (void)snprintf(buf, sizeof(buf), + "%sdb_hotbackup -v -c -h %s -b %s %s %s %s", + path, g.home, config.archive, + config.passwd ? "-P " : "", + config.passwd ? config.passwd : "", + g.logclean == CLEAN_HOTUPDATE ? "-u" : ""); + g.dbenv->errx(g.dbenv, "logclean: %s", buf); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, + errno, "logclean: command failed: %s", buf); + goto out; + } + (void)snprintf(buf, sizeof(buf), + "cp %s %s", config.conf_current, config.archive); + g.dbenv->errx(g.dbenv, "logclean: %s", buf); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", buf); + goto out; + } + (void)snprintf(buf, sizeof(buf), + "%s -V -h %s", g.progpath, config.archive); + g.dbenv->errx(g.dbenv, "logclean: %s", buf); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", buf); + goto out; + } + + goto done; + } + + /* + * Get the names of the log files that are no longer + * in use. + */ + if ((ret = g.dbenv->log_archive(g.dbenv, &list, 0)) != 0) { + g.dbenv->err(g.dbenv, ret, "log_archive list"); + goto out; + } + + /* + * If archiving (and we're not running on VxWorks), + * move the unused log files to the archive + * directory, else just delete them. + */ + if (list != NULL) { + for (begin = list; *list != NULL; ++list) { + (void)snprintf(buf, + sizeof(buf), "%s/%s", config.logdir, *list); + +#if !defined(HAVE_VXWORKS) && !defined(DB_WIN32) + if (config.archive != NULL) { + (void)snprintf(tbuf, sizeof(tbuf), + "%s/%s", config.archive, *list); + + /* + * If running crash tests, the + * previous archive recover/verify + * phase (see below) might + * still be running. + */ + while (link(buf, tbuf) == -1) { + g.dbenv->err(g.dbenv, errno, + "logclean: link %s", tbuf); + while ((ret = unlink(tbuf)) != 0) + __os_sleep(NULL, 1, 0); + g.dbenv->errx(g.dbenv, + "logclean: remove %s", + tbuf); + } + } +#endif + + if ((ret = unlink(buf)) == 0) + g.dbenv->errx(g.dbenv, + "logclean: remove %s", buf); + else { + g.dbenv->err(g.dbenv, ret, + "logclean: remove %s", buf); + g.dbenv->errx(g.dbenv, + "logclean: Error remove %s", + buf); + goto out; + } + } + + /* Delete the list of unused logfile names. */ + free(begin); + } + + /* + * If archiving, copy the remaining log files to the + * archive directory and verify/run recovery. + */ + if (config.archive != NULL) { + /* Get the names of the remaining log files. */ + if ((ret = g.dbenv->log_archive(g.dbenv, + &list, DB_ARCH_LOG)) != 0) { + g.dbenv->err(g.dbenv, ret, + "DB_ENV->log_archive"); + goto out; + } + + /* Copy them to the archive directory. */ + if (list != NULL) { + for (begin = list; *list != NULL; ++list) { + (void)snprintf(buf, sizeof(buf), + "cp %s/%s %s/%s", + config.logdir, *list, config.archive, + *list); + + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", + buf); + goto out; + } + else + g.dbenv->errx(g.dbenv, + "logclean: %s", buf); + } + + /* Delete the list of logfile names. */ + free(begin); + } + + /* + * Verify and run recovery on the database + * and log files in the archive directory. + * We do this by running an additional copy of + * db_perf using either the -Vx or -VX flags. + * We use the archive directory as the home + * directory, and thus we first copy the + * .current file to that directory. + */ + (void)snprintf(buf, sizeof(buf), + "cp %s %s", config.conf_current, config.archive); + g.dbenv->errx(g.dbenv, "logclean: %s", buf); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", buf); + goto out; + } + (void)snprintf(buf, sizeof(buf), + "%s -AV%c -h %s", + g.progpath, + (g.logclean == CLEAN_ARCHIVE && + g.checkpoint == 0) ? 'x' : 'X', config.archive); + g.dbenv->errx(g.dbenv, "logclean: %s", buf); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", buf); + goto out; + } + } + +done: (void)snprintf(buf, sizeof(buf), "rm -f %s/RECOVER", g.home); + if ((ret = my_system(buf, 1)) != 0) { + g.dbenv->err(g.dbenv, errno, + "logclean: command failed: %s", buf); + } + +out: return (ret); +} --- db-4.6.21/test_perf/perf_misc.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_misc.c 2007-11-05 15:51:13.643919000 +1100 @@ -0,0 +1,544 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000,2007 Oracle. All rights reserved. + * + * $Id: perf_misc.c,v 12.10 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +static double choose_normal __P((db_rand_state *, double, double)); + +/* + * init_dbts - initialize or reinitialize DBTs for a key and data item + * from the specified file, assuming that the DBTs have previously + * been zeroed. + */ +void +init_dbts(file, keyp, datap, keystr, datastr, keyrec, key_ulen, data_ulen) + FILE_INFO *file; + DBT *keyp, *datap; + char *keystr, *datastr; + db_recno_t *keyrec; + int key_ulen, data_ulen; +{ + if (file != NULL && IS_RECORD_BASED(file)) { + keyp->data = keyrec; + keyp->size = keyp->ulen = sizeof(db_recno_t); + } else { + keyp->data = keystr; + keyp->ulen = key_ulen; + } + + datap->data = datastr; + datap->ulen = data_ulen; + + keyp->flags = datap->flags = DB_DBT_USERMEM; +} + +/* + * set_create - constructor for SET objects + */ +SET * +set_create(max_items) + int max_items; +{ + SET *set; + int ret; + + if ((ret = __os_malloc(g.dbenv, sizeof(SET), &set)) != 0) + return (NULL); + + /* data members */ + set->max_items = max_items; + set->item_count = 0; + set->freq_total = 0; + if ((ret = __os_malloc(g.dbenv, + max_items * sizeof(void *), &set->item)) != 0) { + __os_free(g.dbenv, set); + return (NULL); + } + if ((ret = __os_malloc(g.dbenv, + max_items * sizeof(int), &set->freq)) != 0) { + __os_free(g.dbenv, set->item); + __os_free(g.dbenv, set); + return (NULL); + } + + /* methods */ + set->add_item = __set_add_item; + set->enlarge = __set_enlarge; + set->select_item = __set_select_item; + set->select_from_dists = __set_select_from_dists; + set->mean_val_dists = __set_mean_val_dists; + set->min_val_dists = __set_min_val_dists; + set->max_val_dists = __set_max_val_dists; + + return (set); +} + +/* + * set_destroy - destructor for SET objects + */ +void +set_destroy(set, free_items) + SET *set; + int free_items; +{ + int i; + + if (set == NULL) + return; + + if (free_items) + for (i = 0; i < set->item_count; i++) + __os_free(g.dbenv, set->item[i]); + __os_free(g.dbenv, set->item); + __os_free(g.dbenv, set->freq); + __os_free(g.dbenv, set); +} + +/* + * __set_add_item - implementation of SET->add_item. Adds an item and + * its associated frequency to the specified set. + */ +void +__set_add_item(set, item, freq) + SET *set; + void *item; + int freq; +{ + + assert(set->item_count < set->max_items); + + set->item[set->item_count] = item; + set->freq[set->item_count] = freq; + set->item_count++; + set->freq_total += freq; +} + +/* + * __set_enlarge - implementation of SET->enlarge(). Makes room for + * twice as many items. + */ +void +__set_enlarge(set) + SET *set; +{ + void **tmp_item; + int i, ret, *tmp_freq; + + ret = __os_malloc(g.dbenv, + set->max_items * 2 * sizeof(void *), &tmp_item); + assert(ret == 0 && tmp_item != NULL); + ret = __os_malloc(g.dbenv, + set->max_items * 2 * sizeof(int), &tmp_freq); + assert(ret == 0 && tmp_freq != NULL); + + for (i = 0; i < set->max_items; i++) { + tmp_item[i] = set->item[i]; + tmp_freq[i] = set->freq[i]; + } + + __os_free(g.dbenv, set->item); + __os_free(g.dbenv, set->freq); + set->item = tmp_item; + set->freq = tmp_freq; + set->max_items *= 2; +} + +/* + * __set_select_item - implementation of SET->select_item(). + */ +void * +__set_select_item(set, rs) + SET *set; + db_rand_state *rs; +{ + int item_num, pick, running_sum; + + assert(set != NULL); + + pick = random_int(rs, 1, set->freq_total); + item_num = 0; + running_sum = set->freq[item_num]; + while (running_sum < pick) { + item_num++; + assert(item_num < set->item_count); + running_sum += set->freq[item_num]; + } + + return (set->item[item_num]); +} + +/* + * __set_select_item - implementation of SET->select_from_dists(). + * Select a value according to the probability distribution given + * by the specified set of subdistributions. Ensure that the returned + * value is no less than minval. + */ +int +__set_select_from_dists(dists, rs, minval) + SET *dists; + db_rand_state *rs; + int minval; +{ + PROB_DIST *dist; + int value; + + dist = dists->select_item(dists, rs); + assert(dist != NULL); + + value = dist->select_int(dist, rs); + if (value < minval) + value = minval; + + return (value); +} + +/* + * __set_mean_val_dists - implementation of SET->mean_val_dists(). + * Estimate the mean value of the probability distribution given + * by the specified set of subdistributions. + */ +int +__set_mean_val_dists(dists) + SET *dists; +{ + int i, total_size; + + total_size = 0; + for (i = 0; i < dists->item_count; i++) + total_size += dists->freq[i] * + ((PROB_DIST *)dists->item[i])->get_meanval(dists->item[i]); + + assert(dists->freq_total > 0); + return (total_size / dists->freq_total); +} + +/* + * __set_min_val_dists - implementation of SET->min_val_dists(). + * Estimate the min value of the probability distribution given + * by the specified set of subdistributions. + */ +int +__set_min_val_dists(dists) + SET *dists; +{ + PROB_DIST *dist; + int i, min_val, this_min; + + min_val = UINT16_MAX; + for (i = 0; i < dists->item_count; i++) { + dist = dists->item[i]; + this_min = dist->get_minval(dist); + if (this_min < min_val) + min_val = this_min; + } + + return (min_val); +} + +/* + * __set_max_val_dists - implementation of SET->max_val_dists(). + * Estimate the max value of the probability distribution given + * by the specified set of subdistributions. + */ +int +__set_max_val_dists(dists) + SET *dists; +{ + PROB_DIST *dist; + int i, max_val, this_max; + + max_val = 0; + for (i = 0; i < dists->item_count; i++) { + dist = dists->item[i]; + this_max = dist->get_maxval(dist); + if (this_max > max_val) + max_val = this_max; + } + + return (max_val); +} + +/* + * prob_dist_create - constructor for PROB_DIST objects + */ +PROB_DIST * +prob_dist_create(type, param1, param2) + dist_type type; + double param1, param2; +{ + PROB_DIST *dist; + int ret; + + if ((ret = __os_malloc(g.dbenv, sizeof(PROB_DIST), &dist)) != 0) + return (NULL); + + dist->type = type; + switch (type) { + case DIST_UNIFORM: + dist->param1.lower_bound = (int)param1; + dist->param2.upper_bound = (int)param2; + dist->select_int = __prob_dist_select_int_uniform; + dist->get_maxval = __prob_dist_get_maxval_uniform; + dist->get_minval = __prob_dist_get_minval_uniform; + dist->get_meanval = __prob_dist_get_meanval_uniform; + break; + case DIST_NORMAL: + dist->param1.mean = param1; + dist->param2.std_dev = param2; + dist->select_int = __prob_dist_select_int_normal; + dist->get_maxval = __prob_dist_get_maxval_normal; + dist->get_minval = __prob_dist_get_minval_normal; + dist->get_meanval = __prob_dist_get_meanval_normal; + break; + default: + assert(0); + } + + return (dist); +} + +/* + * __prob_dist_select_int_uniform - implementation of PROB_DIST->select_int() + * for uniform distributions. + */ +int +__prob_dist_select_int_uniform(dist, rs) + PROB_DIST *dist; + db_rand_state *rs; +{ + assert(dist != NULL); + assert(dist->type == DIST_UNIFORM); + + return (random_int(rs, dist->param1.lower_bound, + dist->param2.upper_bound)); +} + +/* + * __prob_dist_select_int_normal - implementation of PROB_DIST->select_int() + * for normal distributions. + */ +int +__prob_dist_select_int_normal(dist, rs) + PROB_DIST *dist; + db_rand_state *rs; +{ + + assert(dist != NULL); + assert(dist->type == DIST_NORMAL); + + return (int)ceil(choose_normal(rs, dist->param1.mean, + dist->param2.std_dev)); +} + +/* + * Implementations of PROB_DIST methods to compute the max, min, and + * mean values of a probability distribution. + */ +int +__prob_dist_get_maxval_uniform(dist) + PROB_DIST *dist; +{ + + assert(dist != NULL); + assert(dist->type == DIST_UNIFORM); + + return (dist->param2.upper_bound); +} + +int +__prob_dist_get_maxval_normal(dist) + PROB_DIST *dist; +{ + + assert(dist != NULL); + assert(dist->type == DIST_NORMAL); + + /* most values fall within three standard deviations */ + return (int)ceil(dist->param1.mean + (6 * dist->param2.std_dev)); +} + +int +__prob_dist_get_minval_uniform(dist) + PROB_DIST *dist; +{ + + assert(dist != NULL); + assert(dist->type == DIST_UNIFORM); + + return (dist->param1.lower_bound); +} + +int +__prob_dist_get_minval_normal(dist) + PROB_DIST *dist; +{ + + assert(dist != NULL); + assert(dist->type == DIST_NORMAL); + + /* most values fall within three standard deviations */ + return (int)ceil(dist->param1.mean - (3 * dist->param2.std_dev)); +} + +int +__prob_dist_get_meanval_uniform(dist) + PROB_DIST *dist; +{ + + assert(dist != NULL); + assert(dist->type == DIST_UNIFORM); + + return ((dist->param1.lower_bound + dist->param2.upper_bound) / 2); +} + +int +__prob_dist_get_meanval_normal(dist) + PROB_DIST *dist; +{ + + assert(dist != NULL); + assert(dist->type == DIST_NORMAL); + + return (int)ceil(dist->param1.mean); +} + +/* + * choose_normal - choose a value from a normal distribution with the + * specified mean and standard deviation. We make use of the central + * limit theorem, which says that the sum of values from a large enough + * set of independent, identically distributed random variables will be + * normally distributed. This isn't the best way to produce values + * from a normal distribution, but it's computationally simple and good + * enough for our purposes. + */ +static double +choose_normal(rs, mean, std_dev) + db_rand_state *rs; + double mean, std_dev; +{ + double sum, z; + int i; + +#define N 50 + + /* + * First, sum n values chosen from uniform distributions over + * [0,1]. + */ + sum = 0.0; + for (i = 0; i < N; i++) + sum = sum + (double)db_random(rs) / (double)DB_RAND_MAX; + + /* + * Next, scale to N(0,1) by doing: + * z = (sum - n/2) / sqrt(n/12) + * To understand this formula, note the following: + * 1) For each uniform random variable x_i, E(x_i) = 1/2, + * where E(x_i) represents the expected value (mean) of x_i. + * Thus, because the variables are independent, + * E(sum(x_i)) = sum(E(x_i)) = n/2. + * 2) By the definition of variance, Var(x_i) = E(x_i^2) - E(x_i)^2. + * The prob. density func. for each x_i = 1 in [0,1], 0 elsewhere. + * Thus: E(x_i^2) = integral from 0 to 1 (x_i^2 * 1dx) = 1/3 + * Var(x_i) = 1/3 - 1/4 = 1/12 + * Var(sum(x_i)) = sum(Var(x_i)) = n/12, and + * Std_Dev(sum(x_i)) = sqrt(n/12). + */ + z = (sum - N/2) / sqrt(N/12); + + /* Finally, scale to N(mean, std_dev). */ + z = mean + std_dev * z; + return (z); +} + +void +write_err_init(dbenv) + DB_ENV *dbenv; +{ + config.write_err = 0; + +#ifndef DB_WIN32 + db_env_set_func_write(write_err); +#endif + + /* + * We need a mutex to protect no_write_errors, for portability + * we use the DB routines. + */ + if (dbenv->mutex_alloc(dbenv, 0, &g.write_mutex) != 0) + abort(); +} + +void +write_no_errors(dbenv, on) + DB_ENV *dbenv; + int on; +{ + if (dbenv->mutex_lock(dbenv, g.write_mutex) != 0) + abort(); + + config.no_write_errors += on ? 1 : -1; + + if (dbenv->mutex_unlock(dbenv, g.write_mutex) != 0) + abort(); +} + +/* + * write_err - simulate running out of disk space by returning an + * error if we try to extend a file. + * The behavior of this function is governed by three of the GLOBAL + * parameters: + * write_err_start - the number of times this function needs to be + * called before an error can be returned. + * write_err_count - the number of errors that will be returned as + * part of the current set of error-returns. + * write_rand - used to randomly select the write_err_count value + * for subsequent sets of error-returns. + */ +ssize_t +write_err(fd, buffer, nbytes) + int fd; + const void *buffer; + size_t nbytes; +{ + off_t pos; + + /* Don't do this if testing for write errors has been disallowed. */ + if (!config.no_write_errors) { + /* + * If this function has been called more than + * config.write_err_start times and we're seeking + * to extend the file, return an error. + */ + if (config.write_err++ >= config.write_err_start && + config.write_err_count > 0) { + pos = lseek(fd, 0, SEEK_CUR); + if (pos == lseek(fd, 0, SEEK_END)) { + config.write_err_count--; + errno = EIO; + return (-1); + } + lseek(fd, pos, SEEK_SET); + } + + /* + * If we're finished with the current set of error- + * returns, randomly select the parameters for the + * next set. + */ + if (config.write_err_count == 0 && config.write_rand != 0) { + config.write_err_count = + random_int(&g.rand_state, 1, config.write_rand); + config.write_err_start = + random_int(&g.rand_state, config.write_err_start, + 2 * config.write_err_start); + } + } + + return (write(fd, (void *)buffer, nbytes)); +} --- db-4.6.21/test_perf/perf_op.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_op.c 2007-11-05 15:51:13.759916000 +1100 @@ -0,0 +1,768 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000,2007 Oracle. All rights reserved. + * + * $Id: perf_op.c,v 12.9 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +/* + * Functions to carry out the basic operations that can be performed in the + * course of a transaction. + * + * Each op_* function has a parameter called datap (a DBT*) that will be + * used to store data items. Because any new data will be generated from + * within the update functions, this parameter isn't strictly necessary. + * However, we take this approach so that the same DBT can be used + * throughout the course of a transaction. This saves us from repeatedly + * zeroing local DBTs in each function that is called. The DBT passed to + * these functions should be properly zeroed by the caller before its + * first use. + */ + +const char *op_names[NUM_OP_TYPES] = { + "read_nodup", "read_dup", "read_multi", "update_nodup", + "update_dup", "update_prim", "overwrite", "add_nodup", "add_dup", + "del", "curs_read", "curs_update", "curs_update_prim", + "curs_del", "dbs_read", "dbs_read_dupset", "dbs_update", + "dbs_partial_write", "dbs_del", "dbs_add_dupset", "dbs_del_readd", + "dbs_del_readd_secon", "dbs_swap_data", "qtest_scan", + "reorg" +}; + +op_type op_types[NUM_OP_TYPES] = { + OP_READ_NODUP, OP_READ_DUP, OP_READ_MULTI, OP_UPDATE_NODUP, + OP_UPDATE_DUP, OP_UPDATE_PRIM, OP_OVERWRITE, OP_ADD_NODUP, OP_ADD_DUP, + OP_DEL, OP_CURS_READ, OP_CURS_UPDATE, OP_CURS_UPDATE_PRIM, + OP_CURS_DEL, OP_DBS_READ, OP_DBS_READ_DUPSET, OP_DBS_UPDATE, + OP_DBS_PARTIAL_WRITE, OP_DBS_DEL, OP_DBS_ADD_DUPSET, + OP_DBS_DEL_READD, OP_DBS_DEL_READD_SECON, OP_DBS_SWAP_DATA, + OP_QTEST_SCAN, OP_REORG +}; + +typedef int (*op_func) (OP *, int, int *, DB *, DB_TXN *, DBT *, DBT *); +op_func op_funcs[NUM_OP_TYPES] = { __op_read_nodup, __op_read_dup, + __op_read_multi, __op_update_nodup, __op_update_dup, __op_update_prim, + __op_update_nodup, __op_add_nodup, __op_add_dup, __op_del, + __op_curs_read, __op_curs_update, __op_curs_update, __op_curs_del, + __op_dbs_read, __op_dbs_read_dupset, __op_dbs_update, + __op_dbs_partial_write, __op_dbs_del, __op_dbs_add_dupset, + __op_dbs_del_readd, __op_dbs_del_readd_secon, __op_dbs_swap_data, + __op_qtest_scan, __op_reorg +}; + +static int update_using_cursor + __P((OP*, int, DB *, DB_TXN *, DBT *, DBT *, int)); +static void modify_key __P((db_rand_state *, DBT *, FILE_INFO *)); + +/* + * op_create - constructor for OP objects + */ +OP * +op_create(type, filenum, kg_min, kg_max, iter_min, iter_max, + items_min, items_max) + op_type type; + int filenum, kg_min, kg_max, iter_min, iter_max; + int items_min, items_max; +{ + OP *op; + PROB_DIST *dist; + int ret; + + /* Sanity checks */ + if (iter_min > iter_max || items_min > items_max || kg_min > kg_max) + return (NULL); + if (filenum < 0 || filenum >= config.file_count || kg_min < 0 || + kg_max >= config.file[filenum]->key_groups->item_count) + return (NULL); + if (iter_min < 0 || items_min < 0) + return (NULL); + + /* + * If we're updating a file that supports duplicates, make + * sure that we use the duplicate-aware version of the operation. + */ + if (type == OP_UPDATE_NODUP && + config.file[filenum]->flags & DB_DUP) + type = OP_UPDATE_DUP; + + if ((ret = __os_malloc(g.dbenv, sizeof(OP), &op)) != 0) + return (NULL); + + op->type = type; + op->name = op_names[type]; + op->file = config.file[filenum]; + + op->keygrp_dists = set_create(1); + dist = prob_dist_create(DIST_UNIFORM, kg_min, kg_max); + op->keygrp_dists->add_item(op->keygrp_dists, dist, 100); + + op->iter_dists = set_create(1); + dist = prob_dist_create(DIST_UNIFORM, iter_min, iter_max); + op->iter_dists->add_item(op->iter_dists, dist, 100); + + op->item_dists = set_create(1); + dist = prob_dist_create(DIST_UNIFORM, items_min, items_max); + op->item_dists->add_item(op->item_dists, dist, 100); + + op->other = NULL; + + if (IS_CURSOR_OP(op)) { + /* + * Cursor operations use DB_NEXT by default in their + * calls to DBC->get(). The user can override this + * by specifying a flag value of DB_PREV, DB_NEXT_DUP, + * DB_NEXT_NODUP, or DB_PREV_NODUP in the config file + * specs for a particular op. + */ + op->flags = DB_NEXT; + } else if (type == OP_ADD_NODUP) { + if (op->file->method == DB_QUEUE || + op->file->method == DB_RECNO) + op->flags = DB_APPEND; + else + op->flags = DB_NOOVERWRITE; + } else + op->flags = 0; + + /* For update ops, this can be changed in the config file. */ + op->update_pct = (IS_UPDATE_OP(op) ? 100 : 0); + + op->execute = op_funcs[type]; + + return (op); +} + +/* + * op_create_defaults - create an operation using all of the default + * parameters. + */ +OP * +op_create_defaults(type, filenum) + op_type type; + int filenum; +{ + OP *op; + int num_keygrps; + + assert(filenum >= 0 && filenum < config.file_count); + + num_keygrps = config.file[filenum]->key_groups->item_count; + op = op_create(type, filenum, 0, num_keygrps - 1, 1, 1, 1, 1); + + return (op); +} + +/* + * op_destroy - destructor for OP objects + */ +void +op_destroy(op) + OP *op; +{ + + if (op == NULL) + return; + + set_destroy(op->keygrp_dists, 1); + set_destroy(op->iter_dists, 1); + set_destroy(op->item_dists, 1); + __os_free(g.dbenv, op); +} + +/* + * __op_read_nodup - read an item from a database. If the database + * supports duplicates, then this function will read the first data + * item for the specified key. + */ +int +__op_read_nodup(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + + COMPQUIET(thr_id, 0); + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + + return (dbp->get(dbp, txn, keyp, datap, op->flags)); +} + +/* + * __op_read_multi - read all the data for a key + */ +int +__op_read_multi(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + int ret, len; + void *pointer, *data; + + COMPQUIET(thr_id, 0); + assert(dbp != NULL && keyp != NULL && datap != NULL); + + ret = dbp->get(dbp, txn, keyp, datap, op->flags | DB_MULTIPLE); + if (ret == DB_BUFFER_SMALL) { + datap->data = realloc(datap->data, datap->size); + datap->ulen = datap->size; + if (datap->data != NULL) + ret = 0; + } + if (ret != 0) + return (ret); + + DB_MULTIPLE_INIT(pointer, datap); + *countp = 0; + while (pointer != NULL) { + DB_MULTIPLE_NEXT(pointer, datap, data, len); + if (data != NULL) + (*countp)++; + } + return (0); +} + +/* + * __op_read_dup - read a duplicate item from a database using a + * cursor. The specific duplicate is chosen at random from the + * duplicates with the specified key. + */ +int +__op_read_dup(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBC *curs; + db_recno_t num_dups; + int ret, which_dup; + + COMPQUIET(op, NULL); + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + + /* Open a cursor. */ + ret = dbp->cursor(dbp, txn, &curs, 0); + if (ret != 0) + return (ret); + + /* Move the cursor to the first item with the specified key. */ + ret = curs->get(curs, keyp, datap, DB_SET); + if (ret != 0) + goto err; + + /* Choose a duplicate at random. */ + num_dups = 0; + ret = curs->count(curs, &num_dups, 0); + if (ret != 0) + goto err; + which_dup = random_int(THR_RS, 1, (int)num_dups); + + /* Advance the cursor to the right duplicate. */ + do { + which_dup--; + } + while (which_dup > 0 && + (ret = curs->get(curs, keyp, datap, DB_NEXT_DUP)) == 0); + +err: /* Close the cursor. */ + curs->close(curs); + + return (ret); +} + +/* + * __op_update_nodup - update an item in a database that doesn't support + * duplicates. + * + * For operations of type OP_UPDATE_NODUP, we first read the item, and + * then we decide if it should be modified based on the value of the + * update_pct field. + * + * For operations of type OP_OVERWRITE, we don't read the item first + * and we always update it using DB->put(). + */ +int +__op_update_nodup(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + int ret; + + COMPQUIET(countp, NULL); + assert(op != NULL && dbp != NULL && keyp != NULL && datap != NULL); + assert(config.thread[thr_id]->keygrp != NULL); + + if (op->type != OP_OVERWRITE) { + if ((ret = dbp->get(dbp, txn, keyp, datap, op->flags)) != 0) + return (ret); + /* Should we modify this item? If not, return. */ + if (op->update_pct < 100 && + random_int(THR_RS, 1, 100) > op->update_pct) + return (0); + } + + config.thread[thr_id]->keygrp->fill_data( + config.thread[thr_id]->keygrp, THR_RS, datap, 0); + return (dbp->put(dbp, txn, keyp, datap, 0)); +} + +/* + * __op_update_dup - update an item in a database that supports + * duplicates. We can't just use DB->put, because it would create a + * new duplicate. Instead, we use a cursor to read one of the + * existing duplicates, and then we decide if it should be modified + * based on the value of the update_pct field. + */ +int +__op_update_dup(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBC *curs; + db_recno_t num_dups; + int ret, which_dup; + + COMPQUIET(countp, NULL); + assert(op != NULL && dbp != NULL && keyp != NULL && datap != NULL); + + /* Open a cursor. */ + ret = dbp->cursor(dbp, txn, &curs, + (config.env_flags&DB_INIT_CDB ? DB_WRITECURSOR : 0)); + if (ret != 0) + return (ret); + + /* Move the cursor to the first item with the specified key. */ + ret = curs->get(curs, keyp, datap, DB_SET|op->flags); + if (ret != 0) + goto exit; + + /* Choose a duplicate at random. */ + num_dups = 0; + ret = curs->count(curs, &num_dups, 0); + if (ret != 0) + goto exit; + which_dup = random_int(THR_RS, 1, (int)num_dups); + + /* Advance the cursor to the right duplicate. */ + do { + which_dup--; + } + while (which_dup > 0 && + (ret = curs->get(curs, keyp, datap, + DB_NEXT_DUP|op->flags)) == 0); + if (ret != 0) + goto exit; + + /* Should we modify this item? */ + if (op->update_pct < 100 && + random_int(THR_RS, 1, 100) > + op->update_pct) + goto exit; + + /* + * Perform the update using randomly generated data. If we + * don't know which key group we're in, keep the current data + * length. + */ + if (config.thread[thr_id]->keygrp != NULL) + config.thread[thr_id]->keygrp->fill_data( + config.thread[thr_id]->keygrp, THR_RS, datap, which_dup); + else + random_data(THR_RS, (char *)datap->data, datap->size); + ret = curs->put(curs, keyp, datap, DB_CURRENT); + +exit: /* Close the cursor. */ + curs->close(curs); + + return (ret); +} + +/* + * __op_update_prim - update an item in a database that has + * secondaries. This operation gives the user the ability to specify + * which (if any) secondary keys should be changed, whereas + * op_update_nodup will always change all of them. + */ +int +__op_update_prim(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + + COMPQUIET(countp, NULL); + + return (update_using_cursor(op, thr_id, dbp, txn, + keyp, datap, 1)); +} + +/* + * __op_add_nodup - add an item with a new key to a database. + */ +int +__op_add_nodup(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + KEY_GROUP *keygrp; + int ret; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + + keygrp = config.thread[thr_id]->keygrp; + assert(keygrp != NULL); + + /* + * The key passed in is already in the database. + * Unless we're using a record-based access method, modify + * the key to obtain a new one (or, rarely, a previously + * added key), + */ + if (!IS_RECORD_BASED(op->file)) +retry: modify_key(THR_RS, keyp, op->file); + + /* Generate and add the new data item. */ + keygrp->fill_data(keygrp, THR_RS, datap, 0); + ret = dbp->put(dbp, txn, keyp, datap, op->flags); + if (ret == DB_KEYEXIST) { + assert(!IS_RECORD_BASED(op->file)); + goto retry; + } + + return (ret); +} + +/* + * __op_add_dup - add a duplicate item to a database. + */ +int +__op_add_dup(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + int ret; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + assert(!IS_RECORD_BASED(op->file) && (op->file->flags & DB_DUP)); + assert(config.thread[thr_id]->keygrp != NULL); + + /* + * This should not be used with DBS-style databases, because + * the DBS-specific ops depend on there being nthreads + * duplicates for each key. + */ + assert(config.dbs == 0); + + /* Generate and add the new duplicate data item. */ + config.thread[thr_id]->keygrp->fill_data( + config.thread[thr_id]->keygrp, THR_RS, datap, 0); + ret = dbp->put(dbp, txn, keyp, datap, 0); + + return (ret); +} + +/* + * __op_delete - delete an item from a database + */ +int +__op_del(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + + COMPQUIET(op, NULL); + COMPQUIET(thr_id, 0); + COMPQUIET(countp, NULL); + COMPQUIET(datap, NULL); + assert(dbp != NULL && keyp != NULL); + + return (dbp->del(dbp, txn, keyp, 0)); +} + +/* + * __op_curs_read - read items sequentially using a cursor. + */ +int +__op_curs_read(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBC *curs; + int num_items, ret; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + + /* Open a cursor. */ + ret = dbp->cursor(dbp, txn, &curs, 0); + if (ret != 0) + return (ret); + + /* Move the cursor to the first item with the specified key. */ + ret = curs->get(curs, keyp, datap, DB_SET); + + /* Read some number of items. */ + num_items = + op->item_dists->select_from_dists(op->item_dists, THR_RS, 1); + if (ret == 0) { + do { + num_items--; + } + while (num_items > 0 && + (ret = curs->get(curs, keyp, datap, op->flags)) == 0); + } + + /* + * It's possible for us to reach the last item in the database + * before reading the specified number of items. This doesn't + * constitute a failure. + */ + if (ret == DB_NOTFOUND) + ret = 0; + + /* Close the cursor. */ + curs->close(curs); + + return (ret); +} + +/* + * __op_curs_update - update sequential items using a cursor. + */ +int +__op_curs_update(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + int num_items; + + COMPQUIET(countp, NULL); + + num_items = + op->item_dists->select_from_dists(op->item_dists, THR_RS, 1); + return (update_using_cursor(op, thr_id, dbp, txn, + keyp, datap, num_items)); +} + +/* + * __op_curs_del - delete num_items sequential items using a cursor. + */ +int +__op_curs_del(op, thr_id, countp, dbp, txn, keyp, datap) + OP *op; + int thr_id; + int *countp; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; +{ + DBC *curs; + int num_items, ret; + + COMPQUIET(countp, NULL); + assert(dbp != NULL && keyp != NULL && datap != NULL); + + /* Open a cursor. */ + ret = dbp->cursor(dbp, txn, &curs, + (config.env_flags&DB_INIT_CDB ? DB_WRITECURSOR : 0)); + if (ret != 0) + return (ret); + + /* Move the cursor to the first item with the specified key. */ + ret = curs->get(curs, keyp, datap, DB_SET); + if (ret != 0) + goto err; + + /* Delete it and some number of additional items. */ + num_items = + op->item_dists->select_from_dists(op->item_dists, THR_RS, 1); + do { + ret = curs->del(curs, 0); + if (ret != 0) + goto err; + num_items--; + } + while (num_items > 0 && + (ret = curs->get(curs, keyp, datap, op->flags)) == 0); + + /* + * It's possible for us to reach the last item in the database + * before deleting the specified number of items. This doesn't + * constitute a failure. + */ + if (ret == DB_NOTFOUND) + ret = 0; + +err: /* Close the cursor. */ + curs->close(curs); + + return (ret); +} + +/* The __op_dbs_* functions are found in perf_dbs.c */ + +/* + * update_using_cursor - update num_items sequential items using a + * cursor. next_flag allows us to either update a series of items + * that includes duplicates (DB_NEXT) or to update a series of items + * with unique keys (DB_NEXT_NODUP). rmw_flag allows us to acquire + * write locks when retrieving items. + * + * This function is used by __op_update_prim and __op_curs_update. + */ +static int +update_using_cursor(op, thr_id, dbp, txn, keyp, datap, num_items) + OP *op; + int thr_id; + DB *dbp; + DB_TXN *txn; + DBT *keyp, *datap; + int num_items; +{ + DBT secon_key; + DBC *curs; + FILE_INFO *secon; + KEY_GROUP *keygrp; + u_int *offsets; + int flags, i, ret, which_secon; + + assert(op != NULL && dbp != NULL && keyp != NULL && datap != NULL); + assert(num_items > 0); + keygrp = config.thread[thr_id]->keygrp; + assert(keygrp != NULL); + + /* Open a cursor. */ + ret = dbp->cursor(dbp, txn, &curs, + (config.env_flags&DB_INIT_CDB ? DB_WRITECURSOR : 0)); + if (ret != 0) + return (ret); + + /* Move the cursor to the first item with the specified key. */ + flags = (op->flags&DB_RMW ? DB_SET|DB_RMW : DB_SET); + if ((ret = curs->get(curs, keyp, datap, flags)) != 0) + goto err; + + /* Update it and the next (num_items - 1) items. */ + do { + /* Should we modify this item? */ + if (op->update_pct < 100 && + random_int(THR_RS, 1, 100) > op->update_pct) + continue; + + if (op->type == OP_UPDATE_PRIM || + op->type == OP_CURS_UPDATE_PRIM) { + assert(HAS_SECONDARIES(keygrp->file)); + /* + * Modify the keys for the specified secondary + * indices (if any). + */ + if (op->other != NULL) { + offsets = (u_int *)datap->data; + for (i = 0; i < op->other->item_count; i++) { + which_secon = + (uintptr_t)op->other->item[i]; + secon = op->file->secondaries-> + item[which_secon]; + + /* Should we modify it? */ + if (random_int(THR_RS, 1, 100) > + op->other->freq[i]) + continue; + + secon_key.data = ((char *)datap->data) + + offsets[which_secon]; + secon->select_key( + secon, THR_RS, &secon_key); + } + } + } else + keygrp->fill_data(keygrp, THR_RS, datap, -1); + + if ((ret = curs->put(curs, keyp, datap, DB_CURRENT)) != 0) + goto err; + num_items--; + } while (num_items > 0 && + (ret = curs->get(curs, keyp, datap, op->flags)) == 0); + + /* + * It's possible for us to reach the last item in the database + * before updating the specified number of items. This doesn't + * constitute a failure. + */ + if (ret == DB_NOTFOUND) + ret = 0; + +err: /* Close the cursor. */ + curs->close(curs); + + return (ret); +} + +/* + * modify_key - modify an existing key from the specified file to + * obtain a new one (or, rarely, a previously added key), changing 2-5 + * randomly-chosen characters so that they contain characters not in + * the original character set. + */ +static void +modify_key(rs, keyp, file) + db_rand_state *rs; + DBT *keyp; + FILE_INFO *file; +{ + int i, num_changes, which_char; + + num_changes = random_int(rs, 2, 5); + for (i = 0; i < num_changes; i++) { + which_char = random_int(rs, 0, keyp->size - 1); + ((char *)keyp->data)[which_char] += + (file->num_key_chars + random_int(rs, 0, 10)); + } +} --- db-4.6.21/test_perf/perf_parse.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_parse.c 2007-11-05 15:51:13.695918000 +1100 @@ -0,0 +1,784 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996,2007 Oracle. All rights reserved. + * + * $Id: perf_parse.c,v 12.10 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +static int line_counts __P((void)); +static void output_line __P((char *, char *)); +static int parse_case __P((char **)); +static int parse_set __P((char **)); +static int parse_switch __P((char **)); +static void process_closing_brace __P((int *)); +static int process_variable __P((char *)); +static int process_variables __P((char *)); +static int process_expression __P((char *)); +static int eval_expression __P((char **, int *)); +static int select_from_list __P((char **)); +static int select_from_range __P((char **)); + +#define SYNTAX(expected, ret) { \ + fprintf(g.outfp, "%s:%d: incorrect syntax: expected \"%s\"\n", \ + g.config, g.linenum, expected); \ + fflush(g.outfp); \ + return (ret); \ +} + +#define ERR_RET(msg, ret) { \ + fprintf(g.outfp, "%s:%d: %s\n", \ + g.config, g.linenum, msg); \ + fflush(g.outfp); \ + return (ret); \ +} + +/* Used to process SWITCH and CASE statements. */ +#define MAX_NEST 10 /* max levels of nested SWITCHes */ +char switch_val[MAX_NEST][DB_MAXPATHLEN];/* SWITCH-block values; */ +int which_switch; /* the index of the current SWITCH */ +int case_blocknum[MAX_NEST]; /* nesting levels of CASE blocks */ +int case_matches[MAX_NEST]; /* does CASE match SWITCH val? */ +int curr_blocknum; /* nesting level of innermost block */ +int switch_blocknum[MAX_NEST]; /* nesting levels of SWITCH blocks */ + +/* + * init_parser - initialize the globals used by the parser. + */ +void +init_parser() +{ + int i; + + g.linenum = g.line_preread = 0; + curr_blocknum = 0; + + which_switch = -1; + for (i = 0; i < MAX_NEST; i++) { + case_matches[i] = 0; + case_blocknum[i] = switch_blocknum[i] = -1; + } +} + +/* + * get_line - read the next line of the config file into the parameter + * "line". + */ +int +get_line(line, fp) + char *line; + FILE *fp; +{ + int old_blocknum; + char *lp, *p; + + while (fgets(line, DB_MAXPATHLEN, fp) != NULL) { + g.linenum++; + + /* Replace newline with a null character. */ + if ((lp = strchr(line, '\n')) == NULL) + ERR_RET("line too long", EINVAL); + *lp = '\0'; + + /* Determine if this line should be skipped. */ + for (p = line; isspace(*p); ++p) + ; + if (*p == '\0' || *p == '#') + continue; + + /* Update the curr_blocknum value as needed. */ + old_blocknum = curr_blocknum; + for (p = line; *p != '\0'; p++) { + if (*p == '{') + curr_blocknum++; + else if (*p == '}') + curr_blocknum--; + } + if (curr_blocknum > old_blocknum + 1 || + curr_blocknum < old_blocknum - 1) + ERR_RET("invalid use of braces", EINVAL); + + return (0); + } + + return (EOF); +} + +/* + * line_counts - determines if the current line should be applied, + * based on whether the relevant CASE blocks match the values of their + * corresponding SWITCH blocks. + */ +static int +line_counts() +{ + int i; + + /* + * If there are nested SWITCH blocks, all of the + * CASE blocks of which the current line is a part + * must match their corresponding SWITCH values. + * Note that lines that are inside a SWITCH block + * but outside a CASE block are not counted. + */ + for (i = 0; i <= which_switch; i++) + if (case_blocknum[i] == -1 || !case_matches[i]) + return (0); + return (1); +} + +/* + * select_from_list - take a pointer to a value string of the form + * SELECT {choice_1 choice_2 ... choice_n} + * and make the pointer point to one of the choices in the list. + * name is the name string that corresponds to the value string. + */ +static int +select_from_list(value) + char **value; +{ + int num_items, which_item; + char *p; + + /* Skip to the opening brace of the list. */ + for (p = *value; *p != '\0' && *p != '{'; p++) + ; + if (*p != '{') + goto illegal; + + /* Skip any leading white space before the first item. */ + for (p++; isspace(*p); p++) + ; + if (*p == '\0') + goto illegal; + *value = p; + + /* Count the number of items in the list. */ + num_items = 1; + for (p++; *p != '\0' && *p != '}'; p++) { + if (isspace(*(p - 1)) && !isspace(*p)) + num_items++; + } + if (*p != '}' || p == *value) + goto illegal; + + /* + * Randomly select one of the items and make the pointer + * point to it. + */ + which_item = random_int(&g.rand_state, 1, num_items); + for (p = *value + 1; which_item > 1 && *p != '}'; p++) { + if (isspace(*(p - 1)) && !isspace(*p)) + which_item--; + } + *value = --p; + + /* Add an end-of-string character after the selected item. */ + for (p++; !isspace(*p) && *p != '}'; p++) + ; + *p = '\0'; + + return (0); + +illegal: + SYNTAX("SELECT { ... }", EINVAL); +} + +/* + * select_from_range - take a pointer to a value string of the form + * RANGE {lower_bound upper_bound} + * and make the pointer point to a string containing one of the integers + * in the specified range. + */ +static int +select_from_range(value) + char **value; +{ + u_int lower, upper, which_int; + char *p; + + /* Skip to the opening brace of the list. */ + for (p = *value; *p != '\0' && *p != '{'; p++) + ; + if (*p != '{') + goto illegal; + + /* Skip any leading white space before the lower bound. */ + for (p++; isspace(*p); p++) + ; + if (*p == '\0' || *p == '}') + goto illegal; + *value = p; + + /* + * Get the lower and upper bounds, and choose an integer from + * the range. + */ + if (sscanf(*value, "%u %u }", &lower, &upper) != 2) + goto illegal; + which_int = random_int(&g.rand_state, lower, upper); + + /* + * Write the string for this integer into the character array + * to which value points. + */ + sprintf(*value, "%d", which_int); + + return (0); + +illegal: + SYNTAX("RANGE { }", EINVAL); +} + +/* + * eval_expression - take a pointer to a string containing an + * arithmetic expression in prefix notation, e.g.: + * (/ (+ 4 (* 5 30))) + * and make *expr_str point to the last character in the expression + * and *expr_val contain the value of the expression. + */ +static int +eval_expression(expr_str, expr_val) + char **expr_str; + int *expr_val; +{ + int first, second; + char op, *p; + + /* Make sure that we're starting with a left paren. */ + p = *expr_str; + if (*p != '(') + return (EINVAL); + + /* Get the operator. */ + for (p++; isspace((int)*p); p++) + ; + if (*p == '\0' || *p == ')') + return (EINVAL); + op = *p; + + /* Get the first operand. */ + for (p++; isspace((int)*p); p++) + ; + if (*p == '(') { + if (eval_expression(&p, &first) != 0) + return (EINVAL); + } else if (sscanf(p, "%d", &first) != 1) + return (EINVAL); + + /* Get the second operand. */ + for (p++; *p != '\0' && !isspace((int)*p); p++) + ; + if (*p == '\0') + return (EINVAL); + for (p++; isspace((int)*p); p++) + ; + if (*p == '(') { + if (eval_expression(&p, &second) != 0) + return (EINVAL); + } else if (sscanf(p, "%d", &second) != 1) + return (EINVAL); + + /* Perform the operation. */ + switch (op) { + case '+': + *expr_val = first + second; + break; + case '-': + *expr_val = first - second; + break; + case '*': + *expr_val = first * second; + break; + case '/': + *expr_val = first / second; + break; + case '>': + if (first > second) + *expr_val = 1; + else + *expr_val = 0; + break; + case '<': + if (first < second) + *expr_val = 1; + else + *expr_val = 0; + break; + case '=': + if (first == second) + *expr_val = 1; + else + *expr_val = 0; + break; + default: + return (EINVAL); + } + + /* Make *expr_str point to the final parenthesis. */ + for (p++; *p != ')' && *p != '\0' && !isspace((int)*p); p++) + ; + if (*p == '\0') + return (EINVAL); + for ( ; isspace((int)*p); p++) + ; + if (*p != ')') + return (EINVAL); + *expr_str = p; + + return (0); +} + +/* + * process_expression - take a string containing an arithmetic or boolean + * expression in prefix notation, e.g.: + * (/ (+ 4 (* 5 30))) + * and replace the expression with its value. + */ +static int +process_expression(expr) + char *expr; +{ + int val; + char *save; + + /* + * Save a pointer to the start of the expression, because + * eval_expression() changes the value of expr. + */ + save = expr; + + if (eval_expression(&expr, &val) != 0) + ERR_RET("mis-formatted expression", EINVAL); + + /* Replace the expression with its value. */ + sprintf(save, "%d", val); + + return (0); +} + +/* + * process_closing_brace - handle lines that consist of a single + * closing brace. If this is the end of a SWITCH or CASE block, + * we perform the corresponding bookkeeping. Otherwise, we set + * (*other_block_type) to 1. + */ +static void +process_closing_brace(other_block_type) + int *other_block_type; +{ + + *other_block_type = 0; + + /* + * Note that we compare with (curr_blocknum + 1) because + * curr_blocknum will already have been decremented in + * get_line(). + */ + if (which_switch != -1 && + case_blocknum[which_switch] == curr_blocknum + 1) + /* end of a case block */ + case_blocknum[which_switch] = -1; + else if (which_switch != -1 && + switch_blocknum[which_switch] == curr_blocknum + 1) { + /* end of a switch block */ + switch_blocknum[which_switch] = -1; + which_switch--; + } else if (line_counts()) + /* end of some other type of block */ + *other_block_type = 1; +} + +/* + * parse_switch - parse the beginning of a SWITCH block. + * (*value) points to the value of the SWITCH. + */ +static int +parse_switch(value) + char **value; +{ + char *p; + + which_switch++; + if (which_switch >= MAX_NEST) + ERR_RET("too many levels of nested SWITCH statements", EINVAL); + switch_blocknum[which_switch] = curr_blocknum; + + /* Skip opening brace (if any) and subsequent spaces. */ + for (p = *value; *p == '{' || isspace(*p); p++) + ; + *value = p; + + /* Remove closing brace and intervening spaces. */ + for (; *p != '}' && *p != '\0' && !isspace(*p); p++) + ; + *p = '\0'; + + /* Store the switch value. */ + strcpy(switch_val[which_switch], *value); + + return (0); +} + +/* + * parse_case - parse the beginning of a CASE block. + * (*value) points to the CASE value or values. + */ +static int +parse_case(value) + char **value; +{ + char *list, *listval; + + if (which_switch == -1) + ERR_RET("CASE statement outside SWITCH block", EINVAL); + if (case_blocknum[which_switch] != -1) + ERR_RET("nested CASE statement", EINVAL); + case_blocknum[which_switch] = curr_blocknum; + + /* + * See if the current SWITCH value occurs among the list + * of values for this CASE statement. + */ + case_matches[which_switch] = 0; + list = *value; + while ((listval = strtok(list, "{} \t")) != NULL) { + if (!strcasecmp(listval, switch_val[which_switch])) { + case_matches[which_switch] = 1; + break; + } else + list = NULL; + } + + return (0); +} + +/* + * parse_set - parse a SET statement. + * (*var_and_val) points to the portion of the line that comes + * after the SET keyword. + */ +static int +parse_set(var_and_val) + char **var_and_val; +{ + int ret; + char *var_name, *var_val; + + /* Get the variable name and its value. */ + g.line_preread = 1; /* don't output the results */ + if (get_name_value(*var_and_val, &var_name, &var_val) != 0) + SYNTAX("SET ", EINVAL); + + ret = set_variable(var_name, var_val, 0); + + /* + * Put the var and val back into a single string for printing. Use + * the same value for the variable that we will in the run. Note the + * ordering here to protect var_name until after getting the variable. + */ + get_variable(var_name, + *var_and_val + strlen(var_name) + 1, DB_MAXPATHLEN); + *(*var_and_val + strlen(var_name)) = ' '; + + return (ret); +} + +/* + * process_variable - handle the use of a variable name in the value + * portion of a config-file line, replacing the variable with its + * corresponding value. + */ +static int +process_variable(var_name) + char *var_name; +{ + int ret; + char val_buf[DB_MAXPATHLEN], *p, save; + + /* + * Use the variable name as the key, removing the leading $ + * and including everything up to the first white-space or + * other special character. + */ + p = var_name + 1; + while (*p != '\0' && !isspace(*p) && *p != '}') + p++; + save = *p; + *p = '\0'; + + ret = get_variable(var_name + 1, val_buf, sizeof(val_buf)); + + if (ret == 0) { + /* Replace the variable with its value. */ + *p = save; + strncat(val_buf, p, sizeof(val_buf)); + strcpy(var_name, val_buf); + } + + return (ret); +} + +/* + * process_variables - substitute values for all variables in the + * given string + */ +static int +process_variables(value) + char *value; +{ + int ret; + + /* Handle values that are variables. */ + value = strchr(value, '$'); + while (value != NULL) { + if ((ret = process_variable(value)) != 0) + return (ret); + value = strchr(value + 1, '$'); + } + + return (0); +} + +/* + * output_line - output the line consisting of the specified name and + * value, unless it has already been output once before. + */ +static void +output_line(name, value) + char *name, *value; +{ + + if (!g.line_preread && (g.current_fp != NULL)) { + fprintf(g.current_fp, "%s %s\n", name, value); + fprintf(g.outfp, "%s %s\n", name, value); + } else + g.line_preread = 0; +} + +/* + * get_name_value - separate a line of the config file into its name + * and value components, and handle special keywords like SWITCH + * and SELECT. + * + * Name/value pairs are parsed as two white-space-separated strings. + * The value may contain embedded white-space. Note: we use the + * isspace(3) macro because it's more portable, but that means that + * you can use characters like form-feed to separate the strings. + * + * If (*value) == NULL upon the return of this function, then the + * calling function can assume that we have reached the end of some + * block that it needs to handle (as opposed to SWITCH or CASE + * blocks, which are handled by get_name_value()). + */ +int +get_name_value(line, name, value) + char *line; + char **name, **value; +{ + int ret, return_to_caller; + char *p; + + if (0) { +next_line: if ((ret = get_line(line, g.conf_fp)) != 0) + return (ret); + } + + /* Skip leading white-space. */ + for (p = line; isspace(*p); ++p) + ; + if (*p == '\0') + goto illegal; + + /* Get the name. */ + *name = p; + if (!strncmp(*name, "}", 1)) { + return_to_caller = 0; + process_closing_brace(&return_to_caller); + if (return_to_caller) { + *value = NULL; + if (g.current_fp != NULL) { + fprintf(g.current_fp, "%s\n", line); + fprintf(g.outfp, "%s\n", line); + } + return (0); + } else + goto next_line; + } + for (++p; *p != '\0' && !isspace(*p); ++p) + ; + if (*p == '\0' || p == *name) + goto illegal; + *p = '\0'; + + /* Skip the separating white-space. */ + for (++p; isspace(*p); ++p) + ; + if (*p == '\0') + goto illegal; + + /* Get the value, minus any trailing white-space. */ + *value = p; + for (++p; *p != '\0'; ++p) + ; + for (--p; isspace(*p); --p) + ; + ++p; + if (p == *value) +illegal: ERR_RET("invalid entry: expected a keyword followed " + "by one or more arguments", EINVAL); + *p = '\0'; + + /* + * Handle special keywords that appear in the value string. + * If process_variables() returns a non-zero value from + * these invocations, we assume that we're in an inactive + * CASE block and thus don't do anything here. If that's + * not the case, the error will be dealt with later. + */ + if (!strncasecmp(*value, "SELECT", 6) && + process_variables(*value) == 0) + ret = select_from_list(value); + else if (!strncasecmp(*value, "RANGE", 5) && + process_variables(*value) == 0) + ret = select_from_range(value); + else if (**value == '(' && + process_variables(*value) == 0) + ret = process_expression(*value); + else + ret = 0; + + if (ret != 0) + return (ret); + + /* Handle SWITCH, CASE, and SET statements. */ + if (!strcasecmp(*name, "SWITCH")) { + if (line_counts() && + (ret = process_variables(*value)) != 0) + return (ret); + if ((ret = parse_switch(value)) != 0) + return (ret); + goto next_line; + } else if (!strcasecmp(*name, "CASE")) { + if ((ret = parse_case(value)) != 0) + return (ret); + goto next_line; + } else if (!strcasecmp(*name, "SET") && line_counts()) { + if ((ret = process_variables(*value)) != 0 || + (ret = parse_set(value)) != 0) + return (ret); + /* + * Output SET statements so that we have a record of + * the values of variables. + */ + output_line(line, *value); + goto next_line; + } + + /* + * If we're in a CASE block that doesn't match the corresponding + * SWITCH value, keep advancing until we get beyond it. + */ + if (!line_counts()) + goto next_line; + + /* + * Output to the main output file and to the .current file the + * lines that are actually used to configure the run, including + * the results of any random selections, but with variable names + * intact. + */ + output_line(line, *value); + + /* Handle any remaining values that are variables. */ + return (process_variables(*value)); +} + +static DB *var_dbp = NULL; + +int +open_var_db() +{ + int ret; + + db_create(&var_dbp, NULL, 0); + if ((ret = var_dbp->open(var_dbp, NULL, NULL, NULL, + DB_HASH, DB_CREATE, 0)) != 0) + close_var_db(); + + return (ret); +} + +int +close_var_db() +{ + int ret; + + ret = 0; + if (var_dbp != NULL) { + ret = var_dbp->close(var_dbp, 0); + var_dbp = NULL; + } + + return (ret); +} + +int +get_variable(name, value, nvalue) + char *name, *value; + int nvalue; +{ + DBT key, data; + int force, ret; + char keybuf[DB_MAXPATHLEN]; + + for (force = 1; force >= 0; force--) { + /* Prepare the DBTs. */ + memset(&key, 0, sizeof(key)); + snprintf(keybuf, sizeof(keybuf), force ? "!%s" : "%s", name); + key.data = keybuf; + key.size = strlen(keybuf) + 1; /* include the \0 */ + memset(&data, 0, sizeof(data)); + data.data = value; + data.ulen = nvalue; + data.flags = DB_DBT_USERMEM; + + /* Look for the variable in the variable database. */ + if ((ret = var_dbp->get(var_dbp, NULL, &key, &data, 0)) == 0) + return (0); + } + + var_dbp->errx(var_dbp, "get failed for %s: %s", + name, db_strerror(ret)); + ERR_RET("invalid variable usage", ret); +} + +int +set_variable(name, value, force) + char *name, *value; + int force; +{ + DBT key, data; + int ret; + char keybuf[DB_MAXPATHLEN]; + + /* Prepare the DBTs. */ + memset(&key, 0, sizeof(key)); + snprintf(keybuf, sizeof(keybuf), force ? "!%s" : "%s", name); + key.data = keybuf; + key.size = strlen(keybuf) + 1; /* include the \0 */ + memset(&data, 0, sizeof(data)); + data.data = value; + data.size = strlen(value) + 1; + + /* Add the variable to the in-memory database. */ + if ((ret = var_dbp->put(var_dbp, NULL, &key, &data, 0)) != 0) { + var_dbp->errx(var_dbp, "put failed for %s: %s", + name, db_strerror(ret)); + ERR_RET("invalid SET statement", EINVAL); + } + + return (0); +} --- db-4.6.21/test_perf/perf_rand.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_rand.c 2007-11-05 15:51:13.619920000 +1100 @@ -0,0 +1,186 @@ +/* + * Copyright (c) 1983, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: perf_rand.c,v 12.1 2006/05/05 14:54:05 bostic Exp $ + * + */ + +#include "perf_extern.h" + +/* + * perf_rand.c: + * + * An improved random number generation package. This is taken from the + * FreeBSD random(3) implementation, and simplified and renamed for use + * in Berkeley DB's db_perf test application. + * + * The random number generation technique is a linear feedback shift register + * approach, employing trinomials (since there are fewer terms to sum up that + * way). In this approach, the least significant bit of all the numbers in + * the state table will act as a linear feedback shift register, and will + * have period 2^deg - 1 (where deg is the degree of the polynomial being + * used, assuming that the polynomial is irreducible and primitive). The + * higher order bits will have longer periods, since their values are also + * influenced by pseudo-random carries out of the lower bits. The total + * period of the generator is approximately deg*(2**deg - 1); thus doubling + * the amount of state information has a vast influence on the period of the + * generator. Note: the deg*(2**deg - 1) is an approximation only good for + * large deg, when the period of the shift register is the dominant factor. + * With deg equal to seven, the period is actually much longer than the + * 7*(2**7 - 1) predicted by this formula. + */ + +static void db_srandom __P((db_rand_state *rs, unsigned long x)); +static unsigned long good_rand __P((long)); + +/* + * Initially, everything is set up as if from: + * + * initstate(1, randtbl, 128); + * + * Note that this initialization takes advantage of the fact that srandom() + * advances the front and rear pointers 10*DEG_3 times, and hence the + * rear pointer which starts at 0 will also end up at zero; thus the zeroeth + * element of the state information, which contains info about the current + * position of the rear pointer is just + * + * MAX_TYPES (== 5) * (rptr - state) + TYPE_3 == TYPE_3. + */ +static unsigned long init_randtbl[DEG_3 + 1] = { + TYPE_3, + 0x991539b1, 0x16a5bce3, 0x6774a4cd, 0x3e01511e, 0x4e508aaa, 0x61048c05, + 0xf5500617, 0x846b7115, 0x6a19892c, 0x896a97af, 0xdb48f936, 0x14898454, + 0x37ffd106, 0xb58bff9c, 0x59e17104, 0xcf918a49, 0x09378c83, 0x52c7a471, + 0x8d293ea9, 0x1f4fc301, 0xc3db71be, 0x39b44e1c, 0xf8a44ef9, 0x4c8b80b1, + 0x19edc328, 0x87bf4bdd, 0xc9b240e5, 0xe9ee4b1b, 0x4382aee7, 0x535b6b41, + 0xf3bec5da +}; + +void db_init_random(rs, x) + db_rand_state *rs; + unsigned long x; +{ + memcpy(rs->randtbl, init_randtbl, sizeof(init_randtbl)); + + rs->state = &rs->randtbl[1]; + rs->end_ptr = &rs->randtbl[DEG_3 + 1]; + + db_srandom(rs, x); +} + +/* + * random: + * + * The basic operation is to add the number at the rear pointer into the one + * at the front pointer. Then both pointers are advanced to the next location + * cyclically in the table. The value returned is the sum generated, reduced + * to 31 bits by throwing away the "least random" low bit. + * + * Note: the code takes advantage of the fact that both the front and + * rear pointers can't wrap on the same call by not testing the rear + * pointer if the front one has wrapped. + * + * Returns a 31-bit random number. + */ +long +db_random(rs) + db_rand_state *rs; +{ + register long i; + register unsigned long *f, *r; + + /* + * Use local variables rather than static variables for speed. + */ + f = rs->fptr; r = rs->rptr; + *f += *r; + i = (long)((*f >> 1) & 0x7fffffff); /* chucking least random bit */ + if (++f >= rs->end_ptr) { + f = rs->state; + ++r; + } + else if (++r >= rs->end_ptr) { + r = rs->state; + } + + rs->fptr = f; rs->rptr = r; + + return (i); +} + +/* + * srandom: + * + * Initialize the random number generator based on the given seed. If the + * type is the trivial no-state-information type, just remember the seed. + * Otherwise, initializes state[] based on the given "seed" via a linear + * congruential generator. Then, the pointers are set to known locations + * that are exactly SEP_3 places apart. Lastly, it cycles the state + * information a given number of times to get rid of any initial dependencies + * introduced by the L.C.R.N.G. Note that the initialization of randtbl[] + * for default usage relies on values produced by this routine. + */ +static void db_srandom(rs, x) + db_rand_state *rs; + unsigned long x; +{ + register long i; + + rs->state[0] = x; + for (i = 1; i < DEG_3; i++) + rs->state[i] = good_rand(rs->state[i - 1]); + rs->fptr = &rs->state[SEP_3]; + rs->rptr = &rs->state[0]; + for (i = 0; i < 10 * DEG_3; i++) + (void)db_random(rs); +} + +static unsigned long good_rand(x) + register long x; +{ + /* + * Compute x = (7^5 * x) mod (2^31 - 1) + * without overflowing 31 bits: + * (2^31 - 1) = 127773 * (7^5) + 2836 + * From "Random number generators: good ones are hard to find", + * Park and Miller, Communications of the ACM, vol. 31, no. 10, + * October 1988, p. 1195. + */ + register long hi, lo; + + hi = x / 127773; + lo = x % 127773; + x = 16807 * lo - 2836 * hi; + if (x <= 0) + x += 0x7fffffff; + return (x); +} --- db-4.6.21/test_perf/perf_rand.h 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_rand.h 2007-11-05 15:51:13.639919000 +1100 @@ -0,0 +1,71 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1998,2007 Oracle. All rights reserved. + * + * $Id: perf_rand.h,v 12.5 2007/05/17 15:15:58 bostic Exp $ + */ + +#ifndef __PERF_RAND +#define __PERF_RAND + +#define DB_RAND_MAX 0x7fffffff + +/* + * For each of the currently supported random number generators, we have a + * break value on the amount of state information (you need at least this + * many bytes of state info to support this random number generator), a degree + * for the polynomial (actually a trinomial) that the R.N.G. is based on, and + * the separation between the two lower order coefficients of the trinomial. + */ +#define TYPE_3 3 /* x**31 + x**3 + 1 */ +#define BREAK_3 128 +#define DEG_3 31 +#define SEP_3 3 + +/* + * Random number generators are not intended for multi-threaded use. + * One of these structures is required per thread. + */ +typedef struct db_rand_state { + unsigned long randtbl[DEG_3 + 1]; + + /* + * fptr and rptr are two pointers into the state info, a front and a + * rear pointer. These two pointers are always SEP_3 places aparts, + * as they cycle cyclically through the state information. (Yes, this + * does mean we could get away with just one pointer, but the code for + * random() is more efficient this way). The pointers are left + * positioned as they would be from the call + * + * initstate(1, randtbl, 128); + * + * (The position of the rear pointer, rptr, is really 0 (as explained + * above in the initialization of randtbl) because the state table + * pointer is set to point to randtbl[1] (as explained below). + */ + unsigned long *fptr; + unsigned long *rptr; + + /* + * The following things are the pointer to the state information table, + * the type of the current generator, the degree of the current + * polynomial being used, and the separation between the two pointers. + * Note that for efficiency of random(), we remember the first location + * of the state information, not the zeroeth. Hence it is valid to + * access state[-1], which is used to store the type of the R.N.G. + * Also, we remember the last location, since this is more efficient + * than indexing every time to find the address of the last element to + * see if the front and rear pointers have wrapped. + */ + unsigned long *state; + unsigned long *end_ptr; +} db_rand_state; + +void db_init_random __P((db_rand_state *, unsigned long)); +long db_random __P((db_rand_state *)); + +/* Helper macro useful in lots of places */ +#define THR_RS (&config.thread[thr_id]->rand_state) + +#endif --- db-4.6.21/test_perf/perf_spawn.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_spawn.c 2007-11-05 15:51:13.635920000 +1100 @@ -0,0 +1,233 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1996,2007 Oracle. All rights reserved. + * + * $Id: perf_spawn.c,v 12.7 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +/* + * spawn_kids -- + * Create an array of N threads. + */ +os_thread_t * +spawn_kids(msg, nthreads, func) + const char *msg; + long nthreads; + void *(*func)__P((void *)); +{ + char buf[100]; + + os_thread_t *kidsp; + long i; + int ret; + + /* Spawn off threads. */ + if ((ret = __os_calloc(g.dbenv, nthreads + 1, + sizeof(os_thread_t), &kidsp)) != 0) { + fprintf(g.outfp, "%s: %s\n", msg, strerror(errno)); + exit(EXIT_FAILURE); + } + for (i = 0; i < nthreads; i++) { + snprintf(buf, sizeof(buf), "t%.*s%ld", 3, msg, i); + if (os_thread_create(buf, &kidsp[i], func, (void *)i) != 0) { + fprintf(g.outfp, + "%s: failed spawning thread %ld %s\n", + msg, i, strerror(errno)); + exit(EXIT_FAILURE); + } + } + kidsp[nthreads] = (os_thread_t)NULL; + + return (kidsp); +} + +/* + * wait_kids -- + * Wait for an array of N threads. + */ +int +wait_kids(msg, kidsp) + const char *msg; + os_thread_t *kidsp; +{ + int i, status; +#ifdef HAVE_VXWORKS + int done; + + status = 0; + done = 0; + while (!done) { + for (i = 0; kidsp[i] != (int)NULL; i++) { + /* + * The only way we can tell if our children have + * exited is if we get an error back when we + * check their Id. We get success if the task + * still exists, and ERROR back if it does not. + * It is not an error, just a completion indicator. + */ + if (taskIdVerify(kidsp[i]) == ERROR) { + done = 1; + continue; + } else { + done = 0; + break; + } + } + if (!done) + __os_sleep(NULL, 2, 0); + } +#elif defined(DB_WIN32) + status = 0; + for (i = 0; kidsp[i] != (os_thread_t)NULL; i++) { + if (WaitForSingleObject(kidsp[i], INFINITE) == WAIT_FAILED) { + status = 1; + fprintf(g.outfp, + "%s: child %ld exited with error %s\n", + msg, (long)i, strerror(GetLastError())); + } + } +#else + void *retp; + + status = 0; + for (i = 0; kidsp[i] != (os_thread_t)NULL; i++) { + pthread_join(kidsp[i], &retp); + if (retp != NULL) { + status = 1; + fprintf(g.outfp, "%s: child %ld exited with error\n", + msg, (long)i); + } + } +#endif + + __os_free(g.dbenv, kidsp); + return (status); +} + +/* + * wait_procs -- + * Wait for an array of N procs. + */ +int +wait_procs(msg, procs) + const char *msg; + os_pid_t *procs; +{ + int i, status, threads; +#if defined(DB_WIN32) + DWORD ret; +#endif + + status = 0; + threads = config.nthreads; + +#if defined(DB_WIN32) + do { + ret = WaitForMultipleObjects(threads, (HANDLE *)procs, FALSE, + INFINITE); + i = ret - WAIT_OBJECT_0; + if (i < 0 || i >= threads) { + fprintf(g.outfp, "%s: wait failed: %s\n", msg, + strerror(GetLastError())); + return (1); + } + + if ((GetExitCodeProcess((HANDLE)procs[i], &ret) == 0) || + (ret != 0)) { + fprintf(g.outfp, + "Child %d, exited with: %d\n", i, ret); + g.shutdown = 1; + return (1); + } + + /* remove the process handle from the list */ + while (++i < threads) + procs[i - 1] = procs[i]; + } while (--threads); +#elif !defined(HAVE_VXWORKS) + do { + if ((i = wait(&status)) == -1) { + fprintf(g.outfp, + "%s: wait failed: %s\n", msg, strerror(errno)); + return (1); + } + if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0) { + fprintf(g.outfp, + "Child %d, exited with: %d\n", i, status); + g.shutdown = 1; + for (i = 0; i < threads; i++) + kill(procs[i], SIGKILL); + return (1); + } + } while (--threads); +#endif + + return (0); +} + +os_pid_t +my_system(command, should_wait) + const char *command; + int should_wait; +{ + os_pid_t pid; + int status; +#ifdef DB_WIN32 + char *p; +#endif + + COMPQUIET(pid, 0); + COMPQUIET(status, 0); + +#ifdef HAVE_VXWORKS + fprintf(g.outfp, "ERROR: my_system not supported for VxWorks.\n"); + return OS_BAD_PID; +#elif defined(HAVE_QNX) + /* + * For QNX, we cannot fork if we've ever used threads. So + * we'll use their spawn function. We use 'spawnl' which + * is NOT a POSIX function. + * + * The return value of spawnl is just what we want depending + * on the value of the 'wait' arg. + */ + if (should_wait) + return (spawnl(P_WAIT, "/bin/sh", "sh", "-c", command, NULL)); + else + return (spawnl(P_NOWAIT, "/bin/sh", "sh", "-c", command, + NULL)); +#elif defined(DB_WIN32) + /* + * Spawn looks like the best choice for Win32, but (of course) it's + * slightly different... gotta love that intra-shell quoting. This + * also depends on having a "sh" in the path somewhere + */ + for (p = (char *)command; *p; p++) + if (*p == '\\') + *p = '/'; + pid = (os_pid_t)(_spawnlp(should_wait ? P_WAIT : P_NOWAIT, + "sh", "sh", "-c", "\"", command, "\"", NULL)); + /* __os_sleep(g.dbenv, 2, 0); */ + return (pid); +#else + if ((pid = fork()) != 0) { + if (pid == -1) + return OS_BAD_PID; + if (should_wait == 0) + return (pid); + while (pid != wait(&status)) { + if (errno == EINTR) + continue; + fprintf(g.outfp, "wrong child\n"); + return OS_BAD_PID; + } + return (status); + } else { + execl("/bin/sh", "sh", "-c", command, NULL); + exit(EXIT_FAILURE); + } +#endif +} --- db-4.6.21/test_perf/perf_stat.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_stat.c 2007-11-07 22:34:51.890244000 +1100 @@ -0,0 +1,785 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000,2007 Oracle. All rights reserved. + * + * $Id: perf_stat.c,v 12.11 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +#define TXNS 0 +#define D_EVICT 2 +#define HIT 3 +#define MISS 4 +#define CONFLICT 7 +#define FAULT 12 + +/* + * Stat threads--actually, there's only one of them, but we use + * an array so that we can use spawn_kids(). + */ +static os_thread_t *stat_threads; + +/* + * stat_create - constructor for STAT objects + */ +STAT * +stat_create(base_addr_ptr, field_offset) + void *base_addr_ptr; + int field_offset; +{ + STAT *stat; + int i; + + if (__os_malloc(g.dbenv, sizeof(STAT), &stat) != 0) + return (NULL); + + /* + * The following two variables allow us to access fields in DB + * stat structs that will be allocated in the future (because + * these structs are allocated when the stat functions are called). + * base_addr_ptr is a pointer to the variable that will contain + * the address of the stat structure. offset is the index of + * the field when the struct is treated as an array of ints. + */ + stat->base_addr_ptr = (u_int8_t **)base_addr_ptr; + stat->offset = field_offset; + + stat->prev_val = 0; + stat->prev_sample_val = 0; + + if (__os_malloc(g.dbenv, + config.steady_state_num_samples * sizeof(double), + &stat->sample) != 0) { + __os_free(g.dbenv, stat); + return (NULL); + } + for (i = 0; i < config.steady_state_num_samples; i++) + stat->sample[i] = 0.0; + stat->sum_samples = stat->sum_squares = 0.0; + stat->qhead = -1; + stat->using_per_time = 0; + + stat->current_val = __stat_current_val; + stat->delta = __stat_delta; + stat->per_txn = __stat_per_txn; + stat->per_time = __stat_per_time; + stat->update_prev = __stat_update_prev; + stat->update_samples = __stat_update_samples; + stat->steady = __stat_steady; + + return (stat); +} + +/* + * stat_destroy - destructor for STAT objects + */ +void +stat_destroy(stat) + STAT *stat; +{ + + if (stat == NULL) + return; + + if (stat->sample != NULL) + __os_free(g.dbenv, stat->sample); + __os_free(g.dbenv, stat); +} + +/* + * __stat_current_val - implementation of STAT->current_val. + * Returns the current value of the stat associated with the + * specified STAT object. + */ +u_int32_t +__stat_current_val(stat) + STAT *stat; +{ + + return *(u_int32_t *)(*stat->base_addr_ptr + stat->offset); +} + +/* + * __stat_delta - implementation of STAT->delta. Returns the change + * since the last measurement in the value of the stat associated with + * the specified STAT object. + */ +int +__stat_delta(stat) + STAT *stat; +{ + + return (stat->current_val(stat) - stat->prev_val); +} + +/* + * __stat_per_txn - implementation of STAT->per_txn. Returns the per-txn + * rate of change of the stat associated with the specified STAT object. + */ +double +__stat_per_txn(stat, txns) + STAT *stat; + int txns; +{ + + if (txns == 0) + txns = 1; + return ((double)stat->delta(stat)/txns); +} + +/* + * __stat_per_time - implementation of STAT->per_time. Returns the per-time + * rate of change of the stat associated with the specified STAT object. + */ +double +__stat_per_time(stat, time) + STAT *stat; + double time; +{ + + assert(time > 1e-6); + return ((double)stat->delta(stat)/time); +} + +/* + * __stat_update_prev - implementation of STAT->update_prev. Stores + * the current value of the stat associated with specified STAT object. + */ +void +__stat_update_prev(stat) + STAT *stat; +{ + + stat->prev_val = stat->current_val(stat); +} + +/* + * __stat_update_samples - implementation of STAT->update_samples. + * Updates the queue of per-txn or per-time samples of the stat + * associated with specified STAT object. denom is used to compute a + * new per-time or per-txn sample, and this sample is stored in the + * queue. If the queue is full, the new sample replaces the oldest + * sample in the queue. In addition, the sums of the samples and their + * squares are also updated. + */ +void +__stat_update_samples(stat, denom) + STAT *stat; + double denom; +{ + int head; + + assert(stat->qhead == -1 || denom > 1e-6); + head = stat->qhead; + + if (head != -1) { + stat->sum_samples -= stat->sample[head]; + stat->sum_squares -= + stat->sample[head] * stat->sample[head]; + + stat->sample[head] = + (double)(stat->current_val(stat) - stat->prev_sample_val); + stat->sample[head] /= denom; + + stat->sum_samples += stat->sample[head]; + stat->sum_squares += + stat->sample[head] * stat->sample[head]; + } + + stat->prev_sample_val = stat->current_val(stat); + stat->qhead = (stat->qhead + 1) % config.steady_state_num_samples; +} + +/* + * __stat_update_prev - implementation of STAT->steady. Determines if + * the stat associated with specified STAT object is steady by + * computing the mean and variance of the stored samples of the stat + * and checking if the variance is below the user-specified + * thresholds. Assumes that a full set of samples has been gathered + * for the stat. + */ +int +__stat_steady(stat) + STAT *stat; +{ + double mean, term, var; + int i, ret; + + mean = stat->sum_samples / config.steady_state_num_samples; + term = (stat->sum_samples * stat->sum_samples) / + config.steady_state_num_samples; + if (stat->sum_squares > term) + var = stat->sum_squares - term; + else + var = term - stat->sum_squares; + var /= (config.steady_state_num_samples - 1); + + ret = (mean < 1e-6 || + var < config.steady_state_variance || + var < config.steady_state_variance_pct * mean * mean); + + fprintf(g.outfp, "steady-state check: "); + for (i = 0; i < config.steady_state_num_samples; i++) + fprintf(g.outfp, "%.3f\t", stat->sample[i]); + fprintf(g.outfp, "[%.3f\t%.3f\t%.3f\t%d]\n", + mean, var, var/mean/mean, ret); + + return (ret); +} + +/* + * init_stats - initializes the specified array of STAT objects. + * Assumes that the parameter points to an array that is large enough + * to accommodate all of the STAT objects. + */ +int +init_stats(st) + STAT **st; +{ + int i; + + g.mpstat = NULL; + g.lkstat = NULL; + g.lgstat = NULL; + g.txstat = NULL; + g.rusage = NULL; + for (i = 0; i < NUM_INDIV_STATS; i++) + st[i] = NULL; + + st[0] = stat_create(&g.txns_ptr, 0); + + st[1] = stat_create(&g.mpstat, SSZ(DB_MPOOL_STAT, st_ro_evict)); + st[2] = stat_create(&g.mpstat, SSZ(DB_MPOOL_STAT, st_rw_evict)); + st[3] = stat_create(&g.mpstat, SSZ(DB_MPOOL_STAT, st_cache_hit)); + st[4] = stat_create(&g.mpstat, SSZ(DB_MPOOL_STAT, st_cache_miss)); + st[5] = stat_create(&g.mpstat, SSZ(DB_MPOOL_STAT, st_region_wait)); + st[6] = stat_create(&g.mpstat, SSZ(DB_MPOOL_STAT, st_page_out)); + + st[7] = stat_create(&g.lkstat, SSZ(DB_LOCK_STAT, st_lock_wait)); + st[8] = stat_create(&g.lkstat, SSZ(DB_LOCK_STAT, st_ndeadlocks)); + st[9] = stat_create(&g.lkstat, SSZ(DB_LOCK_STAT, st_region_wait)); + + st[10] = stat_create(&g.lgstat, SSZ(DB_LOG_STAT, st_wcount)); + st[11] = stat_create(&g.lgstat, SSZ(DB_LOG_STAT, st_region_wait)); + + if (__os_malloc(g.dbenv, sizeof(RUSAGE), &g.rusage) != 0) + return (1); +#if defined(HAVE_GETRUSAGE) + st[12] = stat_create(&g.rusage, SSZ(RUSAGE, ru_majflt)); +#endif + for (i = 0; i < NUM_INDIV_STATS; i++) { + if (st[i] == NULL) + return (1); + } + + return (0); +} + +/* + * get_stats - gets the current values of the monitored stats. time_ptr + * is a pointer to the variable in which the current time should be stored. + */ +int +get_stats(time_ptr) + double *time_ptr; +{ + db_timespec v; + int i, ret; + + /* time */ + __os_gettime(g.dbenv, &v); + *time_ptr = MKTIME(v); + + /* transactions */ + *(g.txns_ptr) = 0; + for (i = 0; i < config.nthreads; i++) + *(g.txns_ptr) += config.thread[i]->txn_count; + + /* region stats */ + if ((ret = g.dbenv->memp_stat(g.dbenv, &g.mpstat, NULL, 0)) != 0) + g.dbenv->err(g.dbenv, ret, "memp_stat"); + if (ret == 0 && + (ret = g.dbenv->lock_stat(g.dbenv, &g.lkstat, 0)) != 0) + g.dbenv->err(g.dbenv, ret, "lock_stat"); + if (ret == 0 && + (ret = g.dbenv->log_stat(g.dbenv, &g.lgstat, 0)) != 0) + g.dbenv->err(g.dbenv, ret, "log_stat"); + if (ret == 0 && + (ret = g.dbenv->txn_stat(g.dbenv, &g.txstat, 0)) != 0) + g.dbenv->err(g.dbenv, ret, "txn_stat"); + + /* rusage */ +#if defined(HAVE_GETRUSAGE) + if (ret == 0 && (ret = getrusage(RUSAGE_SELF, g.rusage)) != 0) + g.dbenv->err(g.dbenv, ret, "getrusage"); +#endif + + return (ret); +} + +/* + * output_stats_line - prints a line that summarizes the current + * per-time or per-txn averages of the stats being monitored. + */ +void +output_stats_line(st, time, prev_time, start_time) + STAT **st; + double time, prev_time, start_time; +{ + double miss_rate, time_delta; + int i, h, m, txns_delta; + + time_delta = time - prev_time; + txns_delta = st[TXNS]->delta(st[TXNS]); + + /* elapsed time, tput, and miss rate */ + m = st[MISS]->delta(st[MISS]); + h = st[HIT]->delta(st[HIT]); + miss_rate = (double)m / (double)(m + h) * 100; + printf("%.3f\t%.3f\t%.3f\t", time - start_time, + st[TXNS]->per_time(st[TXNS], time_delta), + miss_rate); + + /* per-txn stats */ + for (i = 1; i < NUM_INDIV_STATS; i++) + printf("%.3f\t", st[i]->per_txn(st[i], txns_delta)); + printf("\n"); +} + +/* + * steady_state - returns a boolean value that indicates whether a + * steady state has been reached. The steady-state test is based on + * the values of the stats being monitored. + */ +int +steady_state(st) + STAT **st; +{ + int fault_steady; + +#if defined(HAVE_GETRUSAGE) + fault_steady = st[FAULT]->steady(st[FAULT]); +#else + fault_steady = 1; +#endif + + return (st[TXNS]->steady(st[TXNS]) && + st[MISS]->steady(st[MISS]) && + fault_steady && + st[D_EVICT]->steady(st[D_EVICT])); +} + +/* + * output_stats - outputs a summary of the changes in the relevant + * statistics during the steady-state window. The parameter specifies + * the number of transactions completed during the steady-state + * window; this value is used to compute per-txn averages. + */ +void +output_stats(txns) + int txns; +{ + DB_MPOOL_STAT *mp; + DB_LOCK_STAT *lk; + DB_LOG_STAT *lg; + DB_TXN_STAT *tx; +#if defined(HAVE_GETRUSAGE) + RUSAGE *rusage; + double stime, utime; +#endif + u_long bytes; + + fprintf(g.outfp, " txns:\t\t%10lu\n\n", (u_long)txns); + + /* Mempool stats */ + mp = g.mpstat; + fprintf(g.outfp, " cache hits:\t\t%10lu (%.3f per txn, %.0f%%)\n", + (u_long)mp->st_cache_hit, (double)mp->st_cache_hit/txns, + ((double)mp->st_cache_hit / + (mp->st_cache_hit + mp->st_cache_miss)) * 100); + fprintf(g.outfp, " cache misses:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_cache_miss, (double)mp->st_cache_miss/txns); + fprintf(g.outfp, " cache mapped pages:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_map, (double)mp->st_map/txns); + fprintf(g.outfp, " cache page creates:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_page_create, (double)mp->st_page_create/txns); + fprintf(g.outfp, " cache page reads:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_page_in, (double)mp->st_page_in/txns); + fprintf(g.outfp, " cache page writes:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_page_out, (double)mp->st_page_out/txns); + fprintf(g.outfp, " clean evicts:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_ro_evict, (double)mp->st_ro_evict/txns); + fprintf(g.outfp, " dirty evicts:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_rw_evict, (double)mp->st_rw_evict/txns); + fprintf(g.outfp, " trickle writes:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_page_trickle, (double)mp->st_page_trickle/txns); + fprintf(g.outfp, " clean buffers:\t%10lu\n", + (u_long)mp->st_page_clean); + fprintf(g.outfp, " dirty buffers:\t%10lu\n", + (u_long)mp->st_page_dirty); + fprintf(g.outfp, " hash buckets used:\t%10lu\n", + (u_long)mp->st_hash_buckets); + fprintf(g.outfp, " hash searches:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_hash_searches, (double)mp->st_hash_searches/txns); + fprintf(g.outfp, " longest chain:\t%10lu\n", + (u_long)mp->st_hash_longest); + fprintf(g.outfp, " buckets examined:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_hash_examined, (double)mp->st_hash_examined/txns); + fprintf(g.outfp, " cache reg nowaits:\t%10lu (%.3f per txn)\n", + (u_long)mp->st_region_nowait, (double)mp->st_region_nowait/txns); + fprintf(g.outfp, " cache reg waits:\t%10lu (%.3f per txn)\n\n", + (u_long)mp->st_region_wait, (double)mp->st_region_wait/txns); + + /* Lock stats */ + lk = g.lkstat; + fprintf(g.outfp, " max locks:\t\t%10lu\n", + (u_long)lk->st_maxnlocks); + fprintf(g.outfp, " max lockers:\t%10lu\n", + (u_long)lk->st_maxnlockers); + fprintf(g.outfp, " max lock objects:\t%10lu\n", + (u_long)lk->st_maxnobjects); + fprintf(g.outfp, " lock requests:\t%10lu (%.3f per txn)\n", + (u_long)lk->st_nrequests, (double)lk->st_nrequests/txns); + fprintf(g.outfp, " lock conflicts (wait):\t%10lu (%.3f per txn)\n", + (u_long)lk->st_lock_wait, (double)lk->st_lock_wait/txns); + fprintf(g.outfp, " deadlocks:\t\t%10lu (%.3f per txn)\n", + (u_long)lk->st_ndeadlocks, (double)lk->st_ndeadlocks/txns); + fprintf(g.outfp, " lock reg nowaits:\t%10lu (%.3f per txn)\n", + (u_long)lk->st_region_nowait, (double)lk->st_region_nowait/txns); + fprintf(g.outfp, " lock reg waits:\t%10lu (%.3f per txn)\n\n", + (u_long)lk->st_region_wait, (double)lk->st_region_wait/txns); + + /* Log stats */ + lg = g.lgstat; + bytes = 1024*1024*lg->st_w_mbytes + lg->st_w_bytes; + fprintf(g.outfp, " log bytes written:\t%10lu (%.3f per txn)\n", + bytes, (double)bytes/txns); + fprintf(g.outfp, " log writes:\t\t%10lu (%.3f per txn)\n", + (u_long)lg->st_wcount, (double)lg->st_wcount/txns); + fprintf(g.outfp, " log buf overflows:\t%10lu (%.3f per txn)\n", + (u_long)lg->st_wcount_fill, (double)lg->st_wcount_fill/txns); + fprintf(g.outfp, " log buf flushes:\t%10lu (%.3f per txn)\n", + (u_long)lg->st_scount, (double)lg->st_scount/txns); + fprintf(g.outfp, " max commits/flush:\t%10lu (%.3f per txn)\n", + (u_long)lg->st_maxcommitperflush, + (double)lg->st_maxcommitperflush/txns); + fprintf(g.outfp, " min commits/flush:\t%10lu (%.3f per txn)\n", + (u_long)lg->st_mincommitperflush, + (double)lg->st_mincommitperflush/txns); + fprintf(g.outfp, " log reg nowaits:\t%10lu (%.3f per txn)\n", + (u_long)lg->st_region_nowait, (double)lg->st_region_nowait/txns); + fprintf(g.outfp, " log reg waits:\t%10lu (%.3f per txn)\n\n", + (u_long)lg->st_region_wait, (double)lg->st_region_wait/txns); + + /* Transaction stats */ + tx = g.txstat; + fprintf(g.outfp, " aborted txns:\t%10lu (%.3f per txn)\n", + (u_long)tx->st_naborts, (double)tx->st_naborts/txns); + fprintf(g.outfp, " txn reg nowaits:\t%10lu (%.3f per txn)\n", + (u_long)tx->st_region_nowait, (double)tx->st_region_nowait/txns); + fprintf(g.outfp, " txn reg waits:\t%10lu (%.3f per txn)\n\n", + (u_long)tx->st_region_wait, (double)tx->st_region_wait/txns); + + /* Rusage */ +#if defined(HAVE_GETRUSAGE) + rusage = g.rusage; + fprintf(g.outfp, " maj page faults:\t%10lu (%.3f per txn)\n", + rusage->ru_majflt, (double)rusage->ru_majflt/txns); + utime = rusage->ru_utime.tv_sec + rusage->ru_utime.tv_usec/1e6; + stime = rusage->ru_stime.tv_sec + rusage->ru_stime.tv_usec/1e6; + fprintf(g.outfp, " time active:\t%10.3f (%.3f u, %.3f s)\n", + utime + stime, utime, stime); +#endif +} + +/* + * store_stat_deltas - decrements the current stat values by the + * specified previous values, so that the global stat structures + * contain the changes in the stats since the last measurements were + * made. + */ +void +store_stat_deltas(mp_start, lk_start, lg_start, tx_start, rusg_start) + DB_MPOOL_STAT *mp_start; + DB_LOCK_STAT *lk_start; + DB_LOG_STAT *lg_start; + DB_TXN_STAT *tx_start; + RUSAGE *rusg_start; +{ + + g.mpstat->st_cache_hit -= mp_start->st_cache_hit; + g.mpstat->st_cache_miss -= mp_start->st_cache_miss; + g.mpstat->st_map -= mp_start->st_map; + g.mpstat->st_page_create -= mp_start->st_page_create; + g.mpstat->st_page_in -= mp_start->st_page_in; + g.mpstat->st_page_out -= mp_start->st_page_out; + g.mpstat->st_ro_evict -= mp_start->st_ro_evict; + g.mpstat->st_rw_evict -= mp_start->st_rw_evict; + g.mpstat->st_page_trickle -= mp_start->st_page_trickle; + g.mpstat->st_hash_searches -= mp_start->st_hash_searches; + g.mpstat->st_hash_examined -= mp_start->st_hash_examined; + g.mpstat->st_region_nowait -= mp_start->st_region_nowait; + g.mpstat->st_region_wait -= mp_start->st_region_wait; + + g.lkstat->st_nrequests -= lk_start->st_nrequests; + g.lkstat->st_lock_wait -= lk_start->st_lock_wait; + g.lkstat->st_ndeadlocks -= lk_start->st_ndeadlocks; + g.lkstat->st_region_nowait -= lk_start->st_region_nowait; + g.lkstat->st_region_wait -= lk_start->st_region_wait; + + g.lgstat->st_w_mbytes -= lg_start->st_w_mbytes; + if (g.lgstat->st_w_bytes < lg_start->st_w_bytes) { + g.lgstat->st_w_mbytes -= 1; + g.lgstat->st_w_bytes += + (1024*1024 - lg_start->st_w_bytes); + } else + g.lgstat->st_w_bytes -= lg_start->st_w_bytes; + g.lgstat->st_wcount -= lg_start->st_wcount; + g.lgstat->st_wcount_fill -= lg_start->st_wcount_fill; + g.lgstat->st_scount -= lg_start->st_scount; + g.lgstat->st_region_nowait -= lg_start->st_region_nowait; + g.lgstat->st_region_wait -= lg_start->st_region_wait; + + g.txstat->st_naborts -= tx_start->st_naborts; + g.txstat->st_region_nowait -= tx_start->st_region_nowait; + g.txstat->st_region_wait -= tx_start->st_region_wait; + +#if defined(HAVE_GETRUSAGE) + g.rusage->ru_utime.tv_sec -= rusg_start->ru_utime.tv_sec; + if (g.rusage->ru_utime.tv_usec < rusg_start->ru_utime.tv_usec) { + g.rusage->ru_utime.tv_sec -= 1; + g.rusage->ru_utime.tv_usec += + (1e6 - rusg_start->ru_utime.tv_usec); + } else + g.rusage->ru_utime.tv_usec -= rusg_start->ru_utime.tv_usec; + g.rusage->ru_stime.tv_sec -= rusg_start->ru_stime.tv_sec; + if (g.rusage->ru_stime.tv_usec < rusg_start->ru_stime.tv_usec) { + g.rusage->ru_stime.tv_sec -= 1; + g.rusage->ru_stime.tv_usec += + (1e6 - rusg_start->ru_stime.tv_usec); + } else + g.rusage->ru_stime.tv_usec -= rusg_start->ru_stime.tv_usec; + g.rusage->ru_majflt -= rusg_start->ru_majflt; +#endif +} + +/* + * stat_init - creates the stat thread + */ +int +stat_init() +{ + + return ((stat_threads = spawn_kids( + "stat thread", 1, stat_thread)) == NULL ? 1 : 0); +} + +/* + * stat_shutdown - waits for the stat thread to complete + */ +int +stat_shutdown() +{ + + return (wait_kids("stat thread", stat_threads)); +} + +/* + * stat_thread - the "main" function of the stat thread + */ +void * +stat_thread(arg) + void *arg; +{ + STAT *st[NUM_INDIV_STATS]; + DB_MPOOL_STAT **saved_mp; + DB_LOCK_STAT **saved_lk; + DB_LOG_STAT **saved_lg; + DB_TXN_STAT **saved_tx; + RUSAGE *saved_rusage; + double prev_time, *saved_time; + double start_time, time, time_delta; + int i, iter, iter_betw_samples, max_time_exceeded, n; + int qhead, *saved_txns, sec_betw_samples, txns, txns_delta; + + COMPQUIET(arg, 0); + + /* Initialize the stat structs. */ + g.txns_ptr = &txns; + if (init_stats(st) != 0) + goto exit; + + /* Misc. other initializations */ + qhead = 0; + prev_time = start_time = 0.0; + saved_mp = NULL; + saved_lk = NULL; + saved_lg = NULL; + saved_tx = NULL; + saved_rusage = NULL; + saved_time = NULL; + saved_txns = NULL; + n = config.steady_state_num_samples; + if (__os_malloc(g.dbenv, n * sizeof(DB_MPOOL_STAT *), &saved_mp) != 0) + goto exit; + if (__os_malloc(g.dbenv, n * sizeof(DB_LOCK_STAT *), &saved_lk) != 0) + goto exit; + if (__os_malloc(g.dbenv, n * sizeof(DB_LOG_STAT *), &saved_lg) != 0) + goto exit; + if (__os_malloc(g.dbenv, n * sizeof(DB_TXN_STAT *), &saved_tx) != 0) + goto exit; + if (__os_malloc(g.dbenv, n * sizeof(RUSAGE), &saved_rusage) != 0) + goto exit; + if (__os_malloc(g.dbenv, n * sizeof(double), &saved_time) != 0) + goto exit; + if (__os_malloc(g.dbenv, n * sizeof(int), &saved_txns) != 0) + goto exit; + for (i = 0; i < config.steady_state_num_samples; i++) { + saved_mp[i] = NULL; + saved_lk[i] = NULL; + saved_lg[i] = NULL; + saved_tx[i] = NULL; + memset(&(saved_rusage[i]), 0, sizeof(saved_rusage[i])); + saved_time[i] = 0.0; + saved_txns[i] = 0; + } + + /* Print initial messages. */ + if (g.stats_trace) + printf("time\ttput\tmiss_r\tc_evict\td_evict\thit\tmiss_t" + "\tmp_wait\tmp_out\tlk_conf\taborts\tlk_wait" + "\tlg_writ\tlg_wait\tfaults\n"); + g.dbenv->errx(g.dbenv, "stat thread: %lu",(u_long)os_thread_id()); + + /* + * Determine how frequently we should update our stored + * stat samples and test to see if we've reached a steady-state. + */ + sec_betw_samples = + config.steady_state_time / config.steady_state_num_samples; + if (g.stats_trace) { + iter_betw_samples = + sec_betw_samples / config.stat_check_interval; + if (iter_betw_samples < 1) + iter_betw_samples = 1; + } else { + config.stat_check_interval = sec_betw_samples; + iter_betw_samples = 1; + } + + /* Monitor the stats. */ + iter = 0; + for (;;) { + if (g.shutdown == 1) + goto exit; + + if (!g.threads_started) { + /* Wait until access-method threads have started. */ + __os_sleep(NULL, 1, 0); + continue; + } else if (iter > 0) + __os_sleep(NULL, config.stat_check_interval, 0); + + /* Get the current time and statistics. */ + if (get_stats(&time) != 0) + goto exit; + if (start_time == 0.0) + start_time = time; + if (g.stats_trace && iter > 0) + output_stats_line(st, time, prev_time, start_time); + + /* Store the current values. */ + prev_time = time; + for (i = 0; i < NUM_INDIV_STATS; i++) + st[i]->update_prev(st[i]); + + /* + * Don't do the rest if it isn't time to check for + * a steady state. + */ + if (g.iterations > 0 || iter++ % iter_betw_samples != 0) + continue; + + /* + * Compute and store the current per-txn and per-time + * stat samples. + */ + time_delta = time - saved_time[qhead]; + txns_delta = txns - saved_txns[qhead]; + st[TXNS]->update_samples(st[TXNS], time_delta); + for (i = 1; i < NUM_INDIV_STATS; i++) + st[i]->update_samples(st[i], (double)txns_delta); + + /* Check if it's time to stop. */ + g.steady_state_reached = + (iter > config.steady_state_num_samples*iter_betw_samples ? + steady_state(st) : 0); + max_time_exceeded = ((time - start_time) > + (double)config.steady_state_max_time); + if (g.steady_state_reached || max_time_exceeded) { + /* Save the steady-state stats. */ + qhead = (qhead + 1) % config.steady_state_num_samples; + g.steady_state_start_time = saved_time[qhead]; + g.steady_state_end_time = time; + g.steady_state_start_txns = saved_txns[qhead]; + g.steady_state_end_txns = txns; + store_stat_deltas(saved_mp[qhead], + saved_lk[qhead], saved_lg[qhead], + saved_tx[qhead], &saved_rusage[qhead]); + goto exit; + } + + /* Update the queues of saved stats. */ + if (iter > 1) + qhead = (qhead + 1) % config.steady_state_num_samples; + saved_time[qhead] = time; + saved_txns[qhead] = txns; + if (saved_mp[qhead] != NULL) + __os_ufree(g.dbenv, saved_mp[qhead]); + saved_mp[qhead] = g.mpstat; + if (saved_lk[qhead] != NULL) + __os_ufree(g.dbenv, saved_lk[qhead]); + saved_lk[qhead] = g.lkstat; + if (saved_lg[qhead] != NULL) + __os_ufree(g.dbenv, saved_lg[qhead]); + saved_lg[qhead] = g.lgstat; + if (saved_tx[qhead] != NULL) + __os_ufree(g.dbenv, saved_tx[qhead]); + saved_tx[qhead] = g.txstat; + saved_rusage[qhead] = *(g.rusage); + } + /* NOTREACHED */ + +exit: g.shutdown = 1; + + for (i = 0; i < config.steady_state_num_samples; i++) { + if (saved_mp != NULL && saved_mp[i] != NULL) + __os_ufree(g.dbenv, saved_mp[i]); + if (saved_lk != NULL && saved_lk[i] != NULL) + __os_ufree(g.dbenv, saved_lk[i]); + if (saved_lg != NULL && saved_lg[i] != NULL) + __os_ufree(g.dbenv, saved_lg[i]); + if (saved_tx != NULL && saved_tx[i] != NULL) + __os_ufree(g.dbenv, saved_tx[i]); + } + if (saved_mp != NULL) + __os_free(g.dbenv, saved_mp); + if (saved_lk != NULL) + __os_free(g.dbenv, saved_lk); + if (saved_lg != NULL) + __os_free(g.dbenv, saved_lg); + if (saved_tx != NULL) + __os_free(g.dbenv, saved_tx); + if (saved_rusage != NULL) + __os_free(g.dbenv, saved_rusage); + if (saved_time != NULL) + __os_free(g.dbenv, saved_time); + if (saved_txns != NULL) + __os_free(g.dbenv, saved_txns); + for (i = 0; i < NUM_INDIV_STATS; i++) + stat_destroy(st[i]); + + return (NULL); +} --- db-4.6.21/test_perf/perf_sync.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_sync.c 2007-11-05 15:51:13.723917000 +1100 @@ -0,0 +1,58 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999,2007 Oracle. All rights reserved. + * + * $Id: perf_sync.c,v 12.7 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +static os_thread_t *sync_threads; /* Sync threads. */ + +int +sync_init() +{ + return ((sync_threads = + spawn_kids("sync threads", 1, sync_thread)) == NULL ? 1 : 0); +} + +int +sync_shutdown() +{ + return (wait_kids("sync threads", sync_threads)); +} + +void * +sync_thread(arg) + void *arg; +{ + int i, ret; + + arg = 0; /* UNUSED. */ + + g.dbenv->errx(g.dbenv, "Sync thread: %lu", (u_long)os_thread_id()); + + for (;;) { + if (g.shutdown) + return (NULL); + + for (i = 0; i < config.file_count; i++) { + if ((ret = config.file[i]->dbp[0]->sync( + config.file[i]->dbp[0], 0)) != 0) { + g.dbenv->err(g.dbenv, ret, + "sync thread: %s", db_strerror(ret)); + g.shutdown = 1; + return (NULL); + } + } + + if (g.verbose) + g.dbenv->errx(g.dbenv, + "sync thread: sync completed"); + + /* XXX: Don't convoy. */ + snooze(g.dbenv, 1000 * random_int(&g.rand_state, 1, 10)); + } + /* NOTREACHED */ +} --- db-4.6.21/test_perf/perf_thread.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_thread.c 2007-11-07 22:35:08.853860000 +1100 @@ -0,0 +1,229 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 2000,2007 Oracle. All rights reserved. + * + * $Id: perf_thread.c,v 12.9 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +/* + * thr_type_create - constructor for THR_TYPE objects + */ +THR_TYPE * +thr_type_create() +{ + THR_TYPE *thr_type; + + if (__os_malloc(g.dbenv, sizeof(THR_TYPE), &thr_type) != 0) + return (NULL); + + thr_type->txn_types = NULL; + thr_type->txn_size_dists = NULL; + thr_type->special = 0; + + return (thr_type); +} + +/* + * thr_type_destroy - destructor for THR_TYPE objects + */ +void +thr_type_destroy(thr_type) + THR_TYPE *thr_type; +{ + + if (thr_type == NULL) + return; + + /* + * We assume that the actual TXN_TYPE objects are + * destroyed elsewhere, so we tell set_destroy() *not* + * to destroy the items in the set. + */ + set_destroy(thr_type->txn_types, 0); + set_destroy(thr_type->txn_size_dists, 1); + __os_free(g.dbenv, thr_type); +} + +/* + * thr_info_create - constructor for THR_INFO objects + */ +THR_INFO * +thr_info_create(id, type) + int id; + THR_TYPE *type; +{ + THR_INFO *thr_info; + int i, malloc_failed; + + if (__os_malloc(g.dbenv, sizeof(THR_INFO), &thr_info) != 0) + return (NULL); + + thr_info->id = id; + thr_info->type = type; + thr_info->keystr = NULL; + thr_info->prev_key = NULL; + thr_info->tmp_key = NULL; + thr_info->datastr = NULL; + thr_info->tmp_data = NULL; + thr_info->tmp_data2 = NULL; + thr_info->keygrp = NULL; + thr_info->txn_count = 0; + thr_info->op_count = NULL; + thr_info->rec_count = NULL; + thr_info->start_time = 0.0; + thr_info->end_time = 0.0; + + /* + * Perform the remaining mallocs, noting if any of them fail. + * If failure does occur, we use thr_info_destroy() to free + * up the space allocated by the prior mallocs. + */ + malloc_failed = + (__os_malloc(g.dbenv, MAX_KEY_SIZE, &thr_info->keystr) != 0 || + __os_malloc(g.dbenv, MAX_KEY_SIZE, &thr_info->prev_key) != 0 || + __os_malloc(g.dbenv, MAX_KEY_SIZE, &thr_info->tmp_key) != 0 || + __os_malloc(g.dbenv, MAX_DATA_SIZE, &thr_info->datastr) != 0 || + __os_malloc(g.dbenv, MAX_DATA_SIZE, &thr_info->tmp_data) != 0 || + __os_malloc(g.dbenv, MAX_DATA_SIZE, &thr_info->tmp_data2) != 0 || + __os_malloc(g.dbenv, + NUM_OP_TYPES * sizeof(int), &thr_info->op_count) != 0 || + __os_malloc(g.dbenv, + NUM_OP_TYPES * sizeof(int), &thr_info->rec_count) != 0); + if (malloc_failed) { + thr_info_destroy(thr_info); + return (NULL); + } + + for (i = 0; i < NUM_OP_TYPES; i++) { + thr_info->op_count[i] = 0; + thr_info->rec_count[i] = 0; + } + + return (thr_info); +} + +/* + * thr_info_destroy - destructor for THR_INFO objects + */ +void +thr_info_destroy(thr_info) + THR_INFO *thr_info; +{ + + if (thr_info == NULL) + return; + + if (thr_info->keystr != NULL) + __os_free(g.dbenv, thr_info->keystr); + if (thr_info->prev_key != NULL) + __os_free(g.dbenv, thr_info->prev_key); + if (thr_info->tmp_key != NULL) + __os_free(g.dbenv, thr_info->tmp_key); + if (thr_info->datastr != NULL) + __os_free(g.dbenv, thr_info->datastr); + if (thr_info->tmp_data != NULL) + __os_free(g.dbenv, thr_info->tmp_data); + if (thr_info->tmp_data2 != NULL) + __os_free(g.dbenv, thr_info->tmp_data2); + if (thr_info->op_count != NULL) + __os_free(g.dbenv, thr_info->op_count); + if (thr_info->rec_count != NULL) + __os_free(g.dbenv, thr_info->rec_count); + __os_free(g.dbenv, thr_info); +} + +/* + * thread_run - function executed by each of the access-method threads + */ +void * +thread_run(arg) + void *arg; +{ + DBT key, data; + SET *txn_types, *txn_size_dists; + db_timespec v; + int i, ret, special, txn_size; + long thr_id; + + thr_id = (long)arg; +#ifdef HAVE_VXWORKS + ioTaskStdSet(0, 1, g.stdfd); + ioTaskStdSet(0, 2, g.stdfd); +#endif + g.dbenv->errx(g.dbenv, "Access method thread: %lu: id %ld", + (u_long)os_thread_id(), thr_id); + + /* + * XXX + * Solaris gets mad if each thread doesn't get a chance to start, + * i.e., the threads proceed sequentially without this call. + */ + __os_sleep(NULL, 1, 0); + + /* + * Get the transaction types and transaction-group-size + * distributions for this thread. + */ + thr_id = (long)arg; + assert(thr_id >= 0 && thr_id < config.nthreads); + db_init_random(THR_RS, g.seed + thr_id); + txn_types = config.thread[thr_id]->type->txn_types; + txn_size_dists = config.thread[thr_id]->type->txn_size_dists; + special = config.thread[thr_id]->type->special; + + /* Initialize the DBTs. */ + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + init_dbts(NULL, &key, &data, config.thread[thr_id]->keystr, + config.thread[thr_id]->datastr, NULL, MAX_KEY_SIZE, MAX_DATA_SIZE); + + /* Record our start time. */ + __os_gettime(g.dbenv, &v); + config.thread[thr_id]->start_time = MKTIME(v); + g.threads_started = 1; + + /* Perform the sequence of transactions. */ + for (i = config.thread[thr_id]->txn_count; + config.killtest_iter > 0 || g.iterations == 0 || i < g.iterations; + i = config.thread[thr_id]->txn_count) { + /* + * We only print this if we're not verbose. + * Otherwise, we'll detail the operations in + * txn_execute(). + */ + if (!g.verbose && i % 100 == 0) + g.dbenv->errx(g.dbenv, "[%ld: %d]", thr_id, i); +#ifdef HAVE_VXWORKS + if (i % 100 == 0) + printf("[%ld: %d]\n", thr_id, i); +#endif + + /* + * Select the number of transaction types to be + * grouped together into the next transaction. + */ + if (txn_size_dists == NULL) + txn_size = 1; + else + txn_size = txn_size_dists->select_from_dists( + txn_size_dists, THR_RS, 1); + + /* Execute a transaction. */ + ret = txn_execute(txn_types, + txn_size, &key, &data, thr_id, special); + if (ret != 0) + break; + + config.thread[thr_id]->txn_count++; + } + + /* Record our end time. */ + __os_gettime(g.dbenv, &v); + config.thread[thr_id]->end_time = MKTIME(v); + + g.dbenv->errx(g.dbenv, "[%ld]: exiting cleanly", thr_id); + return (NULL); +} --- db-4.6.21/test_perf/perf_trickle.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/test_perf/perf_trickle.c 2007-11-05 15:51:13.647919000 +1100 @@ -0,0 +1,66 @@ +/*- + * See the file LICENSE for redistribution information. + * + * Copyright (c) 1999,2007 Oracle. All rights reserved. + * + * $Id: perf_trickle.c,v 12.6 2007/05/17 15:15:58 bostic Exp $ + */ + +#include "perf_extern.h" + +static long percent; /* Trickle percent. */ +static os_thread_t *trickle_threads; /* Trickle threads. */ + +int +trickle_init(pct) + long pct; +{ + if ((percent = pct) == 0) + return (0); + + return ((trickle_threads = + spawn_kids("trickle threads", 1, trickle_thread)) == NULL ? 1 : 0); +} + +int +trickle_shutdown() +{ + return (wait_kids("trickle_threads", trickle_threads)); +} + +void * +trickle_thread(arg) + void *arg; +{ + int nwrote, ret; + + arg = 0; /* UNUSED. */ +#ifdef HAVE_VXWORKS + ioTaskStdSet(0, 1, g.stdfd); + ioTaskStdSet(0, 2, g.stdfd); +#endif + + g.dbenv->errx(g.dbenv, "Trickle output thread: %lu", + (u_long)os_thread_id()); + + for (;;) { + if (g.shutdown) + return (NULL); + + if ((ret = g.dbenv->memp_trickle(g.dbenv, + (int)percent, &nwrote)) != 0) { + g.dbenv->err(g.dbenv, ret, + "trickle thread: %s", db_strerror(ret)); + g.shutdown = 1; + return (NULL); + } + + if (g.verbose) + g.dbenv->errx(g.dbenv, + "trickle thread: wrote %d buffers", nwrote); + + /* XXX: Don't convoy. */ + snooze(g.dbenv, 1000 * random_int(&g.rand_state, 1, 10)); + } + /* NOTREACHED */ +} --- db-4.6.21/test_perf/perf_txn.c 1970-01-01 10:00:00.000000000 +1000 +++ db-4.6.21-safe-si2/t