/*	pbp.hpp

	This is the reference C++ library implementation of the
	new Parallel Bit Pattern computation model.

	Copyright (C) 2022 by Henry Dietz, All rights reserved.

	This library is free software available under CC BY 4.0,
	https://creativecommons.org/licenses/by/4.0/

	This code is distributed in the hope that it will be
	useful, but WITHOUT ANY WARRANTY; without even the
	implied warranty of MERCHANTABILITY or FITNESS FOR A
	PARTICULAR PURPOSE.

	WARNING: This code has many KNOWN bugs! In this version,
	pfloat support also has been removed and various tweaks
	have been made to support use in the EE599/699 Quantum
	Computing course taught at the University of Kentucky by
	H. Dietz in the Spring 2026 semester. This code should
	be viewed as being of preliminary pre-release quality,
	and later versions may not be fully compatible.
*/

#ifndef	_PBPH__
#define	_PBPH__

#include <iostream>
using namespace std;

#define	_PBP_VERSION	260414

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <strings.h>
#include <string.h>
#include <stdint.h>
#include <assert.h>
#include <math.h>

// Regular Expression constants
#define	REREGS		(64*1024)
#ifndef	REWAYS
#define	REWAYS		(16)
#endif
#define	REBITS		(1<<REWAYS)
#define	REAOBS		(1<<(REWAYS-AOBWAYS))
#define	forREAOBS(V)	for (int V=0; V<REAOBS; ++V)

// Array of Bits constants
#ifndef	AOBREGS
#define	AOBREGS		(64*1024)
#endif
#ifndef	AOBWAYS
#define	AOBWAYS		(5)
#endif
#define	AOBBITS		(1<<AOBWAYS)
#ifndef	AOBPOW2
#define	AOBPOW2		(5) // power of 2 bits per word, 5 for 32-bit, 6 for 64-bit, AOBWAYS for SWAR
#endif
#define	AOBWORDS	(1<<(AOBWAYS-AOBPOW2))
#define	forAOBWORDS(V)	for (int V=0; V<AOBWORDS; ++V)

// Enable performance counting?
#ifndef	GATELOG
#define	GATELOG 0
#endif
#if	GATELOG>0
int	gates = 0;
#define	GATESWORD	(1<<GATELOG)
#define	GATES(N)	(gates += (N))	// count gates
#define	SHOWGATES()	fprintf(stderr, "%d gates for word-level code\n", gates);
#else
#define	GATESWORD	0
#define	GATES(N)	// nothing
#define	SHOWGATES()	// nothing
#endif

// Pattern Float normalization rule
// define NORMLSB as non-0 to normalize to 1 LSB rather than 1 MSB
#ifndef	NORMLSB
#define	NORMLSB		0		// default to usual normalization
#endif

// Pattern Bit constants
// deliberately skips REREGS, cause that can be answer for First(), Ones()
#define	pbit0		(0)
#define	pbit1		(1)
#define	pbitH(D)	((D)+2)		// entanglement dimension D
#define	pbitValid(P)	((P)<REREGS)	// a valid pbit value
#define	pbitNaN		(REREGS+1)

// Pattern Int constants
#define	PINTBITS	(REWAYS)	// restricted by entanglement support
#define	forprec(V)	for (int V=0; V<prec; ++V)

// Pattern Float constants
#define	EXPOF0		-128		// smallest non-zero value exponent

// Array of Bits structures
typedef	uint16_t AoBreg_t;
#if	(AOBPOW2 == 5)
typedef	uint32_t AoBword_t;
typedef	uint32_t AoBpart_t;
#define	AOBPARTS	1
#elif	(AOBPOW2 == 6)
typedef	uint64_t AoBword_t;
typedef	uint64_t AoBpart_t;
#define	AOBPARTS	1
#else
// use SWAR type; note that size is given in bytes
typedef	uint32_t AoBword_t __attribute__ ((vector_size (1<<(AOBPOW2-3))));
typedef	uint32_t AoBpart_t;
#define	AOBPARTS	((1<<AOBPOW2)/32)
#endif

// Regular Expression structures
#if	(REWAYS > 15)
typedef	uint32_t REreg_t;
#ifdef	BELAZY
#define	RELAZY	0x80000000
#endif
#else
typedef	uint16_t REreg_t;
#ifdef	BELAZY
#define	RELAZY	0x8000
#endif
#endif

#define	register	// C++ warns about registers, so null 'em

/*	Applicative Cache layer for PBP.
*/

typedef	struct {
	AoBreg_t op;		// opcode
	AoBreg_t a, b, c;	// c = a op b
}	AC_t;

#define	ACSIZE	(32 * 1024)	// size of applicative cache

class AC
{
	// Applicative Cache
	AC_t ac[ACSIZE];
	int spot;
	int hit, miss, rehash;

public:
	AC() {
		bzero(((void *) &ac[0]), sizeof(ac));
		hit = 0;
		miss = 0;
		rehash = 0;
	}

	int hash(AoBreg_t op, AoBreg_t a, AoBreg_t b) {
		// should really pick this by benchmarking...
		register int h = (op * (ACSIZE/256));
		h ^= a;
		h ^= b * (ACSIZE/AOBREGS);
		return(h & (ACSIZE-1));
	}

	int avail(AoBreg_t op, AoBreg_t a, AoBreg_t b) {
		register int h = hash(op, a, b);

		for (;;) {
			// empty entry
			if (!ac[h].op) {
				++miss;
				ac[h].op = op;
				ac[h].a = a;
				ac[h].b = b;
				spot = h;
				return(-1);
			}

			// found it
			if ((ac[h].op == op) &&
			    (ac[h].a == a) &&
			    (ac[h].b == b)) {
				++hit;
				return(ac[h].c);
			}

			++rehash;
			h = ((h + 13) & (ACSIZE-1));
		}
	}

	int assoc(AoBreg_t op, AoBreg_t a, AoBreg_t b) {
		// assoiciative operator, so normalize operands
		return((b < a) ? avail(op, a, b) : avail(op, b, a));
	}

	void Stats() {
		fprintf(stderr, "AC: %d hits, %d misses, %d rehashes\n", hit, miss, rehash);
	}

	AoBreg_t cache(AoBreg_t c) {
		return(ac[spot].c = c);
	}
};

/*	Array Of Bits layer for PBP.
*/

// initialization patterns for words
AoBword_t AoBinits[] = {
#if (AOBPOW2==5)
	0x00000000,
	0xffffffff,
	0xaaaaaaaa,
	0xcccccccc,
	0xf0f0f0f0,
	0xff00ff00,
	0xffff0000
#elif (AOBPOW2==6)
	0x0000000000000000LL,
	0xffffffffffffffffLL,
	0xaaaaaaaaaaaaaaaaLL,
	0xccccccccccccccccLL,
	0xf0f0f0f0f0f0f0f0LL,
	0xff00ff00ff00ff00LL,
	0xffff0000ffff0000LL,
	0xffffffff00000000LL
#elif (AOBPOW2==7)
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
	{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
	{ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa },
	{ 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc },
	{ 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0 },
	{ 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00 },
	{ 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000 },
	{ 0x00000000, 0xffffffff, 0x00000000, 0xffffffff }, // note word order
	{ 0x00000000, 0x00000000, 0xffffffff, 0xffffffff }
#elif (AOBPOW2==8)
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
	{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
	{ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa },
	{ 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc },
	{ 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0 },
	{ 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00 },
	{ 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000 },
	{ 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff }, // note word order
	{ 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }
#elif (AOBPOW2==9)
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
	{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
	  0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
	{ 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
	  0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa },
	{ 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc,
	  0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc, 0xcccccccc },
	{ 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0,
	  0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0, 0xf0f0f0f0 },
	{ 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00,
	  0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00, 0xff00ff00 },
	{ 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000,
	  0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000, 0xffff0000 },
	{ 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, // note word order
	  0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff, 0x00000000, 0xffffffff },
	{ 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff,
	  0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
	  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }
#else
#error("AOBPOW2>9 not yet supported")
#endif
};

// mask patterns for trailing zeros
AoBword_t AoBtrail[] = {
#if (AOBPOW2==5)
	0x0000ffff,
	0x000000ff,
	0x0000000f,
	0x00000003,
	0x00000001
#elif (AOBPOW2==6)
	0x00000000ffffffffLL,
	0x000000000000ffffLL,
	0x00000000000000ffLL,
	0x000000000000000fLL,
	0x0000000000000003LL,
	0x0000000000000001LL
#elif (AOBPOW2==7)
	{ 0x00000000, 0x00000000, 0xffffffff, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x0000ffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x000000ff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x0000000f },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000003 },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000001 }
#elif (AOBPOW2==8)
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000000f },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000003 },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001 }
#elif (AOBPOW2==9)
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ffff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000000f },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000003 },
	{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
	  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001 }
#else
#error("AOBPOW2>9 not yet supported")
#endif
};

class AoB
{
	AC		ac; // applicative cache for AoB values
	AoBword_t	reg[AOBREGS][AOBWORDS];
	AoBreg_t	index[AOBREGS]; // 0 means empty, >0 means reg[index-1]
	AoBreg_t	regs;
	int		hit, rehash;

public:
	// Helpers
	void copy(const AoBreg_t d, const AoBreg_t s) {
		memcpy(&(reg[d][0]), &(reg[s][0]), (AOBWORDS * sizeof(AoBword_t)));
	}

	int match(const AoBreg_t a) {
		forAOBWORDS(i) {
#if	(AOBPOW2 > 6)
			uint32_t *p = ((uint32_t *) &(reg[a][i]));
			uint32_t *q = ((uint32_t *) &(reg[regs][i]));
			for (int j=0; j<((1<<AOBPOW2)/32); ++j) {
				if (p[j] != q[j]) return(0);
			}
#else
			if (reg[a][i] != reg[regs][i]) return(0);
#endif
		}
		return(1);
	}

	void Stats() {
		ac.Stats();
		fprintf(stderr, "AoB: %d regs, %d hits, %d rehashes\n", regs, hit, rehash);
	}

	void Show(const AoBreg_t a) {
		forAOBWORDS(i) {
#if	(AOBPOW2 == 6)
			for (int b=0; b<64; ++b) {
				putc(((reg[a][i] & (1LL << b)) ? '1' : '0'), stderr);
			}
#elif	(AOBPOW2 == 5)
			for (int b=0; b<32; ++b) {
				putc(((reg[a][i] & (1 << b)) ? '1' : '0'), stderr);
			}
#else
			uint32_t *p = ((uint32_t *) &(reg[a][i]));
			for (int b=0; b<(1<<AOBPOW2); ++b) {
				putc(((p[b / 32] & (1 << (b % 32))) ? '1' : '0'), stderr);
			}
#endif
			putc('\n', stderr);
		}
	}

	// Constructor
	AoB() {
		// set-up Stats
		hit = 0;
		rehash = 0;

		// set-up 0, 1, H(0), H(1), H(2), ...
		bzero(&(index[0]), (AOBREGS * sizeof(index[0])));
		regs = 0;
		while (regs < (AOBWAYS+2)) {
			forAOBWORDS(i) {
				reg[regs][i] = AoBinits[(regs>(AOBPOW2+1)) ? ((i & (1<<(regs-(AOBPOW2+2)))) != 0) : regs];
			}

			uniq(); // making unique entry also increments regs
		}
	}

	// Uniqueness functions
	AoBreg_t hash() {
		// should really pick this by benchmarking...
		AoBreg_t h = 601; // a nice random starting bit pattern
		AoBreg_t *p = ((AoBreg_t *) &(reg[regs][0]));

		// the following seems better...?
		h = p[0];
		h += (p[(h / 1031) & (AOBWORDS-1)] / 521);
		h += (p[(h / 173) & (AOBWORDS-1)] / 7);
		h -= (97 * p[AOBWORDS-1]);
		return(h & (AOBREGS-1));
	}

	AoBreg_t uniq() {
		AoBreg_t h = hash();
		for (;;) {
			// empty entry
			if (!index[h]) { index[h] = ++regs; return(regs - 1); }

			// found it
			if (index[h] && match(index[h] - 1)) { ++hit; return(index[h] - 1); }

			++rehash;
			h = ((h + 241) & (AOBREGS-1));
		}
	}

	// Basic operations
	AoBreg_t And(const AoBreg_t a, const AoBreg_t b) {
		// normalize
		register AoBreg_t ta, tb;
		if (a <= b) { ta = a; tb = b; } else { ta = b; tb = a; }

		// check special cases
		if (!ta) return(0);
		if ((ta == 1) || (ta == tb)) return(tb);

		// check associative cache
		int c = ac.avail('&', ta, tb);
		if (c >= 0) return(c);

		// do bitwise operation
		forAOBWORDS(i) reg[regs][i] = reg[ta][i] & reg[tb][i];
		return(ac.cache(uniq()));
	}

	AoBreg_t Or(const AoBreg_t a, const AoBreg_t b) {
		// normalize
		AoBreg_t ta, tb;
		if (a <= b) { ta = a; tb = b; } else { ta = b; tb = a; }

		// check special cases
		if ((!ta) || (ta == tb)) return(tb);
		if (ta == 1) return(1);

		// check associative cache
		int c = ac.avail('|', ta, tb);
		if (c >= 0) return(c);

		// do bitwise operation
		forAOBWORDS(i) reg[regs][i] = reg[ta][i] | reg[tb][i];
		return(ac.cache(uniq()));
	}

	AoBreg_t Xor(const AoBreg_t a, const AoBreg_t b) {
		// normalize
		AoBreg_t ta, tb;
		if (a <= b) { ta = a; tb = b; } else { ta = b; tb = a; }

		// check special cases
		if (!ta) return(tb);
		if (ta == tb) return(0);

		// check associative cache
		int c = ac.avail('^', ta, tb);
		if (c >= 0) return(c);

		// do bitwise operation
		forAOBWORDS(i) reg[regs][i] = reg[ta][i] ^ reg[tb][i];
		return(ac.cache(uniq()));
	}

	AoBreg_t Rot(const AoBreg_t a, const AoBreg_t b) {
		// normalize
		AoBreg_t tb = (b & (AOBBITS-1));

		// check special case
		if (!tb) return(a);

		// check associative cache
		int c = ac.avail('r', a, tb); // doesn't matter that tb isn't a reg
		if (c >= 0) return(c);

#if	(AOBPOW2<7)
		// first, shift by words
		int ws = (tb >> AOBPOW2);
		forAOBWORDS(i) {
			reg[regs][i] = reg[a][(i - ws) & (AOBWORDS - 1)];
		}

		// then by bits if any left
		ws = tb & ((1 << AOBPOW2) - 1);
		if (ws) {
			int rs = ((8 * sizeof(AoBword_t)) - ws);
			AoBword_t lo = (reg[regs][AOBWORDS - 1] >> rs);
			forAOBWORDS(i) {
				register AoBword_t hi = (reg[regs][i] >> rs);
				reg[regs][i] = ((reg[regs][i] << ws) | lo);
				lo = hi;
			}
		}
#else
		// first, shift by 32-bit words
		uint32_t *p = ((uint32_t *) &reg[a][0]);
		uint32_t *q = ((uint32_t *) &reg[regs][0]);
		int ws = (tb >> 5);
		for (int i=0; i<(1<<(AOBPOW2-5)); ++i) {
			q[i] = p[(i - ws) & ((1<<(AOBWAYS-5))-1)];
		}

		// then by bits if any left
		ws = tb & 31;
		if (ws) {
			int rs = (32 - ws);
			uint32_t lo = (q[(1<<(AOBPOW2-5))-1] >> rs);
			for (int i=0; i<(1<<(AOBPOW2-5)); ++i) {
				register uint32_t hi = (q[i] >> rs);
				q[i] = ((q[i] << ws) | lo);
				lo = hi;
			}
		}
#endif

		// cache & return unique
		return(ac.cache(uniq()));
	}

	AoBreg_t Flip(const AoBreg_t a, const AoBreg_t b) {
		// normalize
		register AoBreg_t tb = (b & (AOBBITS-1));

		// check special case
		if (!tb) return(a);

		// check associative cache
		int c = ac.avail('f', a, tb); // doesn't matter that tb isn't a reg
		if (c >= 0) return(c);

#if (AOBPOW2==6)
		AoBword_t	left[] = { 0x5555555555555555LL,
				   0x3333333333333333LL,
				   0x0f0f0f0f0f0f0f0fLL,
				   0x00ff00ff00ff00ffLL,
				   0x0000ffff0000ffffLL,
				   0x00000000ffffffffLL };
		AoBword_t	right[] = { 0xaaaaaaaaaaaaaaaaLL,
				    0xccccccccccccccccLL,
				    0xf0f0f0f0f0f0f0f0LL,
				    0xff00ff00ff00ff00LL,
				    0xffff0000ffff0000LL,
				    0xffffffff00000000LL };
#else
		AoBword_t	left[] = { 0x55555555,
				   0x33333333,
				   0x0f0f0f0f,
				   0x00ff00ff,
				   0x0000ffff };
		AoBword_t	right[] = { 0xaaaaaaa,
				    0xcccccccc,
				    0xf0f0f0f0,
				    0xff00ff00,
				    0xffff0000 };
#endif

		// first, copy into work area
		copy(regs, a);

		// do any within-a-word steps
		for (int k=0; k<AOBPOW2; ++k) {
			int bit = (1 << k);
			if (bit & tb) {
				forAOBWORDS(i) {
					reg[regs][i] = (((reg[regs][i] & left[k]) << bit) |
							((reg[regs][i] & right[k]) >> bit));
				}
			}
		}

		// now do across words
		for (int k=0; k<(AOBWAYS-AOBPOW2); ++k) {
			if (((1 << AOBPOW2) << k) & tb) {
				// copy into regs+1
				copy(regs+1, regs);

				// re-arrange into regs
				forAOBWORDS(i) {
					reg[regs][i] = reg[regs+1][i ^ (1 << k)];
				}
			}
		}

		// cache & return unique
		return(ac.cache(uniq()));
	}

	AoBreg_t Reset(const AoBreg_t a, const AoBreg_t b) {
		// normalize
		register AoBreg_t tb = (b & (AOBBITS-1));

		// check associative cache
		int c = ac.avail('0', a, tb);
		if (c >= 0) return(c);

		// do bitwise operation
		copy(regs, a);
#if	(ABOBPOW2==5)
		reg[regs][tb >> AOBPOW2] &= ~(1 << (tb & ((1 << AOBPOW2) - 1)));
#elif	(AOBPOW2==6)
		reg[regs][tb >> AOBPOW2] &= ~(1LL << (tb & ((1 << AOBPOW2) - 1)));
#else
		uint32_t *p = ((uint32_t *) &reg[regs][0]);
		p[tb >> 5] &= ~(1 << (tb & 31));
#endif
		return(ac.cache(uniq()));
	}

	AoBreg_t Set(const AoBreg_t a, const AoBreg_t b) {
		// normalize
		register AoBreg_t tb = (b & (AOBBITS-1));

		// check associative cache
		int c = ac.avail('1', a, tb);
		if (c >= 0) return(c);

		// do bitwise operation
		copy(regs, a);
#if	(AOBPOW2==5)
		reg[regs][tb >> AOBPOW2] |= (1 << (tb & ((1 << AOBPOW2) - 1)));
#elif	(AOBPOW2==6)
		reg[regs][tb >> AOBPOW2] |= (1LL << (tb & ((1 << AOBPOW2) - 1)));
#else
		uint32_t *p = ((uint32_t *) &reg[regs][0]);
		p[tb >> 5] |= (1 << (tb & 31));
#endif
		return(ac.cache(uniq()));
	}

	AoBreg_t Tog(const AoBreg_t a, const AoBreg_t b) {
		// normalize
		register AoBreg_t tb = (b & (AOBBITS-1));

		// check associative cache
		int c = ac.avail('t', a, tb);
		if (c >= 0) return(c);

		// do bitwise operation
		copy(regs, a);
#if	(AOBPOW2==5)
		reg[regs][tb >> AOBPOW2] ^= (1 << (tb & ((1 << AOBPOW2) - 1)));
#elif	(AOBPOW2==6)
		reg[regs][tb >> AOBPOW2] ^= (1LL << (tb & ((1 << AOBPOW2) - 1)));
#else
		uint32_t *p = ((uint32_t *) &reg[regs][0]);
		p[tb >> 5] ^= (1 << (tb & 31));
#endif
		return(ac.cache(uniq()));
	}

	AoBreg_t Dom(const AoBreg_t a, const AoBreg_t b) {
		// normalize
		register AoBreg_t tb = (b & (AOBBITS-1));

		// check associative cache
		int c = ac.avail('t', a, tb);
		if (c >= 0) return(c);

		// do bitwise operation
		copy(regs, a);
#if	(AOBPOW2==5)
		for (int i=0; i<(tb >> AOBPOW2); ++i) reg[regs][i] = ~reg[regs][i];
		reg[regs][tb >> AOBPOW2] ^= ((2 << (tb & ((1 << AOBPOW2) - 1))) - 1);
#elif	(AOBPOW2==6)
		for (int i=0; i<(tb >> AOBPOW2); ++i) reg[regs][i] = ~reg[regs][i];
		reg[regs][tb >> AOBPOW2] ^= ((2LL << (tb & ((1 << AOBPOW2) - 1))) - 1LL);
#else
		uint32_t *p = ((uint32_t *) &reg[regs][0]);
		// process like AOBPOW2==5 inside SWAR word
		for (int i=0; i<(tb >> 5); ++i) p[i] = ~p[i];
		p[tb >> 5] ^= ((2 << (tb & 31)) - 1);
#endif
		return(ac.cache(uniq()));
	}

	AoBreg_t Meas(const AoBreg_t a, const AoBreg_t b) {
		// normalize
		register AoBreg_t tb = (b & (AOBBITS-1));
		// don't bother caching, result is 0 or 1
#if	(AOBPOW2<7)
		return((reg[a][tb >> AOBPOW2] & (1LL << (tb & ((1 << AOBPOW2) - 1)))) != 0LL);
#else
		uint32_t *p = ((uint32_t *) &(reg[a][0]));
		return((p[tb >> 5] & (1 << (tb & 31))) != 0);
#endif
	}

	AoBreg_t Any(const AoBreg_t a) {
		// could check for Ones(a) == 0, but this is faster
#if	(AOBPOW2<7)
		forAOBWORDS(i) {
			if (reg[a][i]) return(1);
		}
#else
		uint32_t *p = ((uint32_t *) &(reg[a][0]));
		for (int i=0; i<(1<<(AOBPOW2-5)); ++i) {
			if (p[i]) return(1);
		}
#endif
		return(0);
	}

	AoBreg_t All(const AoBreg_t a) {
		// could check for Ones(a) == AOBBITS, but this is faster
#if	(AOBPOW2<7)
		forAOBWORDS(i) {
			if (reg[a][i] != AoBinits[1]) return(0);
		}
#else
		uint32_t *p = ((uint32_t *) &(reg[a][0]));
		for (int i=0; i<(1<<(AOBPOW2-5)); ++i) {
			if (p[i] != 0xffffffff) return(0);
		}
#endif
		return(1);
	}

	AoBreg_t First(const AoBreg_t a) {
		// check special cases... initialized constants
		if (a < (AOBWAYS+2)) {
			if (a == 0) return(1 << AOBWAYS);
			if (a == 1) return(0);
			return(1 << (a - 2));
		}

		// don't cache, result is an integer
#if	(AOBPOW2<7)
		forAOBWORDS(i) {
			register AoBword_t t = reg[a][i];
			if (t) {
				// count trailing zeros
				AoBreg_t pos = (i * (8 * sizeof(AoBword_t)));
				for (int i=0; i<(AOBPOW2-1); ++i) {
					if (!(t & AoBtrail[i])) {
						pos += (1 << (AOBPOW2-1-i));
						t >>= (1 << (AOBPOW2-1-i));
					}
				}
				return((t & AoBtrail[AOBPOW2-1]) ? pos : (pos + 1));
			}
		}
#else
		uint32_t *p = ((uint32_t *) &(reg[a][0]));
		for (int i=0; i<(1<<(AOBPOW2-5)); ++i) {
			register uint32_t t = p[i];
			if (t) {
				// count trailing zeros
				AoBreg_t pos = (i * 32);
				if ((t & 0x0000ffff) == 0) { pos += 16; t >>= 16; }
				if ((t & 0x000000ff) == 0) { pos += 8; t >>= 8; }
				if ((t & 0x0000000f) == 0) { pos += 4; t >>= 4; }
				if ((t & 0x00000003) == 0) { pos += 2; t >>= 2; }
				pos += ((t & 0x00000001) == 0);
				return(pos);
			}
		}
#endif
		return(1 << AOBWAYS);
	}

	AoBreg_t Ones(const AoBreg_t a) {
		// check special cases... initialized constants
		if (a < (AOBWAYS+2)) {
			if (a == 0) return(0);
			if (a == 1) return(1 << AOBWAYS);
			return(1 << (AOBWAYS - 1));
		}

		// don't cache, result is an integer
		AoBreg_t pop = 0;
#if	(AOBPOW2==5)
		forAOBWORDS(i) {
			register AoBword_t t = reg[a][i];
			if (t) {
				if (t != 0xffffffff) {
					// as per http://aggregate.org/MAGIC
					t -= ((t >> 1) & 0x55555555);
					t = (((t >> 2) & 0x33333333) + (t & 0x33333333));
					t = (((t >> 4) + t) & 0x0f0f0f0f);
					t += (t >> 8);
					t += (t >> 16);
					pop += (t & 0x3f);
				} else	pop += 32;
			}
		}
#elif	(AOBPOW2==6)
		forAOBWORDS(i) {
			register AoBword_t t = reg[a][i];
			if (t) {
				if (t != 0xffffffffffffffffLL) {
					// as per http://aggregate.org/MAGIC
					t -= ((t >> 1) & 0x5555555555555555LL);
					t = (((t >> 2) & 0x3333333333333333LL) + (t & 0x3333333333333333LL));
					t = (((t >> 4) & 0x0f0f0f0f0f0f0f0fLL) + (t & 0x0f0f0f0f0f0f0f0fLL));
					t = (((t >> 8) + t) & 0x00ff00ff00ff00ff);
					t += (t >> 16);
					t += (t >> 32);
					pop += (t & 0x7f);
				} else	pop += 64;
			}
		}
#else
		uint32_t *p = ((uint32_t *) &(reg[a][0]));
		for (int i=0; i<(1<<(AOBPOW2-5)); ++i) {
			register uint32_t t = p[i];
			if (t) {
				if (t != 0xffffffff) {
					// as per http://aggregate.org/MAGIC
					t -= ((t >> 1) & 0x55555555);
					t = (((t >> 2) & 0x33333333) + (t & 0x33333333));
					t = (((t >> 4) + t) & 0x0f0f0f0f);
					t += (t >> 8);
					t += (t >> 16);
					pop += (t & 0x3f);
				} else	pop += 32;
			}
		}
#endif
		return(pop);
	}
};

/*	Regular Expression layer for PBP.
*/

class RE
{
	AoB		aob; // AoB and applicative cache
	AoBreg_t	reg[REREGS][REAOBS];
	REreg_t		index[AOBREGS]; // 0 means empty, >0 means reg[index-1]
	REreg_t		regs;
	int		hit, rehash;

public:
	// Helpers
	void copy(const REreg_t d, const REreg_t s) {
		REreg_t td = Eval(d);
		REreg_t ts = Eval(s);
		memcpy(&(reg[td][0]), &(reg[ts][0]), (REAOBS * sizeof(AoBreg_t)));
	}

	int match(const REreg_t a) {
		forREAOBS(i) {
			if (reg[a][i] != reg[regs][i]) return(0);
		}
		return(1);
	}

	void Stats() {
		aob.Stats();
		fprintf(stderr, "RE: %d regs, %d hits, %d rehashes\n", regs, hit, rehash);
	}

	void Show(const REreg_t a) {
		forREAOBS(i) {
			aob.Show(reg[a][i]);
		}
	}

	REreg_t Eval(const REreg_t a) { return(a); }

	// Constructor
	RE() {
		// set-up Stats
		hit = 0;
		rehash = 0;

		// set-up 0, 1, H(0), H(1), H(2), ...
		bzero(&(index[0]), (REREGS * sizeof(index[0])));
		regs = 0;
		while (regs < (REWAYS+2)) {
			forREAOBS(i) {
				reg[regs][i] = ((regs < (AOBWAYS+2)) ? regs : ((i & (1<<(regs-(AOBWAYS+2)))) != 0));
			}

			uniq(); // making unique entry also increments regs
		}
	}

	// Uniqueness functions
	REreg_t hash() {
		// should really pick this by benchmarking...
		REreg_t h = 42; // a nice random starting bit pattern

		for (int i=0; i<REAOBS; ++i) {
			h = (h << 3) | (h & 7);
			h ^= reg[regs][i];
		}

		return((h ^ (h / REREGS)) & (REREGS-1));
	}

	REreg_t uniq() {
		REreg_t h = hash();
		for (;;) {
			// empty entry
			if (!index[h]) { index[h] = ++regs; return(regs - 1); }

			// found it
			if (index[h] && match(index[h] - 1)) { ++hit; return(index[h] - 1); }

			++rehash;
			h = ((h + 347) & (REREGS-1));
		}
	}

	// Basic operations using eager evaluation
	REreg_t And(const REreg_t a, const REreg_t b) {
		// normalize
		register REreg_t ta, tb;
		if (a <= b) { ta = a; tb = b; } else { ta = b; tb = a; }

		// check special cases
		if (!ta) return(0);
		if ((ta == 1) || (ta == tb)) return(tb);

		// do AoB operation
		forREAOBS(i) reg[regs][i] = aob.And(reg[ta][i], reg[tb][i]);
		return(uniq());
	}

	REreg_t Or(const REreg_t a, const REreg_t b) {
		// normalize
		register REreg_t ta, tb;
		if (a <= b) { ta = a; tb = b; } else { ta = b; tb = a; }

		// check special cases
		if ((!ta) || (ta == tb)) return(tb);
		if (ta == 1) return(1);

		// do AoB operation
		forREAOBS(i) reg[regs][i] = aob.Or(reg[ta][i],  reg[tb][i]);
		return(uniq());
	}

	REreg_t Xor(const REreg_t a, const REreg_t b) {
		// normalize
		register REreg_t ta, tb;
		if (a <= b) { ta = a; tb = b; } else { ta = b; tb = a; }

		// check special cases
		if (!ta) return(tb);
		if (ta == tb) return(0);

		// do AoB operation
		forREAOBS(i) reg[regs][i] = aob.Xor(reg[ta][i], reg[tb][i]);
		return(uniq());
	}

	REreg_t Rot(const REreg_t a, const REreg_t b) {
		// normalize
		register REreg_t tb = (b & (REBITS-1));

		// check special case
		if (!tb) return(a);

		// first, shift by AoBs
		int ws = (tb / AOBBITS);
		forREAOBS(i) reg[regs][i] = reg[a][(i - ws) & (REAOBS-1)];

		// then by bits if any left
		int br = (tb % AOBBITS);
		if (br) {
			// rotate 'em individually, which makes top bits correct
			forREAOBS(i) reg[regs+1][i] = aob.Rot(reg[regs][i], br);

			// copy bits from other AoBs
			AoBreg_t lomask = aob.Dom(0, br-1);
			AoBreg_t himask = aob.Xor(1, lomask);
			forREAOBS(i) {
				reg[regs][i] = aob.Or(aob.And(himask, reg[regs+1][i]),
						      aob.And(lomask, reg[regs+1][(i - 1) & (REAOBS - 1)]));
			}
		}

		// return unique
		return(uniq());
	}

	REreg_t Flip(const REreg_t a, const REreg_t b) {
		// normalize
		register REreg_t tb = (b & (REBITS-1));

		// check special case
		if (!tb) return(a);

		// first, copy into work area
		copy(regs, a);

		// do any within-an-AoB steps
		forREAOBS(i) {
			reg[regs][i] = aob.Flip(reg[a][i], (tb & (AOBBITS-1)));
		}

		// now do across AoBs
		for (int k=0; k<(REWAYS-AOBWAYS); ++k) {
			if (((1 << AOBWAYS) << k) & tb) {
				// copy into regs+1
				copy(regs+1, regs);

				// re-arrange into regs
				forAOBWORDS(i) {
					reg[regs][i] = reg[regs+1][i ^ (1 << k)];
				}
			}
		}

		// return unique
		return(uniq());
	}

	REreg_t Reset(const REreg_t a, const REreg_t b) {
		// normalize
		register REreg_t tb = (b & (REBITS-1));

		// do bitwise operation
		copy(regs, a);
		reg[regs][tb >> AOBWAYS] = aob.Reset(reg[regs][tb >> AOBWAYS], (tb & (AOBBITS-1)));
		return(uniq());
	}

	REreg_t Set(const REreg_t a, const REreg_t b) {
		// normalize
		register REreg_t tb = (b & (REBITS-1));

		// do bitwise operation
		copy(regs, a);
		reg[regs][tb >> AOBWAYS] = aob.Set(reg[regs][tb >> AOBWAYS], (tb & (AOBBITS-1)));
		return(uniq());
	}

	REreg_t Tog(const REreg_t a, const REreg_t b) {
		// normalize
		register REreg_t tb = (b & (REBITS-1));

		// do bitwise operation
		copy(regs, a);
		reg[regs][tb >> AOBWAYS] = aob.Tog(reg[regs][tb >> AOBWAYS], (tb & (AOBBITS-1)));
		return(uniq());
	}

	REreg_t Dom(const REreg_t a, const REreg_t b) {
		// normalize
		register REreg_t tb = (b & (REBITS-1));

		// do bitwise operation
		copy(regs, a);
		for (int i=0; i<(tb >> AOBWAYS); ++i) reg[regs][i] = aob.Xor(1, reg[regs][i]);
		reg[regs][tb >> AOBWAYS] = aob.Dom(reg[regs][tb >> AOBWAYS], (tb & (AOBBITS-1)));
		return(uniq());
	}

	REreg_t Meas(const REreg_t a, const REreg_t b) {
		// eager Measure
		register REreg_t ta, tb;
		ta = Eval(a);	// force eval of arg

		// special cases for measuring in 0 or 1
		if ((ta == 0) || (ta == 1)) return(ta);

		// normalize b (which is really a number)
		tb = (b & (REBITS-1));

		// result is 0 or 1; no need to do uniq()
		return(aob.Meas(reg[ta][tb >> AOBWAYS], (tb & (AOBBITS-1))));
	}

	REreg_t Any(const REreg_t a) {
		// eager Any
		register REreg_t ta;
		ta = Eval(a);	// force eval of arg

		// result is 0 or 1; no need to do uniq()
		return(ta != 0);
	}

	REreg_t All(const REreg_t a) {
		// eager All
		register REreg_t ta;
		ta = Eval(a);	// force eval of arg

		// result is 0 or 1; no need to do uniq()
		return(ta == 1);
	}

	REreg_t First(const REreg_t a) {
		// eager First
		register REreg_t ta;
		ta = Eval(a);	// force eval of arg

		// special cases for non-lazy stuff known constants
		if (ta < (REWAYS+2)) {
			if (ta == 0) return(1 << REWAYS);
			if (ta == 1) return(0);
			return(1 << (ta - 2));
		}

		// result is an integer
		register REreg_t r = 0;
		forREAOBS(i) {
			register REreg_t t = aob.First(reg[a][i]);
			r += t;
			if (t < AOBBITS) {
				return(r);
			}
		}
		return(r);
	}

	REreg_t Ones(const REreg_t a) {
		// eager Ones
		register REreg_t ta;
		ta = Eval(a);	// force eval of arg

		// check special cases... initialized constants
		if (ta < (REWAYS+2)) {
			if (ta == 0) return(0);
			if (ta == 1) return(1 << REWAYS);
			return(1 << (REWAYS - 1));
		}
		
		// result is an integer
		REreg_t pop = 0;
		forREAOBS(i) {
			pop += aob.Ones(reg[ta][i]);
		}
		return(pop);
	}
};

RE re;	// instantiate everything

/*	Pattern Bit layer for PBP; includes quantum-style operators.
*/

class pbit
{
	REreg_t v;  // value

public:
	// Constructor
	pbit(int value=pbitNaN) {
		v = (((value < 0) || (value > REREGS)) ? pbitNaN : value);
	}

	// Helpers
	void Show() const {
		if (Valid()) re.Show(v);
		else fprintf(stderr, "pbitNaN\n");
	}

	int Valid() const {
		return(v != pbitNaN);
	}

	void Eval() {
		v = re.Eval(v);
	}

	// Operators
	void operator=(const pbit& a) {
		v = (a.Valid() ? a.v : pbitNaN);
	}

	pbit And(const pbit& a) const {
		return((a.Valid() && Valid()) ? pbit(re.And(a.v, v)) : pbit(pbitNaN));
	}
	pbit operator&(const pbit& a) const { return(And(a)); }
	pbit operator*(const pbit& a) const { return(And(a)); }

	pbit Or(const pbit& a) const {
		return((a.Valid() && Valid()) ? pbit(re.Or(a.v, v)) : pbit(pbitNaN));
	}
	pbit operator|(const pbit& a) const { return(Or(a)); }

	pbit Xor(const pbit& a) const {
		return((a.Valid() && Valid()) ? pbit(re.Xor(a.v, v)) : pbit(pbitNaN));
	}
	pbit operator^(const pbit& a) const { return(Xor(a)); }
	pbit operator+(const pbit& a) const { return(Or(a)); }

	// 1-bit ~ is same as !
	pbit Not() const { return(Valid() ? pbit(re.Xor(1, v)) : pbit(pbitNaN)); }
	pbit operator~() const { return(Not()); }
	pbit operator!() const { return(Not()); }

	pbit Sub(const pbit& a) const {
		// this - a means this & ~a
		return((a.Valid() && Valid()) ? pbit(re.And(v, re.Xor(1, a.v))) : pbit(pbitNaN));
	}
	pbit operator-(const pbit& a) const { return(Sub(a)); }

	pbit LT(const pbit& a) const {
		// this < a means a & ~this
		return((a.Valid() && Valid()) ? pbit(re.And(a.v, re.Xor(1, v))) : pbit(pbitNaN));
	}
	pbit operator<(const pbit& a) const { return(LT(a)); }

	pbit GT(const pbit& a) const { return(a < pbit(v)); }
	pbit operator>(const pbit& a) const { return(GT(a)); }

	pbit LE(const pbit& a) const {
		// this <= a means ~(this > a)
		return((a.Valid() && Valid()) ? pbit(re.Xor(1, GT(a).v)) : pbit(pbitNaN));
	}
	pbit operator<=(const pbit& a) const { return(LE(a)); }

	pbit GE(const pbit& a) const { return(a <= pbit(v)); }
	pbit operator>=(const pbit& a) const { return(GE(a)); }

	pbit NE(const pbit& a) const { return(Xor(a)); }
	pbit operator!=(const pbit& a) const { return(NE(a)); }

	pbit EQ(const pbit& a) const {
		// this == a means ~(this != a)
		return((a.Valid() && Valid()) ? pbit(re.Xor(1, NE(a).v)) : pbit(pbitNaN));
	}
	pbit operator==(const pbit& a) const { return(EQ(a)); }

	pbit Rot(const int a) const {
		return(Valid() ? pbit(re.Rot(v, a)) : pbit(pbitNaN));
	}
	pbit operator<<(const int a) const { return(Rot(a)); }

	pbit Flip(const int a) const {
		return(Valid() ? pbit(re.Flip(v, a)) : pbit(pbitNaN));
	}

	pbit Reset(const int a) const { return(Valid() ? pbit(re.Reset(v, a)) : pbit(pbitNaN)); }

	pbit Set(const int a) const { return(Valid() ? pbit(re.Set(v, a)) : pbit(pbitNaN)); }

	pbit Tog(const int a) const { return(Valid() ? pbit(re.Tog(v, a)) : pbit(pbitNaN)); }

	pbit Dom(const int a) const { return(Valid() ? pbit(re.Dom(v, a)) : pbit(pbitNaN)); }

	pbit Meas(const int a) const { return(Valid() ? pbit(re.Meas(v, a)) : pbit(pbitNaN)); }

	pbit Any() const { return(Valid() ? pbit(re.Any(v)) : pbit(pbitNaN)); }

	pbit All() const { return(Valid() ? pbit(re.All(v)) : pbit(pbitNaN)); }

	REreg_t First() const { return(Valid() ? re.First(v) : pbitNaN); }

	REreg_t Ones() const { return(Valid() ? re.Ones(v) : pbitNaN); }

	pbit Select(const pbit& a, const pbit& b) const {
		// a few simplifications
		if (v == 0) return(b);
		if (v == 1) return(a);
		if (a.v == b.v) return(a);
		if ((!Valid()) || (!a.Valid()) || (!b.Valid())) return(pbitNaN);

		// do the bit ops
		return((a & *this) | (b & ~*this));
	}

	pbit operator&=(pbit b) { *this = And(b); return(*this); }
	pbit operator|=(pbit b) { *this = Or(b); return(*this); }
	pbit operator^=(pbit b) { *this = Xor(b); return(*this); }
	pbit operator*=(pbit b) { *this = And(b); return(*this); } // 1-bit And is Mul
	pbit operator+=(pbit b) { *this = Or(b); return(*this); }  // 1-bit Or is Add

	friend class pint;

	// quantum-style friends...
	friend void NOT(pbit& q);
	friend void CNOT(const pbit& c, pbit& t);
	friend void SWAP(pbit& i0, pbit& i1);
	friend void CCNOT(const pbit& a, const pbit& b, pbit& c);
	friend void CSWAP(const pbit& c, pbit& i0, pbit& i1);
	friend void H(pbit& q, const int c);
	friend void SETMEAS(const int m);
	friend void SETMEAS();
	friend int MEAS(pbit& q);
};

// quantum-style pbit operators
// all these operate on pbits in-place
void NOT(pbit& q) { q = pbit(1) ^ q; }
void CNOT(const pbit& c, pbit& t) { t = c ^ t; }
void SWAP(pbit& i0, pbit& i1) { pbit t = i0; i0 = i1; i1 = t; }
void CCNOT(const pbit& a, const pbit& b, pbit& c) { c = c ^ (a & b); }
void CSWAP(const pbit& c, pbit& i0, pbit& i1) { pbit t = c.Select(i1,i0); i1 = c.Select(i0,i1); i0 = t; }
void H(pbit& q, const int c) { q = q ^ pbit(c + 2); }
int MEASat = 0;
void SETMEAS(const int m) { MEASat = (m & (REBITS - 1)); }
void SETMEAS() { SETMEAS(rand()); }
int MEAS(pbit& q) { q = q.Meas(MEASat); return(q.v); }

/*	Pattern INTeger layer for PBP.
*/

class pint
{
	bool	has_sign;	// has a sign pbit?
	uint8_t	prec;		// precision in pbits
	pbit	bit[PINTBITS];	// actual pbits

public:
	// constructors
	pint() {
		// default to 1 unsigned pbit that's NaN
		has_sign = false;
		prec = 1;
		bit[0] = pbit(pbitNaN);
	}

	pint(const int konst) {
		// auto sizes to konst, also determines sign from konst
		has_sign = (konst < 0);
		prec = PINTBITS;

		// install the bits
		forprec(i) {
			bit[i] = pbit(((1 << i) & konst) ? 1 : 0);
		}

		// get rid of any unnecessary bits
		*this = Minimize();
	}

	pint(const int konst, const int bits) {
		// make an integer with constant value konst
		// signed if bits is negative
		if (bits >= 0) {
			has_sign = false; // unsigned
			prec = bits;
		} else {
			has_sign = true; // signed
			prec = -bits;
		}

		// install bits
		forprec(i) {
			bit[i] = pbit(((1 << i) & konst) ? 1 : 0);
		}
	}

	void Eval() {
		// force all bit values computed
		forprec(i) {
			bit[i].v = re.Eval(bit[i].v);
		}
	}

	// Display
	void Summary();
	void Show(const int vals);
	void ShowBits();

	// Logic
	pint LAnd(pint b) const;
	pint LOr(pint b) const;
	pint LXOr(pint b) const;

	// Bitwise
	pint And(pint b) const;
	pint Or(pint b) const;
	pint XOr(pint b) const;

	// Equality
	pint EQ(pint b) const;
	pint NE(pint b) const;

	// Relational
	pint GT(pint b) const;
	pint LT(pint b) const;
	pint GE(pint b) const;
	pint LE(pint b) const;

	// Shifts
	pint ShR(pint b) const;
	pint ShR(const int b) const;
	pint ShL(pint b) const;
	pint ShL(const int b) const;

	// Additive
	pint Add(pint b) const;
	pint Sub(pint b) const;

	// Multiplicative
	pint Mul(pint b) const;
	pint Mul(pint b, const int bits) const;
	pint Div(pint b) const;
	pint Rem(pint b) const; // C/C++ % is really remainder, not modulus

	// Not
	pint LNot() const;
	pint Not() const;
	pint Logic() const;	// like !!

	// Sign manipulation
	pint Neg() const;
	pint Abs() const;
	pint Signed() const;	// forces bits to be treated signed
	pint UnSigned() const;	// forces bits to be treated unsigned

	// PBP primitives
	pint Had(const int ways, const int dim) const;
	pint Rot(const int b) const;
	pint Flip(const int b) const;
	pint Reset(const int b) const;
	pint Set(const int e, const int v) const;
	pint Dom(const int b) const;
	int Meas(const int b);
	int Meas();
	REreg_t First();
	REreg_t Pop();

	// Helpers
	int Valid() const;
	pint Bit(const int b) const;
	pint Minimize() const;
	pint Extend(const int p) const;
	pint Promote(const pint& b) const;
	pint Select(pint vtrue, pint vfalse) const;

	// Extended functions
	pint Min(pint b) const;
	pint Max(pint b) const;

	// Reductions
	int Any() const;
	int All() const;
	int ReduceAnd() const;
	int ReduceOr() const;
	int ReduceXOr() const;
	int ReduceAdd() const;
	int ReduceMul(const int preclimit) const; // problematic...
	int ReduceMin() const;
	int ReduceMax() const;

	// Scans (parallel prefix)
	pint ScanAnd() const;
	pint ScanOr() const;
	pint ScanXOr() const;
	pint ScanAdd() const;
	pint ScanMul(const int preclimit) const; // problematic...
	pint ScanMin() const;
	pint ScanMax() const;

	// Gather
	int Gather(int *p, const int n);

	// Friends
	friend class pbit;
	friend pint Cover(const int min, const int max, const int dim);
	friend pint Range(const int min, const int max, const int dim);
	friend pint Scatter(const int *p, const int n, const int reways);

	// Expensive library stuff
	pint SortUp() const;
	pint SortDown() const;

	// Assignment operators
	pint operator=(const pint& a) {
		has_sign = a.has_sign; prec = a.prec;
		forprec (i) bit[i] = a.bit[i];
		return(*this);
	}
	pint operator=(const int a) { return(*this = pint(a)); }
	pint operator*=(pint b) { *this = Mul(b); return(*this); }
	pint operator/=(pint b) { *this = Div(b); return(*this); }
	pint operator%=(pint b) { *this = Rem(b); return(*this); }
	pint operator+=(pint b) { *this = Add(b); return(*this); }
	pint operator-=(pint b) { *this = Sub(b); return(*this); }
	pint operator>>=(pint b) { *this = ShR(b); return(*this); }
	pint operator>>=(int b) { *this = ShR(b); return(*this); }
	pint operator<<=(pint b) { *this = ShL(b); return(*this); }
	pint operator<<=(int b) { *this = ShL(b); return(*this); }
	pint operator&=(pint b) { *this = And(b); return(*this); }
	pint operator|=(pint b) { *this = Or(b); return(*this); }
	pint operator^=(pint b) { *this = XOr(b); return(*this); }

	// Logical operators
	pint operator&&(const pint& b) const { return(LAnd(b)); }
	pint operator||(const pint& b) const { return(LOr(b)); }
	// there is no operator^^, but != effectively does it

	// Bitwise operators
	pint operator|(const pint& b) const { return(Or(b)); }
	pint operator^(const pint& b) const { return(XOr(b)); }
	pint operator&(const pint& b) const { return(And(b)); }

	// Equality operators
	pint operator==(const pint& b) const { return(EQ(b)); }
	pint operator!=(const pint& b) const { return(NE(b)); }

	// Relational operators
	pint operator>(const pint& b) const { return(GT(b)); }
	pint operator<(const pint& b) const { return(LT(b)); }
	pint operator>=(const pint& b) const { return(GE(b)); }
	pint operator<=(const pint& b) const { return(LE(b)); }

	// C++ 20 three-way comparison operator
	// uncomment if your C++ supports it (GCC doesn't yet)
//	pint operator<=>(const pint& b) const { return(GT(b) - LT(b)); }

	// Bitwise shift operators
	pint operator>>(pint b) const { return(ShR(b)); }
	pint operator>>(int b) const { return(ShR(b)); }
	pint operator<<(pint b) const { return(ShL(b)); }
	pint operator<<(int b) const { return(ShL(b)); }

	// Additive operators
	pint operator+(pint b) const { return(Add(b)); }
	pint operator-(pint b) const { return(Sub(b)); }

	// Multiplicative operators
	pint operator*(pint b) const { return(Mul(b)); }
	pint operator/(pint b) const { return(Div(b)); }
	pint operator%(pint b) const { return(Rem(b)); }

	// Not operators
	pint operator!() const { return(LNot()); }
	pint operator~() const { return(Not()); }

	// Unary plus and minus
	pint operator+() const { return(*this); }
	pint operator-() const { return(Neg()); }

	// Prefix increment and decrement
	pint& operator++() { *this = Add(pint(1)); return(*this); }
	pint& operator--() { *this = Sub(pint(1)); return(*this); }

	// Postfix increment and decrement
	// note that int arg is a C++ notation thing; there isn't an int arg
	pint operator++(int) { pint r = *this; *this = Add(pint(1)); return(r); }
	pint operator--(int) { pint r = *this; *this = Sub(pint(1)); return(r); }
};

// operators with an int first operand

pint operator&&(int a, pint& b) { return(pint(a!=0) && b); }
pint operator||(int a, pint& b) { return(pint(a!=0) || b); }
pint operator|(int a, pint& b) { return(pint(a) | b); }
pint operator^(int a, pint& b) { return(pint(a) ^ b); }
pint operator&(int a, pint& b) { return(pint(a) & b); }
pint operator==(int a, pint& b) { return(pint(a) == b); }
pint operator!=(int a, pint& b) { return(pint(a) != b); }
pint operator>(int a, pint& b) { return(pint(a) > b); }
pint operator<(int a, pint& b) { return(pint(a) < b); }
pint operator>=(int a, pint& b) { return(pint(a) >= b); }
pint operator<=(int a, pint& b) { return(pint(a) <= b); }
pint operator>>(int a, pint& b) { return(pint(a) >> b); }
pint operator<<(int a, pint& b) { return(pint(a) << b); }
pint operator+(int a, pint& b) { return(pint(a) + b); }
pint operator-(int a, pint& b) { return(pint(a) - b); }
pint operator*(int a, pint& b) { return(pint(a) * b); }
pint operator/(int a, pint& b) { return(pint(a) / b); }
pint operator%(int a, pint& b) { return(pint(a) % b); }

// Display

void
pint::Summary()
{
	// summarize this pint
	Eval();
	fprintf(stderr, "%s%d_t {", (has_sign ? "int" : "uint"), prec);
	forprec (i) {
		fprintf(stderr, "%d%s", bit[i].v, ((i==(prec-1)) ? "}\n" : ","));
	}
}

void
pint::Show(const int vals = REBITS)
{
	// show bits in this pint
	Summary();
	fprintf(stderr, "{");
	for (int i=0; i<vals-1; ++i) {
		fprintf(stderr, "%d,%c", Meas(i), (((i%16)==15) ? '\n' : ' '));
	}
	fprintf(stderr, "%d}\n", Meas(vals-1));
}

void
pint::ShowBits()
{
	// show bits in this pint
	Summary();
	for (int i=0; i<prec; ++i) bit[i].Show();
}

// Logic

pint
pint::LAnd(pint b) const
{
	GATES(GATESWORD-1);
	pint r = Logic();
	b = b.Logic();
	r.bit[0] &= b.bit[0];
	return(r);
}

pint
pint::LOr(pint b) const
{
	GATES(GATESWORD-1);
	pint r = Logic();
	b = b.Logic();
	r.bit[0] = r.bit[0] | b.bit[0];
	return(r);
}

pint
pint::LXOr(pint b) const
{
	GATES(GATESWORD-1);
	pint r = Logic();
	b = b.Logic();
	r.bit[0] = r.bit[0] ^ b.bit[0];
	return(r);
}

// Bitwise

pint
pint::And(pint b) const
{
	GATES(GATESWORD);
	pint r = Promote(b);
	b = b.Promote(r);
	for (int i=0; i<r.prec; ++i) {
		r.bit[i] = r.bit[i] & b.bit[i];
	}
	r = r.Minimize();
	return(r);
}

pint
pint::Or(pint b) const
{
	GATES(GATESWORD);
	pint r = Promote(b);
	b = b.Promote(r);
	for (int i=0; i<r.prec; ++i) {
		r.bit[i] = r.bit[i] | b.bit[i];
	}
	r = r.Minimize();
	return(r);
}

pint
pint::XOr(pint b) const
{
	GATES(GATESWORD);
	pint r = Promote(b);
	b = b.Promote(r);
	for (int i=0; i<r.prec; ++i) {
		r.bit[i] = r.bit[i] ^ b.bit[i];
	}
	return(r.Minimize());
}

// Equality

pint
pint::EQ(pint b) const
{
	pint r = XOr(b);
	r = r.Logic();
	r.bit[0] = ~r.bit[0];
	return(r);
}

pint
pint::NE(pint b) const
{
	pint r = XOr(b);
	r = r.Logic();
	return(r);
}

// Relational

pint
pint::GT(pint b) const
{
	pbit gt(0);
	pbit ep(1);
	pint r;
	r = Promote(b);
	b = b.Promote(r);

	if (r.has_sign) {
		// signed > flips bits if they were negative
		r.bit[r.prec-1] = ~r.bit[r.prec-1];
		b.bit[r.prec-1] = ~b.bit[r.prec-1];
		r.has_sign = false;
		b.has_sign = false;
	}

	// unsigned > looks for msb that differs
	for (int i=r.prec-1; i>=0; --i) {
		gt = (gt | (ep & (r.bit[i] & ~b.bit[i])));
		ep = (ep & ~(r.bit[i] ^ b.bit[i]));
	}

	// always returns 1 bit unsigned
	r.bit[0] = gt;
	r.prec = 1;
	r.has_sign = false;
	return(r);
}

pint
pint::LT(pint b) const
{
	b = (b > *this);
	return(b);
}

pint
pint::GE(pint b) const
{
	b = ~(*this < b);
	return(b);
}

pint
pint::LE(pint b) const
{
	b = ~(*this > b);
	return(b);
}

// Shifts

pint
pint::ShR(pint b) const
{
	// shift right, treats shift amount as unsigned
	GATES(GATELOG*GATESWORD);
	pint r = *this;

	for (int i=(b.prec-1); i>=0; --i) {
		for (int j=0; j<prec; ++j) {
			pbit x;

			if ((j + (1 << i)) >= prec) {
				x = (has_sign ? bit[prec-1] : pbit(0));
			} else {
				x = r.bit[j + (1 << i)];
			}

			r.bit[j] = b.bit[i].Select(x, r.bit[j]);
		}
	}
	return(r.Minimize());
}

pint
pint::ShR(const int b) const
{
	// shift right, treats shift amount as unsigned
	GATES(GATELOG*GATESWORD);
	pint r = *this;
	for (int i=0; i<r.prec; ++i) {
		r.bit[i] = (((i + b) < r.prec) ?
			    bit[i + b] :
			    (has_sign ? bit[prec-1] : pbit(0)));
	}
	return(r.Minimize());
}

pint
pint::ShL(int b = 1) const
{
	// shift left, treats shift amount as unsigned
	GATES(GATESWORD);
	int newprec = b + prec;
	if (newprec > REWAYS) newprec = REWAYS;
	pint r = Extend(newprec);
	for (int i=newprec-1; i>=0; --i) {
		r.bit[i] = (((i - b) >= 0) ? r.bit[i - b] : pbit(0));
	}
	return(r.Minimize());
}

pint
pint::ShL(const pint b) const
{
	// shift left, treats shift amount as unsigned
	GATES(GATESWORD);
	pint r = *this;
	pint s(0);
	for (int i=0; i<b.prec; ++i) {
		s.bit[0] = b.bit[i];
		r = s.Select((r << (1<<i)), r);
	}
	return(r.Minimize());
}

// Additive

pint
pint::Add(pint b) const
{
	GATES(5*GATESWORD);
	pint r = *this;
	r = r.Promote(b);
	r = r.Extend(r.prec + 1); // k bits + k bits = k+1 bits
	b = b.Promote(r);

	// ripple carry adder
	pbit c = pbit(0);
	for (int i=0; i<b.prec; ++i) {
		pbit x = r.bit[i] ^ b.bit[i];
		pbit n = r.bit[i] & b.bit[i];
		r.bit[i] = x ^ c;
		c = n | (x & c);
	}
	return(r.Minimize());
}

pint
pint::Sub(pint b) const
{
	GATES(6*GATESWORD);
	pint r = *this;
	r = r.Promote(pint(-1)); // force promotion to signed
	r = r.Promote(b);
	r = r.Extend(r.prec + 1); // k bits + k bits = k+1 bits
	b = b.Promote(r);

	// ripple carry subtract
	pbit c = pbit(1);
	for (int i=0; i<b.prec; ++i) {
		pbit n = ~b.bit[i];
		pbit x = r.bit[i] ^ n;
		n = r.bit[i] & n;
		r.bit[i] = x ^ c;
		c = n | (x & c);
	}
	return(r.Minimize());
}

// Multiplicative

pint
pint::Mul(pint b) const
{
	// multiply
	GATES(5*GATESWORD*GATESWORD);
	pint v(0);
	pint a = *this;

	// deal with signed values
	if (a.has_sign || b.has_sign) {
		pint aneg = (a < pint(0));
		a = aneg.Select(-a, a);
		pint bneg = (b < pint(0));
		b = bneg.Select(-b, b);
		pint vneg = aneg ^ bneg;

		// faster if first arg has fewer bits
		for (int i=0; i<a.prec; ++i) {
			pint c(0,a.prec+b.prec);
			for (int j=0; ((j<b.prec)&&((i+j)<PINTBITS)); ++j) {
				c.bit[i+j] = b.bit[j] & a.bit[i];
			}
			c = c.Minimize();
			v = v + c;
		}

		v = vneg.Select(-v, v);
	} else {
		// faster if first arg has fewer bits
		for (int i=0; i<a.prec; ++i) {
			pint c(0,PINTBITS);
			for (int j=0; ((j<b.prec)&&((i+j)<PINTBITS)); ++j) {
				c.bit[i+j] = b.bit[j] & a.bit[i];
			}
			c = c.Minimize();
			v = v + c;
		}
	}
	return(v.Minimize());
}

pint
pint::Mul(pint b, const int bits) const
{
	// multiply, but only get least significant bits
	GATES(5*bits*bits);
	pint v(0);
	pint a = *this;

	// deal with signed values
	if (a.has_sign || b.has_sign) {
		// this is not really well defined...
		// signed values can change sign when limited
		pint aneg = (a < pint(0));
		a = aneg.Select(-a, a);
		pint bneg = (b < pint(0));
		b = bneg.Select(-b, b);
		pint vneg = aneg ^ bneg;
		if (b.prec > bits) b.prec = bits;
		int rounds = ((prec > bits) ? bits : prec);

		// faster if first arg has fewer bits
		for (int i=0; i<rounds; ++i) {
			pint c(0,a.prec+b.prec);
			for (int j=0; ((j<b.prec)&&((i+j)<PINTBITS)); ++j) {
				c.bit[i+j] = b.bit[j] & a.bit[i];
			}
			c = c.Minimize();
			v = v + c;
		}

		v = vneg.Select(-v, v);
	} else {
		if (b.prec > bits) b.prec = bits;
		int rounds = ((prec > bits) ? bits : prec);

		// faster if first arg has fewer bits
		for (int i=0; i<rounds; ++i) {
			pint c = b;

			for (int j=0; j<c.prec; ++j) {
				c.bit[j] = c.bit[j] & bit[i];
			}
			v = v + c;
			b = b << 1;

			// discard any extra bits
			if (v.prec > bits) v.prec = bits;
			if (b.prec > bits) b.prec = bits;
		}
	}

	return(v.Minimize());
}

pint
pint::Div(pint b) const
{
	// divide
	GATES(6*GATESWORD*GATESWORD);
	// divide by 0 yields 0 (maybe should be NaN?)
	pint s = (*this < pint(0));
	pint sb = (b < pint(0));
	pint rem = Abs();
	b = b.Abs();
	pint zero = (rem < b) | (b == pint(0));
	pint quo = 0;

	// shift & subtract loop (using adds)
	for (int i=rem.prec-1; i>=0; --i) {
		pint t = (b << i);
		pint le = (t <= rem);
		rem = rem - le.Select(t, pint(0));
		quo = quo | (le << i);
	}

	// force zeros, fix sign
	quo = zero.Select(pint(0), quo);
	quo = (s ^ sb).Select(-quo, quo);
	return(quo.Minimize());
}

pint
pint::Rem(pint b) const
{
	// C's % operator is called modulus,
	// but modulus should result in sign of divisor;
	// C's % is really remainder, with sign of dividend
	// modulus 0 yields 0 (maybe should be NaN?)
	GATES(6*GATESWORD*GATESWORD);
	pint s = (*this < pint(0));
	pint sb = (b < pint(0));
	pint rem = Abs();
	b = b.Abs();
	pint zero = (b == pint(0));

	// shift & subtract loop (using adds)
	for (int i=rem.prec-1; i>=0; --i) {
		pint t = (b << i);
		pint le = (t <= rem);
		rem = rem - le.Select(t, pint(0));
	}

	// force zeros, fix sign
	rem = zero.Select(pint(0), rem);
	rem = s.Select(-rem, rem);
	return(rem.Minimize());
}

// Not

pint
pint::LNot() const
{
	GATES(1);
	pint r = *this;
	r = r.Logic();
	r.bit[0] = ~r.bit[0];
	return(r);
}

pint
pint::Not() const
{
	GATES(GATESWORD);
	pint r = *this;
	forprec(i) r.bit[i] = ~bit[i];
	return(r.Minimize());
}

pint
pint::Logic() const
{
	// reduce to a logical value, i.e., a single pbit
	GATES(GATESWORD-1);
	pint r = *this;
	for (int i=1; i<prec; ++i) {
		r.bit[0] = r.bit[0] | r.bit[i];
	}
	r.has_sign = false; // always unsigned
	r.prec = 1; // single bit
	return(r);
}

// Sign manipulation

pint
pint::Neg() const
{
	pint r = pint(0).Sub(*this);
	return(r.Minimize());
}

pint
pint::Abs() const
{
	// absolute value (trivial if unsigned)
	if (!has_sign) {
		return(*this);
	}

	// negate the negative ones
	pint t = (*this < 0);
	t = t.Select(-*this, *this);
	return(t.Minimize());
}

pint
pint::Signed() const
{
	// force signed; preserves bits, not value
	pint r = *this;
	r.has_sign = true;
	return(r);
}

pint
pint::UnSigned() const
{
	// force unsigned
	pint r = *this;
	r.has_sign = false;
	return(r);
}

// PBP primitives

pint
pint::Had(const int ways, const int dim = 0) const
{
	// make a bits-pbit Hadamard value using entanglement channels
	// starting with dimension dim and XOr it with this
	// negative ways forces signed
	GATES(GATESWORD);
	int hbits = ((ways < 0) ? -ways : ways);
	int w = ((hbits > prec) ? hbits : prec); // extend this if needed
	pint r = (*this);
	r.Eval();
	r = r.Extend(w);
	int h = dim;

	// XOr the relevant (low) bits...
	for (int i=0; i<hbits; ++i) {
		r.bit[i] = r.bit[i] ^ pbit(pbitH(h));
		++h;
	}
	if (ways < 0) r = r.Signed();
	return(r.Minimize());
}

pint
pint::Rot(const int b) const
{
	GATES(GATESWORD);
	pint r = *this;
	forprec(i) r.bit[i] = bit[i].Rot(b);
	return(r);
}

pint
pint::Flip(const int b) const
{
	GATES(GATESWORD);
	pint r = *this;
	forprec(i) r.bit[i] = bit[i].Flip(b);
	return(r);
}

pint
pint::Reset(const int b) const
{
	GATES(1);
	pint r = *this;
	forprec(i) r.bit[i] = bit[i].Reset(b);
	r = r.Minimize();
	return(r);
}

pint
pint::Set(const int e, const int v = 1) const
{
	// make a pint of v so that we know has_sign and prec
	// to promote this to before set/reset bits
	GATES(1);
	pint t(v);
	pint r = Promote(t); // make r big enough to hold v
	for (int i=0; i<r.prec; ++i) {
		if (v & (1 << i)) {
			r.bit[i] = r.bit[i].Set(e);
		} else {
			r.bit[i] = r.bit[i].Reset(e);
		}
	}
	return(r.Minimize());
}

pint
pint::Dom(const int b) const
{
	GATES(GATESWORD);
	pint r = *this;
	forprec(i) r.bit[i] = bit[i].Dom(b);
	return(r);
}

int
pint::Meas(const int b)
{
	// note that Measurement always produces 0 or 1
	// bit values, never superpositions, which is why
	// this can return an ordinary int value
	Eval();
	GATES(1);
	register int v = 0;
	forprec(i) if ((bit[i].Meas(b)).v) v |= (1<<i);
	if (has_sign) {
		// perform sign extension?
		if (v & (1<<(prec-1))) {
			// yes, negative value
			v |= ~((1<<(prec-1))-1);
		}
	}
	return(v);
}

int
pint::Meas()
{
	// Quantum-like random measurement
	return(Meas(rand() & (REBITS - 1)));
}

REreg_t
pint::First()
{
	// first is first 1 in any pbit
	Eval();
	GATES(GATESWORD);
	register REreg_t first = REBITS;
	forprec(i) {
		register REreg_t t = bit[i].First();
		if (t < first) first = t;
	}
	return(first);
}

REreg_t
pint::Pop()
{
	// pop(ulation) is number of non-zero multi-bit values
	Eval();
	GATES(GATELOG*GATESWORD);
	pint r = *this;
	r = r.Logic();
	return(r.bit[0].Ones());
}

// Helpers

int
pint::Valid() const {
	// check to see if this pbit is valid
	if (has_sign > 1) return(0); // can't happen?
	if ((prec < 1) || (prec >= PINTBITS)) return(0);
	forprec (i) {
		if (!bit[i].Valid()) return(0);
	}
	return(1);
}

pint
pint::Bit(const int b) const
{
	// Extract a particular bit position
	pint r(1);
	r.bit[0] = ((b < prec) ? bit[b] : pbit(0));
	return(r);
}

pint
pint::Minimize() const
{
	// make value as small as possible
	pint r = *this;

	if (r.has_sign) {
		// signed?
		if ((r.prec < 2) || (r.bit[r.prec-1].v != 0)) {
			// still signed
			int i=r.prec-1;
			if (i > 0) {
				r.bit[i].Eval(); // NEEDED?
				for (; i>0; --i) {
					r.bit[i-1].Eval(); // NEEDED?
					if (r.bit[i].v != r.bit[i-1].v) return(r);
					--r.prec;
				}
			}
			return(r);
		}

		// make unsigned
		r.has_sign = false;
		--(r.prec);
	}

	// unsigned
	for (int i=r.prec-1; i>0; --i) {
		if (r.bit[i].v) return(r);
		--r.prec;
	}
	return(r);
}

pint
pint::Extend(const int p) const
{
	// Extend to precision p, truncate if already bigger
	int pnew = ((p > PINTBITS) ? PINTBITS : p);
	pint r = *this;

	// Signed/unsigned extend
	if (r.prec < pnew) {
		for (int i=r.prec; i<pnew; ++i) {
			r.bit[i].v = (r.has_sign ? r.bit[i-1].v : 0);
		}
	}
	r.prec = pnew;
	return(r);
}

pint
pint::Promote(const pint& b) const
{
	// promote this to covering type for this, b
	register int p = ((prec < b.prec) ? b.prec : prec);
	pint r = *this;

	// if the signs differ and the signed one isn't bigger
	// than the unsigned one, add a bit and make both signed
	if (((has_sign && (!b.has_sign)) && (prec <= b.prec)) ||
	    ((b.has_sign && (!has_sign)) && (b.prec <= prec))) {
		++p;
	}

	// Extend precision
	r = r.Extend(p);

	// Correct sign
	r.has_sign = (has_sign | b.has_sign);
	return(r);
}

pint
pint::Select(pint vtrue, pint vfalse) const
{
	// multiplexor
	GATES(5*GATESWORD);
	pint r = Logic(); // make 1-bit logical value
	pbit c = r.bit[0];
	vtrue = vtrue.Promote(vfalse);
	vfalse = vfalse.Promote(vtrue);
	r = vtrue;
	for (int i=0; i<vtrue.prec; ++i) {
		r.bit[i] = c.Select(vtrue.bit[i], vfalse.bit[i]);
	}
	return(r.Minimize());
}

// Extended functions

pint
pint::Min(pint b) const
{
	pint r = Promote(b);
	b = b.Promote(r);
	pint gt = (r > b);
	for (int i=0; i<b.prec; ++i) {
		r.bit[i] = gt.bit[0].Select(b.bit[i], r.bit[i]);
	}
	return(r.Minimize());
}

pint
pint::Max(pint b) const
{
	pint r = Promote(b);
	b = b.Promote(r);
	pint gt = (r > b);
	for (int i=0; i<b.prec; ++i) {
		r.bit[i] = gt.bit[0].Select(r.bit[i], b.bit[i]);
	}
	return(r.Minimize());
}

// Reductions

int
pint::Any() const
{
	// this is the classic SIMD test
	// true if any entanglement channel value is non-0
	pint r = *this;
	r = r.Logic();
	r.bit[0].Eval();
	return(r.bit[0].v ? 1 : 0);
}

int
pint::All() const
{
	// this is the classic SIMD test
	// true is all entanglement channels values are non-0
	pint r = *this;
	r = r.Logic();
	r.bit[0].Eval();
	return((r.bit[0].v == 1) ? 1 : 0);
}

int
pint::ReduceAnd() const
{
	// unary bitwise AND reduction
	int r = 0;
	forprec(i) r |= ((bit[i].v == pbit1) << i);
	if (has_sign && (r & (1 << (prec-1)))) {
		// sign extend to make it negative value
		r |= ~((1 << (prec-1)) - 1);
	}
	return(r);
}

int
pint::ReduceOr() const
{
	// unary bitwise OR reduction
	int r = 0;
	forprec(i) r |= ((bit[i].v != pbit0) << i);
	if (has_sign && (r & (1 << (prec-1)))) {
		// sign extend to make it negative value
		r |= ~((1 << (prec-1)) - 1);
	}
	return(r);
}

int
pint::ReduceXOr() const
{
	// unary bitwise OR reduction
	int r = 0;
	forprec(i) r |= ((bit[i].Ones() & 1) << i);
	if (has_sign && (r & (1 << (prec-1)))) {
		// sign extend to make it negative value
		r |= ~((1 << (prec-1)) - 1);
	}
	return(r);
}

int
pint::ReduceAdd() const
{
	// unary add reduction (sum)
	int r = 0;
	if (!has_sign) {
		// unsigned does weighted add
		for (int i=prec-1; i>=0; --i) {
			// sums weighted population counts
			r += r + bit[i].Ones();
		}
	} else {
		// signed does pos & neg sums separately
		pint uns = (*this > pint(0));
		pint pos = uns.Select(*this, pint(0));
		pint neg = -uns.Select(pint(0), *this);
		for (int i=prec-1; i>=0; --i) {
			// sums weighted population counts
			r += r + pos.bit[i].Ones() - neg.bit[i].Ones();
		}
	}
	return(r);
}

int
pint::ReduceMul(const int preclimit = REWAYS) const
{
	// Reduce Mul by classical tree reduction
	// does large steps first because that makes
	// more of the chunks 0 early
	// Be careful! It's easy to go out of pint range
	pint r = *this;
	for (int i=REWAYS-1; i>=0; --i) {
		pint tomul = r.Rot(1<<i);
		r = r.Mul(tomul, preclimit);
	}
	return(r.Meas(0));
}

int
pint::ReduceMin() const
{
	// unary minimum reduction
	pint t = *this;
	int r = ((1 << prec) - 1);

	if (has_sign) {
		// make it unsigned by adding max positive + 1
		t.bit[prec-1] = t.bit[prec-1] ^ pbit(1);
		t.has_sign = false;
	}

	// from MSB down
	for (int i=prec-1; i>=0; --i) {
		// anybody got a 0 in this position?
		if (t.bit[i].v != pbit1) {
			// set lower bits that don't match uppers
			for (int j=0; j<i; ++j) {
				t.bit[j] = t.bit[j] | t.bit[i];
			}
			r ^= (1 << i);
		}
	}

	if (has_sign) {
		// if it was signed, subtract max positive
		r -= (1 << (prec - 1));
	}
	return(r);
}

int
pint::ReduceMax() const
{
	// unary maximum reduction
	pint t = *this;
	int r = 0;

	if (has_sign) {
		// make it unsigned by adding max positive + 1
		t.bit[prec-1] = t.bit[prec-1] ^ pbit(1);
		t.has_sign = false;
	}

	// from MSB down
	for (int i=prec-1; i>=0; --i) {
		// anybody got a 1 in this position?
		if (t.bit[i].v != pbit0) {
			// wipe lower bits that don't match uppers
			for (int j=0; j<i; ++j) {
				t.bit[j] = t.bit[j] & t.bit[i];
			}
			r |= (1 << i);
		}
	}

	if (has_sign) {
		// if it was signed, subtract max positive
		r -= (1 << (prec - 1));
	}
	return(r);
}

// Scans (parallel prefix)

pint
pint::ScanAnd() const
{
	// unary bitwise AND scan (parallel prefix)
	pint r = *this;
	forprec(i) {
		int pos = (~bit[i]).First();
		switch (pos) {
		case 0: r.bit[i] = pbit(0); break;
		case REBITS: break; // already pbit(1)
		default:
			pbit t(0);
			t = t.Dom(pos-1);
			r.bit[i] = t;
		}
	}
	return(r);
}

pint
pint::ScanOr() const
{
	// unary bitwise OR scan (parallel prefix)
	pint r = *this;
	forprec(i) {
		int pos = bit[i].First();
		switch (pos) {
		case 0: r.bit[i] = pbit(1); break;
		case REBITS: break; // already pbit(0)
		default:
			pbit t(1);
			t = t.Dom(pos-1);
			r.bit[i] = t;
		}
	}
	return(r);
}

pint
pint::ScanXOr() const
{
	// unary scan XOr (parallel prefix)
	pint r = *this;
	for (int i=0; i<REWAYS; ++i) {
		// send value up to i-dim neighbor
		pint mask = (pint(1)).Dom((1 << i) - 1);
		r = mask.Select(r ^ r.Rot(1 << i), r);
	}
	return(r);
}

pint
pint::ScanAdd() const
{
	// unary scan Add (parallel prefix)
	pint r = *this;
	for (int i=0; i<REWAYS; ++i) {
		// send value up to i-dim neighbor
		pint mask = pint(1).Dom((1 << i) - 1);
		r = mask.Select(r + r.Rot(1 << i), r);
	}
	return(r);
}

pint
pint::ScanMul(const int preclimit = REWAYS) const
{
	// unary scan Mul (parallel prefix)
	// note that this can easily exceed the range of
	// representable integers... that's not a bug
	pint r = *this;
	for (int i=0; i<prec; ++i) {
		// send value up to i-dim neighbor
		pint mask = (pint(1)).Dom((1 << i) - 1);
		r = mask.Select(r.Mul(r.Rot(1 << i), preclimit), r);
	}
	return(r);
}

pint
pint::ScanMin() const
{
	// unary scan Max (parallel prefix)
	pint r = *this;
	for (int i=0; i<REWAYS; ++i) {
		// send value up to i-dim neighbor
		pint mask = (pint(1)).Dom((1 << i) - 1);
		r = r.Min(mask.Select(r.Rot(1 << i), r));
	}
	return(r);
}

pint
pint::ScanMax() const
{
	// unary scan Max (parallel prefix)
	pint r = *this;
	for (int i=0; i<REWAYS; ++i) {
		// send value up to i-dim neighbor
		pint mask = (pint(1)).Dom((1 << i) - 1);
		r = r.Max(mask.Select(r.Rot(1 << i), r));
	}
	return(r);
}

// Gather

int
pint::Gather(int *p, const int n)
{
	// Gather a pint's entangled superposition
	// into the host array pointed to by p,
	// which is big enough for 2^ways elements
	// actual length of the array is returned
	int r = ((n > REBITS) ? REBITS : n);
	Eval();
	for (int e=0; e<r; ++e) {
		p[e] = Meas(e);
	}
	return(r);
}

// Friends

pint
Cover(const int min, const int max, const int dim = 0)
{
	// create an entangled superposition that covers [min..max]
	int range = (max - min) + 1;
	int bits = 0;
	while ((1 << (bits-1)) <= range) ++bits;
	++bits;
	return(pint(0).Had(bits, dim) + pint(min));
}

pint
Range(const int min, const int max, const int dim = 0)
{
	// create an entangled superposition that TIGHTLY covers [min..max]
	// excess values at the end are made into all 0
	pint r = Cover(min, max, dim);
	pint mask(0);
	mask = mask.Dom(max - min);
	return(mask.Select(r, pint(0)));
}

pint
Scatter(const int *p, const int n, const int reways = REWAYS)
{
	// Scatter a host array into the
	// entanglement channels of a pint
	int sawneg = 0; // any negative values?
	for (int j=0; j<n; ++j) {
		if (p[j] < 0) {
			sawneg = 1;
			break;
		}
	}
	pint r(0, reways);
	int i = 0;
	do {
		// what should first bit be?
		int prev = (p[0] & (1 << i));
		int t = (prev != 0);
		int state = 0;

		// keep flipping all below when we
		// see a change (domino)
		for (int j=1; j<n; ++j) {
			int now = (p[j] & (1 << i));
			if (now != prev) {
				r.bit[i] = r.bit[i].Dom(j-1);
				state = !state;
				prev = now;;
			}
		}

		// did we end right?
		if (state != t) {
			// one more flip of everything
			r.bit[i] = r.bit[i].Dom(n-1);
		}
	} while (++i < reways);

	if (sawneg) r = r.Signed();
	return(r.Minimize());
}

// Expensive library stuff

pint
pint::SortUp() const
{
	// use Flip to implement Bitonic sort...
	// of values in entanglement channels
	// this does NOT change probabilities
	// sorts ascending unless non-0 arg given
	pint r = *this;
	pint i;
	i = pint(0).Had(prec, 0);

	for (int k=2; k<=REBITS; k+=k) {
		for (int j=k>>1; j>0; j=j>>1) {
			pint mate = r.Flip(j);
			pint dir = (((i ^ pint(j)) > i) ^
				    ((i & pint(k & (REBITS-1))).Logic()));
			pint swap = ((r < mate) != dir) && ((r > mate) == dir);
			r = swap.Select(mate, r);
		}
	}
	return(r);
}

pint
pint::SortDown() const
{
	// use Flip to implement Bitonic sort...
	// of values in entanglement channels
	// this does NOT change probabilities
	// sorts ascending unless non-0 arg given
	pint r = *this;
	pint i;
	i = pint(0).Had(prec, 0);

	for (int k=2; k<=REBITS; k+=k) {
		for (int j=k>>1; j>0; j=j>>1) {
			pint mate = r.Flip(j);
			pint dir = (((i ^ pint(j)) > i) ^
				    ((i & pint(k & (REBITS-1))).Logic()));
			pint swap = ((r < mate) == dir) && ((r > mate) != dir);
			r = swap.Select(mate, r);
		}
	}
	return(r);
}

#endif
