/*	be_optimizer.c

	Optimizations on fragment tuple operations
*/


#undef DEBUG
#undef NOTDEFD

#include "swartypes.h"
#include "tuple.h"
#include "scheduler.h"
#include "tuplegen.h"
#include "oputils.h"


int
peephole(
int op,
int arg0,
int arg1,
int arg2,
typ t)
{
	/* Don't do optimization on trinary ops at this time */
	if (arg2 != -1) {
		return -1;
	}

	/* If possible optimize the tree for "op" and return an index to the
	   tree.  Otherwise, return -1 to indicate that no optimization was
	   performed.
	*/
	switch (op) {
	case AND:
		/* fold constant x AND y */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg1].op == NUM)) {
				p128_t tmp;
				tmp.q[1] = tup[arg0].immed.q[1] &
					   tup[arg1].immed.q[1];
				tmp.q[0] = tup[arg0].immed.q[0] &
					   tup[arg1].immed.q[0];
				return(immed128(tmp));
			}
			break;
		case 64:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg1].op == NUM)) {
				return(immed64u((p64_t)
					     (tup[arg0].immed.q[0] &
					      tup[arg1].immed.q[0])));
			}
			break;
		case 32:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg1].op == NUM)) {
				return(immed32u((p32_t)
					     (tup[arg0].immed.d[0] &
					      tup[arg1].immed.d[0])));
			}
			break;
		}

		/* x AND x is really x */
		if (arg0 == arg1) return(arg0);

		/* x AND NOT x is really 0 */
		if ((tup[arg0].op == NOT) &&
		    (tup[arg0].arg[0] == arg1)) {
			return(immed128((p128_t) {{0ULL,0ULL}}));
		}
		if ((tup[arg1].op == NOT) &&
		    (tup[arg1].arg[0] == arg0)) {
			return(immed128((p128_t) {{0ULL,0ULL}}));
		}

		/* 0 AND anything is 0 */
		if ((tup[arg0].op == NUM) &&
		    (tup[arg0].immed.q[1] == 0ULL) &&
		    (tup[arg0].immed.q[0] == 0ULL)) {
			return(arg0);
		}
		if ((tup[arg1].op == NUM) &&
		    (tup[arg1].immed.q[1] == 0ULL) &&
		    (tup[arg1].immed.q[0] == 0ULL)) {
			return(arg1);
		}

		/* -1 (all 1's) AND anything is anything */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[1] == -1ULL) &&
			    (tup[arg0].immed.q[0] == -1ULL)) {
				return(arg1);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[1] == -1ULL) &&
			    (tup[arg1].immed.q[0] == -1ULL)) {
				return(arg0);
			}
			break;
		case 64:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[0] == -1ULL)) {
				return(arg1);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[0] == -1ULL)) {
				return(arg0);
			}
			break;
		case 32:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.d[0] == -1)) {
				return(arg1);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.d[0] == -1)) {
				return(arg0);
			}
			break;
		}

		/* sequential ANDs by two constants can be one */
		if ((tup[arg0].op == NUM) &&
		    (tup[arg1].op == AND)) {
			if (tup[ tup[arg1].arg[0] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(AND,
						    tup[arg1].arg[1],
						     immed128((p128_t)
							{{(tup[arg0].immed.q[0]&
							  tup[tup[arg1].arg[0]
							     ].immed.q[0]),
							 (tup[arg0].immed.q[1] &
							  tup[tup[arg1].arg[0]
							     ].immed.q[1])
							}}),
						     t));
					break;
				case 64:
					return(binop(AND,
						     tup[arg1].arg[1],
						     immed64u((p64_t)
							(tup[arg0].immed.q[0] &
							 tup[tup[arg1].arg[0]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(AND,
						     tup[arg1].arg[1],
						     immed32((p32_t)
							(tup[arg0].immed.d[0] &
							 tup[tup[arg1].arg[0]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
			if (tup[ tup[arg1].arg[1] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(AND,
						     tup[arg1].arg[0],
						     immed128((p128_t)
							{{(tup[arg0].immed.q[0]&
							  tup[ tup[arg1].arg[1]
							     ].immed.q[0]),
							 (tup[arg0].immed.q[1] &
							  tup[ tup[arg1].arg[1]
							     ].immed.q[1])
							}}),
						     t));
					break;
				case 64:
					return(binop(AND,
						     tup[arg1].arg[0],
						     immed64u((p64_t)
							(tup[arg0].immed.q[0] &
							 tup[ tup[arg1].arg[1]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(AND,
						     tup[arg1].arg[0],
						     immed32((p32_t)
							(tup[arg0].immed.d[0] &
							 tup[ tup[arg1].arg[1]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
		}
		if ((tup[arg1].op == NUM) &&
		    (tup[arg0].op == AND)) {
			if (tup[ tup[arg0].arg[0] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(AND,
						     tup[arg0].arg[1],
						     immed128((p128_t)
							{{(tup[arg1].immed.q[0]&
							  tup[ tup[arg0].arg[0]
							     ].immed.q[0]),
							 (tup[arg1].immed.q[1] &
							  tup[ tup[arg0].arg[0]
							     ].immed.q[1])
							}}),
						     t));
					break;
				case 64:
					return(binop(AND,
						     tup[arg0].arg[1],
						     immed64u((p64_t)
							(tup[arg1].immed.q[0] &
							 tup[ tup[arg0].arg[0]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(AND,
						     tup[arg0].arg[1],
						     immed32((p32_t)
							(tup[arg1].immed.d[0] &
							 tup[ tup[arg0].arg[0]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
			if (tup[ tup[arg0].arg[1] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(AND,
						     tup[arg0].arg[0],
						     immed128((p128_t)
							{{(tup[arg1].immed.q[0]&
							  tup[ tup[arg0].arg[1]
							     ].immed.q[0]),
							 (tup[arg1].immed.q[1] &
							  tup[ tup[arg0].arg[1]
							     ].immed.q[1])
							}}),
						     t));
					break;
				case 64:
					return(binop(AND,
						     tup[arg0].arg[0],
						     immed64u((p64_t)
							(tup[arg1].immed.q[0] &
							 tup[ tup[arg0].arg[1]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(AND,
						     tup[arg0].arg[0],
						     immed32((p32_t)
							(tup[arg1].immed.d[0] &
							 tup[ tup[arg0].arg[1]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
		}
		break;

	case ANDN:
		/* fold constant x ANDN y */
		if ((tup[arg0].op == NUM) &&
		    (tup[arg1].op == NUM)) {
			switch (bitsperfrag()) {
			case 128:
				return(immed128((p128_t)
					{{ (~(tup[arg0].immed.q[0]) &
					  tup[arg1].immed.q[0]),
					  (~(tup[arg0].immed.q[1]) &
					  tup[arg1].immed.q[1])
					}}));
				break;
			case 64:
				return(immed64u((p64_t)
				     	(~(tup[arg0].immed.q[0]) &
				      	tup[arg1].immed.q[0])));
				break;
			case 32:
				return(immed32((p32_t)
				     	(~(tup[arg0].immed.d[0]) &
				      	tup[arg1].immed.d[0])));
				break;
			}
		}

		/* x ANDN x is really 0 */
		if (arg0 == arg1) return(immed128((p128_t) {{0ULL,0ULL}}));

		/* (NOT x) ANDN x is really x */
		if ((tup[arg0].op == NOT) &&
		    (tup[arg0].arg[0] == arg1)) {
			return(arg1);
		}

		/* -1 ANDN anything is 0 */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[1] == -1ULL) &&
			    (tup[arg0].immed.q[0] == -1ULL)) {
				return(immed128((p128_t) {{0ULL,0ULL}}));
			}
			break;
		case 64:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[0] == -1ULL)) {
				return(immed64u((p64_t) 0ULL));
			}
			break;
		case 32:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.d[0] == -1)) {
				return(immed32((p32_t) 0));
			}
			break;
		}

		/* anything ANDN 0 is 0 */
		if ((tup[arg1].op == NUM) &&
		    (tup[arg1].immed.q[1] == 0ULL) &&
		    (tup[arg1].immed.q[0] == 0ULL)) {
			return(arg1);
		}

		/* anything ANDN -1 is ~anything */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[1] == -1ULL) &&
			    (tup[arg1].immed.q[0] == -1ULL)) {
				return(unop(NOT,arg0,typnull));
			}
			break;
		case 64:
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[0] == -1ULL)) {
				return(unop(NOT,arg0,typnull));
			}
			break;
		case 32:
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.d[0] == -1)) {
				return(unop(NOT,arg0,typnull));
			}
			break;
		}
		break;

	case EQ:
		/* (x EQ/GT/GE y) EQ 0 is really NOT (x EQ/GT/GE y) */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[1] == 0ULL) &&
			    (tup[arg0].immed.q[0] == 0ULL) &&
			    ((tup[arg1].op == EQ) ||
			     (tup[arg1].op == GT) ||
			     (tup[arg1].op == GE))) {
				return(unop(NOT, arg1, t));
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[1] == 0ULL) &&
			    (tup[arg1].immed.q[0] == 0ULL) &&
			    ((tup[arg0].op == EQ) ||
			     (tup[arg0].op == GT) ||
			     (tup[arg0].op == GE))) {
				return(unop(NOT, arg0, t));
			}
			break;
		case 64:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[0] == 0ULL) &&
			    ((tup[arg1].op == EQ) ||
			     (tup[arg1].op == GT) ||
			     (tup[arg1].op == GE))) {
				return(unop(NOT, arg1, t));
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[0] == 0ULL) &&
			    ((tup[arg0].op == EQ) ||
			     (tup[arg0].op == GT) ||
			     (tup[arg0].op == GE))) {
				return(unop(NOT, arg0, t));
			}
			break;
		case 32:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.d[0] == 0) &&
			    ((tup[arg1].op == EQ) ||
			     (tup[arg1].op == GT) ||
			     (tup[arg1].op == GE))) {
				return(unop(NOT, arg1, t));
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.d[0] == 0) &&
			    ((tup[arg0].op == EQ) ||
			     (tup[arg0].op == GT) ||
			     (tup[arg0].op == GE))) {
				return(unop(NOT, arg0, t));
			}
			break;
		}
		break;

	case OR:
		/* fold constant x OR y */
		if ((tup[arg0].op == NUM) &&
		    (tup[arg1].op == NUM)) {
			switch (bitsperfrag()) {
			case 128:
				return(immed128((p128_t)
					     {{(tup[arg0].immed.q[0] |
					       tup[arg1].immed.q[0]),
					      (tup[arg0].immed.q[0] |
					       tup[arg1].immed.q[0])}}));
				break;
			case 64:
				return(immed64u((p64_t)
					     (tup[arg0].immed.q[0] |
					      tup[arg1].immed.q[0])));
				break;
			case 32:
				return(immed32((p32_t)
					     (tup[arg0].immed.d[0] |
					      tup[arg1].immed.d[0])));
				break;
			}
		}

		/* x OR x is really x */
		if (arg0 == arg1) return(arg0);

		/* x OR NOT x is really all 1s */
		if ((tup[arg0].op == NOT) &&
		    (tup[arg0].arg[0] == arg1)) {
			switch (bitsperfrag()) {
			case 128:
				return(immed128((p128_t)
						{{0xffffffffffffffffULL,
						  0xffffffffffffffffULL}}));
			case 64:
				return(immed64u((p64_t) 0xffffffffffffffffULL));
			case 32:
				return(immed32((p32_t) 0xffffffff));
			}
		}
		if ((tup[arg1].op == NOT) &&
		    (tup[arg1].arg[0] == arg0)) {
			switch (bitsperfrag()) {
			case 128:
				return(immed128((p128_t)
						{{0xffffffffffffffffULL,
						  0xffffffffffffffffULL}}));
			case 64:
				return(immed64u((p64_t) 0xffffffffffffffffULL));
			case 32:
				return(immed32((p32_t) 0xffffffff));
			}
		}

		/* 0 OR anything is anything */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[1] == 0ULL) &&
			    (tup[arg0].immed.q[0] == 0ULL)) {
				return(arg1);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[1] == 0ULL) &&
			    (tup[arg1].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 64:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[0] == 0ULL)) {
				return(arg1);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 32:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.d[0] == 0)) {
				return(arg1);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.d[0] == 0)) {
				return(arg0);
			}
			break;
		}

		/* all 1 OR anything is all 1 */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[1] == 0xffffffffffffffffULL) &&
			    (tup[arg0].immed.q[0] == 0xffffffffffffffffULL)) {
				return(arg0);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[1] == 0xffffffffffffffffULL) &&
			    (tup[arg1].immed.q[0] == 0xffffffffffffffffULL)) {
				return(arg1);
			}
			break;
		case 64:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[0] == 0xffffffffffffffffULL)) {
				return(arg0);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[0] == 0xffffffffffffffffULL)) {
				return(arg1);
			}
			break;
		case 32:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.d[0] == 0xffffffff)) {
				return(arg0);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.d[0] == 0xffffffff)) {
				return(arg1);
			}
			break;
		}

		/* sequential ORs by two constants can be one */
		if ((tup[arg0].op == NUM) &&
		    (tup[arg1].op == OR)) {
			if (tup[ tup[arg1].arg[0] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(OR,
						     tup[arg1].arg[1],
						     immed128((p128_t)
							{{(tup[arg0].immed.q[1]|
							  tup[ tup[arg1].arg[0]
							     ].immed.q[1]),
							 (tup[arg0].immed.q[0] |
							  tup[ tup[arg1].arg[0]
							     ].immed.q[0])
							}}),
						     t));
					break;
				case 64:
					return(binop(OR,
						     tup[arg1].arg[1],
						     immed64u((p64_t)
							(tup[arg0].immed.q[0] |
							 tup[ tup[arg1].arg[0]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(OR,
						     tup[arg1].arg[1],
						     immed32((p32_t)
							(tup[arg0].immed.d[0] |
							 tup[ tup[arg1].arg[0]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
			if (tup[ tup[arg1].arg[1] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(OR,
						     tup[arg1].arg[0],
						     immed128((p128_t)
							{{(tup[arg0].immed.q[1]|
							  tup[ tup[arg1].arg[1]
							     ].immed.q[1]),
							 (tup[arg0].immed.q[0] |
							  tup[ tup[arg1].arg[1]
							     ].immed.q[0])
							}}),
						     t));
					break;
				case 64:
					return(binop(OR,
						     tup[arg1].arg[0],
						     immed64u((p64_t)
							(tup[arg0].immed.q[0] |
							 tup[ tup[arg1].arg[1]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(OR,
						     tup[arg1].arg[0],
						     immed32((p32_t)
							(tup[arg0].immed.d[0] |
							 tup[ tup[arg1].arg[1]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
		}
		if ((tup[arg1].op == NUM) &&
		    (tup[arg0].op == OR)) {
			if (tup[ tup[arg0].arg[0] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(OR,
						     tup[arg0].arg[1],
						     immed128((p128_t)
							{{(tup[arg1].immed.q[1]|
							  tup[ tup[arg0].arg[0]
							     ].immed.q[1]),
							 (tup[arg1].immed.q[0] |
							  tup[ tup[arg0].arg[0]
							     ].immed.q[0])
							}}),
						     t));
					break;
				case 64:
					return(binop(OR,
						     tup[arg0].arg[1],
						     immed64u((p64_t)
							(tup[arg1].immed.q[0] |
							 tup[ tup[arg0].arg[0]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(OR,
						     tup[arg0].arg[1],
						     immed32((p32_t)
							(tup[arg1].immed.d[0] |
							 tup[ tup[arg0].arg[0]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
			if (tup[ tup[arg0].arg[1] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(OR,
						     tup[arg0].arg[0],
						     immed128((p128_t)
							{{(tup[arg1].immed.q[1]|
							  tup[ tup[arg0].arg[1]
							     ].immed.q[1]),
							 (tup[arg1].immed.q[0] |
							  tup[ tup[arg0].arg[1]
							     ].immed.q[0])
							}}),
						     t));
					break;
				case 64:
					return(binop(OR,
						     tup[arg0].arg[0],
						     immed64u((p64_t)
							(tup[arg1].immed.q[0] |
							 tup[ tup[arg0].arg[1]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(OR,
						     tup[arg0].arg[0],
						     immed32((p32_t)
							(tup[arg1].immed.d[0] |
							 tup[ tup[arg0].arg[1]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
		}

		break;

	case SHL:
		/* shifting 0 does nothing */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[1] == 0ULL) &&
			    (tup[arg0].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 64:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 32:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.d[0] == 0)) {
				return(arg0);
			}
			break;
		}

		/* shifting by 0 does nothing */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[1] == 0ULL) &&
			    (tup[arg1].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 64:
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 32:
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.d[0] == 0)) {
				return(arg0);
			}
			break;
		}

		/* shifting a constant by a constant is a constant */
		if ((tup[arg0].op == NUM) &&
		    (tup[arg1].op == NUM)) {
			p128_t tmp;

			switch (bitsperfrag()) {
			case 128:
				tmp.uq[1] = tmp.uq[0] = 0ULL;

				/* If anything in upper 64 bits, the shift
				   will clear the register */
				if (tup[arg1].immed.uq[1] != 0ULL) {
					return(immed128(tmp));
				}

				tmp.uq[1] = (tup[arg0].immed.uq[1] <<
					     tup[arg1].immed.uq[0]) |
					    (tup[arg0].immed.uq[0] >>
					     (64ULL-tup[arg1].immed.uq[0]));
				tmp.uq[0] = tup[arg0].immed.uq[0] <<
					    tup[arg1].immed.uq[0];
				return(immed128(tmp));
			case 64:
				tmp.uq[1] = 0ULL;
				tmp.uq[0] = tup[arg0].immed.uq[0] <<
					    tup[arg1].immed.uq[0];
				return(immed128(tmp));
			case 32:
				tmp.uq[1] = tmp.uq[0] = 0ULL;
				tmp.ud[0] = tup[arg0].immed.ud[0] <<
					    tup[arg1].immed.ud[0];
				return(immed128(tmp));
			}
		}
		break;

	case SHR:
		/* shifting 0 does nothing */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[1] == 0ULL) &&
			    (tup[arg0].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 64:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 32:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.d[0] == 0)) {
				return(arg0);
			}
			break;
		}

		/* shifting by 0 does nothing */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[1] == 0ULL) &&
			    (tup[arg1].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 64:
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 32:
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.d[0] == 0)) {
				return(arg0);
			}
			break;
		}

		/* shifting a constant by a constant is a constant */
		if ((tup[arg0].op == NUM) &&
		    (tup[arg1].op == NUM)) {
			p128_t tmp;

			switch (bitsperfrag()) {
			case 128:
			    tmp.uq[1] = tmp.uq[0] = 0ULL;

			    if (t.attr & TYP_UNSIGN) {
				/* If anything in upper 64 bits, the shift
				   will clear the register */
				if (tup[arg1].immed.uq[1] != 0ULL) {
					return(immed128(tmp));
				}

				tmp.uq[0] = (tup[arg0].immed.uq[0] >>
					     tup[arg1].immed.uq[0]) |
					    (tup[arg0].immed.uq[1] <<
					     (64ULL-tup[arg1].immed.uq[0]));
				tmp.uq[1] = tup[arg0].immed.uq[1] >>
					    tup[arg1].immed.uq[0];
				return(immed128(tmp));
			    } else {
				/* If shift count is greater than 63, then we
				   just need to use the upper half of arg0 */
				if ( (tup[arg1].immed.uq[1] != 0ULL) ||
				     (tup[arg1].immed.uq[0] > 63ULL) ) {
					tmp.q[0] = tup[arg0].immed.q[1] >>
						  (tup[arg1].immed.uq[0]-64ULL);
					tmp.q[1] = tup[arg0].immed.q[1]>>63ULL;
				} else {
					/* Need to paste things together */
					tmp.uq[0] =
						(tup[arg0].immed.uq[0] >>
						 tup[arg1].immed.uq[0]) |
						(tup[arg0].immed.uq[1] <<
						 (64ULL-tup[arg1].immed.uq[0]));
					tmp.q[1] = tup[arg0].immed.q[1] >>
						   tup[arg1].immed.uq[0];
				}
				return(immed128(tmp));
			    }

			case 64:
				tmp.uq[1] = 0ULL;

				if (t.attr & TYP_UNSIGN) {
					tmp.uq[0] = tup[arg0].immed.uq[0] >>
						    tup[arg1].immed.uq[0];
				} else {
					tmp.q[0] = tup[arg0].immed.q[0] >>
						   tup[arg1].immed.uq[0];
				}
				return(immed128(tmp));
			case 32:
				tmp.uq[1] = tmp.uq[0] = 0ULL;

				if (t.attr & TYP_UNSIGN) {
					tmp.ud[0] = tup[arg0].immed.ud[0] >>
						    tup[arg1].immed.ud[0];
				} else {
					tmp.d[0] = tup[arg0].immed.d[0] >>
						   tup[arg1].immed.ud[0];
				}
				return(immed128(tmp));
			}
		}
		break;

	case XOR:
		/* fold constant x XOR y */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg1].op == NUM)) {
				return(immed128((p128_t)
						{{(tup[arg0].immed.q[1] ^
						 tup[arg1].immed.q[1]),
						 (tup[arg0].immed.q[0] ^
						 tup[arg1].immed.q[0])
						}}));
			}
			break;
		case 64:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg1].op == NUM)) {
				return(immed64u((p64_t)
					     (tup[arg0].immed.q[0] ^
					      tup[arg1].immed.q[0])));
			}
			break;
		case 32:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg1].op == NUM)) {
				return(immed32((p32_t)
					     (tup[arg0].immed.d[0] ^
					      tup[arg1].immed.d[0])));
			}
			break;
		}

		/* x XOR x is really 0 */
		if (arg0 == arg1) {
			switch (bitsperfrag()) {
			case 128:
				return(immed128((p128_t) {{0ULL,0ULL}}));
			case 64:
				return(immed64u((p64_t) 0ULL));
			case 32:
				return(immed32((p32_t) 0));
			}
		}

		/* x XOR NOT x is really all 1s */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NOT) &&
			    (tup[arg0].arg[0] == arg1)) {
				return(immed128((p128_t)
						{{0xffffffffffffffffULL,
						  0xffffffffffffffffULL}}));
			}
			if ((tup[arg1].op == NOT) &&
			    (tup[arg1].arg[0] == arg0)) {
				return(immed128((p128_t)
						{{0xffffffffffffffffULL,
						  0xffffffffffffffffULL}}));
			}
			break;
		case 64:
			if ((tup[arg0].op == NOT) &&
			    (tup[arg0].arg[0] == arg1)) {
				return(immed64u((p64_t) 0xffffffffffffffffULL));
			}
			if ((tup[arg1].op == NOT) &&
			    (tup[arg1].arg[0] == arg0)) {
				return(immed64u((p64_t) 0xffffffffffffffffULL));
			}
			break;
		case 32:
			if ((tup[arg0].op == NOT) &&
			    (tup[arg0].arg[0] == arg1)) {
				return(immed32((p32_t) 0xffffffff));
			}
			if ((tup[arg1].op == NOT) &&
			    (tup[arg1].arg[0] == arg0)) {
				return(immed32((p32_t) 0xffffffff));
			}
			break;
		}

		/* 0 XOR anything is anything */
		switch (bitsperfrag()) {
		case 128:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[1] == 0ULL) &&
			    (tup[arg0].immed.q[0] == 0ULL)) {
				return(arg1);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[1] == 0ULL) &&
			    (tup[arg1].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 64:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.q[0] == 0ULL)) {
				return(arg1);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.q[0] == 0ULL)) {
				return(arg0);
			}
			break;
		case 32:
			if ((tup[arg0].op == NUM) &&
			    (tup[arg0].immed.d[0] == 0)) {
				return(arg1);
			}
			if ((tup[arg1].op == NUM) &&
			    (tup[arg1].immed.d[0] == 0)) {
				return(arg0);
			}
			break;
		}

		/* sequential XORs by two constants can be one */
		if ((tup[arg0].op == NUM) &&
		    (tup[arg1].op == XOR)) {
			if (tup[ tup[arg1].arg[0] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(OR,
						     tup[arg1].arg[1],
						     immed128((p128_t)
							{{(tup[arg0].immed.q[1]^
							  tup[ tup[arg1].arg[0]
							     ].immed.q[1]),
							 (tup[arg0].immed.q[0] ^
							  tup[ tup[arg1].arg[0]
							     ].immed.q[0])
							}}),
						     t));
					break;
				case 64:
					return(binop(OR,
						     tup[arg1].arg[1],
						     immed64u((p64_t)
							(tup[arg0].immed.q[0] ^
							 tup[ tup[arg1].arg[0]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(OR,
						     tup[arg1].arg[1],
						     immed32((p32_t)
							(tup[arg0].immed.d[0] ^
							 tup[ tup[arg1].arg[0]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
			if (tup[ tup[arg1].arg[1] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(OR,
						     tup[arg1].arg[0],
						     immed128((p128_t)
							{{(tup[arg0].immed.q[1]^
							  tup[ tup[arg1].arg[1]
							     ].immed.q[1]),
							 (tup[arg0].immed.q[0] ^
							  tup[ tup[arg1].arg[1]
							     ].immed.q[0])
							}}),
						     t));
					break;
				case 64:
					return(binop(OR,
						     tup[arg1].arg[0],
						     immed64u((p64_t)
							(tup[arg0].immed.q[0] ^
							 tup[ tup[arg1].arg[1]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(OR,
						     tup[arg1].arg[0],
						     immed32((p32_t)
							(tup[arg0].immed.d[0] ^
							 tup[ tup[arg1].arg[1]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
		}
		if ((tup[arg1].op == NUM) &&
		    (tup[arg0].op == OR)) {
			if (tup[ tup[arg0].arg[0] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(OR,
						     tup[arg0].arg[1],
						     immed128((p128_t)
							{{(tup[arg1].immed.q[1]^
							  tup[ tup[arg0].arg[0]
							     ].immed.q[1]),
							 (tup[arg1].immed.q[0] ^
							  tup[ tup[arg0].arg[0]
							     ].immed.q[0])
							}}),
						     t));
					break;
				case 64:
					return(binop(OR,
						     tup[arg0].arg[1],
						     immed64u((p64_t)
							(tup[arg1].immed.q[0] ^
							 tup[ tup[arg0].arg[0]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(OR,
						     tup[arg0].arg[1],
						     immed32((p32_t)
							(tup[arg1].immed.d[0] ^
							 tup[ tup[arg0].arg[0]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
			if (tup[ tup[arg0].arg[1] ].op == NUM) {
				switch (bitsperfrag()) {
				case 128:
					return(binop(OR,
						     tup[arg0].arg[0],
						     immed128((p128_t)
							{{(tup[arg1].immed.q[1]^
							  tup[ tup[arg0].arg[1]
							     ].immed.q[1]),
							 (tup[arg1].immed.q[0] ^
							  tup[ tup[arg0].arg[1]
							     ].immed.q[0])
							}}),
						     t));
					break;
				case 64:
					return(binop(OR,
						     tup[arg0].arg[0],
						     immed64u((p64_t)
							(tup[arg1].immed.q[0] ^
							 tup[ tup[arg0].arg[1]
							    ].immed.q[0])),
						     t));
					break;
				case 32:
					return(binop(OR,
						     tup[arg0].arg[0],
						     immed32((p32_t)
							(tup[arg1].immed.d[0] ^
							 tup[ tup[arg0].arg[1]
							    ].immed.d[0])),
						     t));
					break;
				}
			}
		}
		break;

	case NOT:
		/* NOT NOT x is really x */
		if (tup[arg0].op == NOT) {
			return(tup[arg0].arg[0]);
		}
		break;

	case LNOT:
	case NEG:
		break;
	}

	/* If no optimization performed, return -1 */
	return(-1);
} /* peephole() */


int
be_cofold(int op,
int arg0,
int arg1,
int arg2,
typ t)
{
/* HEREHERE - Some of the folding assumes 64 bits, and needs fixing */
	if (tup[arg0].op == VNUM) {
		switch (op) {
		case LNOT:
			/* Should return field masks (i.e. ALL, not ONES) */
			switch (bitsperfield(t.bits)) {
			case 1:
			{
				p128_t t;
				t.q[1] = ~tup[arg0].immed.q[1];
				t.q[0] = ~tup[arg0].immed.q[0];

				switch (bitsperfrag()) {
				case 128:
					return(immed128(t));
				case 64:
					return(immed64u((p64_t) t.uq[0]));
				case 32:
					return(immed32((p32_t) t.ud[0]));
				}
				break;
			}
			case 2:
			{
				p128_t t;
				t.uq[1] = tup[arg0].immed.q[1];
				t.uq[0] = tup[arg0].immed.q[0];

				t.uq[1] |= ((t.uq[1] >> 1) &
					    0x5555555555555555ULL);
				t.uq[1] |= (t.uq[1] << 1);
				t.uq[0] |= ((t.uq[0] >> 1) &
					    0x5555555555555555ULL);
				t.uq[0] |= (t.uq[0] << 1);
				t.uq[1] = ~t.uq[1];
				t.uq[0] = ~t.uq[0];

				switch (bitsperfrag()) {
				case 128:
					return(immed128(t));
				case 64:
					return(immed64u((p64_t) ~t.uq[0]));
				case 32:
					return(immed32((p32_t) ~t.ud[0]));
				}
				break;
			}
			case 4:
			{
				p128_t t;
				t.uq[1] = tup[arg0].immed.q[1];
				t.uq[0] = tup[arg0].immed.q[0];

				t.uq[1] |= ((t.uq[1] >> 2) &
					    0x3333333333333333ULL);
				t.uq[1] |= ((t.uq[1] >> 1) &
					    0x1111111111111111ULL);
				t.uq[1] |= (t.uq[1] << 1);
				t.uq[1] |= (t.uq[1] << 2);

				t.uq[0] |= ((t.uq[0] >> 2) &
					    0x3333333333333333ULL);
				t.uq[0] |= ((t.uq[0] >> 1) &
					    0x1111111111111111ULL);
				t.uq[0] |= (t.uq[0] << 1);
				t.uq[0] |= (t.uq[0] << 2);

				t.uq[1] = ~t.uq[1];
				t.uq[0] = ~t.uq[0];

				switch (bitsperfrag()) {
				case 128:
					return(immed128(t));
				case 64:
					return(immed64u((p64_t) t.uq[0]));
				case 32:
					return(immed32((p32_t) t.ud[0]));
				}
				break;
			}
			case 8:
			{
				p128_t t;
				t.uq[1] = tup[arg0].immed.q[1];
				t.uq[0] = tup[arg0].immed.q[0];

				t.uq[1] |= ((t.uq[1] >> 4) &
					    0x0f0f0f0f0f0f0f0fULL);
				t.uq[1] |= ((t.uq[1] >> 2) &
					    0x0303030303030303ULL);
				t.uq[1] |= ((t.uq[1] >> 1) &
					    0x0101010101010101ULL);
				t.uq[1] |= (t.uq[1] << 1);
				t.uq[1] |= (t.uq[1] << 2);
				t.uq[1] |= (t.uq[1] << 4);

				t.uq[0] |= ((t.uq[0] >> 4) &
					    0x0f0f0f0f0f0f0f0fULL);
				t.uq[0] |= ((t.uq[0] >> 2) &
					    0x0303030303030303ULL);
				t.uq[0] |= ((t.uq[0] >> 1) &
					    0x0101010101010101ULL);
				t.uq[0] |= (t.uq[0] << 1);
				t.uq[0] |= (t.uq[0] << 2);
				t.uq[0] |= (t.uq[0] << 4);

				t.uq[1] = ~t.uq[1];
				t.uq[0] = ~t.uq[0];

				switch (bitsperfrag()) {
				case 128:
					return(immed128(t));
				case 64:
					return(immed64u((p64_t) t.uq[0]));
				case 32:
					return(immed32((p32_t) t.ud[0]));
				}
				break;
			}
			case 16:
			{
				p128_t t;
				t.uq[1] = tup[arg0].immed.q[1];
				t.uq[0] = tup[arg0].immed.q[0];

				t.uq[1] |= ((t.uq[1] >> 8) &
					    0x00ff00ff00ff00ffULL);
				t.uq[1] |= ((t.uq[1] >> 4) &
					    0x000f000f000f000fULL);
				t.uq[1] |= ((t.uq[1] >> 2) &
					    0x0003000300030003ULL);
				t.uq[1] |= ((t.uq[1] >> 1) &
					    0x0001000100010001ULL);
				t.uq[1] |= (t.uq[1] << 1);
				t.uq[1] |= (t.uq[1] << 2);
				t.uq[1] |= (t.uq[1] << 4);
				t.uq[1] |= (t.uq[1] << 8);

				t.uq[0] |= ((t.uq[0] >> 8) &
					    0x00ff00ff00ff00ffULL);
				t.uq[0] |= ((t.uq[0] >> 4) &
					    0x000f000f000f000fULL);
				t.uq[0] |= ((t.uq[0] >> 2) &
					    0x0003000300030003ULL);
				t.uq[0] |= ((t.uq[0] >> 1) &
					    0x0001000100010001ULL);
				t.uq[0] |= (t.uq[0] << 1);
				t.uq[0] |= (t.uq[0] << 2);
				t.uq[0] |= (t.uq[0] << 4);
				t.uq[0] |= (t.uq[0] << 8);

				t.uq[1] = ~t.uq[1];
				t.uq[0] = ~t.uq[0];

				switch (bitsperfrag()) {
				case 128:
					return(immed128(t));
				case 64:
					return(immed64u((p64_t) ~t.uq[0]));
				case 32:
					return(immed32((p32_t) ~t.ud[0]));
				}
				break;
			}
			case 32:
			{
				p128_t t;
				t.uq[1] = tup[arg0].immed.q[1];
				t.uq[0] = tup[arg0].immed.q[0];

				t.uq[1] |= ((t.uq[1] >> 16) &
					    0x0000ffff0000ffffULL);
				t.uq[1] |= ((t.uq[1] >> 8) &
					    0x000000ff000000ffULL);
				t.uq[1] |= ((t.uq[1] >> 4) &
					    0x0000000f0000000fULL);
				t.uq[1] |= ((t.uq[1] >> 2) &
					    0x0000000300000003ULL);
				t.uq[1] |= ((t.uq[1] >> 1) &
					    0x0000000100000001ULL);
				t.uq[1] |= (t.uq[1] << 1);
				t.uq[1] |= (t.uq[1] << 2);
				t.uq[1] |= (t.uq[1] << 4);
				t.uq[1] |= (t.uq[1] << 8);
				t.uq[1] |= (t.uq[1] << 16);

				t.uq[0] |= ((t.uq[0] >> 16) &
					    0x0000ffff0000ffffULL);
				t.uq[0] |= ((t.uq[0] >> 8) &
					    0x000000ff000000ffULL);
				t.uq[0] |= ((t.uq[0] >> 4) &
					    0x0000000f0000000fULL);
				t.uq[0] |= ((t.uq[0] >> 2) &
					    0x0000000300000003ULL);
				t.uq[0] |= ((t.uq[0] >> 1) &
					    0x0000000100000001ULL);
				t.uq[0] |= (t.uq[0] << 1);
				t.uq[0] |= (t.uq[0] << 2);
				t.uq[0] |= (t.uq[0] << 4);
				t.uq[0] |= (t.uq[0] << 8);
				t.uq[0] |= (t.uq[0] << 16);

				t.uq[1] = ~t.uq[1];
				t.uq[0] = ~t.uq[0];

				switch (bitsperfrag()) {
				case 128:
					return(immed128(t));
				case 64:
					return(immed64u((p64_t) t.uq[0]));
				case 32:
					return(immed32((p32_t) t.ud[0]));
				}
				break;
			}
			}
			break;
		case NEG:
			break;
		case NOT:
			{
			p128_t t;
			t.uq[1] = tup[arg0].immed.q[1];
			t.uq[0] = tup[arg0].immed.q[0];

			t.uq[1] = ~t.uq[1];
			t.uq[0] = ~t.uq[0];

			switch (bitsperfrag()) {
			case 128:
				return(immed128(t));
			case 64:
				return(immed64u((p64_t) t.uq[0]));
			case 32:
				return(immed32((p32_t) t.ud[0]));
			}
			break;
			}
		}
	}

	/* Don't have a constant fold for this, so return -1 */
	return(-1);
} /* be_cofold */

