// full adder module module full_adder(s, cout, a, b, cin); input a, b, cin; // 1 bit inputs and carry-in output s, cout; // 1 bit output and carry-out wire [4:0] w; // internal wires // if either ONLY ONE or ALL 3 are set, s = 1 xor #1 x1(w[0], a, b), x2(s, w[0], cin); // if any two are set, and #1 a1(w[1], a, b), a2(w[2], a, cin), a3(w[3], b, cin); // then cout=1 or #1 o1(w[4], w[1], w[2]), o2(cout, w[4], w[3]); // only using two-input OR gates to really hammer on the delay endmodule // 2 to 1 multiplexer module mux_2to1(y, a, b, s); input a, b, s; // input operands, selector output y; // output wire not_s, a_out, b_out; // negated selector, internal wiring not #1 not0(not_s, s); // create negated selector and #1 and0(a_out, a, not_s), and1(b_out, b, s); // filter inputs using selector or #1 or0(y, a_out, b_out); // connect filtered inputs to output endmodule // adder-subtractor module module add_sub(s, cout, a, b, cin, sub); input a, b, cin, sub; // operands, carry-in, subtract flag output s, cout; // sum, carry-out wire not_b, b_out; // negated b (for mux input), b output from mux not #1 not0(not_b, b); // create negated b mux_2to1 mux0(b_out, b, not_b, sub); // if sub flag is set, negate b (sub flag carry-in is done in main adder superstructure, not on individual adders) full_adder add0(s, cout, a, b_out, cin); // send everything to the adder endmodule // 4 to 1 multiplexer module mux_4to1(y, a, b, c, d, s); input [1:0] s; // 2 bit selector input a, b, c, d; // 4 inputs output y; // selected output wire out_ab, out_cd; // internal wiring // you can't do this with a mux > 4:1 because you end up with striping mux_2to1 mux0(out_ab, a, b, s[0]), // one mux for a and b mux1(out_cd, c, d, s[0]), // one mux for c and d (selected by lo bit) mux2(y, out_ab, out_cd, s[1]); // one mux for ab and cd (selected by hi bit) endmodule // individual bit-slice alu // not very useful on its own module alu(out, cout, set, a, b, cin, op, less); input [2:0] op; // incoming opcode input a, b, cin, less; // incoming operands, carry-in output out, cout, set; // output value, carry-out, set (for SLT/adder output) wire and_out, or_out, add_out; // output from and, or assign set = add_out; // connect add_out to set output (because you CANNOT read from an output wire) and #1 and0(and_out, a, b); // AND, opcode 000 or #1 or0(or_out, a, b); // OR, opcode 001 add_sub add0(add_out, cout, a, b, cin, op[2]); // ADD/SUB, opcode 010/110 (sub flag goes to carry-in) mux_4to1 mux0(out, and_out, or_out, add_out, less, op[1:0]); // mux decides which result to return, s=11 is DNC so feed it ADD endmodule // combining the individual ALU units into something usable, // namely a 32-bit ALU module alu_32bit(out, cout, zf, of, a, b, op); input [31:0] a,b; input [2:0] op; output [31:0] out; output cout, zf, of; wire [31:0] c; // internal carry-wires wire [29:0] orw; // internal or wires wire set31, // set output from alu 31 gnd, // this doesn't go anywhere less, // carries result of slt (not currently used) of_wire; alu alu0(out[0], c[0], gnd, a[0], b[0], op[2], op, less), // first alu gets subtract flag as carry-in alu1(out[1], c[1], gnd, a[1], b[1], c[0], op, 0), alu2(out[2], c[2], gnd, a[2], b[2], c[1], op, 0), alu3(out[3], c[3], gnd, a[3], b[3], c[2], op, 0), alu4(out[4], c[4], gnd, a[4], b[4], c[3], op, 0), alu5(out[5], c[5], gnd, a[5], b[5], c[4], op, 0), alu6(out[6], c[6], gnd, a[6], b[6], c[5], op, 0), alu7(out[7], c[7], gnd, a[7], b[7], c[6], op, 0), alu8(out[8], c[8], gnd, a[8], b[8], c[7], op, 0), alu9(out[9], c[9], gnd, a[9], b[9], c[8], op, 0), alu10(out[10], c[10], gnd, a[10], b[10], c[9], op, 0), alu11(out[11], c[11], gnd, a[11], b[11], c[10], op, 0), alu12(out[12], c[12], gnd, a[12], b[12], c[11], op, 0), alu13(out[13], c[13], gnd, a[13], b[13], c[12], op, 0), alu14(out[14], c[14], gnd, a[14], b[14], c[13], op, 0), alu15(out[15], c[15], gnd, a[15], b[15], c[14], op, 0), alu16(out[16], c[16], gnd, a[16], b[16], c[15], op, 0), alu17(out[17], c[17], gnd, a[17], b[17], c[16], op, 0), alu18(out[18], c[18], gnd, a[18], b[18], c[17], op, 0), alu19(out[19], c[19], gnd, a[19], b[19], c[18], op, 0), alu20(out[20], c[20], gnd, a[20], b[20], c[19], op, 0), alu21(out[21], c[21], gnd, a[21], b[21], c[20], op, 0), alu22(out[22], c[22], gnd, a[22], b[22], c[21], op, 0), alu23(out[23], c[23], gnd, a[23], b[23], c[22], op, 0), alu24(out[24], c[24], gnd, a[24], b[24], c[23], op, 0), alu25(out[25], c[25], gnd, a[25], b[25], c[24], op, 0), alu26(out[26], c[26], gnd, a[26], b[26], c[25], op, 0), alu27(out[27], c[27], gnd, a[27], b[27], c[26], op, 0), alu28(out[28], c[28], gnd, a[28], b[28], c[27], op, 0), alu29(out[29], c[29], gnd, a[29], b[29], c[28], op, 0), alu30(out[30], c[30], gnd, a[30], b[30], c[29], op, 0), alu31(out[31], c[31], set31, a[31], b[31], c[30], op, 0); // zero flag is OR all bits of output // ignoring that verilog has 32-input (N)OR gates, this was done with // 2-input OR gates to more accurately reflect real-world conditions, // because n-input or gates with the same delay as 2-input or gates only // exist in disneyland or #1 or0(orw[0], out[0], out[1]), or1(orw[1], out[2], out[3]), or16(orw[16], orw[0], orw[1]), or2(orw[2], out[4], out[5]), or3(orw[3], out[6], out[7]), or17(orw[17], orw[2], orw[3]), or24(orw[24], orw[16], orw[17]), or4(orw[4], out[8], out[9]), or5(orw[5], out[10], out[11]), or18(orw[18], orw[4], orw[5]), or6(orw[6], out[12], out[13]), or7(orw[7], out[14], out[15]), or19(orw[19], orw[6], orw[7]), or25(orw[25], orw[18], orw[19]), or28(orw[28], orw[24], orw[25]), or8(orw[8], out[16], out[17]), or20(orw[20], orw[8], orw[9]), or26(orw[26], orw[20], orw[21]), or29(orw[29], orw[26], orw[27]), or9(orw[9], out[18], out[19]), or10(orw[10], out[20], out[21]), or21(orw[21], orw[10], orw[11]), or11(orw[11], out[22], out[23]), or12(orw[12], out[24], out[25]), or22(orw[22], orw[12], orw[13]), or27(orw[27], orw[22], orw[23]), or13(orw[13], out[26], out[27]), or14(orw[14], out[28], out[29]), or23(orw[23], orw[14], orw[15]), or15(orw[15], out[30], out[31]); nor #1 nor0(zf, orw[28], orw[29]); // less than is overflow xor set[31], i.e. (addition flag bit) xor (MSB cout == MSB cin) xor #1 xor0(of_wire, c[30], c[31]), xor1(less, of_wire, set31); assign of = of_wire; assign cout = c[31]; endmodule module test_bench; reg signed [31:0] a, b; reg [2:0] op; wire signed [31:0] out; wire cout, zf, of; alu_32bit alu0(out, cout, zf, of, a, b, op); // testing ops // 000 (0) -- OR // 001 (1) -- AND // 010 (2) -- ADD // 110 (6) -- SUB // 111 (7) -- SLT initial begin // uncomment this one for parseable output $monitor("%0d %0d %0d %0d %0d %0d %0d %0d", $time, out, cout, zf, of, a, b, op); // uncomment this one for human-readable output // $monitor("-- %0d -- op=%0d ------------------------------------\nA %b (%0d)\nB %b (%0d)\nOUT %b (%0d)\n cout=%b zf=%b of=%b", $time, op, a, a, b, b, out, out, cout, zf, of); // uncomment this for zero-flag test outputs // $monitor("%b %b", out, zf); // dependence of ADD #1000 a=32'b00000000000000000000000000000000; b=32'b00000000000000000000000000000000; op=0; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=0; #1000 a=32'b00000000000000000000000000000000; b=32'b11111111111111111111111111111111; op=0; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=0; #1000 a=32'b10101010101010101010101010101010; b=32'b01010101010101010101010101010101; op=0; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=0; #1000 a=32'b00000000000000000000000000000000; b=32'b00000000000000000000000000000000; op=1; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=1; #1000 a=32'b00000000000000000000000000000000; b=32'b11111111111111111111111111111111; op=1; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=1; #1000 a=32'b10101010101010101010101010101010; b=32'b01010101010101010101010101010101; op=1; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=1; #1000 a=32'b00000000000000000000000000000000; b=32'b00000000000000000000000000000000; op=2; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=2; #1000 a=32'b00000000000000000000000000000000; b=32'b11111111111111111111111111111111; op=2; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=2; #1000 a=32'b10101010101010101010101010101010; b=32'b01010101010101010101010101010101; op=2; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=2; #1000 a=32'b00000000000000000000000000000000; b=32'b00000000000000000000000000000000; op=6; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=6; #1000 a=32'b00000000000000000000000000000000; b=32'b11111111111111111111111111111111; op=6; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=6; #1000 a=32'b10101010101010101010101010101010; b=32'b01010101010101010101010101010101; op=6; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=6; #1000 a=32'b00000000000000000000000000000000; b=32'b00000000000000000000000000000000; op=7; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=7; #1000 a=32'b00000000000000000000000000000000; b=32'b11111111111111111111111111111111; op=7; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=7; #1000 a=32'b10101010101010101010101010101010; b=32'b01010101010101010101010101010101; op=7; #1000 a=32'b00111111111111111111111111111111; b=32'b00111111111111111111111111111111; op=7; #1000 a=32'h00000000; b=32'h00000000; op=2; // clear state #10000; end endmodule