`define WIDTH_TAG	10:0
`define WIDTH_VALID	3:0

module cache(
	input			clk,
	input			reset,
	
	input [22:0]	i_pc_,
	output [31:0]	i_instr,

	output			i_stall,
	output			d_stall,

	input			d_doread_,d_dowrite_,
	input [22:0]	d_address_,
	input			d_mem16_,d_mem8_,
	input [31:0]	d_writedata_,
	output [31:0]	d_readdata,
	
	output reg 		ava_read,
	output reg 		ava_write,
	output reg[1:0]	ava_byteenable,
	output reg[22:0]ava_address,
	input [15:0]	ava_readdata,
	output [15:0]	ava_writedata,
	input			ava_waitrequest,

	output				problem_a,
	output				problem_b
);

	parameter S_IDLE=8'd0;
	parameter S_IOPENLINE=8'd1;
	parameter S_IFETCH=8'd2;
	parameter S_IFETCH2=8'd3;
	parameter S_MEMPAUSE=8'd4;
	parameter S_DOPENLINE=8'd5;
	parameter S_DFETCH=8'd6;
	parameter S_DFETCH2=8'd7;
	parameter S_FLUSHLINE=8'd8;
	
	/////////////////////////////////
	//Clocking and synchronisation
	reg [22:0] i_pc;
	always @(posedge clk)
	begin
		i_pc<=i_pc_;
	end
	
	reg d_doread,d_dowrite,d_mem16,d_mem8;
	reg [22:0] d_address;
	reg [31:0] d_writedata;
	
	always @(posedge clk)
	if(reset)
	begin
		d_doread<=1'b0;
		d_dowrite<=1'b0;
		d_mem16<=1'b0;
		d_mem8<=1'b0;
		d_address<=23'b0;
		d_writedata<=32'b0;
	end
	else if(!d_stall)
	begin
		d_doread<=d_doread_;
		d_dowrite<=d_dowrite_;
		d_mem16<=d_mem16_;
		d_mem8<=d_mem8_;
		d_address<=d_address_;
		d_writedata<=d_writedata_;
	end

	/////////////////////////////////
	//Bit extract inputs
	wire [`WIDTH_TAG] i_tag;
	wire [7:0] i_index;
	wire [1:0] i_line;
	cache_extract cix(
		.address(i_pc),
		.tag(i_tag),
		.index(i_index),
		.line(i_line)
	);
	wire [7:0] i_index_;
	wire [1:0] i_line_;
	cache_extract cix_(
		.address(i_pc_),
		.index(i_index_),
		.line(i_line_)
	);
	
	wire [`WIDTH_TAG] d_tag;
	wire [7:0] d_index;
	wire [1:0] d_line;
	cache_extract cdx(
		.address(d_address),
		.tag(d_tag),
		.index(d_index),
		.line(d_line)
	);
	wire [7:0] d_index_;
	wire [1:0] d_line_;
	cache_extract cdx_(
		.address(d_address_),
		.index(d_index_),
		.line(d_line_)
	);
	

	/////////////////////////////////
	//Cache tags
	reg [3:0] i_tagwrite;
	reg [3:0] d_tagwrite;

	wire [3:0] i_match;
	wire i_anymatch;
	wire [1:0] i_set;
	
	wire [3:0] d_match;
	wire d_anymatch;
	wire [1:0]d_set;
	
	wire [`WIDTH_TAG] f_tag;
	
	wire [`WIDTH_TAG] i_tagdata0,i_tagdata1,i_tagdata2,i_tagdata3;

	cache_tagstore ct(
		.clk(clk),

		.i_tag(i_tag),
		.i_index(i_index_),
		.i_tagwrite(i_tagwrite),
		.i_match(i_match),
		.i_anymatch(i_anymatch),
		.i_set(i_set),
		.i_tagdata0(i_tagdata0),
		.i_tagdata1(i_tagdata1),
		.i_tagdata2(i_tagdata2),
		.i_tagdata3(i_tagdata3),

		.d_tag(d_tag),
		.d_index(flushduring ? f_index : d_stall ? d_index : d_index_),
		.d_tagwrite(d_tagwrite),
		.d_match(d_match),
		.d_anymatch(d_anymatch),
		.d_set(d_set),
		
		.f_set(f_set),
		.f_tag(f_tag)
	);

	/////////////////////////
	//Valid and dirty bits
	
	reg i_validwrite;
	reg [3:0] i_validwritedata;

	wire [15:0] i_allvaliddata;
	wire [15:0] i_alldirtydata;
	wire [`WIDTH_VALID] i_validdata;
	wire [`WIDTH_VALID] i_dirtydata;
	wire [3:0] i_allvalid;
	wire [3:0] i_somevalid;
	wire [3:0] i_alldirty;
	wire [3:0] i_somedirty;
	wire i_valid;

	reg d_validwrite;
	reg [3:0] d_validwritedata;
	
	wire [15:0] d_allvaliddata;
	wire [15:0] d_alldirtydata;
	wire [`WIDTH_VALID] d_validdata;
	wire [`WIDTH_VALID] d_dirtydata;
	wire [3:0] d_allvalid;
	wire [3:0] d_somevalid;
	wire [3:0] d_alldirty;
	wire [3:0] d_somedirty;
	wire d_valid;

	cache_validator cv(
		.clk(clk),

		.i_match(i_match),
		.i_set(i_set),
		.i_index(i_index_),
		.i_line(i_line),
		.i_validwrite(i_validwrite),
		.i_validwritedata(i_validwritedata),
		.i_clearing(i_clearing),
		.i_tagwrite(i_tagwrite),

		.i_allvaliddata(i_allvaliddata),
		.i_alldirtydata(i_alldirtydata),
		.i_validdata(i_validdata),
		.i_dirtydata(i_dirtydata),
		.i_allvalid(i_allvalid),
		.i_somevalid(i_somevalid),
		.i_alldirty(i_alldirty),
		.i_somedirty(i_somedirty),
		.i_valid(i_valid),

		.d_match(d_match),
		.d_set(d_set),
		.d_index((d_stall || (d_write && state==S_IDLE)) ? d_index : d_index_),
		.d_line(d_line),
		.d_validwrite(d_validwrite),
		.d_validwritedata(d_validwritedata),
		.d_clearing(d_clearing),
		.d_tagwrite(d_tagwrite),
		.idle(state==S_IDLE),
		.d_dowrite(d_dowrite),
		.d_write(d_write),
		.d_mem16(d_mem16),
		.d_mem8(d_mem8),
		.d_address(d_address[1:0]),

		.d_allvaliddata(d_allvaliddata),
		.d_alldirtydata(d_alldirtydata),
		.d_validdata(d_validdata),
		.d_dirtydata(d_dirtydata),
		.d_allvalid(d_allvalid),
		.d_somevalid(d_somevalid),
		.d_alldirty(d_alldirty),
		.d_somedirty(d_somedirty),
		.d_valid(d_valid),
		
		.flushduring(state==S_IOPENLINE && i_somedirty[i_set_replace] ? 1'b1 : flushduring),
		.flushdone(flushdone),
		.f_set(f_set),
		.f_index(state==S_IOPENLINE && i_somedirty[i_set_replace] ? i_index : f_index),
		
		.problem_a(problem_a),
		.problem_b(problem_b)
	);

	/////////////////////////////
	//Cache data
	reg 		i_write;
	reg [31:0]	i_writedata;
	wire [31:0]	i_data;

	reg			d_write;
	reg [31:0]d_writedata_cache;
	wire [31:0]	d_readdata_word;

	cache_datastore cd(
		.clk(clk),
		
		.i_match(i_match),
		.i_set(i_set),
		.i_index(i_index_),
		.i_line(i_line_),
		.i_write(i_write),
		.i_writedata(i_writedata),
		.i_data(i_data),
		
		.d_match(d_match),
		.d_set(flushduring ? f_set : d_set),
		.d_index(flushduring ? f_index : (d_stall || (d_write && state==S_IDLE)) ? d_index : d_index_),
		.d_line(flushduring ? (ava_waitrequest ? f_dataaddress[2:1] : f_address[2:1]) : (d_stall || (d_write && state==S_IDLE)) ? d_line : d_line_),
		.d_address(d_address[1:0]),
		.d_write(d_write),
		.d_dowrite(d_dowrite),
		.d_mem16(d_mem16),
		.d_mem8(d_mem8),
		.idle(state==S_IDLE),
		.d_writedata(d_writedata),
		.d_writedata_cache(d_writedata_cache),
		.d_readdata_word(d_readdata_word),
		.d_readdata(d_readdata)
	);

	////////////////////////////
	// Control logic
	assign i_instr=i_write && state==S_IDLE ? i_writedata : i_data;
		
	//assign stall=state!=S_IDLE || nextstate!=S_IDLE || (d_dowrite && !d_write);
	
	assign i_stall=!i_anymatch || !i_valid;
	assign d_stall=((d_doread || d_dowrite) && !d_anymatch) || (d_doread && !d_valid) || (d_dowrite && !d_write);
	
	wire [7:0] nextstate=reset									?	S_IDLE
						:!i_anymatch							?	S_IOPENLINE
						:!i_valid								?	S_IFETCH
						:(d_doread || d_dowrite) && !d_anymatch	?	S_DOPENLINE
						:d_doread && !d_valid					?	S_DFETCH
						/*:d_dowrite && !d_write					?	S_DWRITE*/
						:											S_IDLE;

	reg [7:0] state;
	reg [1:0] random;
	reg i_clearing;
	reg d_clearing;
	
	reg flushstarted;
	reg flushduring;
	reg flushdone;
	reg [7:0] f_index;
	reg [1:0] f_set;
	reg [2:0] f_address;
	reg [2:0] f_dataaddress;
	reg [1:0] f_dirty;
	
	reg [1:0] last_set;

	wire lock_last_set=i_index==d_index;
	
	wire [1:0] i_set_replace;//=2'd0;
	cache_sorter #(4) i_sorter(
		.clk(clk),
		.x0({i_alldirty[0],i_somedirty[0],i_allvalid[0],i_somevalid[0]}),
		.x1({i_alldirty[1],i_somedirty[1],i_allvalid[1],i_somevalid[1]}),
		.x2({i_alldirty[2],i_somedirty[2],i_allvalid[2],i_somevalid[2]}),
		.x3({i_alldirty[3],i_somedirty[3],i_allvalid[3],i_somevalid[3]}),
		.tiebreaker(random),
		.y(i_set_replace)
	);

	wire [1:0] d_set_replace;//=2'd1;
	cache_sorter #(5) d_sorter(
		.clk(clk),
		.x0({lock_last_set && (last_set==2'd0 || i_tagdata0==i_tag),d_alldirty[0],d_somedirty[0],d_allvalid[0],d_somevalid[0]}),
		.x1({lock_last_set && (last_set==2'd1 || i_tagdata1==i_tag),d_alldirty[1],d_somedirty[1],d_allvalid[1],d_somevalid[1]}),
		.x2({lock_last_set && (last_set==2'd2 || i_tagdata2==i_tag),d_alldirty[2],d_somedirty[2],d_allvalid[2],d_somevalid[2]}),
		.x3({lock_last_set && (last_set==2'd3 || i_tagdata3==i_tag),d_alldirty[3],d_somedirty[3],d_allvalid[3],d_somevalid[3]}),
		.tiebreaker(random),
		.y(d_set_replace)
	);
	
	always @(posedge clk)
		random<=random+2'b1;


	always @(posedge clk)
	if(reset)
		state<=S_IDLE;
	else
	case(state)
	S_IDLE:
	begin
		i_tagwrite<=4'b0000;
		d_tagwrite<=4'b0000;
		i_validwrite<=1'b0;
		d_validwrite<=1'b0;
		i_write<=1'b0;
		d_write<=1'b0;
		i_clearing<=1'b0;
		d_clearing<=1'b0;
		ava_read<=1'b0;
		ava_write<=1'b0;
		flushstarted<=1'b0;
		flushduring<=1'b0;
		flushdone<=1'b0;
		f_address<=3'b0;
		state<=nextstate;
		if(nextstate==S_IDLE && d_dowrite && !d_write)d_write<=1'b1;
	end
	S_IOPENLINE:
	begin
		if(i_somedirty[i_set_replace])
		begin
			//Flush the line
			f_set<=i_set_replace;
			f_index<=i_index;
			flushduring<=1'b1;
			state<=S_FLUSHLINE;
		end
		else
		begin
			//Open the cache line, clearing valid bits if necessary
			i_tagwrite[i_set_replace]<=1'b1;
			i_clearing<=i_somevalid[i_set_replace];

			//Record which i_set has just been opened
			last_set<=i_set_replace;

			state<=S_MEMPAUSE;
		end
	end
	S_IFETCH:
	begin
		i_write<=1'b0;
		i_writedata <= i_data;
		i_validwrite<=1'b0;
		ava_byteenable<=2'b11;
		if(!i_validdata[3] || !i_validdata[2])
		begin
			ava_read<=1'b1;
			ava_address<={i_tag,i_index,i_line,2'b00};
			state<=S_IFETCH2;
		end
		else if(!i_validdata[1] || !i_validdata[0])
		begin
			ava_read<=1'b1;
			ava_address<={i_tag,i_index,i_line,2'b10};
			state<=S_IFETCH2;
		end
		else
			state<=nextstate; //Error, this is already all valid?
	end
	S_IFETCH2:
	if(!ava_waitrequest)
	begin
		i_write<=1'b1;
		i_writedata<=ava_address[1] ?
			{
				i_writedata[31:16],
				i_dirtydata[1] ? i_writedata[15:8] : ava_readdata[15:8],
				i_dirtydata[0] ? i_writedata[7:0] : ava_readdata[7:0]
			}
		:
			{
				i_dirtydata[3] ? i_writedata[31:24] : ava_readdata[15:8],
				i_dirtydata[2] ? i_writedata[23:16] : ava_readdata[7:0],
				i_writedata[15:0]
			}
		;
		
		i_validwrite<=1'b1;
		i_validwritedata<=	i_validdata
						|	({4{i_validwrite}} & i_validwritedata)
						|	(ava_address[1] ? 4'b0011 : 4'b1100);
		
		if(ava_address[1] || (i_validdata[1] && i_validdata[0]))
		begin
			ava_read<=1'b0;
			state<=S_MEMPAUSE;
		end
		else
		begin
			ava_address<={i_tag,i_index,i_line,2'b10};
			state<=S_IFETCH2;
		end
	end
	S_MEMPAUSE:
	begin
		if(!ava_waitrequest)
		begin
			i_tagwrite<=4'b0000;
			d_tagwrite<=4'b0000;
			i_validwrite<=1'b0;
			d_validwrite<=1'b0;
			i_write<=1'b0;
			d_write<=1'b0;
			i_clearing<=1'b0;
			d_clearing<=1'b0;
			ava_read<=1'b0;
			ava_write<=1'b0;
			flushstarted<=1'b0;
			flushduring<=1'b0;
			flushdone<=1'b0;
			f_address<=3'b0;
			state<=S_IDLE;
		end
	end
	S_DOPENLINE:
	begin
		if(d_somedirty[d_set_replace])
		begin
			//Flush the line
			f_set<=d_set_replace;
			f_index<=d_index;
			flushduring<=1'b1;
			state<=S_FLUSHLINE;
		end
		else
		begin
			//Open the cache line, clearing valid bits if necessary
			d_tagwrite[d_set_replace]<=1'b1;

			d_clearing<=d_somevalid[d_set_replace];

			state<=S_MEMPAUSE;
		end
	end
	S_DFETCH:
	begin
		d_write<=1'b0;
		d_writedata_cache<=d_readdata_word;
		d_validwrite<=1'b0;
		ava_byteenable<=2'b11;
		
		if( 
			(d_mem8 && (d_address[1:0]==2'd0 && !d_validdata[3]) || d_address[1:0]==2'd1 && !d_validdata[2])
		||	(d_mem16 && d_address[1]==1'b0 && (!d_validdata[3] || !d_validdata[2]))
		||	(!d_mem16 && !d_mem8 && (!d_validdata[3] || !d_validdata[2]))
		)
		begin
			ava_read<=1'b1;
			ava_address<={d_tag,d_index,d_line,2'b00};
			state<=S_DFETCH2;
		end
		else if(
			(d_mem8 && (d_address[1:0]==2'd2 && !d_validdata[1]) || d_address[1:0]==2'd3 && !d_validdata[0])
		||	(d_mem16 && d_address[1]==1'b1 && (!d_validdata[1] || !d_validdata[0]))
		||	(!d_mem16 && !d_mem8 && (!d_validdata[1] || !d_validdata[0]))
		)
		begin
			ava_read<=1'b1;
			ava_address<={d_tag,d_index,d_line,2'b10};
			state<=S_DFETCH2;
		end
		else
			state<=nextstate; //Error, this is already all valid?
	end
	S_DFETCH2:
	if(!ava_waitrequest)
	begin
		d_write<=1'b1;

		d_writedata_cache<=
			ava_address[1] ?
			{
				d_writedata_cache[31:16],
				d_dirtydata[1] ? d_writedata_cache[15:8] : ava_readdata[15:8],
				d_dirtydata[0] ? d_writedata_cache[7:0] : ava_readdata[7:0]
			}
			:
			{
				d_dirtydata[3] ? d_writedata_cache[31:24] : ava_readdata[15:8],
				d_dirtydata[2] ? d_writedata_cache[23:16] : ava_readdata[7:0],
				d_writedata_cache[15:0]
			}
		;

		d_validwrite<=1'b1;
		d_validwritedata<=	d_validdata 
						|	({4{d_validwrite}} & d_validwritedata)
						|	(ava_address[1] ? 4'b0011 : 4'b1100);
		
		if(d_mem8 || d_mem16 || ava_address[1] || (d_validdata[1] && d_validdata[0]))
		begin
			ava_read<=1'b0;
			state<=S_MEMPAUSE;
		end
		else
		begin
			ava_address<={d_tag,d_index,d_line,2'b10};
			state<=S_DFETCH2;
		end
	end
	S_FLUSHLINE:
	begin
		//Make flushline disappear to the memory
		if(!ava_waitrequest)
		begin
			flushstarted<=1'b1;
			if(!flushstarted && |d_alldirtydata[3:2])
			begin
				f_address<=3'd0;
				f_dirty<=d_alldirtydata[3:2];
			end
			else if(f_address<3'd1 && |d_alldirtydata[1:0])
			begin
				f_address<=3'd1;
				f_dirty<=d_alldirtydata[1:0];
			end
			else if(f_address<3'd2 && |d_alldirtydata[7:6])
			begin
				f_address<=3'd2;
				f_dirty<=d_alldirtydata[7:6];
			end
			else if(f_address<3'd3 && |d_alldirtydata[5:4])
			begin
				f_address<=3'd3;
				f_dirty<=d_alldirtydata[5:4];
			end
			else if(f_address<3'd4 && |d_alldirtydata[11:10])
			begin
				f_address<=3'd4;
				f_dirty<=d_alldirtydata[11:10];
			end
			else if(f_address<3'd5 && |d_alldirtydata[9:8])
			begin
				f_address<=3'd5;
				f_dirty<=d_alldirtydata[9:8];
			end
			else if(f_address<3'd6 && |d_alldirtydata[15:14])
			begin
				f_address<=3'd6;
				f_dirty<=d_alldirtydata[15:14];
			end
			else if(f_address<3'd7 && |d_alldirtydata[13:12])
			begin
				f_address<=3'd7;
				f_dirty<=d_alldirtydata[13:12];
			end
			else
			begin
				flushdone<=1'b1;
				state<=S_MEMPAUSE;
			end
			
			f_dataaddress <= f_address;

			if(flushstarted)
			begin
				ava_address<={f_tag,f_index,f_address,1'b0};
				ava_byteenable<=f_dirty;
				ava_write<=1'b1;
			end
		end
	end
	default:
	begin
		state<=S_IDLE;
	end
	endcase

	assign ava_writedata=f_dataaddress[0] ? d_readdata_word[15:0] : d_readdata_word[31:16];

endmodule