Suppose that a representative program takes 100 time units to
run without vector mode. If computations which comprise
of that time can be made to use vector computation (which is
20 times faster), then the total time used will be reduced to
time units.
The hardware design group can double the speed of vector
mode. This would reduce the total time to
time units for the same job.
To achieve this same improvement, suppose we can make
of
the computation use vector mode. Then
must be equal to 31.75; X must be 71.84. This is only a
increase in the amount of computation which uses vector
mode over our original enhancement.
I would recommend increasing the use of vector mode. Because vector mode runs 20 times faster, non-vector mode computations become the dominant factor in time consumption. Further improvements to the speed of the vector mode are hardly noticeable compared to the conversion of more computation to vector computation.
My scheme to reduce the number of loads and stores is
faster. Suppose there are 100 instructions to be performed. On
the original computer, this would take 100 fast clock
cycles. With my enhancement, only 70 regular instructions and
20 (of the original 30) load-store instructions are
necessary. This would take 90 slow clock cycles. The
fast clock cycles are
faster, which means they
take the same time as 95.24 cycles on the slow machine. My
version will be done first.
| Microprocessor | die area | pins | wafer cost | die yield | good chips/wafer |
| MIPS 4600 | 77 | 208 | $3200 | 0.479 | 171.07 |
| PowerPC 603 | 85 | 240 | $3400 | 0.449 | 144.47 |
| HP 71x0 | 196 | 504 | $2800 | 0.210 | 27.02 |
| Digital 21064A | 166 | 431 | $4000 | 0.253 | 39.23 |
| SuperSPARC/60 | 256 | 293 | $4000 | 0.149 | 14.17 |
| Microprocessor | wafer cost | good chips/wafer | cost/good die |
| MIPS 4600 | $3200 | 171.07 | $18.71 |
| PowerPC 603 | $3400 | 144.47 | $23.54 |
| HP 71x0 | $2800 | 27.02 | $103.62 |
| Digital 21064A | $4000 | 39.23 | $101.96 |
| SuperSPARC/60 | $4000 | 14.17 | $282.29 |
| Microprocessor | cost/good die | final cost |
| MIPS 4600 | $18.71 | $31.54 |
| PowerPC 603 | $23.54 | $44.42 |
| HP 71x0 | $103.62 | $175.29 |
| Digital 21064A | $101.96 | $153.26 |
| SuperSPARC/60 | $282.29 | $313.18 |
| Microprocessor | defect density | final cost |
| SuperSPARC/60 | 0.006 | $184.17 |
| SuperSPARC/60 | 0.012 | $398.57 |
| Microprocessor | final cost | |
| Digital 21064A | 3.00 | $132.99 |
| Digital 21064A | 4.50 | $138.40 |
Suppose some program takes 100 time units. If enhancements 1
and 2 are both used for
of the original program, then
the time in those modes totals
30/30 + 30/20 = 2.5 time
units. If we are to achieve an overall speedup of 10, then we
must take 10 time units for the whole program, which means we
have 7.5 time units to perform the remaining
computations. Using enhancement 3 for
of the time to
achieve a local speedup of 40/7.5 means that X can be found
by the following equation.
X = .90
Thus enhancement 3 must be used for
of the remaining
computations, or
of the original program time.
If only one enhancement can be implemented, it should be
enhancement 3. It takes
as long as the unenhanced
version.
If two can be used, they should be 1 and 3. This system would
only take
of the original time.
The following represents the calculations I used to do problem 1.8
#include <iostream.h>
#include <math.h>
#include <string>
enum package { PQFP, PGA };
class microprocessor {
public:
microprocessor(){};
microprocessor(char* n,
int d,
int p,
int w,
package pk) {
Name = n;
Die_Area = d;
Pins = p;
Wafer_Cost = w;
Package = pk;
};
char* Name;
int Die_Area;
int Pins;
int Wafer_Cost;
package Package;
};
double wafer_size = 200; // 20cm == 200mm
double defect_density = .01; // 1/cm^2 == 1/100mm^2
double wafer_yield = .95;
double alpha = 3;
double die_yield (microprocessor m){
double temp =(1+ (defect_density * m.Die_Area / alpha));
return wafer_yield * 1/(exp(alpha*log(temp)));
};
double good_chips_per_wafer (microprocessor m){
double a = (M_PI * (wafer_size*wafer_size/4) / m.Die_Area );
double b = (M_PI * wafer_size) /sqrt(m.Die_Area*2);
return (a-b)*die_yield(m);
};
double cost_per_good_die (microprocessor m){
return m.Wafer_Cost / good_chips_per_wafer(m);
};
double final_cost (microprocessor m){
if (m.Package==PQFP) {
if (m.Pins<220) { return 12+(10*300.0/3600.0)+cost_per_good_die(m); }
else { return 20+(10*320.0/3600.0)+cost_per_good_die(m); }
};
if (m.Package==PGA) {
if (m.Pins<300) { return 30+(10*320.0/3600.0)+cost_per_good_die(m); }
else if (m.Pins<400) { return 40+(12*340.0/3600.0)+cost_per_good_die(m); }
else if (m.Pins<450) { return 50+(13*360.0/3600.0)+cost_per_good_die(m); }
else if (m.Pins<500) { return 60+(14*380.0/3600.0)+cost_per_good_die(m); }
else if (m.Pins>500) { return 70+(15*400.0/3600.0)+cost_per_good_die(m); }
};
};
int main() {
microprocessor list[5];
list[0] = microprocessor("MIPS 4600",77,208,3200,PQFP);
list[1] = microprocessor("PowerPC 603",85,240,3400,PQFP);
list[2] = microprocessor("HP 71x0",196,504,2800,PGA);
list[3] = microprocessor("Digital 21064A",166,431,4000,PGA);
list[4] = microprocessor("SuperSPARC/60",256,293,4000,PGA);
printf("%-14s\t%4s\t%4s\t%6s\t%6s\t%16s\n","Microprocessor",
"Area","Pins","W Cost","dYield","Good chips/wafer");
for (int i=0; i<5; i++){
microprocessor* a = & list[i];
printf("%-14s\t%4d\t%4d\t$%-6d\t%6.3f\t%16.2f\n",a->Name,
a->Die_Area,
a->Pins,
a->Wafer_Cost,
die_yield(*a),
good_chips_per_wafer(*a)
);
}
cout << endl << endl;
printf("%-14s\t%6s\t%16s\t%13s\n","Microprocessor",
"W Cost","Good chips/wafer","cost/good die");
for (int i=0; i<5; i++){
microprocessor* a = & list[i];
printf("%-14s\t$%-6d\t%16.2f\t$%-13.2f\n",a->Name,
a->Wafer_Cost,
good_chips_per_wafer(*a),
cost_per_good_die(*a)
);
}
cout << endl << endl;
printf("%-14s\t%13s\t%10s\n","Microprocessor",
"cost/good die","final cost");
for (int i=0; i<5; i++){
microprocessor* a = & list[i];
printf("%-14s\t$%-13.2f\t$%-10.2f\n",a->Name,
cost_per_good_die(*a),
final_cost(*a)
);
}
cout << endl << endl;
microprocessor* a = & list[4];
printf("%-14s\t%14s\t%10s\n","Microprocessor","defect density","total cost");
defect_density = .006;
printf("%-14s\t%14.3f\t$%-10.2f\n",a->Name,defect_density,final_cost(*a));
defect_density = .012;
printf("%-14s\t%14.3f\t$%-10.2f\n",a->Name,defect_density,final_cost(*a));
cout << endl << endl;
defect_density = .008;
microprocessor D = microprocessor("Digital 21064A",166,431,4000,PGA);
printf("%-14s\t%5s\t%10s\n","Microprocessor","alpha","total cost");
printf("%-14s\t%5.2f\t$%-10.2f\n",D.Name,alpha,final_cost(D));
alpha = 4.5;
printf("%-14s\t%5.2f\t$%-10.2f\n",D.Name,alpha,final_cost(D));
cout << endl << endl;
return 0;
}