Со сложением справился, а умножение незнаю как...
Осталось умножить матрицу A на B=B+C+D
program xmm;
{$APPTYPE CONSOLE}
uses
SysUtils;
var a:array[0..3,0..3] of single;
b:array[0..3,0..3] of single;
c:array[0..3,0..3] of single;
d:array[0..3,0..3] of single;
i,j:integer;
begin
for i:=0 to 3 do
for j:=0 to 3 do a[i,j]:=4;
for i:=0 to 3 do
for j:=0 to 3 do b[i,j]:=3;
for i:=0 to 3 do
for j:=0 to 3 do c[i,j]:=2;
for i:=0 to 3 do
for j:=0 to 3 do d[i,j]:=1;
asm
lea eax,b
lea ebx,c
lea edx,d
lea esi,a
mov ecx,4
@repeat_1:
movups xmm0,[eax]
movups xmm1,[ebx]
movups xmm2,[edx]
addps xmm0,xmm1
addps xmm0,xmm2
movups [eax],xmm0
add eax,16
add ebx,16
add edx,16
loop @repeat_1
{?????????????}
end;
for i:=0 to 3 do begin;
for j:=0 to 3 do write(b[j,i]:0:0,' ');
writeln;
end;
readln;
end.