Ok,
This version 2 uses 85 instructions. Still 2 more instructions than the
simulated-int64 version ;)
// Optimizations applied:
// + KeepLowBits function inlined.
// + Variables reduced by re-using stack space via absolute directive
// Instructions re-ordered to make that possible.
// + DestBitIndex shr 3 done once for address calculation.
Now it remains to be seen during benchmarks which version will be the
fastest.
In real code I'll probably not need to call this routine many times, I
might
get away with if statements for the number of bits and then selecting the
8
bit, 16 bit or this 32 bit version ;) but still I like to have a fast 32
bit
version just in case ;)
// *** Begin of Code ***
function KeepLowBits( Value : longword; Bits : longword ) : longword;
inline;
begin
Result := Value; // 32 bits case.
if Bits <= 31 then
begin
Result := Result and not (4294967295 shl Bits); // shl instruction
limited
to 31.
end;
end;
// correct
// 85 instructions
procedure WriteLongwordBitsV2( Value : longword; Bits : longword;
DestAddress : pointer; DestBitIndex : longword );
var
vContent : longword;
vMask : longword;
v****ft : longword;
vFirstContent : longword;
vFirstMask : longword;
vFirstAddress : longword;
// recycle the variables above, little bit dangerous because
// compiler might be buggy, but so far it seems to be working.
vSecondContent : longword absolute vFirstContent;
vSecondMask : longword absolute vFirstMask;
vSecondAddress : longword absolute vFirstAddress;
begin
vContent := KeepLowBits( Value, Bits );
vMask := KeepLowBits( 4294967295, Bits );
v****ft := DestBitIndex and 7;
DestBitIndex := DestBitIndex shr 3; // div 32
vFirstContent := ****ftLeft( vContent, 0, v****ft );
vFirstMask := ****ftLeft( vMask, 0, v****ft );
vFirstAddress := longword(DestAddress) + DestBitIndex;
Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^ and not
vFirstMask)
or vFirstContent;
vSecondContent := ****ftLeft( 0, vContent, v****ft );
vSecondMask := ****ftLeft( 0, vMask, v****ft );
vSecondAddress := longword(DestAddress) + DestBitIndex + 4;
Plongword(vSecondAddress)^ := (Plongword(vSecondAddress)^ and not
vSecondMask) or vSecondContent;
end;
// Generated Assembler:
{
77 instructions + 4 * 2 (= 8) = 85 instructions
Project1.dpr.1648: begin
0040906C 55 push ebp
0040906D 8BEC mov ebp,esp
0040906F 83C4D4 add esp,-$2c
00409072 894DE8 mov [ebp-$18],ecx
00409075 8955EC mov [ebp-$14],edx
00409078 8945F0 mov [ebp-$10],eax
Project1.dpr.1649: vContent := KeepLowBits( Value, Bits );
0040907B 8B45F0 mov eax,[ebp-$10]
0040907E 8945D8 mov [ebp-$28],eax
00409081 837DEC1F cmp dword ptr [ebp-$14],$1f
00409085 770D jnbe $00409094
00409087 8B4DEC mov ecx,[ebp-$14]
0040908A 83C8FF or eax,-$01
0040908D D3E0 shl eax,cl
0040908F F7D0 not eax
00409091 2145D8 and [ebp-$28],eax
00409094 8B45D8 mov eax,[ebp-$28]
00409097 8945E4 mov [ebp-$1c],eax
Project1.dpr.1650: vMask := KeepLowBits( 4294967295, Bits );
0040909A C745D4FFFFFFFF mov [ebp-$2c],$ffffffff
004090A1 837DEC1F cmp dword ptr [ebp-$14],$1f
004090A5 770D jnbe $004090b4
004090A7 8B4DEC mov ecx,[ebp-$14]
004090AA 83C8FF or eax,-$01
004090AD D3E0 shl eax,cl
004090AF F7D0 not eax
004090B1 2145D4 and [ebp-$2c],eax
004090B4 8B45D4 mov eax,[ebp-$2c]
004090B7 8945E0 mov [ebp-$20],eax
Project1.dpr.1652: v****ft := DestBitIndex and 7;
004090BA 8B4508 mov eax,[ebp+$08]
004090BD 83E007 and eax,$07
004090C0 8945DC mov [ebp-$24],eax
Project1.dpr.1654: DestBitIndex := DestBitIndex shr 3; // div 32
004090C3 C16D0803 shr dword ptr [ebp+$08],$03
Project1.dpr.1656: vFirstContent := ****ftLeft( vContent, 0, v****ft );
004090C7 8B4DDC mov ecx,[ebp-$24]
004090CA 33D2 xor edx,edx
004090CC 8B45E4 mov eax,[ebp-$1c]
004090CF E8BCFEFFFF call ****ftLeft
004090D4 8945FC mov [ebp-$04],eax
Project1.dpr.1657: vFirstMask := ****ftLeft( vMask, 0, v****ft );
004090D7 8B4DDC mov ecx,[ebp-$24]
004090DA 33D2 xor edx,edx
004090DC 8B45E0 mov eax,[ebp-$20]
004090DF E8ACFEFFFF call ****ftLeft
004090E4 8945F8 mov [ebp-$08],eax
Project1.dpr.1658: vFirstAddress := longword(DestAddress) + DestBitIndex;
004090E7 8B45E8 mov eax,[ebp-$18]
004090EA 034508 add eax,[ebp+$08]
004090ED 8945F4 mov [ebp-$0c],eax
Project1.dpr.1659: Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^
and not vFirstMask) or vFirstContent;
004090F0 8B45F4 mov eax,[ebp-$0c]
004090F3 8B00 mov eax,[eax]
004090F5 8B55F8 mov edx,[ebp-$08]
004090F8 F7D2 not edx
004090FA 23C2 and eax,edx
004090FC 0B45FC or eax,[ebp-$04]
004090FF 8B55F4 mov edx,[ebp-$0c]
00409102 8902 mov [edx],eax
Project1.dpr.1661: vSecondContent := ****ftLeft( 0, vContent, v****ft );
00409104 8B4DDC mov ecx,[ebp-$24]
00409107 8B55E4 mov edx,[ebp-$1c]
0040910A 33C0 xor eax,eax
0040910C E87FFEFFFF call ****ftLeft
00409111 8945FC mov [ebp-$04],eax
Project1.dpr.1662: vSecondMask := ****ftLeft( 0, vMask, v****ft );
00409114 8B4DDC mov ecx,[ebp-$24]
00409117 8B55E0 mov edx,[ebp-$20]
0040911A 33C0 xor eax,eax
0040911C E86FFEFFFF call ****ftLeft
00409121 8945F8 mov [ebp-$08],eax
Project1.dpr.1663: vSecondAddress := longword(DestAddress) + DestBitIndex
+
4;
00409124 8B45E8 mov eax,[ebp-$18]
00409127 034508 add eax,[ebp+$08]
0040912A 83C004 add eax,$04
0040912D 8945F4 mov [ebp-$0c],eax
Project1.dpr.1664: Plongword(vSecondAddress)^ :=
(Plongword(vSecondAddress)^
and not vSecondMask) or vSecondContent;
00409130 8B45F4 mov eax,[ebp-$0c]
00409133 8B00 mov eax,[eax]
00409135 8B55F8 mov edx,[ebp-$08]
00409138 F7D2 not edx
0040913A 23C2 and eax,edx
0040913C 0B45FC or eax,[ebp-$04]
0040913F 8B55F4 mov edx,[ebp-$0c]
00409142 8902 mov [edx],eax
Project1.dpr.1665: end;
00409144 8BE5 mov esp,ebp
00409146 5D pop ebp
00409147 C20400 ret $0004
Extra Routine:
Unit_BitManipulation_****ft_version_001.pas.12: shld eax, edx, cl
00408F90 0FA5D0 shld eax,edx,cl
Unit_BitManipulation_****ft_version_001.pas.13: end;
00408F93 C3 ret
}
// *** End of Code ***
Bye,
Skybuck.


|