"Skybuck Flying" <BloodyShame@[EMAIL PROTECTED]
> wrote in message
news:15897$481f4a34$541983fa$14271@[EMAIL PROTECTED]
> Ok,
>
> This version 2 uses 85 instructions. Still 2 more instructions than the
> simulated-int64 version ;)
>
> // Optimizations applied:
> // + KeepLowBits function inlined.
> // + Variables reduced by re-using stack space via absolute directive
> // Instructions re-ordered to make that possible.
> // + DestBitIndex shr 3 done once for address calculation.
>
> Now it remains to be seen during benchmarks which version will be the
> fastest.
>
> In real code I'll probably not need to call this routine many times, I
> might get away with if statements for the number of bits and then
> selecting the 8 bit, 16 bit or this 32 bit version ;) but still I like
to
> have a fast 32 bit version just in case ;)
>
> // *** Begin of Code ***
>
> function KeepLowBits( Value : longword; Bits : longword ) : longword;
> inline;
> begin
> Result := Value; // 32 bits case.
> if Bits <= 31 then
> begin
> Result := Result and not (4294967295 shl Bits); // shl instruction
> limited to 31.
> end;
> end;
>
> // correct
> // 85 instructions
> procedure WriteLongwordBitsV2( Value : longword; Bits : longword;
> DestAddress : pointer; DestBitIndex : longword );
> var
> vContent : longword;
> vMask : longword;
> v****ft : longword;
>
> vFirstContent : longword;
> vFirstMask : longword;
> vFirstAddress : longword;
>
> // recycle the variables above, little bit dangerous because
> // compiler might be buggy, but so far it seems to be working.
> vSecondContent : longword absolute vFirstContent;
> vSecondMask : longword absolute vFirstMask;
> vSecondAddress : longword absolute vFirstAddress;
> begin
> vContent := KeepLowBits( Value, Bits );
> vMask := KeepLowBits( 4294967295, Bits );
>
> v****ft := DestBitIndex and 7;
>
> DestBitIndex := DestBitIndex shr 3; // div 32
Oh little comment typo:
DestBitIndex := DestBitIndex shr 3; // div 8
Ah that's better :) LOL
Bye,
Skybuck ;)
>
> vFirstContent := ****ftLeft( vContent, 0, v****ft );
> vFirstMask := ****ftLeft( vMask, 0, v****ft );
> vFirstAddress := longword(DestAddress) + DestBitIndex;
> Plongword(vFirstAddress)^ := (Plongword(vFirstAddress)^ and not
> vFirstMask) or vFirstContent;
>
> vSecondContent := ****ftLeft( 0, vContent, v****ft );
> vSecondMask := ****ftLeft( 0, vMask, v****ft );
> vSecondAddress := longword(DestAddress) + DestBitIndex + 4;
> Plongword(vSecondAddress)^ := (Plongword(vSecondAddress)^ and not
> vSecondMask) or vSecondContent;
> end;
>
> // Generated Assembler:
>
> {
>
> 77 instructions + 4 * 2 (= 8) = 85 instructions
> Project1.dpr.1648: begin
> 0040906C 55 push ebp
> 0040906D 8BEC mov ebp,esp
> 0040906F 83C4D4 add esp,-$2c
> 00409072 894DE8 mov [ebp-$18],ecx
> 00409075 8955EC mov [ebp-$14],edx
> 00409078 8945F0 mov [ebp-$10],eax
> Project1.dpr.1649: vContent := KeepLowBits( Value, Bits );
> 0040907B 8B45F0 mov eax,[ebp-$10]
> 0040907E 8945D8 mov [ebp-$28],eax
> 00409081 837DEC1F cmp dword ptr [ebp-$14],$1f
> 00409085 770D jnbe $00409094
> 00409087 8B4DEC mov ecx,[ebp-$14]
> 0040908A 83C8FF or eax,-$01
> 0040908D D3E0 shl eax,cl
> 0040908F F7D0 not eax
> 00409091 2145D8 and [ebp-$28],eax
> 00409094 8B45D8 mov eax,[ebp-$28]
> 00409097 8945E4 mov [ebp-$1c],eax
> Project1.dpr.1650: vMask := KeepLowBits( 4294967295, Bits );
> 0040909A C745D4FFFFFFFF mov [ebp-$2c],$ffffffff
> 004090A1 837DEC1F cmp dword ptr [ebp-$14],$1f
> 004090A5 770D jnbe $004090b4
> 004090A7 8B4DEC mov ecx,[ebp-$14]
> 004090AA 83C8FF or eax,-$01
> 004090AD D3E0 shl eax,cl
> 004090AF F7D0 not eax
> 004090B1 2145D4 and [ebp-$2c],eax
> 004090B4 8B45D4 mov eax,[ebp-$2c]
> 004090B7 8945E0 mov [ebp-$20],eax
> Project1.dpr.1652: v****ft := DestBitIndex and 7;
> 004090BA 8B4508 mov eax,[ebp+$08]
> 004090BD 83E007 and eax,$07
> 004090C0 8945DC mov [ebp-$24],eax
> Project1.dpr.1654: DestBitIndex := DestBitIndex shr 3; // div 32
> 004090C3 C16D0803 shr dword ptr [ebp+$08],$03
> Project1.dpr.1656: vFirstContent := ****ftLeft( vContent, 0, v****ft );
> 004090C7 8B4DDC mov ecx,[ebp-$24]
> 004090CA 33D2 xor edx,edx
> 004090CC 8B45E4 mov eax,[ebp-$1c]
> 004090CF E8BCFEFFFF call ****ftLeft
> 004090D4 8945FC mov [ebp-$04],eax
> Project1.dpr.1657: vFirstMask := ****ftLeft( vMask, 0, v****ft );
> 004090D7 8B4DDC mov ecx,[ebp-$24]
> 004090DA 33D2 xor edx,edx
> 004090DC 8B45E0 mov eax,[ebp-$20]
> 004090DF E8ACFEFFFF call ****ftLeft
> 004090E4 8945F8 mov [ebp-$08],eax
> Project1.dpr.1658: vFirstAddress := longword(DestAddress) +
DestBitIndex;
> 004090E7 8B45E8 mov eax,[ebp-$18]
> 004090EA 034508 add eax,[ebp+$08]
> 004090ED 8945F4 mov [ebp-$0c],eax
> Project1.dpr.1659: Plongword(vFirstAddress)^ :=
(Plongword(vFirstAddress)^
> and not vFirstMask) or vFirstContent;
> 004090F0 8B45F4 mov eax,[ebp-$0c]
> 004090F3 8B00 mov eax,[eax]
> 004090F5 8B55F8 mov edx,[ebp-$08]
> 004090F8 F7D2 not edx
> 004090FA 23C2 and eax,edx
> 004090FC 0B45FC or eax,[ebp-$04]
> 004090FF 8B55F4 mov edx,[ebp-$0c]
> 00409102 8902 mov [edx],eax
> Project1.dpr.1661: vSecondContent := ****ftLeft( 0, vContent, v****ft );
> 00409104 8B4DDC mov ecx,[ebp-$24]
> 00409107 8B55E4 mov edx,[ebp-$1c]
> 0040910A 33C0 xor eax,eax
> 0040910C E87FFEFFFF call ****ftLeft
> 00409111 8945FC mov [ebp-$04],eax
> Project1.dpr.1662: vSecondMask := ****ftLeft( 0, vMask, v****ft );
> 00409114 8B4DDC mov ecx,[ebp-$24]
> 00409117 8B55E0 mov edx,[ebp-$20]
> 0040911A 33C0 xor eax,eax
> 0040911C E86FFEFFFF call ****ftLeft
> 00409121 8945F8 mov [ebp-$08],eax
> Project1.dpr.1663: vSecondAddress := longword(DestAddress) +
DestBitIndex
> + 4;
> 00409124 8B45E8 mov eax,[ebp-$18]
> 00409127 034508 add eax,[ebp+$08]
> 0040912A 83C004 add eax,$04
> 0040912D 8945F4 mov [ebp-$0c],eax
> Project1.dpr.1664: Plongword(vSecondAddress)^ :=
> (Plongword(vSecondAddress)^ and not vSecondMask) or vSecondContent;
> 00409130 8B45F4 mov eax,[ebp-$0c]
> 00409133 8B00 mov eax,[eax]
> 00409135 8B55F8 mov edx,[ebp-$08]
> 00409138 F7D2 not edx
> 0040913A 23C2 and eax,edx
> 0040913C 0B45FC or eax,[ebp-$04]
> 0040913F 8B55F4 mov edx,[ebp-$0c]
> 00409142 8902 mov [edx],eax
> Project1.dpr.1665: end;
> 00409144 8BE5 mov esp,ebp
> 00409146 5D pop ebp
> 00409147 C20400 ret $0004
>
> Extra Routine:
>
> Unit_BitManipulation_****ft_version_001.pas.12: shld eax, edx, cl
> 00408F90 0FA5D0 shld eax,edx,cl
> Unit_BitManipulation_****ft_version_001.pas.13: end;
> 00408F93 C3 ret
>
> }
>
> // *** End of Code ***
>
> Bye,
> Skybuck.
>


|