Talk About Network

Google


Register and Login
Nick
Password
Register create new account Sign up is FREE and you can post replies, new topics, bookmark posts and more!
Recover lost password


Programming > Assembly Language > Crypt++ 5.5.2 T...
Latest [ Topics | Posts ] Archive Post A New Topic Post a Reply
<< Topic < Post Post 1 of 15 Topic 4916 of 5083
Post > Topic >>

Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compress, AV: Compiler Optimization Bug with SSE2 ? Or just programming bug ? HELP NEEDED :) ?!

by "Skybuck Flying" <spam@[EMAIL PROTECTED] > Mar 18, 2008 at 09:48 AM

Hello,

This is the tiger transform routine/procedure/method converted from the 
Crypt++ 5.5.2 C/C++/Asm code to DCPCrypt2 Delphi/Basm code.

It works with optimizations turned off.

As soon as optimizations are turned on, it crashes with an access
violation 
as soon as Index := 0; is executed.

I am not sure what the problem is because:

1. I didn't write the asm code.

2. I am not an asm expert.

I did my best converting it.

I think some possible causes might be:

1. Mis-aligned of byte boundaries ? (Maybe the C/C++/Asm had to do special

memory alignment ?)

2. Stack issue's ? (Not enough pushes or pops ?) (Framing issue?)

3. Compiler optimization bug ?

Who can tell for sure ?

procedure TDCPOptimizedTiger.Compress( Digest : Pint64; const X : Pint64
);
// procedure TDCPOptimizedTiger.Compress;
//var
//  digest : pointer;
//  LocalX: array[0..7] of int64;
//  x : pointer;
begin
//  Move(HashBuffer,LocalX,Sizeof(LocalX));

//  Digest := @[EMAIL PROTECTED]
  X := @[EMAIL PROTECTED]
  asm
   lea edx, [TigerTable]
   mov eax, digest

   mov esi, X

   movq mm0, [eax]
   movq mm1, [eax+1*8]

   movq mm5, mm1
   movq mm2, [eax+2*8]
   movq mm7, [edx+4*2048+0*8]
   movq mm6, [edx+4*2048+1*8]
   mov ecx, esp
   and esp, $fffffff0
   sub esp, 8*8
   push ecx

   xor ebx, ebx

   @[EMAIL PROTECTED]
   pxor mm2, [esi+0*8+ebx]
   movd ecx, mm2
   movzx edi, cl
   movq mm3, [edx+0*2048+edi*8]
   movzx edi, ch
   movq mm4, [edx+3*2048+edi*8]
   shr ecx, 16
   movzx edi, cl
   pxor mm3, [edx+1*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+2*2048+edi*8]
   pextrw ecx, mm2, 2
   movzx edi, cl
   pxor mm3, [edx+2*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+1*2048+edi*8]
   pextrw ecx, mm2, 3
   movzx edi, cl
   pxor mm3, [edx+3*2048+edi*8]
   psubq mm0, mm3
   movzx edi, ch
   pxor mm4, [edx+0*2048+edi*8]
   paddq mm1, mm4
   movq mm3, mm1
   psllq mm1, 2
   paddq mm1, mm3
   pxor mm0, [esi+1*8+ebx]
   movd ecx, mm0
   movzx edi, cl
   movq mm3, [edx+0*2048+edi*8]
   movzx edi, ch
   movq mm4, [edx+3*2048+edi*8]
   shr ecx, 16
   movzx edi, cl
   pxor mm3, [edx+1*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+2*2048+edi*8]
   pextrw ecx, mm0, 2
   movzx edi, cl
   pxor mm3, [edx+2*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+1*2048+edi*8]
   pextrw ecx, mm0, 3
   movzx edi, cl
   pxor mm3, [edx+3*2048+edi*8]
   psubq mm1, mm3
   movzx edi, ch
   pxor mm4, [edx+0*2048+edi*8]
   paddq mm2, mm4
   movq mm3, mm2
   psllq mm2, 2
   paddq mm2, mm3

   cmp ebx, 6*8
   je @[EMAIL PROTECTED]
   pxor mm1, [esi+2*8+ebx]
   movd ecx, mm1
   movzx edi, cl
   movq mm3, [edx+0*2048+edi*8]
   movzx edi, ch
   movq mm4, [edx+3*2048+edi*8]
   shr ecx, 16
   movzx edi, cl
   pxor mm3, [edx+1*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+2*2048+edi*8]
   pextrw ecx, mm1, 2
   movzx edi, cl
   pxor mm3, [edx+2*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+1*2048+edi*8]
   pextrw ecx, mm1, 3
   movzx edi, cl
   pxor mm3, [edx+3*2048+edi*8]
   psubq mm2, mm3
   movzx edi, ch
   pxor mm4, [edx+0*2048+edi*8]
   paddq mm0, mm4
   movq mm3, mm0
   psllq mm0, 2
   paddq mm0, mm3
   add ebx, 3*8

   jmp @[EMAIL PROTECTED]
   @[EMAIL PROTECTED]
   movq mm3, [esi+7*8]
   pxor mm3, mm6
   movq mm4, [esi+0*8]
   psubq mm4, mm3
   movq [esp+4+0*8], mm4
   pxor mm4, [esi+1*8]
   movq mm3, mm4
   movq [esp+4+1*8], mm4
   paddq mm4, [esi+2*8]
   pxor mm3, mm7
   psllq mm3, 19
   movq [esp+4+2*8], mm4
   pxor mm3, mm4
   movq mm4, [esi+3*8]
   psubq mm4, mm3
   movq [esp+4+3*8], mm4
   pxor mm4, [esi+4*8]
   movq mm3, mm4
   movq [esp+4+4*8], mm4
   paddq mm4, [esi+5*8]
   pxor mm3, mm7
   psrlq mm3, 23
   movq [esp+4+5*8], mm4
   pxor mm3, mm4
   movq mm4, [esi+6*8]
   psubq mm4, mm3
   movq [esp+4+6*8], mm4
   pxor mm4, [esi+7*8]
   movq mm3, mm4
   movq [esp+4+7*8], mm4
   paddq mm4, [esp+4+0*8]
   pxor mm3, mm7
   psllq mm3, 19
   movq [esp+4+0*8], mm4
   pxor mm3, mm4
   movq mm4, [esp+4+1*8]
   psubq mm4, mm3
   movq [esp+4+1*8], mm4
   pxor mm4, [esp+4+2*8]
   movq mm3, mm4
   movq [esp+4+2*8], mm4
   paddq mm4, [esp+4+3*8]
   pxor mm3, mm7
   psrlq mm3, 23
   movq [esp+4+3*8], mm4
   pxor mm3, mm4
   movq mm4, [esp+4+4*8]
   psubq mm4, mm3
   movq [esp+4+4*8], mm4
   pxor mm4, [esp+4+5*8]
   movq [esp+4+5*8], mm4
   paddq mm4, [esp+4+6*8]
   movq [esp+4+6*8], mm4
   pxor mm4, [edx+4*2048+2*8]
   movq mm3, [esp+4+7*8]
   psubq mm3, mm4
   movq [esp+4+7*8], mm3

   xor ebx, ebx

   @[EMAIL PROTECTED]
   pxor mm1, [esp+4+0*8+ebx]
   movd ecx, mm1
   movzx edi, cl
   movq mm3, [edx+0*2048+edi*8]
   movzx edi, ch
   movq mm4, [edx+3*2048+edi*8]
   shr ecx, 16
   movzx edi, cl
   pxor mm3, [edx+1*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+2*2048+edi*8]
   pextrw ecx, mm1, 2
   movzx edi, cl
   pxor mm3, [edx+2*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+1*2048+edi*8]
   pextrw ecx, mm1, 3
   movzx edi, cl
   pxor mm3, [edx+3*2048+edi*8]
   psubq mm2, mm3
   movzx edi, ch
   pxor mm4, [edx+0*2048+edi*8]
   paddq mm0, mm4
   movq mm3, mm0
   psllq mm0, 3
   psubq mm0, mm3
   pxor mm2, [esp+4+1*8+ebx]
   movd ecx, mm2
   movzx edi, cl
   movq mm3, [edx+0*2048+edi*8]
   movzx edi, ch
   movq mm4, [edx+3*2048+edi*8]
   shr ecx, 16
   movzx edi, cl
   pxor mm3, [edx+1*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+2*2048+edi*8]
   pextrw ecx, mm2, 2
   movzx edi, cl
   pxor mm3, [edx+2*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+1*2048+edi*8]
   pextrw ecx, mm2, 3
   movzx edi, cl
   pxor mm3, [edx+3*2048+edi*8]
   psubq mm0, mm3
   movzx edi, ch
   pxor mm4, [edx+0*2048+edi*8]
   paddq mm1, mm4
   movq mm3, mm1
   psllq mm1, 3
   psubq mm1, mm3

   cmp ebx, 6*8
   je @[EMAIL PROTECTED]
   pxor mm0, [esp+4+2*8+ebx]
   movd ecx, mm0
   movzx edi, cl
   movq mm3, [edx+0*2048+edi*8]
   movzx edi, ch
   movq mm4, [edx+3*2048+edi*8]
   shr ecx, 16
   movzx edi, cl
   pxor mm3, [edx+1*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+2*2048+edi*8]
   pextrw ecx, mm0, 2
   movzx edi, cl
   pxor mm3, [edx+2*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+1*2048+edi*8]
   pextrw ecx, mm0, 3
   movzx edi, cl
   pxor mm3, [edx+3*2048+edi*8]
   psubq mm1, mm3
   movzx edi, ch
   pxor mm4, [edx+0*2048+edi*8]
   paddq mm2, mm4
   movq mm3, mm2
   psllq mm2, 3
   psubq mm2, mm3
   add ebx, 3*8

   jmp @[EMAIL PROTECTED]
   @[EMAIL PROTECTED]
   movq mm3, [esp+4+7*8]
   pxor mm3, mm6
   movq mm4, [esp+4+0*8]
   psubq mm4, mm3
   movq [esp+4+0*8], mm4
   pxor mm4, [esp+4+1*8]
   movq mm3, mm4
   movq [esp+4+1*8], mm4
   paddq mm4, [esp+4+2*8]
   pxor mm3, mm7
   psllq mm3, 19
   movq [esp+4+2*8], mm4
   pxor mm3, mm4
   movq mm4, [esp+4+3*8]
   psubq mm4, mm3
   movq [esp+4+3*8], mm4
   pxor mm4, [esp+4+4*8]
   movq mm3, mm4
   movq [esp+4+4*8], mm4
   paddq mm4, [esp+4+5*8]
   pxor mm3, mm7
   psrlq mm3, 23
   movq [esp+4+5*8], mm4
   pxor mm3, mm4
   movq mm4, [esp+4+6*8]
   psubq mm4, mm3
   movq [esp+4+6*8], mm4
   pxor mm4, [esp+4+7*8]
   movq mm3, mm4
   movq [esp+4+7*8], mm4
   paddq mm4, [esp+4+0*8]
   pxor mm3, mm7
   psllq mm3, 19
   movq [esp+4+0*8], mm4
   pxor mm3, mm4
   movq mm4, [esp+4+1*8]
   psubq mm4, mm3
   movq [esp+4+1*8], mm4
   pxor mm4, [esp+4+2*8]
   movq mm3, mm4
   movq [esp+4+2*8], mm4
   paddq mm4, [esp+4+3*8]
   pxor mm3, mm7
   psrlq mm3, 23
   movq [esp+4+3*8], mm4
   pxor mm3, mm4
   movq mm4, [esp+4+4*8]
   psubq mm4, mm3
   movq [esp+4+4*8], mm4
   pxor mm4, [esp+4+5*8]
   movq [esp+4+5*8], mm4
   paddq mm4, [esp+4+6*8]
   movq [esp+4+6*8], mm4
   pxor mm4, [edx+4*2048+2*8]
   movq mm3, [esp+4+7*8]
   psubq mm3, mm4
   movq [esp+4+7*8], mm3

   xor ebx, ebx

   @[EMAIL PROTECTED]
   pxor mm0, [esp+4+0*8+ebx]
   movd ecx, mm0
   movzx edi, cl
   movq mm3, [edx+0*2048+edi*8]
   movzx edi, ch
   movq mm4, [edx+3*2048+edi*8]
   shr ecx, 16
   movzx edi, cl
   pxor mm3, [edx+1*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+2*2048+edi*8]
   pextrw ecx, mm0, 2
   movzx edi, cl
   pxor mm3, [edx+2*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+1*2048+edi*8]
   pextrw ecx, mm0, 3
   movzx edi, cl
   pxor mm3, [edx+3*2048+edi*8]
   psubq mm1, mm3
   movzx edi, ch
   pxor mm4, [edx+0*2048+edi*8]
   paddq mm2, mm4
   movq mm3, mm2
   psllq mm2, 3
   paddq mm2, mm3
   pxor mm1, [esp+4+1*8+ebx]
   movd ecx, mm1
   movzx edi, cl
   movq mm3, [edx+0*2048+edi*8]
   movzx edi, ch
   movq mm4, [edx+3*2048+edi*8]
   shr ecx, 16
   movzx edi, cl
   pxor mm3, [edx+1*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+2*2048+edi*8]
   pextrw ecx, mm1, 2
   movzx edi, cl
   pxor mm3, [edx+2*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+1*2048+edi*8]
   pextrw ecx, mm1, 3
   movzx edi, cl
   pxor mm3, [edx+3*2048+edi*8]
   psubq mm2, mm3
   movzx edi, ch
   pxor mm4, [edx+0*2048+edi*8]
   paddq mm0, mm4
   movq mm3, mm0
   psllq mm0, 3
   paddq mm0, mm3

   cmp ebx, 6*8
   je @[EMAIL PROTECTED]
   pxor mm2, [esp+4+2*8+ebx]
   movd ecx, mm2
   movzx edi, cl
   movq mm3, [edx+0*2048+edi*8]
   movzx edi, ch
   movq mm4, [edx+3*2048+edi*8]
   shr ecx, 16
   movzx edi, cl
   pxor mm3, [edx+1*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+2*2048+edi*8]
   pextrw ecx, mm2, 2
   movzx edi, cl
   pxor mm3, [edx+2*2048+edi*8]
   movzx edi, ch
   pxor mm4, [edx+1*2048+edi*8]
   pextrw ecx, mm2, 3
   movzx edi, cl
   pxor mm3, [edx+3*2048+edi*8]
   psubq mm0, mm3
   movzx edi, ch
   pxor mm4, [edx+0*2048+edi*8]
   paddq mm1, mm4
   movq mm3, mm1
   psllq mm1, 3
   paddq mm1, mm3
   add ebx, 3*8
   jmp @[EMAIL PROTECTED]
   @[EMAIL PROTECTED]
   pxor mm0, [eax+0*8]
   movq [eax+0*8], mm0
   psubq mm1, mm5
   movq [eax+1*8], mm1
   paddq mm2, [eax+2*8]
   movq [eax+2*8], mm2

   pop esp

   emms
  end;

//  CurrentHash[0]:= a xor aa;
//  CurrentHash[1]:= b - bb;
//  CurrentHash[2]:= c + cc;
  Index:= 0;
  FillChar(HashBuffer,Sizeof(HashBuffer),0);
end;

Please helpppppppp ?

Bye,
  Skybuck.
 




 15 Posts in Topic:
Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compress,
"Skybuck Flying"  2008-03-18 09:48:13 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Skybuck Flying"  2008-03-18 09:56:58 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Skybuck Flying"  2008-03-18 10:07:57 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Skybuck Flying"  2008-03-18 10:16:12 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Skybuck Flying"  2008-03-20 12:19:52 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Rod Pemberton"  2008-03-18 06:26:49 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Skybuck Flying"  2008-03-18 11:51:39 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Skybuck Flying"  2008-03-18 11:52:52 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Skybuck Flying"  2008-03-18 11:55:04 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Skybuck Flying"  2008-03-18 11:58:24 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Skybuck Flying"  2008-03-18 20:58:46 
Re: Crypt++ 5.5.2 Tiger::Transform converted to DCPCrypt2.Compre
"Skybuck Flying"  2008-03-19 01:37:00 
Delphi re-ordering memory as an "optimization" ?
"Skybuck Flying"  2008-03-19 01:39:53 
Probably register issue, like esi, edi, whatever. pushad, popad
"Skybuck Flying"  2008-03-19 02:29:56 
Still 105 MB/sec not bad :)
"Skybuck Flying"  2008-03-19 02:32:46 

Post A Reply:
  Go here to Signup

AddThis Feed Button


About - Advertising - Contact - Frequently Asked Questions - Privacy Policy - Terms of Use - Signup

Contact
tan12V112 Sat Jul 26 1:36:28 CDT 2008.