
From: Andi Kleen <ak@suse.de>

The following patch speeds up the restoring of swsusp images on x86-64
and makes the assembly code more readable (tested and works on AMD64).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

Changed by AK to not hardcode any C values and get them from offset.h instead
and not flushing CR3 needlessly (according to Pavel it was just an old
debugging measure that is not needed anymore)

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 25-akpm/arch/x86_64/kernel/asm-offsets.c |    5 ++
 25-akpm/arch/x86_64/kernel/suspend_asm.S |   54 +++++++++++--------------------
 2 files changed, 24 insertions(+), 35 deletions(-)

diff -puN arch/x86_64/kernel/asm-offsets.c~x86_64-speed-up-suspend arch/x86_64/kernel/asm-offsets.c
--- 25/arch/x86_64/kernel/asm-offsets.c~x86_64-speed-up-suspend	2005-01-23 14:40:19.390099128 -0800
+++ 25-akpm/arch/x86_64/kernel/asm-offsets.c	2005-01-23 14:40:19.397098064 -0800
@@ -8,6 +8,7 @@
 #include <linux/stddef.h>
 #include <linux/errno.h> 
 #include <linux/hardirq.h>
+#include <linux/suspend.h>
 #include <asm/pda.h>
 #include <asm/processor.h>
 #include <asm/segment.h>
@@ -61,6 +62,8 @@ int main(void)
 	       offsetof (struct rt_sigframe32, uc.uc_mcontext));
 	BLANK();
 #endif
-
+	DEFINE(SIZEOF_PBE, sizeof(struct pbe));
+	DEFINE(pbe_address, offsetof(struct pbe, address));
+	DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
 	return 0;
 }
diff -puN arch/x86_64/kernel/suspend_asm.S~x86_64-speed-up-suspend arch/x86_64/kernel/suspend_asm.S
--- 25/arch/x86_64/kernel/suspend_asm.S~x86_64-speed-up-suspend	2005-01-23 14:40:19.392098824 -0800
+++ 25-akpm/arch/x86_64/kernel/suspend_asm.S	2005-01-23 14:40:19.396098216 -0800
@@ -11,6 +11,7 @@
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/page.h>
+#include <asm/offset.h>
 
 ENTRY(swsusp_arch_suspend)
 
@@ -49,43 +50,28 @@ ENTRY(swsusp_arch_resume)
 	movq	%rcx, %cr3;
 	movq	%rax, %cr4;  # turn PGE back on
 
+	movq	pagedir_nosave(%rip), %rdx
+	/* compute the limit */
 	movl	nr_copy_pages(%rip), %eax
-	xorl	%ecx, %ecx
-	movq	$0, %r10
 	testl	%eax, %eax
 	jz	done
-.L105:
-	xorl	%esi, %esi
-	movq	$0, %r11
-	jmp	.L104
-	.p2align 4,,7
-copy_one_page:
-	movq	%r10, %rcx
-.L104:
-	movq	pagedir_nosave(%rip), %rdx
-	movq	%rcx, %rax
-	salq	$5, %rax
-	movq	8(%rdx,%rax), %rcx
-	movq	(%rdx,%rax), %rax
-	movzbl	(%rsi,%rax), %eax
-	movb	%al, (%rsi,%rcx)
-
-	movq	%cr3, %rax;  # flush TLB
-	movq	%rax, %cr3;
-
-	movq	%r11, %rax
-	incq	%rax
-	cmpq	$4095, %rax
-	movq	%rax, %rsi
-	movq	%rax, %r11
-	jbe	copy_one_page
-	movq	%r10, %rax
-	incq	%rax
-	movq	%rax, %rcx
-	movq	%rax, %r10
-	mov	nr_copy_pages(%rip), %eax
-	cmpq	%rax, %rcx
-	jb	.L105
+	movq	%rdx,%r8
+	movl	$SIZEOF_PBE,%r9d
+	mul		%r9  # with rax, clobbers rdx
+	movq 	%r8, %rdx
+	addq	%r8, %rax
+loop:
+	/* get addresses from the pbe and copy the page */
+	movq	pbe_address(%rdx), %rsi
+	movq	pbe_orig_address(%rdx), %rdi
+	movq	$512, %rcx
+	rep
+	movsq
+
+	/* progress to the next pbe */
+	addq	$SIZEOF_PBE, %rdx
+	cmpq	%rax, %rdx
+	jb	loop
 done:
 	movl	$24, %eax
 	movl	%eax, %ds
_
