Planet
navi homePPSaboutscreenshotsdownloaddevelopmentforum

source: downloads/openal-0.0.8/src/arch/i386/memcpy_mmx_prk.nasm @ 17

Last change on this file since 17 was 17, checked in by landauf, 16 years ago

added openal

File size: 2.7 KB
Line 
1;***************************************************************************
2;*   Copyright (C) 2005 by Prakash Punnoor                                 *
3;*   prakash@punnoor.de                                                    *
4;*                                                                         *
5;*   This program is free software; you can redistribute it and/or modify  *
6;*   it under the terms of the GNU Library General Public License as       *
7;*   published by the Free Software Foundation; either version 2 of the    *
8;*   License, or (at your option) any later version.                       *
9;*                                                                         *
10;*   This program is distributed in the hope that it will be useful,       *
11;*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12;*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13;*   GNU General Public License for more details.                          *
14;*                                                                         *
15;*   You should have received a copy of the GNU Library General Public     *
16;*   License along with this program; if not, write to the                 *
17;*   Free Software Foundation, Inc.,                                       *
18;*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19;***************************************************************************
20
21; void _alMMXmemcpy(void* dst, void* src, unsigned int n);
22; pretty straight-forward implementation
23; by design broken for n<8, so check that before calling
24; x86 32 bit only!
25global __alMMXmemcpy
26global _alMMXmemcpy
27
28__alMMXmemcpy:
29_alMMXmemcpy:
30
31; Save the registers affected
32pushf
33push edi
34push esi
35
36cld
37
38mov edi, [esp + 16] ;char* dst
39mov esi, [esp + 20] ;char* src
40mov edx, [esp + 24] ;int n
41
42; align dest
43mov ecx, edi
44and ecx, 7      ;MMX align - 1
45sub ecx, 8      ;MMX align
46neg ecx         ;eax has pre copy bytes
47
48sub edx, ecx    ;less to copy after this
49; pre copy
50; copying first dwords and then
51; remaining bytes wasn't faster
52rep movsb
53
54; calc MMX copy length
55mov ecx, edx
56and ecx, 63     ;post copy bytes
57shr edx, 6      ;MMX copy iterations
58cmp edx, 0
59
60jz .loopend
61; MMX copy
62.loopstart
63movq mm0, [esi]
64movq mm1, [esi + 8]
65movq mm2, [esi + 16]
66movq mm3, [esi + 24]
67movq mm4, [esi + 32]
68movq mm5, [esi + 40]
69movq mm6, [esi + 48]
70movq mm7, [esi + 56]
71movq [edi], mm0
72movq [edi + 8], mm1
73movq [edi + 16], mm2
74movq [edi + 24], mm3
75movq [edi + 32], mm4
76movq [edi + 40], mm5
77movq [edi + 48], mm6
78movq [edi + 56], mm7
79add esi, 64
80add edi, 64
81dec edx
82jnz .loopstart
83emms
84.loopend
85
86; post copy
87rep movsb
88
89; Restore registers
90pop esi
91pop edi
92popf
93ret
94
95; prevent executable stack
96%ifidn __OUTPUT_FORMAT__,elf
97section .note.GNU-stack noalloc noexec nowrite progbits
98%endif
Note: See TracBrowser for help on using the repository browser.