[17] | 1 | ;*************************************************************************** |
---|
| 2 | ;* Copyright (C) 2005 by Prakash Punnoor * |
---|
| 3 | ;* prakash@punnoor.de * |
---|
| 4 | ;* * |
---|
| 5 | ;* This program is free software; you can redistribute it and/or modify * |
---|
| 6 | ;* it under the terms of the GNU Library General Public License as * |
---|
| 7 | ;* published by the Free Software Foundation; either version 2 of the * |
---|
| 8 | ;* License, or (at your option) any later version. * |
---|
| 9 | ;* * |
---|
| 10 | ;* This program is distributed in the hope that it will be useful, * |
---|
| 11 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of * |
---|
| 12 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
---|
| 13 | ;* GNU General Public License for more details. * |
---|
| 14 | ;* * |
---|
| 15 | ;* You should have received a copy of the GNU Library General Public * |
---|
| 16 | ;* License along with this program; if not, write to the * |
---|
| 17 | ;* Free Software Foundation, Inc., * |
---|
| 18 | ;* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * |
---|
| 19 | ;*************************************************************************** |
---|
| 20 | |
---|
| 21 | ; void _alMMXmemcpy(void* dst, void* src, unsigned int n); |
---|
| 22 | ; pretty straight-forward implementation |
---|
| 23 | ; by design broken for n<8, so check that before calling |
---|
| 24 | ; x86 32 bit only! |
---|
| 25 | global __alMMXmemcpy |
---|
| 26 | global _alMMXmemcpy |
---|
| 27 | |
---|
| 28 | __alMMXmemcpy: |
---|
| 29 | _alMMXmemcpy: |
---|
| 30 | |
---|
| 31 | ; Save the registers affected |
---|
| 32 | pushf |
---|
| 33 | push edi |
---|
| 34 | push esi |
---|
| 35 | |
---|
| 36 | cld |
---|
| 37 | |
---|
| 38 | mov edi, [esp + 16] ;char* dst |
---|
| 39 | mov esi, [esp + 20] ;char* src |
---|
| 40 | mov edx, [esp + 24] ;int n |
---|
| 41 | |
---|
| 42 | ; align dest |
---|
| 43 | mov ecx, edi |
---|
| 44 | and ecx, 7 ;MMX align - 1 |
---|
| 45 | sub ecx, 8 ;MMX align |
---|
| 46 | neg ecx ;eax has pre copy bytes |
---|
| 47 | |
---|
| 48 | sub edx, ecx ;less to copy after this |
---|
| 49 | ; pre copy |
---|
| 50 | ; copying first dwords and then |
---|
| 51 | ; remaining bytes wasn't faster |
---|
| 52 | rep movsb |
---|
| 53 | |
---|
| 54 | ; calc MMX copy length |
---|
| 55 | mov ecx, edx |
---|
| 56 | and ecx, 63 ;post copy bytes |
---|
| 57 | shr edx, 6 ;MMX copy iterations |
---|
| 58 | cmp edx, 0 |
---|
| 59 | |
---|
| 60 | jz .loopend |
---|
| 61 | ; MMX copy |
---|
| 62 | .loopstart |
---|
| 63 | movq mm0, [esi] |
---|
| 64 | movq mm1, [esi + 8] |
---|
| 65 | movq mm2, [esi + 16] |
---|
| 66 | movq mm3, [esi + 24] |
---|
| 67 | movq mm4, [esi + 32] |
---|
| 68 | movq mm5, [esi + 40] |
---|
| 69 | movq mm6, [esi + 48] |
---|
| 70 | movq mm7, [esi + 56] |
---|
| 71 | movq [edi], mm0 |
---|
| 72 | movq [edi + 8], mm1 |
---|
| 73 | movq [edi + 16], mm2 |
---|
| 74 | movq [edi + 24], mm3 |
---|
| 75 | movq [edi + 32], mm4 |
---|
| 76 | movq [edi + 40], mm5 |
---|
| 77 | movq [edi + 48], mm6 |
---|
| 78 | movq [edi + 56], mm7 |
---|
| 79 | add esi, 64 |
---|
| 80 | add edi, 64 |
---|
| 81 | dec edx |
---|
| 82 | jnz .loopstart |
---|
| 83 | emms |
---|
| 84 | .loopend |
---|
| 85 | |
---|
| 86 | ; post copy |
---|
| 87 | rep movsb |
---|
| 88 | |
---|
| 89 | ; Restore registers |
---|
| 90 | pop esi |
---|
| 91 | pop edi |
---|
| 92 | popf |
---|
| 93 | ret |
---|
| 94 | |
---|
| 95 | ; prevent executable stack |
---|
| 96 | %ifidn __OUTPUT_FORMAT__,elf |
---|
| 97 | section .note.GNU-stack noalloc noexec nowrite progbits |
---|
| 98 | %endif |
---|