//+build !noasm !appengine // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT TEXT ·_bitmap_aligned_and_sse4(SB), $0-32 MOVQ left+0(FP), DI MOVQ right+8(FP), SI MOVQ out+16(FP), DX MOVQ length+24(FP), CX WORD $0x8548; BYTE $0xc9 // test rcx, rcx JLE LBB0_16 LONG $0x1ff98348 // cmp rcx, 31 JA LBB0_7 WORD $0x3145; BYTE $0xdb // xor r11d, r11d LBB0_3: WORD $0x894d; BYTE $0xd8 // mov r8, r11 WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xc8 // add r8, rcx WORD $0x8949; BYTE $0xc9 // mov r9, rcx LONG $0x03e18349 // and r9, 3 JE LBB0_5 LBB0_4: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] LONG $0x1f042242 // and al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x01c38349 // add r11, 1 LONG $0xffc18349 // add r9, -1 JNE LBB0_4 LBB0_5: LONG $0x03f88349 // cmp r8, 3 JB LBB0_16 LBB0_6: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] LONG $0x1f042242 // and al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] LONG $0x1f442242; BYTE $0x01 // and al, byte [rdi + r11 + 1] LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] LONG $0x1f442242; BYTE $0x02 // and al, byte [rdi + r11 + 2] LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] LONG $0x1f442242; BYTE $0x03 // and al, byte [rdi + r11 + 3] LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al LONG $0x04c38349 // add r11, 4 WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 JNE LBB0_6 JMP LBB0_16 LBB0_7: LONG $0x0a0c8d4c // lea r9, [rdx + rcx] LONG $0x0f048d48 // lea rax, [rdi + rcx] WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd2970f41 // seta r10b LONG $0x0e048d48 // lea rax, [rsi + rcx] WORD $0x3949; BYTE $0xf9 // cmp r9, rdi WORD $0x970f; BYTE $0xd3 // seta bl WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd0970f41 // seta r8b WORD $0x3949; BYTE $0xf1 // cmp r9, rsi LONG $0xd1970f41 // seta r9b WORD $0x3145; BYTE $0xdb // xor r11d, r11d WORD $0x8441; BYTE $0xda // test r10b, bl JNE LBB0_3 WORD $0x2045; BYTE $0xc8 // and r8b, r9b JNE LBB0_3 WORD $0x8949; BYTE $0xcb // mov r11, rcx LONG $0xe0e38349 // and r11, -32 LONG $0xe0438d49 // lea rax, [r11 - 32] WORD $0x8949; BYTE $0xc1 // mov r9, rax LONG $0x05e9c149 // shr r9, 5 LONG $0x01c18349 // add r9, 1 WORD $0x8548; BYTE $0xc0 // test rax, rax JE LBB0_10 WORD $0x894d; BYTE $0xca // mov r10, r9 LONG $0xfee28349 // and r10, -2 WORD $0xf749; BYTE $0xda // neg r10 WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB0_12: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 LONG $0x40c08349 // add r8, 64 LONG $0x02c28349 // add r10, 2 JNE LBB0_12 LONG $0x01c1f641 // test r9b, 1 JE LBB0_15 LBB0_14: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x540f; BYTE $0xd0 // andps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x540f; BYTE $0xc1 // andps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LBB0_15: WORD $0x3949; BYTE $0xcb // cmp r11, rcx JNE LBB0_3 LBB0_16: RET LBB0_10: WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LONG $0x01c1f641 // test r9b, 1 JNE LBB0_14 JMP LBB0_15 TEXT ·_bitmap_aligned_or_sse4(SB), $0-32 MOVQ left+0(FP), DI MOVQ right+8(FP), SI MOVQ out+16(FP), DX MOVQ length+24(FP), CX WORD $0x8548; BYTE $0xc9 // test rcx, rcx JLE LBB1_16 LONG $0x1ff98348 // cmp rcx, 31 JA LBB1_7 WORD $0x3145; BYTE $0xdb // xor r11d, r11d LBB1_3: WORD $0x894d; BYTE $0xd8 // mov r8, r11 WORD $0xf749; BYTE $0xd0 // not r8 WORD $0x0149; BYTE $0xc8 // add r8, rcx WORD $0x8949; BYTE $0xc9 // mov r9, rcx LONG $0x03e18349 // and r9, 3 JE LBB1_5 LBB1_4: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] LONG $0x1f040a42 // or al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x01c38349 // add r11, 1 LONG $0xffc18349 // add r9, -1 JNE LBB1_4 LBB1_5: LONG $0x03f88349 // cmp r8, 3 JB LBB1_16 LBB1_6: LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11] LONG $0x1f040a42 // or al, byte [rdi + r11] LONG $0x1a048842 // mov byte [rdx + r11], al LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1] LONG $0x1f440a42; BYTE $0x01 // or al, byte [rdi + r11 + 1] LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2] LONG $0x1f440a42; BYTE $0x02 // or al, byte [rdi + r11 + 2] LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3] LONG $0x1f440a42; BYTE $0x03 // or al, byte [rdi + r11 + 3] LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al LONG $0x04c38349 // add r11, 4 WORD $0x394c; BYTE $0xd9 // cmp rcx, r11 JNE LBB1_6 JMP LBB1_16 LBB1_7: LONG $0x0a0c8d4c // lea r9, [rdx + rcx] LONG $0x0f048d48 // lea rax, [rdi + rcx] WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd2970f41 // seta r10b LONG $0x0e048d48 // lea rax, [rsi + rcx] WORD $0x3949; BYTE $0xf9 // cmp r9, rdi WORD $0x970f; BYTE $0xd3 // seta bl WORD $0x3948; BYTE $0xd0 // cmp rax, rdx LONG $0xd0970f41 // seta r8b WORD $0x3949; BYTE $0xf1 // cmp r9, rsi LONG $0xd1970f41 // seta r9b WORD $0x3145; BYTE $0xdb // xor r11d, r11d WORD $0x8441; BYTE $0xda // test r10b, bl JNE LBB1_3 WORD $0x2045; BYTE $0xc8 // and r8b, r9b JNE LBB1_3 WORD $0x8949; BYTE $0xcb // mov r11, rcx LONG $0xe0e38349 // and r11, -32 LONG $0xe0438d49 // lea rax, [r11 - 32] WORD $0x8949; BYTE $0xc1 // mov r9, rax LONG $0x05e9c149 // shr r9, 5 LONG $0x01c18349 // add r9, 1 WORD $0x8548; BYTE $0xc0 // test rax, rax JE LBB1_10 WORD $0x894d; BYTE $0xca // mov r10, r9 LONG $0xfee28349 // and r10, -2 WORD $0xf749; BYTE $0xda // neg r10 WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LBB1_12: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32] LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48] LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32] WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48] WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2 LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0 LONG $0x40c08349 // add r8, 64 LONG $0x02c28349 // add r10, 2 JNE LBB1_12 LONG $0x01c1f641 // test r9b, 1 JE LBB1_15 LBB1_14: LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8] LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16] LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8] WORD $0x560f; BYTE $0xd0 // orps xmm2, xmm0 LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16] WORD $0x560f; BYTE $0xc1 // orps xmm0, xmm1 LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2 LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0 LBB1_15: WORD $0x3949; BYTE $0xcb // cmp r11, rcx JNE LBB1_3 LBB1_16: RET LBB1_10: WORD $0x3145; BYTE $0xc0 // xor r8d, r8d LONG $0x01c1f641 // test r9b, 1 JNE LBB1_14 JMP LBB1_15