;
; Date: 2000-04-17			Mikael Kalms (Scout/C-Lous & more)
;					Email: mikael@kalms.org
;
; About:
;   1x1 6bpl cpu5 C2P for contigous bitplanes and no horizontal modulo
;
;   This routine is intended for use on all 68040 and 68060 based systems.
;   It is not designed to perform well on 68020-030.
;
;   This routine is released into the public domain. It may be freely used
;   for non-commercial as well as commercial purposes. A short notice via
;   email is always appreciated, though.
;
; Timings:
;   Estimated to run at copyspeed on 040-40 and 060
;
; Features:
;   Handles bitplanes of virtually any size (4GB)
;
; Restrictions:
;   Chunky-buffer must be an even multiple of 32 pixels wide
;   If incorrect/invalid parameters are specified, the routine will
;   most probably crash.
;
; c2p1x1_6_c5_040_init			sets chunkybuffer size/pos & bplsize
; c2p1x1_6_c5_040			performs the actual c2p conversion
;


	XDEF	_c2p1x1_6_c5_040_init
	XDEF	_c2p1x1_6_c5_040


	section	code,code

; d0.w	chunkyx [chunky-pixels]
; d1.w	chunkyy [chunky-pixels]
; d2.w	(scroffsx) [screen-pixels]
; d3.w	scroffsy [screen-pixels]
; d4.l	(rowlen) [bytes] -- offset between one row and the next in a bpl
; d5.l	bplsize [bytes] -- offset between one row in one bpl and the next bpl
; d6.l	(chunkylen) [bytes] -- offset between one row and the next in chunkybuf

_c2p1x1_6_c5_040_init
c2p1x1_6_c5_040_init
	move.l	d3,-(sp)
	mulu.w	d0,d3
	lsr.l	#3,d3
	move.l	d3,c2p1x1_6_c5_040_scroffs
	mulu.w	d0,d1
	move.l	d1,c2p1x1_6_c5_040_pixels
	move.l	d5,d0
	lsl.l	#2,d0
	add.l	d5,d0
	move.l	d0,c2p1x1_6_c5_040_delta0
	addq.l	#4,d0
	move.l	d0,c2p1x1_6_c5_040_delta3
	neg.l	d5
	move.l	d5,c2p1x1_6_c5_040_delta1
	move.l	d5,c2p1x1_6_c5_040_delta2
	move.l	d5,c2p1x1_6_c5_040_delta4
	move.l	d5,c2p1x1_6_c5_040_delta5
	move.l	d5,c2p1x1_6_c5_040_delta6
	move.l	(sp)+,d3
	rts


; a0	chunkybuffer
; a1	bitplanes

_c2p1x1_6_c5_040
c2p1x1_6_c5_040

	movem.l	d2-d7/a2-a6,-(sp)

	add.l	c2p1x1_6_c5_040_delta0(pc),a1
	add.l	c2p1x1_6_c5_040_scroffs(pc),a1

	move.l	c2p1x1_6_c5_040_pixels(pc),a2
	tst.l	a2
	beq	.none
	add.l	a0,a2

	move.l	(a0)+,d0
	move.l	(a0)+,d1
	move.l	(a0)+,d2
	move.l	(a0)+,d3
	move.l	(a0)+,d4
	move.l	(a0)+,d5
	move.l	(a0)+,a5
	move.l	(a0)+,a6

	move.l	d1,d6			; Swap 4x1, part 1
	move.l	d3,d7
	lsr.l	#4,d6
	lsr.l	#4,d7
	eor.l	d0,d6
	eor.l	d2,d7
	and.l	#$0f0f0f0f,d6
	and.l	#$0f0f0f0f,d7
	eor.l	d6,d0
	eor.l	d7,d2
	lsl.l	#4,d6
	lsl.l	#4,d7
	eor.l	d6,d1
	eor.l	d7,d3

	exg	d2,a5
	exg	d3,a6

	move.l	d5,d6			; Swap 4x1, part 2
	move.l	d3,d7
	lsr.l	#4,d6
	lsr.l	#4,d7
	eor.l	d4,d6
	eor.l	d2,d7
	and.l	#$0f0f0f0f,d6
	and.l	#$0f0f0f0f,d7
	eor.l	d6,d4
	eor.l	d7,d2
	lsl.l	#4,d6
	lsl.l	#4,d7
	eor.l	d6,d5
	eor.l	d7,d3

	exg	a5,d1

	move.w	d4,d6			; Swap 16x4, part 1
	move.w	d2,d7
	move.w	d0,d4
	move.w	d1,d2
	swap	d4
	swap	d2
	move.w	d4,d0
	move.w	d2,d1
	move.w	d6,d4
	move.w	d7,d2

	lsl.l	#2,d0			; Swap/Merge 2x4, part 1
	lsl.l	#2,d1
	or.l	d4,d0
	or.l	d2,d1

	move.l	d1,d6			; Swap 8x2, part 1
	move.l	a5,d4			; Swap 16x4, part 2, interleaved
	lsr.l	#8,d6
	move.l	a6,d2

	swap	d5
	swap	d3
	eor.l	d0,d6
	eor.w	d4,d5
	and.l	#$00ff00ff,d6
	eor.w	d2,d3
	eor.l	d6,d0
	eor.w	d5,d4
	lsl.l	#8,d6
	eor.w	d3,d2
	eor.l	d6,d1
	eor.w	d4,d5

	move.l	d1,d6			; Swap 1x2, part 1
	eor.w	d2,d3			; Swap 16x4, part 2, interleaved
	swap	d5
	swap	d3
	lsr.l	#1,d6

	bra	.start
	cnop	0,16
.x
	tst.b	32(a0)
	move.l	(a0)+,d0
	move.l	(a0)+,d1
	move.l	(a0)+,d2
	move.l	(a0)+,d3
	tst.b	32(a0)
	move.l	(a0)+,d4
	move.l	(a0)+,d5
	move.l	(a0)+,a5
	move.l	(a0)+,a6

	move.l	d6,(a1)

	move.l	d1,d6			; Swap 4x1, part 1
	move.l	d3,d7
	lsr.l	#4,d6
	lsr.l	#4,d7
	eor.l	d0,d6
	eor.l	d2,d7
	and.l	#$0f0f0f0f,d6
	and.l	#$0f0f0f0f,d7
	eor.l	d6,d0
	eor.l	d7,d2
	lsl.l	#4,d6
	lsl.l	#4,d7
	eor.l	d6,d1
	eor.l	d7,d3

	exg	d2,a5
	exg	d3,a6

	move.l	d5,d6			; Swap 4x1, part 2
	move.l	d3,d7
	lsr.l	#4,d6
	lsr.l	#4,d7
	eor.l	d4,d6
	add.l	c2p1x1_6_c5_040_delta1(pc),a1
	eor.l	d2,d7
	and.l	#$0f0f0f0f,d6
	and.l	#$0f0f0f0f,d7
	eor.l	d6,d4
	eor.l	d7,d2
	move.l	a3,(a1)
	lsl.l	#4,d6
	lsl.l	#4,d7
	eor.l	d6,d5
	eor.l	d7,d3

	exg	a5,d1

	move.w	d4,d6			; Swap 16x4, part 1
	move.w	d2,d7
	move.w	d0,d4
	move.w	d1,d2
	swap	d4
	swap	d2
	move.w	d4,d0
	move.w	d2,d1
	move.w	d6,d4
	move.w	d7,d2

	lsl.l	#2,d0			; Swap/Merge 2x4, part 1
	lsl.l	#2,d1
	add.l	c2p1x1_6_c5_040_delta2(pc),a1
	or.l	d4,d0
	or.l	d2,d1

	move.l	d1,d6			; Swap 8x2, part 1
	move.l	a5,d4			; Swap 16x4, part 2, interleaved
	lsr.l	#8,d6
	move.l	a6,d2
	move.l	a4,(a1)

	swap	d5
	swap	d3
	eor.l	d0,d6
	eor.w	d4,d5
	and.l	#$00ff00ff,d6
	eor.w	d2,d3
	eor.l	d6,d0
	eor.w	d5,d4
	lsl.l	#8,d6
	eor.w	d3,d2
	eor.l	d6,d1
	eor.w	d4,d5

	move.l	d1,d6			; Swap 1x2, part 1
	eor.w	d2,d3			; Swap 16x4, part 2, interleaved
	swap	d5
	swap	d3
	add.l	c2p1x1_6_c5_040_delta3(pc),a1
	lsr.l	#1,d6
.start
	eor.l	d0,d6
	and.l	#$55555555,d6
	eor.l	d6,d0
	add.l	d6,d6
	eor.l	d6,d1

	move.l	d0,(a1)

	move.l	d5,d6			; Swap/Merge 2x4, part 2
	move.l	d3,d7
	lsr.l	#2,d6
	lsr.l	#2,d7
	eor.l	d4,d6
	eor.l	d2,d7
	and.l	#$33333333,d6
	and.l	#$33333333,d7
	eor.l	d6,d4
	eor.l	d7,d2
	lsl.l	#2,d6
	lsl.l	#2,d7
	eor.l	d6,d5
	eor.l	d7,d3

	add.l	c2p1x1_6_c5_040_delta4(pc),a1
	move.l	d2,d6			; Swap 8x2, part 2
	move.l	d3,d7
	lsr.l	#8,d6
	lsr.l	#8,d7
	eor.l	d4,d6
	eor.l	d5,d7
	move.l	d1,(a1)
	and.l	#$00ff00ff,d6
	and.l	#$00ff00ff,d7
	eor.l	d6,d4
	eor.l	d7,d5
	lsl.l	#8,d6
	lsl.l	#8,d7
	eor.l	d6,d2
	eor.l	d7,d3

	move.l	d2,d6			; Swap 1x2, part 2
	move.l	d3,d7
	lsr.l	#1,d6
	lsr.l	#1,d7
	add.l	c2p1x1_6_c5_040_delta5(pc),a1
	eor.l	d4,d6
	eor.l	d5,d7
	and.l	#$55555555,d6
	and.l	#$55555555,d7
	eor.l	d6,d4
	eor.l	d7,d5
	move.l	d4,(a1)
	add.l	d6,d6
	add.l	d7,d7
	eor.l	d2,d6
	eor.l	d7,d3

	move.l	d5,a3
	move.l	d3,a4
	add.l	c2p1x1_6_c5_040_delta6(pc),a1

	cmp.l	a0,a2
	bne	.x

	move.l	d6,(a1)
	add.l	c2p1x1_6_c5_040_delta1(pc),a1
	move.l	a3,(a1)
	add.l	c2p1x1_6_c5_040_delta2(pc),a1
	move.l	a4,(a1)


.none	movem.l	(sp)+,d2-d7/a2-a6
	rts

			cnop	0,4
c2p1x1_8_c5_040_data
c2p1x1_6_c5_040_scroffs	ds.l	1
c2p1x1_6_c5_040_pixels	ds.l	1
c2p1x1_6_c5_040_delta0	ds.l	1
c2p1x1_6_c5_040_delta1	ds.l	1
c2p1x1_6_c5_040_delta2	ds.l	1
c2p1x1_6_c5_040_delta3	ds.l	1
c2p1x1_6_c5_040_delta4	ds.l	1
c2p1x1_6_c5_040_delta5	ds.l	1
c2p1x1_6_c5_040_delta6	ds.l	1
