;
;		Flick FLI-format Animation Viewer v1.2		  19 Feb 1994
;		--------------------------------------
;
;
;This program plays FLI/FLC-format bitmapped animation files on any ECS
;or AGA Amiga running OS2.04 or higher.  FLI/FLC-format files are
;produced by Autodesk Animator and Autodesk 3D Studio on a PC, as well
;as by other programs.
;
;The files in this archive may be distributed anywhere provided they are
;unmodified and are not sold for profit.
;
;Ownership and copyright of all files remains with the author:
;
;	Peter McGavin, 86 Totara Crescent, Lower Hutt, New Zealand.
;	e-mail: peterm@maths.grace.cri.nz
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;		xdef	_chunky2planar

; Basically the same as peterm/chunky4.s, except compare chunky memory
; with a compare page and update only where changes occur.
; Also update the compare page.

;-----------------------------------------------------------------------------
; Set Macro68 defaults

		default	_branch,_word
		default	_adrbasedisp,_word
		default	_pcbasedisp,_word
		default	_outerdisp,_word
		default	_absolute,_pcrel

;-----------------------------------------------------------------------------
; chunky2planar:	(new Motorola syntax)
;  a0 -> chunky pixels
;  a1 -> plane0 (assume other 7 planes are allocated contiguously)
;  a2 -> compare page of chunky pixels
;  d1 = width*height/32 (if "generic" is defined)


	ifnd generic
plsiz		equ	width*height/8
	endc


_LVOCacheClearU equ	-636

	ifd generic
	ifeq depth-8
_c2p_8_cmp::
	else
	ifeq depth-6
_c2p_6_cmp::
	else
	ifeq depth-4
_c2p_4_cmp::
	endc
	endc
	endc
	else
	ifeq depth-8
		ifeq	width-320
_c2p320x200x8_cmp::
		endc
	else
	ifeq depth-6
		ifeq	width-320
_c2p320x200x6_cmp::
		endc
	else
		die	"Unrecognised resolution"
	endc
	endc
	endc

		movem.l	d2-d7/a2-a6,-(sp)

		bset	#0,(firsttimeflag)
		bne.b	skip_relocate	; branch if not being called 1st time

; relocate the mainloop to a quad-longword boundary (for 030/040 cache line)

		lea	(begincode,pc),a4
		adda.w	#15,a4
		move.l	a4,d0
		and.w	#~15,d0
		movea.l	d0,a4
		lea	(mainloop,pc),a3
		move.w	#(endcode-mainloop)/2-1,d0
1$:		move.w	(a3)+,(a4)+
		dbra	d0,1$

; flush the caches

		movem.l	a0/a1/d1,-(sp)
		movea.l	(4).w,a6
		jsr	(_LVOCacheClearU,a6)
		movem.l	(sp)+,a0/a1/d1

skip_relocate:	move.w	sp,d0
		and.w	#15,d0
		add.w	#32,d0		; make room on stack for
		suba.w	d0,sp		; 32-byte quad-longword aligned buffer
		movea.l	sp,a3		; pointed to by a3
		move.w	d0,-(sp)	; and save the allocated size
	ifd generic
		move.l	d1,-(sp)	; plsiz on stack at (6,sp)
		move.l	d1,d0
	ifeq depth-8
		lsl.l	#3,d0
		sub.l	d1,d0		; d0 = 7*plsiz
	else
	ifeq depth-6
		lsl.l	#2,d0
		add.l	d1,d0		; d0 = 5*plsiz
	else
	ifeq depth-4
		add.l	d0,d0
		add.l	d1,d0		; d0 = 3*plsiz
	else
		die	"Unsupported depth"
	endc
	endc
	endc
		move.l	d0,-(sp)	; (depth-1)*plsiz on stack at (2,sp)
		lsr.l	#2,d1
		move.w	d1,-(sp)	; outer loop counter on stack at (sp)
	else
		move.w	#plsiz/4,-(sp)	; outer loop counter on stack at (sp)
	endc

; set up register constants

		move.l	#$0f0f0f0f,d5	; d5 = constant $0f0f0f0f
		move.l	#$55555555,d6	; d6 = constant $55555555
		move.l	#$3333cccc,d7	; d7 = constant $3333cccc

; load up address registers with buffer ptrs

		lea	(2*4,a3),a4	; a4 -> plane2buf
	ifgt depth-4
		lea	(2*4,a4),a5	; a5 -> plane4buf
	endc
		lea	(4,a3),a6	; a6 used for inner loop end test

begincode:	rept	8		; space for mainloop code relocation
		nop
		endr

; main loop (starts here) processes 32 chunky pixels at a time
; compare next 32 pixels with compare page, looking for differences

mainloop:	cmpm.l	(a0)+,(a2)+
		bne.b	1$
		cmpm.l	(a0)+,(a2)+
		bne.b	2$
		cmpm.l	(a0)+,(a2)+
		bne.b	3$
		cmpm.l	(a0)+,(a2)+
		bne.b	4$
		cmpm.l	(a0)+,(a2)+
		bne.b	5$
		cmpm.l	(a0)+,(a2)+
		bne.b	6$
		cmpm.l	(a0)+,(a2)+
		bne.b	7$
		cmpm.l	(a0)+,(a2)+
		bne.b	8$

		addq.l	#4,a1		; skip 4 bytes in output
		sub.w	#1,(sp)
		bne.b	mainloop
		bra.w	done

; difference found, restore a0 and a2

8$:		subq.l	#4,a0
		subq.l	#4,a2
7$:		sub.w	#28,a0
		sub.w	#28,a2
		bra.b	go_c2p

6$:		subq.l	#4,a0
		subq.l	#4,a2
5$:		sub.w	#20,a0
		sub.w	#20,a2
		bra.b	go_c2p

4$:		subq.l	#4,a0
		subq.l	#4,a2
3$:		sub.w	#12,a0
		sub.w	#12,a2
		bra.b	go_c2p

2$:		subq.l	#4,a0
		subq.l	#4,a2
1$:		subq.l	#4,a0
		subq.l	#4,a2

; convert 32 pixels to stack buffers

go_c2p:		move.l	(a0)+,d2	; 12 get next 4 chunky pixels in d2
		move.l	d2,(a2)+	;    update compare page
		move.l	(a0)+,d3	; 12 get next 4 chunky pixels in d3
		move.l	d3,(a2)+	;    update compare page
	ifgt depth-4
		move.l	d2,d0		;  4
		and.l	d5,d2		;  8 d5=$0f0f0f0f
		move.l	d3,d1		;  4
		and.l	d5,d3		;  8 d5=$0f0f0f0f
		eor.l	d2,d0		;  8
		eor.l	d3,d1		;  8
		lsr.l	#4,d1		; 16
		or.l	d1,d0		;  8
	endc
		lsl.l	#4,d2		; 16
		or.l	d3,d2		;  8
		move.l	d2,d3		;  4
		and.l	d7,d3		;  8 d7=$3333cccc
		eor.l	d3,d2		;  8
		lsr.w	#2,d3		; 10
		swap	d3		;  4
		lsl.w	#2,d3		; 10
		or.l	d2,d3		;  8
	ifgt depth-4
		move.l	d0,d1		;  4
		and.l	d7,d1		;  8 d7=$3333cccc
		eor.l	d1,d0		;  8
		lsr.w	#2,d1		; 10
		swap	d1		;  4
		lsl.w	#2,d1		; 10
		or.l	d0,d1		;  8
		move.l	d1,d2		;  4
		lsr.l	#7,d2		; 22
		move.l	d1,d0		;  4
		and.l	d6,d0		;  8 d6=$55555555
		eor.l	d0,d1		;  8
		move.l	d2,d4		;  4
		and.l	d6,d4		;  8 d6=$55555555
		eor.l	d4,d2		;  8
		or.l	d4,d1		;  8
		lsr.l	#1,d1		; 10
		move.b	d1,(4,a5)	; 12 plane 5
	ifgt depth-6
		swap	d1		;  4
		move.b	d1,(12,a5)	; 12 plane 7
	endc
		or.l	d0,d2		;  8
		move.b	d2,(a5)+	;  8 plane 4
	ifgt depth-6
		swap	d2		;  4
		move.b	d2,(8-1,a5)	;  8 plane 6
	endc
	endc
		move.l	d3,d2		;  4
		lsr.l	#7,d2		; 22
		move.l	d3,d0		;  4
		and.l	d6,d0		;  8 d6=$55555555
		eor.l	d0,d3		;  8
		move.l	d2,d4		;  4
		and.l	d6,d4		;  8 d6=$55555555
		eor.l	d4,d2		;  8
		or.l	d4,d3		;  8
		lsr.l	#1,d3		; 10
		move.b	d3,(4,a3)	; 12 plane 1
		swap	d3		;  4
		move.b	d3,(4,a4)	; 12 plane 3
		or.l	d0,d2		;  8
		move.b	d2,(a3)+	;  8 plane 0
		swap	d2		;  4
		move.b	d2,(a4)+	;  8 plane 2

		cmpa.l	a3,a6
		bne.w	go_c2p

; move the result of the previous 32 pixels from stack buffers to
; chip ram planes and restore stack buffer pointers

	ifd generic
		adda.l	(2,sp),a1		; add (depth-1)*plsiz
		move.l	(6,sp),d0		; plsiz
	else
		adda.l	#(plsiz*(depth-1)),a1
		move.l	#plsiz,d0
	endc
	ifgt depth-6
		move.l	(8,a5),(a1)		; plane 7
		sub.l	d0,a1
		move.l	(4,a5),(a1)		; plane 6
		sub.l	d0,a1
	endc
	ifgt depth-4
		move.l	(a5),(a1)		; plane 5
		sub.l	d0,a1
		move.l	-(a5),(a1)		; plane 4
		sub.l	d0,a1
	endc
		move.l	(a4),(a1)		; plane 3
		sub.l	d0,a1
		move.l	-(a4),(a1)		; plane 2
		sub.l	d0,a1
		move.l	(a3),(a1)		; plane 1
		sub.l	d0,a1
		move.l	-(a3),(a1)+		; plane 0

; check if finished

check_done:	sub.w	#1,(sp)
		bne.w	mainloop

; all done!  restore stack and return

done:	ifd generic
		adda.w	#4+4+2,sp		; remove stack variables
	else
		addq.w	#2,sp			; remove outer loop counter
	endc
		adda.w	(sp)+,sp		; remove aligned 32-byte buffer
		movem.l	(sp)+,d2-d7/a2-a6

		rts
endcode:

firsttimeflag:	dc.b	0
		even

;-----------------------------------------------------------------------------

		end
