;
; al = input character
; di = nlist tail
; si = clist tail
; dx = clist head
;
RE_TEXT	SEGMENT  WORD PUBLIC 'CODE'
	assume	cs:RE_TEXT
	assume	ds:RE_TEXT
	assume	es:RE_TEXT
	extrn	_reFast1:BYTE
	extrn	_reFast2:BYTE
;
;	XTAG:re_scan
;	re_scan( char * beginBuffer, char * endBuffer, char *segBuffer,
;			char ** matchBegin, char ** matchEnd,
;			int * numberOfNewlines );
;
;	returns:
;		0 = NOT_FOUND -- RE was not found between beginBuffer
;							and endBuffer
;		1 = FOUND -- RE was found between matchBegin and matchEnd
;		2 = PARTIAL_MATCH -- partial match at the end of the buffer
;				starting at matchBegin
;
	public	_re_scan
_re_scan PROC FAR
	push	bp		; save bp
	mov	bp,sp		; set up bp to access the arguments
;
	mov	cs:dataSegment,ds	; save ds
;
	push	ds		; save ds and es
	push	es
	push	si		; save si and di
	push	di
;
	push	cs		; have all the segment registers point to
	pop	ds		; the beginning of the code segment
	push	cs
	pop	es
;
	mov	ax,[bp+6]	; pick up and save the arguments
	mov	cs:nextByteOffset,ax
	mov	cs:firstByteOffset,ax
	mov	ax,[bp+8]
	mov	cs:lastByteOffset,ax
; move the segment into a position so GetNextChar can pick it up with a LDS
	mov	ax,[bp+10]
	mov	cs:nextByteSegment,ax
;
init:
	cld			; set search direction to forward
	lea	di,list1	; initially list1=nlist and list2=clist
	mov	nlistTop,di	; but they are swapped after each character
	lea	si,list2
	mov	clistTop,si
	mov	dx,si		; dx = clist head
	mov	al,0AH		; "last char read" at beginning is a NL
	jmp	myxchg
;
;
finish:
	; count the number of lines in the scanned bytes
	push	ax		; save ax since we need to change al
	mov	di,cs:firstByteOffset ; start the scan here
	mov	cx,cs:nextByteOffset  ; compute the number of bytes to scan
	mov	ds,cs:nextByteSegment
	mov	es,cs:nextByteSegment
	sub	cx,di		; cx = the number of bytes
	mov	al,0AH		; 0AH = newline, ASCII line feed
	xor	dx,dx		; newline counter, start at 0 (of course)
NLLoop:
	repne scasb		; scan to the next newline
	jne	noMoreNLs	; go to end of buffer w/o finding a newline
	inc	dx		; found another one, bump the counter
	cmp	cx,0
	jne	NLLoop		; find any more
noMoreNLs:
	mov	ds,cs:dataSegment
	mov	bx,[bp+16]	; get address of int to put newline count in
	mov	[bx],dx		; store the computed newline count
	; end of newline counting
;
	pop	ax		; restore ax -- the return value
	pop	di
	pop	si
	pop	es
	pop	ds
	pop	bp		; restore bp
	ret
_re_scan ENDP	
;
	public	_re_cnode
_re_cnode:
	pop	bx			; get the return address
	mov	[si],bx			; save it in the clist
	mov	cx,thisMatchBegin
	mov	[si+2],cx
	add	si,4			; mov clist tail (si)
	add	bx,3			; generate bx+3 as an address
	push	bx			; push it so we can "JMP" to it
	ret				; with a RET.  We cannot use JMP 3[bx]
					; directly since it will be indirect
;
	public	_re_nnode
	extrn	_addrJmpFound:WORD
_re_nnode:
	pop	bx			; get the return address
	cmp	bx,_addrJmpFound	; did we find it?
	je	_re_found
	mov	[di],bx			; save it in the nlist
	mov	cx,thisMatchBegin
	mov	[di+2],cx
	add	di,4			; mov nlist tail (di)
					; then move to next clist item
;
	public	_re_clist
_re_clist:
	cmp	dx,si
	je	myxchg		; head=tail => clist is empty
	mov	bx,dx		; move address into base register
	mov	cx,[bx+2]
	mov	thisMatchBegin,cx
	add	dx,4		; move to the next item
	jmp	[bx]
;
	public	_re_found
_re_found:
	mov	cx,thisMatchBegin
	dec	cx		; since it was taken from nextByteOffset
				; and so is one to big
	mov	ds,cs:dataSegment
	mov	bx,[bp+12]
	mov	[bx],cx		; store begin address of found RE
	mov	cx,cs:nextByteOffset
	dec	cx		; the RE ends here
	mov	bx,[bp+14]
	mov	[bx],cx		; store end address of found RE
	mov	ax,1		; FOUND flag is returned in ax
	jmp	finish
;
;
myxchg:
	cmp	di,nlistTop
	jne	skipFastScan
;
; if this char is 0 then do not try fast searches, if it is not 0 then the
; first character of the RE is a single fixed character
;
	cmp	_reFast1,0
	je	skipFastScan
;
;  If there is no pending list of partial RE matches, then try to speed up
;  the search by scanning for the first character of the RE.  The 8086 string
;  search instructions are very fast for looking for a single character.
;
;  get the parameters for the string scan
	push	ax	; save last char read (in al)
	mov	al,_reFast1
	les	di,DWORD PTR nextByteOffset
	mov	cx,lastByteOffset
	cmp	cx,di
	jbe	noScan
	sub	cx,di
	inc	cx	; since lastByteOffset points to a valid byte
	mov	dx,cx		; save cx for the second scan
 repne	scasb
 	jne	notFound1	; distinguish: not found - found at last char
	dec	di		; adjust since repne scasb goes one too far
notFound1:
	mov	cx,dx		; restore the count we saved
	mov	dx,di		; save the results of the first scasb in dx
	mov	al,_reFast2	; get the second fast scan character
	jnz	doSecondScan	; only look for chars not equal to '\0'
	mov	di,lastByteOffset ; make sure this is not the lowest
	jmp	notFound2
doSecondScan:
	mov	di,WORD PTR nextByteOffset
 repne	scasb
 	jne	notFound2
 	dec	di
notFound2:
	; use the one that came first (the lower one)
	cmp	di,dx
	jbe	useSecond
	mov	di,dx
useSecond:
	mov	nextByteOffset,di
noScan:
	push	cs
	pop	es		; restore es (NECESSARY since we use DI)
	mov	di,nlistTop	; restore di
	pop	ax		; restore last char read (back into al)
;
;
skipFastScan:
	mov	si,di		; set new clist tail
	mov	di,clistTop	; set new nlist tail
	mov	bx,nlistTop	; set up to exchange clist and nlist
	mov	nlistTop,di	; now do the reverse
	mov	clistTop,bx	; reversing is faster than moving the lists
	mov	dx,bx		; start adding at the top of the clist
;
; get the next character
;
	lds	bx,DWORD PTR nextByteOffset
	cmp	bx,cs:lastByteOffset
	ja	endOfSpan	; use unsigned comparison
	mov	BYTE PTR cs:_re_sidechars,al ; save last char read
	mov	al,[bx+1]	; get char after the next one
	mov	BYTE PTR cs:_re_sidechars+1,al ; save next char to read
	mov	al,[bx]		; get the next character (finally)
	push	cs		; restore DS to equal CS
	pop	ds
	inc	bx
	cmp	bx,lastByteOffset ; are we at the end of the buffer?
	jb	notAtEnd
	mov	ah,0AH		; if so, simulate a NL as the next char
	mov	BYTE PTR _re_sidechars+1,ah
notAtEnd:
	mov	nextbyteOffset,bx
	mov	thisMatchBegin,bx
	jmp	_re_code
;
endOfSpan:
;
	mov	es,cs:nextByteSegment
	cmp	dx,si
	je	noClist
	mov	bx,dx
	mov	dx,cs:[bx+2]
loop1:
	add	bx,4
	cmp	bx,si
	je	endLoop
	cmp	dx,cs:[bx+2]
	jbe	loop1
	mov	dx,cs:[bx+2]
	jmp	loop1
endLoop:
	mov	ds,cs:dataSegment	; restore ds
	dec	dx		; taken from nextByteOffset and so 1 too high
	mov	bx,[bp+12]
	mov	[bx],dx
	mov	ax,2
	jmp	over1
noClist:
	xor	ax,ax
over1:
	jmp	finish
;
	even
	public	_re_code
_re_code:
	db	1000 DUP (0)
;
		even
nlistTop	dw	0
clistTop	dw	0
firstByteOffset	dw	0
; These next words two must stay in this order since they are picked up with
; an LDS instruction that wants then in this order in two consecutive words
nextByteOffset	dw	0
nextByteSegment	dw	0
dataSegment	dw	0
;
; Here we will keep the last character read and the next character to be read
;
		public	_re_sidechars
_re_sidechars	dw	0
;
lastByteOffset	dw	0
thisMatchBegin	dw	0
;
		public	_re_wordtable
_re_wordtable	db	0, 0, 0, 0, 0, 0, -1, 3
		db	-2, -1, -1, 7, -2, -1, -1, 7
		db	0, 0, 0, 0, 0, 0, 0, 0
		db	0, 0, 0, 0, 0, 0, 0, 0
;
	even
list1:
	db	400 DUP (0)
;
	even
list2:
	db	400 DUP (0)
;
RE_TEXT	ENDS
	end
