[futurebasic] Re: [FB] text parsing

Message: < previous - next > : Reply : Subscribe : Cleanse
Home   : February 2002 : Group Archive : Group : All Groups

From: Alain Pastor <apastor@...>
Date: Thu, 21 Feb 2002 16:07:30 +0100
David Cottrell wrote:
> 
> Hi all
> 
> This is really not my bag but can one of you text parsing demons show me
> a better way to parse a text file. This is what I have at the moment but
> it is way to slow.
> 
> I know I should read the file into a handle and parse that, I'm just not
> sure how to do it in this case as I only want part of the data in the
> file (in this case the first 4 values on each line of a tab separated
> data file).
> 
> Many thanks for enlightening me.
> 
David,

Here is a possible path to follow. It reads the whole file into a
handle, and for that reason, it should be way faster than the line
input# technique.
I may have misunderstood what you were trying to do in your code,
but you can possibly adapt the following statements to your needs
(beware, it is not guaranteed bug free. I have cut and pasted some
functions of mine that I frequently use. I have also left some
comments, so caution with line breaks). 

'~'1
_tab = 9
_maxIndexValue = 3

begin globals
dim as str255 gSubjectName
end globals

LOCAL MODE
'~'8
LOCAL FN ReadFileToHandle( fName AS STR63, ref AS INT )'Alain Date {19/02/00}
'~'9
DIM size  AS LONG
DIM fileH AS HNDL
DIM id    AS INT

ON ERROR END
id = 1
WHILE USR _Fileaddr( id )'find an available file id
id++
WEND
OPEN "ID", id, fName, , ref
size = LOF( id, 1 )
fileH = FN NewHandleClear( size )
LONG IF fileH
HLock( fileH )
READ FILE id,[fileH],size
HUnlock( fileH )
END IF
CLOSE id
LONG IF (ERROR AND 255)<>_noerr
IF fileH THEN DisposeHandle(fileH) : fileH = _nil
ERROR = _noerr
END IF
ON ERROR RETURN

END FN = fileH
'~'1
/*
     This function will parse a handle according to a parsing char.
     It will update the marker (offset) to keep tracks of the
     location in the handle and fill the resulting string.
*/
'~'6
LOCAL MODE
'~'8
LOCAL FN GetParsedLineFromHandle( theH AS HNDL, asciiCode AS INT,
@markP AS ^LONG, @strP AS ^STR255 )'Alain Date {19/02/00}
'~'9
DIM AS LONG   position, strSize, handleSize
DIM AS CHAR @ cutChar

strSize = 0
handleSize = FN GetHandleSize( theH )
IF handleSize = 0 OR theH = _nil THEN EXIT FN
cutChar = asciiCode
position  = FN Munger( theH, markP.nil&, @cutChar, 1, 0, 0 )'look
for the parsing char in the handle
LONG IF position > -1'we've found at least one
strSize = 1 + position-markP.nil&'calculate the size to extract
IF strSize > 255 THEN strSize = 255'if the size is too big shorten
it to fit in a pascal string
BlockMove( [theH] + markP.nil&, strP + 1, strSize )'move the bytes
to extract from the handle to our string
markP.nil& = markP.nil& + strSize'update our marker
strSize--'remove the parsing char from the resulting string
XELSE'we didn't find our parsing char
strSize = handleSize - markP.nil&'we are probably on the last chunck
since there are still characters to read
LONG IF strSize > 0'are we sure there are characters left?
IF strSize > 255 THEN strSize = 255'calculate size to move
BlockMove( [theH] + markP.nil&, strP + 1, strSize )'move the chars
from the handle to the pascal string
markP.nil& = handleSize'set the marker at the end of the handle
END IF
END IF
strP.nil` = strSize

END FN

local fn GetValues(theStr as str255,split(_maxIndexValue) as str255)
dim as int whichVal,index

whichVal = 0
do
Long if theStr[index] != _tab
split(whichVal) = split(whichVal) + chr$(theStr[index])
xelse
whichVal++
end if
index++
until whichVal > _maxIndexValue or index >= theStr[0]
end fn

local fn scoreResponses (condition as str255,response as str255,rt
as long)
print "SubjectName: ";gSubjectName
print condition
print response
print rt
print "-------------"

end fn
'~'1


'~'1
local
dim as handle fileH
dim as str255 rawLine, dummy, response, condition
dim as int trialNo, rt
dim as long @ marker
dim splitStr(_maxIndexValue) as str255

local fn AnalyseFile (fileName as str255,WDrefNo as int)
/*
 read the whole file into a handle.
 A spinashed Poppeye boost technique compared
 to line input.
*/
fileH = fn ReadFileToHandle( fileName, WDrefNo )
long if fileH
/*
 searching the subject name, we can now use
 the Munger function which is also fast
*/
dummy = "SubjectName: "
marker = fn Munger(fileH,marker,@dummy[1],dummy[0],_nil,_nil)
long if marker > -1
/*
 we've found it, we add the length of our tag to our marker
 to skip it and then get the line (ending with a carriage return)
 from our handle starting at the marker position
 On return gSubjectName should be, hey... the subject name
*/
marker += dummy[0]
fn GetParsedLineFromHandle( fileH, _newLineFlag, marker,gSubjectName)
/*
 go find the Trial tag with the same technique
*/
dummy = "Trial"
marker = fn Munger(fileH,marker,@dummy[1],dummy[0],_nil,_nil)
long if marker > -1
marker += dummy[0]
/*
 we've found the Trial tag we can enter the loop
*/
do
// getting the raw line (ending with CR at the marker's position)
fn GetParsedLineFromHandle( fileH, _newLineFlag, marker,rawLine)
long if rawLine[0] > 1
/*
 our line is more than 1 char in length, so, we can try to retrieve
 the values, but first, we empty our four-items array
*/
def blockfill(@splitStr(0),(_maxIndexValue +1)*sizeof(str255),0)
/*
 we pass the rawLine along with the array
 (actually its pointer) to GetValues
*/
fn GetValues(rawLine,splitStr(0))
long if len(splitStr(0))
// our array has been filled, so convert the values
trialNo      = val(splitStr(0))
condition    = splitStr(1)
response     = splitStr(2)
rt           = val(splitStr(3))
fn scoreResponses (condition,response,rt)
end if
end if
until marker >= fn GetHandleSize(fileH)
end if
end if
DisposeHandle(fileH)
end if
end fn

dim fName as str63
dim @vRef as int
window 1
fName = Files$(_fOpen,"","",vRef)
long if fName[0]
fn AnalyseFile(fName,vRef)
end if

do
handleevents
until fn button
-- 

Alain

-----------------------------------------------------
FB^3 in Europe:  http://euro.futurebasic.com/
FB II Pouch:     http://www.pixmix.com/FB/outils.html
-----------------------------------------------------