[futurebasic] RE: [FB] text parsing

Message: < previous - next > : Reply : Subscribe : Cleanse
Home   : February 2002 : Group Archive : Group : All Groups

From: Griggs Douglas B CRBE <GriggsDB@...>
Date: Thu, 21 Feb 2002 08:54:38 -0500
David, here is a bit of code that will read large data files into a text handle, then parse for the delimiters and some line labels, then put the numbers into a series of one-dimensional float arrays.  (you could use text arrays if you want)  The code is a bit long, so email me privately and I will send the a sample data file

/*This program will quickly read and parse large text files into a floating point arrays.
The record Vect Rec holds information about each column of data (ultimately a one-dimensional
floating point array), STAFFstuff holds data about the file, accoring to our local file format, 
which we call staff for "standard ascii file format", which is a tab-delimited text file that 
reserves the first column for line labels.  We collect data, so a time column is required in all 
our files.  Other than that, staff rules require that programs handle any other mix of line and 
column labels, and not crash on unrecognized labels.
*/
begin RECORD VectRec
DIM Addr&'4, the address of the first element
DIM 32 VName$'32
dim 32 Vsource$
DIM SampNo&'4, total number of samples
DIM bytes&'4
DIM slope#'gain
DIM zero#'zero
DIM execute%'location of execute
DIM start%'beginning of area of interest
DIM finish%'end of area of interest
DIM rsquare#
DIM SEE#
dim max#
dim min#
dim ave#
dim stdDev#
END RECORD!


begin record STAFFstuff
dim filename$
dim dataLine&
dim headerLine&
dim sourceLine&
dim timeCol&
dim executeLine&
dim comexLine&
dim finexLine&
dim numRows&
end record
//I am lazy.  This is a HUGE memory allocation  With these settings, it needs nearly 20 meg of ram
_ArraySize=10000'max number of rows
_maxNumChan=256'max number of columns
DIM gVect(_maxNumChan) as VectRec
dim staff as STAFFstuff

dim 2 tb$,com$,cr$,lf$
tb$=chr$(9)'tab
com$=chr$(44)'comma
cr$=chr$(13)'carriage return
lf$=chr$(12)'linefeed
_tab=9
_comma=44

dim bigText$(_maxNumChan)
DIM gLineCount%
dim gMaxCol&,gMaxRows&
dim gChn#(_maxNumChan,_ArraySize)
DIM gEnd

dim gPi#,gRadToDeg#,gDegToRad#
gPi#=atn(1)*4
gRadToDeg#=180/gPi#
gDegToRad#=gPi#/180

dim textOut$$
dim goutChanList%(_maxNumChan)
dim gNumOutChan%
gNumOutChan%=0

end globals

clear
LOCAL FN parseToArray(txtHndl&,delim%)
DIM r,c,tlen%,t$,b$,cr$,lf$,t&,cr&,lf&,a$,osErr
DIM strLen&,i&,slen%,a%,e$,lastLabel$,eolFlg%
dim sdl&
dim cc,rr
xref daDat#(_Arra!
ySize)
FN HLOCK(txtHndl&)'lock in memory
osErr=syserror
LONG IF osErr=0
strLen&=FN GETHANDLESIZE(txtHndl&)
i&=2:r=0:c=0:slen%=0:eolFlg%=0
cursor=_watchCursor
DO
//read a line of data into bigtext$ by parsing a byte at a time
do 
a%=(PEEK([txtHndl&]+i&))
INC(slen%)
select a%
case 9,44'delim%
'is it tb? then save var, inc col, null tlen
POKE @bigText$(c),slen%-1
INC(c):slen%=0
case 13'is it cr?
'is next lf? then save var,inc row, null tlen
IF (PEEK([txtHndl&]+i&+1))=10 THEN INC(i&)'lf$ 
POKE @bigText$(c),slen%-1'poke in the length byte
INC(r):c=0:slen%=0:eolFlg%=1
'================
'is it lf?
case 10'is next cr? then save var,inc row, null tlen
IF (PEEK([txtHndl&]+i&+1))=13 THEN INC(i&)'cr$ 
POKE @bigText$(c),slen%-1'poke in the length byte
INC(r):c=0:slen%=0:eolFlg%=1
'================
case else
end select
'================
POKE @bigText$(c)+slen%,a%'poke the text byte in
INC(i&)
IF c>gMaxCol& THEN gMaxCol&=c
IF r>gMaxRows& THEN gMaxRows&=r
until eolFlg%=1'beat on bits until en!
d of line
//now find specific (STAFF) stuff in each line
//edit this portion to meet your needs
rr=r-1
select 
case instr(1,bigText$(0),"DATA")>0
staff.dataLine&=r-1
sdl&=staff.dataLine&
case instr(1,bigText$(0),"HEADER")>0
long if staff.headerLine&=0
staff.headerLine&=r-1
for cc = 0 to gMaxCol&
gVect.VName$(cc)=bigText$(cc)
next cc
end if
case instr(1,bigText$(0),"SOURCE")>0
long if staff.sourceLine&=0
staff.sourceLine&=r-1
for cc = 0 to gMaxCol&
gVect.Vsource$(cc)=bigText$(cc)
next cc
end if
case instr(1,bigText$(0),"EXECUTE")>0
if staff.executeLine&=0 then staff.executeLine&=r-1
case instr(1,bigText$(0),"FINEX")>0
if staff.finexLine&=0 then staff.finexLine&=r-1
case instr(1,bigText$(0),"COMEX")>0
if staff.comexLine&=0 then staff.comexLine&=r-1
case else
end select
//'This fills the big float arrays with data, from the local data line
long if staff.dataLine&<>0
rr=r-staff.dataLine&-1
for cc=0 to gMaxCol&
daDat&=gVect.Addr&(cc)
daDat#(rr)=val(bigText$(cc))
next cc
end if
eolF!
lg%=0'set beginning of line
UNTIL i=>strLen& OR r=>_ArraySize OR gMaxCol&=>_maxNumChan
XELSE 
PRINT@(0,0) "error locking down handle "+str$(syserror)
END IF
FN HUNLOCK(txtHndl&)
fn disposehandle(txtHndl&)
cursor=0
END FN

clear
LOCAL FN ReadTextFile&
DIM rect.8,size&,txthndl&,osErr,err$,fileName$,vRefNum%
fileName$ = FILES$ (_fOpen, "TEXT", , vRefNum%)
LONG IF vRefNum% <> 0
OPEN "I", #1, fileName$, , vRefNum%'open file here
LONG IF SYSERROR = _noErr'whew, no error
size& = LOF (1,1)'get file size
txthndl& = FN NEWHANDLE (size& + 2)'create handle of size
LONG IF txthndl& <> _false'got valid handle?
FN HLOCK(txthndl&)'lock in memory
POKE WORD [txthndl&], size&'poke length into handle
READ FILE #1, [txthndl&] + 2, size&'read text file to handle
FN HUNLOCK (txthndl&)'unlock text handle
XELSE
END IF
CLOSE #1'close text file
XELSE
err$="Sorry Dude, this file has a problem!"+STR$(SYSERROR)
print @(0,0) err$:stop
END IF
staff.filename$=filename$
xelse
END IF
END FN=txthndl&

clear
Loca!
l fn readSTAFF
dim namLoc&,textHndl&,bigArrayPtr&,i
dim i,n
for i=0 to _maxNumChan-1
gVect.Addr&(i)=@gChn#(i,0)
gVect.SampNo&(i)=staff.numRows&
gVect.VName$=""
gVect.Vsource$=""
next i
gMaxCol&=1:gMaxRows&=1
staff.dataLine& = 0
staff.headerLine&=0
staff.sourceLine&=0
staff.executeLine&=0
staff.comexLine&=0
staff.finexLine&=0
gNumOutChan%=0
textHndl&=fn ReadTextFile&
if textHndl& != _false then fn parseToArray(textHndl&,_tab)
print  "header loc ";staff.headerLine&
print "source loc ";staff.sourceLine&
print "data loc ";staff.dataLine&
print "time col ";staff.timeCol&
print "execute line ";staff.executeLine&
stop
cls
end fn

'main
gEnd=0
fn readSTAFF
end