// // Dataset.m // DaMaT // // Created by Simon Urbanek on Fri Aug 30 2002. // Copyright (c) 2001 __MyCompanyName__. All rights reserved. // #import "Dataset.h" #define dataIncrement 512 #define msgIncrement 64 /* read chunk must be equal or less initBufSize and bufsizeIncrement */ #define readChunk 32768 #define initBufSize 65536 #define bufSizeIncrement 32768 @implementation Dataset - (id) init { [super init]; rs=cols=0; ars=dataIncrement; data=(char***)malloc(sizeof(char**)*ars); rlen=(int*)malloc(sizeof(int)*ars); memset(rlen,0,sizeof(int)*ars); amsgs=msgIncrement; msg=(struct msgInfo*)malloc(sizeof(struct msgInfo)*amsgs); msgs=0; printf("Dataset.init\n"); return self; } - (void) dealloc { printf("Dataset.dealloc\n"); if (data) { int i=0; while (i=rs)?0:rlen[rnr]; } - (char*) entryAtRow: (int) rnr andColumn: (int) cnr { return (rnr<0 || cnr<0 || rnr>=rs || cnr>=rlen[rnr])?0:data[rnr][cnr]; } - (char*) replaceEntryAtRow: (int) rnr andColumn: (int) cnr withEntry: (const char*) net { if (rnr<0 || cnr<0 || rnr>=rs || cnr>=rlen[rnr]) return 0; if (data[rnr][cnr]) free(data[rnr][cnr]); data[rnr][cnr]=(net)?(char*)malloc(strlen(net)+1):0; if (net) strcpy(data[rnr][cnr],net); return data[rnr][cnr]; } - (char**) getRow: (int) rnr { return (rnr<0 || rnr>=rs)?0:data[rnr]; } - (struct fieldInfo*) getFieldAt: (int) fix { return ((fix<0) || (fix>=fs))?0:fields+fix; } - (struct fieldInfo*) allocFields: (int) fis { fields=(struct fieldInfo*)malloc(sizeof(struct fieldInfo)*fis); memset(fields,0,sizeof(struct fieldInfo)*fis); afs=fs=fis; return fields; } /* BEWARE! message text uses SHALLOW copy, since the set of message texts is usually fixed, hence it's much more useful to use just one copy no matter for how many cells */ - (void) addMessage: (const char*) m withId: (int) mid forRow: (int) rnr andColumn: (int) cnr { if (msgs>=amsgs) { while (msgs>=amsgs) amsgs+=msgIncrement; msg=(struct msgInfo*)realloc(msg,sizeof(struct msgInfo)*amsgs); } msg[msgs].msg=m; msg[msgs].mid=mid; msg[msgs].row=rnr; msg[msgs++].col=cnr; //printf("Line %d, column %d: [%d] %s\n",rnr+1,cnr+1,mid,m); } - (struct msgInfo*) getMessages { return msg; } - (int) getMessagesCount { return msgs; } - (int) srcEncoding { return srcEncoding; } - (void) processLine: (char*) lc asLine: (int) ln { int i,j,len=0,n; char *c,*d; if (!*lc) { if (ln==0) [self addMessage:"First line is empty. Assuming headers are to be found at first non-empty line" withId:2 forRow:ln andColumn:-1]; return; }; c=lc; n=1; while(*c) { if (*c=='\t') { n++; *c=0; }; c++; len++; }; //printf("Line %d: has %d columns\n",ln,n); if (!fields && ln>0) [self addMessage:"First non-empty line, treating as header" withId:3 forRow:ln andColumn:-1]; if (!fields) { printf("alloc fields\n"); [self allocFields:n]; c=d=lc; i=0; while(ifs) [self addMessage:"The row has more columns that the header" withId:5 forRow:ln andColumn:-1]; if (j<=fs) { while (k='0' && *c<='9')) { /* could be a number - parse */ strtod(c,&d); if (*d) fields[k].strs++; else fields[k].nums++; } else { fields[k].strs++; }; }; k++; } }; } } - (id) loadFromFile: (const char*) fname { FILE *f; char *buf, *c,*d; int blen=initBufSize; int tlen, readLen,pls,i,lpp; int nrs, nns; int lnrs=-1, lnns; int ln=0; if (fields) free(fields); afs=fs=0; f=fopen(fname,"rb"); if (!f) return nil; pls=tlen=0; buf=(char*)malloc(blen); while(!feof(f)) { readLen=fread(buf+tlen,1,readChunk,f); //printf("read %d bytes\n",readLen); if (readLen>0) { tlen+=readLen; c=buf; pls=lpp=0; i=0; while(iblen) { /* allocate enough space for next read */ while (tlen+readChunk>blen) blen+=bufSizeIncrement; //printf("reallocating buffer to size %d.\n",blen); buf=(char*)realloc(buf,blen); } } } printf("Lines: %d\n",ln); formInfo[1023]=0; snprintf(formInfo,1023,"Tab-separated values with header\nTotal %d variables and %d cases.\nEOL encoding: %s\n", fs,rs, (srcEncoding==SE_mac)?"Mac":((srcEncoding==SE_unix)?"unix":((srcEncoding==SE_pc)?"PC":"other"))); fclose(f); return self; } - (NSString*) getFormatInfoString { return [NSString stringWithCString:formInfo]; } @end