00001 // ---------------------------------------------------------------------------- 00002 // CVS $Id: SIO_functions.cc,v 1.9 2008/05/28 14:02:09 engels Exp $ 00003 // ---------------------------------------------------------------------------- 00004 // => Function package for SIO 00005 // ---------------------------------------------------------------------------- 00006 // 00007 // General Description: 00008 // 00009 // SIO_functions implements the basic I/O functions of SIO along with a number 00010 // of general purpose utilities. 00011 // 00012 // ---------------------------------------------------------------------------- 00013 00014 #ifdef _MSC_VER 00015 # pragma warning(disable:4786) // >255 characters in debug information 00016 #endif 00017 00018 #include <iostream> 00019 #include <cstdlib> 00020 #include <cstring> 00021 00022 #include "SIO_stream.h" 00023 #include "SIO_functions.h" 00024 00025 // ---------------------------------------------------------------------------- 00026 // Deal with 'endian-ness'. Try to base this on the processor type (because 00027 // the operating system doesn't guarantee endian-ness ... Linux runs happily 00028 // on both x86 CPUs (little endian) and PPC CPUs (big endian)). 00029 // 00030 // Branch on flag provided by compiler: 00031 // 00032 // OS CPU Macro Provided by Endian-ness 00033 // ------------ ------------ ------------ ------------ ----------- 00034 // AIX PPC(?) _AIX GNU compiler Big 00035 // OSF1 Alpha __alpha__ GNU compiler Little 00036 // Linux x86 __i386__ GNU compiler Little 00037 // Linux Opteron _LP64 GNU compiler Little 00038 // Linux itanium _LP64 GNU compiler Little 00039 // SunOS Sparc __sparc__ GNU compiler Big 00040 // Windows/NT Alpha _M_ALPHA VC compiler Little 00041 // Windows/NT x86 _M_IX86 VC compiler Little 00042 // Windows/NT MIPS _M_MRX000 VC compiler ? 00043 // Windows/NT PPC _M_PPC VC compiler Big 00044 // ---------------------------------------------------------------------------- 00045 #if defined(__alpha__) || defined(__i386__) || defined(_M_ALPHA) || defined(_M_IX86) || defined(_LP64) || defined(__LITTLE_ENDIAN__) 00046 #define SIO_LITTLE_ENDIAN 00047 #endif 00048 00049 #if defined(_AIX) || defined(__sparc__) || defined(_M_PPC) || ( defined(__APPLE_CC__) && !defined(__LITTLE_ENDIAN__) ) 00050 #define SIO_BIG_ENDIAN 00051 #endif 00052 00053 // **************************************************************************** 00054 // Primitive copying functions. 00055 // 00056 // This is where the endian-ness problem is resolved. Note that there are two 00057 // implementations of SIO_functions::copy. Only (at most) one version gets 00058 // passed to the compiler by the preprocesor. In the case of a new target CPU 00059 // whose endian-ness cannot determined by the preprocessor directives above, 00060 // -no- version is passed through to the compiler. The result is a compilation 00061 // failure (i.e. you can't compile the SIO package on a particular CPU if the 00062 // CPU's endian-ness cannot be determined by the SIO preprocessor directives). 00063 // 00064 // In SIO all datasets have the same endian-ness. The choice between a little- 00065 // or a big-endian dataset representation should be driven by the type of CPU 00066 // which will do most of the work (swapping the endian-ness on read or 00067 // write is somewhat inefficient). In a ditributed multi-platform environment 00068 // that's a tough call, so the chice is really somewhat arbitrary. SIO uses 00069 // big-endian xdr-like data representation. 00070 // **************************************************************************** 00071 00072 #ifdef SIO_BIG_ENDIAN 00073 // ---------------------------------------------------------------------------- 00074 // => Keep the byte ordering during the copy. 00075 // ---------------------------------------------------------------------------- 00076 void SIO_functions::copy 00077 ( 00078 unsigned char* from, 00079 unsigned char* dest, 00080 const int size, 00081 const int count 00082 ) 00083 { 00084 00085 // 00086 // Local variables. 00087 // 00088 int 00089 totlen; 00090 00091 // 00092 // Just do it! 00093 // 00094 totlen = size * count; 00095 memcpy( dest, from, totlen ); 00096 00097 // 00098 // That's all folks! 00099 // 00100 return; 00101 } 00102 #endif 00103 00104 #ifdef SIO_LITTLE_ENDIAN 00105 // ---------------------------------------------------------------------------- 00106 // => Reverse the byte ordering during the copy. 00107 // ---------------------------------------------------------------------------- 00108 void SIO_functions::copy 00109 ( 00110 unsigned char* from, 00111 unsigned char* dest, 00112 const int size, 00113 const int count 00114 ) 00115 { 00116 00117 // 00118 // Local variables. 00119 // 00120 int 00121 icnt, 00122 ibyt, 00123 jump; 00124 00125 // 00126 // Just do it! 00127 // 00128 dest += size; 00129 jump = size << 1; 00130 for( icnt = 0; icnt < count; icnt++ ) 00131 { 00132 for( ibyt = 0; ibyt < size; ibyt++ ) 00133 { 00134 *--dest = *from++; 00135 } 00136 dest += jump; 00137 } 00138 00139 // 00140 // That's all folks! 00141 // 00142 return; 00143 } 00144 #endif 00145 00146 // ---------------------------------------------------------------------------- 00147 // => Transfer to/from the buffer. 00148 // ---------------------------------------------------------------------------- 00149 unsigned int SIO_functions::xfer 00150 ( 00151 SIO_stream* stream, 00152 const int size, 00153 const int count, 00154 unsigned char* fromto 00155 ) 00156 { 00157 00158 // 00159 // Local variables. 00160 // 00161 int 00162 bytcnt, 00163 bytlen, 00164 padlen; 00165 00166 // 00167 // Is the stream healthy. 00168 // 00169 if( stream->state == SIO_STATE_ERROR ) 00170 return( SIO_STREAM_BADSTATE ); 00171 00172 // 00173 // Byte count length and padded byte count length. 00174 // 00175 bytlen = count * size; 00176 padlen = (bytlen + 3) & 0xfffffffc; 00177 00178 // 00179 // Ensure the data will fit into the output buffer (write). 00180 // 00181 if( stream->mode != SIO_MODE_READ ) 00182 { 00183 if( (stream->buffer + padlen) > stream->bufmax ) 00184 { 00185 int 00186 newlen, 00187 oldlen; 00188 00189 unsigned char 00190 *newbuf; 00191 00192 //fg: --- this is not correct if the new request is larger than double the size... 00193 // newlen = (stream->bufmax - stream->bufloc) << 1; 00194 00195 int minNeeded = (stream->buffer + padlen) - stream->bufmax ; 00196 int bufSize = stream->bufmax - stream->bufloc ; 00197 int resizeFactor = 2 ; 00198 while( bufSize * (resizeFactor-1) < minNeeded ) 00199 ++resizeFactor ; 00200 00201 newlen = bufSize * resizeFactor ; 00202 00203 // std::cout << " resizing buffer - needed : " << minNeeded 00204 // << " bufSize : " << bufSize 00205 // << " resize factor : " << resizeFactor 00206 // << " new size: " << std::endl ; 00207 //fg: --- 00208 00209 newbuf = (unsigned char *)malloc( newlen ); 00210 if( newbuf == NULL ) 00211 { 00212 if( stream->verbosity >= SIO_ERRORS ) 00213 { 00214 std::cout << "SIO: [" 00215 << stream->name << "/" 00216 << stream->rec_name << "/" 00217 << stream->blk_name << "] " 00218 << "Buffer allocation failed" 00219 << std::endl; 00220 } 00221 00222 stream->state = SIO_STATE_ERROR; 00223 return( SIO_STREAM_NOALLOC ); 00224 } 00225 00226 00227 oldlen = stream->buffer - stream->bufloc; 00228 00229 memcpy( newbuf, stream->bufloc, oldlen ); 00230 free( stream->bufloc ); 00231 stream->blkmax = newbuf + (stream->blkmax - stream->bufloc); 00232 stream->bufmax = newbuf + newlen; 00233 stream->buffer = newbuf + oldlen; 00234 stream->bufloc = newbuf; 00235 00236 if( stream->verbosity >= SIO_ALL ) 00237 std::cout << "SIO: [" 00238 << stream->name << "/" 00239 << stream->rec_name << "/" 00240 << stream->blk_name << "] " 00241 << "Allocated a " 00242 << newlen 00243 << "(0x" << std::hex << newlen << std::dec << ")" 00244 << " byte buffer" 00245 << std::endl; 00246 } 00247 00248 // 00249 // Copy and write padding as necessary (xdr format). 00250 // 00251 SIO_functions::copy( fromto, stream->buffer, size, count ); 00252 stream->buffer += bytlen; 00253 for( bytcnt = bytlen; bytcnt < padlen; bytcnt++ ) 00254 *stream->buffer++ = 0; 00255 } 00256 00257 // 00258 // Ensure the request stays in bounds (read) 00259 // 00260 else 00261 { 00262 if( (stream->buffer + padlen) > stream->blkmax ) 00263 { 00264 if( stream->verbosity >= SIO_ERRORS ) 00265 { 00266 std::cout << "SIO: [" 00267 << stream->name << "/" 00268 << stream->rec_name << "/" 00269 << stream->blk_name << "] " 00270 << "Read request exceeds size of block! " 00271 << std::endl; 00272 } 00273 00274 stream->state = SIO_STATE_ERROR; 00275 return( SIO_STREAM_OFFEND ); 00276 } 00277 00278 // 00279 // Copy and skip over padding as necessary (xdr format). 00280 // 00281 SIO_functions::copy( stream->buffer, fromto, size, count ); 00282 stream->buffer += padlen; 00283 } 00284 00285 // 00286 // That's all folks! 00287 // 00288 return( SIO_STREAM_SUCCESS); 00289 } 00290 00291 // **************************************************************************** 00292 // Transfer functions for (arrays of) primitive data types. 00293 // **************************************************************************** 00294 // ---------------------------------------------------------------------------- 00295 // => Transfer (array of) 'char' 00296 // ---------------------------------------------------------------------------- 00297 unsigned int SIO_functions::data 00298 ( 00299 SIO_stream* stream, 00300 char* xfer, 00301 const int cnt 00302 ) 00303 { return( SIO_functions::xfer( stream, SIO_LEN_SB, cnt, UCHR_CAST( xfer ))); } 00304 00305 // ---------------------------------------------------------------------------- 00306 // => Transfer (array of) 'unsigned char' 00307 // ---------------------------------------------------------------------------- 00308 unsigned int SIO_functions::data 00309 ( 00310 SIO_stream* stream, 00311 unsigned char* xfer, 00312 const int cnt 00313 ) 00314 { return( SIO_functions::xfer( stream, SIO_LEN_SB, cnt, UCHR_CAST( xfer ))); } 00315 00316 // ---------------------------------------------------------------------------- 00317 // => Transfer (array of) 'short' 00318 // ---------------------------------------------------------------------------- 00319 unsigned int SIO_functions::data 00320 ( 00321 SIO_stream* stream, 00322 short* xfer, 00323 const int cnt 00324 ) 00325 { return( SIO_functions::xfer( stream, SIO_LEN_DB, cnt, UCHR_CAST( xfer ))); } 00326 00327 // ---------------------------------------------------------------------------- 00328 // => Transfer (array of) 'unsigned short' 00329 // ---------------------------------------------------------------------------- 00330 unsigned int SIO_functions::data 00331 ( 00332 SIO_stream* stream, 00333 unsigned short* xfer, 00334 const int cnt 00335 ) 00336 { return( SIO_functions::xfer( stream, SIO_LEN_DB, cnt, UCHR_CAST( xfer ))); } 00337 00338 // ---------------------------------------------------------------------------- 00339 // => Transfer (array of) 'int' 00340 // ---------------------------------------------------------------------------- 00341 // An 'int' is four bytes on -all- machines! On DEC/OSF1, avoid using 'long' 00342 // which it treats as 8 bytes (as opposed to 4 on AIX(PPC), Linux(I86) and 00343 // SunOS(Sparc)). 00344 // ---------------------------------------------------------------------------- 00345 unsigned int SIO_functions::data 00346 ( 00347 SIO_stream* stream, 00348 int* xfer, 00349 const int cnt 00350 ) 00351 { return( SIO_functions::xfer( stream, SIO_LEN_QB, cnt, UCHR_CAST( xfer ))); } 00352 00353 // ---------------------------------------------------------------------------- 00354 // => Transfer (array of) 'unsigned int' 00355 // ---------------------------------------------------------------------------- 00356 // An 'unsigned int' is four bytes on -all- machines! On DEC/OSF1, avoid using 00357 // 'unsigned long' which it treats as 8 bytes (as opposed to 4 on AIX(PPC), 00358 // Linux(I86) and SunOS(Sparc)). 00359 // ---------------------------------------------------------------------------- 00360 unsigned int SIO_functions::data 00361 ( 00362 SIO_stream* stream, 00363 unsigned int* xfer, 00364 const int cnt 00365 ) 00366 { return( SIO_functions::xfer( stream, SIO_LEN_QB, cnt, UCHR_CAST( xfer ))); } 00367 00368 // ---------------------------------------------------------------------------- 00369 // => Transfer (array of) 'long long' (8 bytes on all machines). 00370 // ---------------------------------------------------------------------------- 00371 unsigned int SIO_functions::data 00372 ( 00373 SIO_stream* stream, 00374 SIO_64BITINT* xfer, 00375 const int cnt 00376 ) 00377 { return( SIO_functions::xfer( stream, SIO_LEN_OB, cnt, UCHR_CAST( xfer ))); } 00378 00379 // ---------------------------------------------------------------------------- 00380 // => Transfer (array of) 'unsigned long long' (8 bytes on all machines) 00381 // ---------------------------------------------------------------------------- 00382 unsigned int SIO_functions::data 00383 ( 00384 SIO_stream* stream, 00385 unsigned SIO_64BITINT* xfer, 00386 const int cnt 00387 ) 00388 { return( SIO_functions::xfer( stream, SIO_LEN_OB, cnt, UCHR_CAST( xfer ))); } 00389 00390 // ---------------------------------------------------------------------------- 00391 // => Transfer (array of) 'float' 00392 // ---------------------------------------------------------------------------- 00393 unsigned int SIO_functions::data 00394 ( 00395 SIO_stream* stream, 00396 float* xfer, 00397 const int cnt 00398 ) 00399 { return( SIO_functions::xfer( stream, SIO_LEN_QB, cnt, UCHR_CAST( xfer ))); } 00400 00401 // ---------------------------------------------------------------------------- 00402 // => Transfer (array of) 'double' 00403 // ---------------------------------------------------------------------------- 00404 unsigned int SIO_functions::data 00405 ( 00406 SIO_stream* stream, 00407 double* xfer, 00408 const int cnt 00409 ) 00410 { return( SIO_functions::xfer( stream, SIO_LEN_OB, cnt, UCHR_CAST( xfer ))); } 00411 00412 // ---------------------------------------------------------------------------- 00413 // => Transfer a 'pointed at by something' 00414 // ---------------------------------------------------------------------------- 00415 unsigned int SIO_functions::pointed_at 00416 ( 00417 SIO_stream* stream, 00418 SIO_POINTER_DECL* xfer 00419 ) 00420 { 00421 00422 static unsigned int 00423 SIO_ptag = 0xffffffff; 00424 00425 // 00426 // Whether reading or writing the basic principle of SIO_pointed_at is to 00427 // build a map of pairs of values called the 'pointed at' table. The routine 00428 // SIO_pointer_to builds a similar table called the 'pointer to' table. The 00429 // first value in each table is the 'match' value. When writing, both tables 00430 // have the same layout: 00431 // 00432 // First value: Pointer to a memory location 00433 // Second value: Offset in the output buffer 00434 // 00435 // When reading, that changes to: 00436 // 00437 // First value: Simple 32-bit integer value read from the buffer 00438 // Second value: Pointer to a memory location 00439 // 00440 // That means that these tables have different 'shapes' depending on whether 00441 // the program is reading or writing. Given that a stream must be either 00442 // read or write, I was loath to allocate four tables when at most two 00443 // would ever be used. I have therefore reused the same tables for both 00444 // purposes. Hmmm. Maybe not the world's best decision. I have been 00445 // forced to use a lot of 'reinterpret_cast' statements to make it all 00446 // work (a process complicated by the fact that pointers are not the same 00447 // size on all architectures). 00448 // 00449 00450 // 00451 // Write. Save the memory location of this object along with the offset 00452 // in the output buffer where the generated match quantity must go. Put 00453 // a placeholder in the output buffer (it will be overwritten at the "output 00454 // relocation" stage). 00455 // 00456 if( stream->mode != SIO_MODE_READ ) 00457 { 00458 std::pair< void* const, void* > 00459 entry( xfer, 00460 reinterpret_cast<void *>( stream->buffer - stream->bufloc ) ); 00461 00462 stream->pointedAt->insert( entry ); 00463 00464 return( SIO_functions::xfer( stream, SIO_LEN_QB, 1, UCHR_CAST(&SIO_ptag))); 00465 } 00466 00467 // 00468 // Read. Keep a record of the "match" quantity read from the buffer and 00469 // the location in memory which will need relocating. 00470 // 00471 else 00472 { 00473 unsigned int 00474 match, 00475 status; 00476 00477 status = SIO_functions::xfer( stream, SIO_LEN_QB, 1, UCHR_CAST( &match ) ); 00478 if( !( status & 1 ) ) 00479 return( status ); 00480 00481 // 00482 // Ignore match = SIO_ptag . This is basically a pointer target which was 00483 // never relocated when the record was written. i.e. nothing points to it! 00484 // Don't clutter the maps with information that can never be used. 00485 // 00486 if( match != SIO_ptag ) 00487 { 00488 std::pair< void* const, void* > 00489 entry( reinterpret_cast<void *>(match), xfer ); 00490 00491 stream->pointedAt->insert( entry ); 00492 } 00493 } 00494 00495 // 00496 // That's all folks! 00497 // 00498 return( SIO_STREAM_SUCCESS ); 00499 } 00500 00501 // ---------------------------------------------------------------------------- 00502 // => Transfer a 'pointer to something' 00503 // ---------------------------------------------------------------------------- 00504 unsigned int SIO_functions::pointer_to 00505 ( 00506 SIO_stream* stream, 00507 SIO_POINTER_DECL* xfer 00508 ) 00509 { 00510 static unsigned int 00511 SIO_pntr = 0x00000000; // Placeholder value for 'pointer to' 00512 00513 void 00514 *ifer; // Indirect xfer (actually **xfer) 00515 00516 // 00517 // xfer is really a pointer-to-a-pointer. This routine is most interested 00518 // in the value of *xfer when treated as a pointer. C++ tends to object 00519 // to this as being 'not type safe'. To keep the compiler happy (and purists 00520 // miserable), do one 'reinterpret_cast' immediately to make later code 00521 // easier to read. 00522 // 00523 ifer = reinterpret_cast<void *>(*xfer); 00524 00525 // 00526 // Whether reading or writing the basic principle of SIO_pointer_to is to 00527 // build a map of pairs of values called the 'pointer to' table. The routine 00528 // SIO_pointed_at builds a similar table called the 'pointed at' table. The 00529 // first value in each table is the 'match' value. When writing, both tables 00530 // have the same layout: 00531 // 00532 // First value: Pointer to a memory location 00533 // Second value: Offset in the output buffer 00534 // 00535 // When reading, that changes to: 00536 // 00537 // First value: Simple 32-bit integer value read from the buffer 00538 // Second value: Pointer to a memory location 00539 // 00540 // That means that these tables have different 'shapes' depending on whether 00541 // the program is reading or writing. Given that a stream must be either 00542 // read or write, I was loath to allocate four tables when at most two 00543 // would ever be used. I have therefore reused the same tables for both 00544 // purposes. Hmmm. Maybe not the world's best decision. I have been 00545 // forced to use a lot of 'reinterpret_cast' statements to make it all 00546 // work (a process complicated by the fact that pointers are not the same 00547 // size on all architectures). 00548 // 00549 00550 // 00551 // Write. Keep a record of the "match" quantity (i.e. the value of the 00552 // pointer (which may be different lengths on different machines!)) and 00553 // the current offset in the output buffer. Put a placeholder in the 00554 // output buffer (it will be overwritten at the "output relocation" stage). 00555 // 00556 if( stream->mode != SIO_MODE_READ ) 00557 { 00558 // 00559 // Ignore NULL pointers. These are always recorded in the buffer with a 00560 // zero match word (and are treated specially when read back). There's no 00561 // point in putting useless information in the maps. 00562 // 00563 if( ifer != NULL ) 00564 { 00565 std::pair< void* const, void* > 00566 entry( ifer, 00567 reinterpret_cast<void *>(stream->buffer - stream->bufloc) ); 00568 00569 stream->pointerTo->insert( entry ); 00570 } 00571 return( SIO_functions::xfer( stream, SIO_LEN_QB, 1, UCHR_CAST(&SIO_pntr))); 00572 } 00573 00574 // 00575 // Read. Keep a record of the "match" quantity read from the buffer and 00576 // the location in memory which will need relocating. 00577 // 00578 else 00579 { 00580 unsigned int 00581 match, 00582 status; 00583 00584 status = SIO_functions::xfer( stream, SIO_LEN_QB, 1, UCHR_CAST( &match ) ); 00585 if( !( status & 1 ) ) 00586 return( status ); 00587 00588 // 00589 // Ignore match = SIO_pntr. This is basically a null pointer which can 00590 // never be relocated, so don't fill the multimap with a lot of useless 00591 // information. 00592 // 00593 // 00594 // C cast replaces static_cast for: 00595 // 00596 // std::pair 00597 // entry( static_cast<void *>(match), 00598 // static_cast<void *>(xfer) ); 00599 // 00600 if( match != SIO_pntr ) 00601 { 00602 std::pair< void* const, void* > 00603 entry( reinterpret_cast<void *>(match), xfer ); 00604 00605 stream->pointerTo->insert( entry ); 00606 } 00607 00608 // 00609 // Hand -something- back to the caller. The number passed back is -not- 00610 // a pointer, and pointer relocation will not occur until the whole record 00611 // has been read. The only circumstance where the next line is important 00612 // is the case of a NULL pointer which the caller may be relying on to 00613 // find the end of (for instance) a singly linked list. 00614 // 00615 *xfer = static_cast<SIO_POINTER_DECL>(match); 00616 } 00617 00618 // 00619 // That's all folks! 00620 // 00621 return( SIO_STREAM_SUCCESS ); 00622 } 00623 00624 // **************************************************************************** 00625 // Miscellaneous functions 00626 // **************************************************************************** 00627 00628 // ---------------------------------------------------------------------------- 00629 // => Test for a legal name 00630 // ---------------------------------------------------------------------------- 00631 // 00632 // General Description: 00633 // 00634 // validateName tests the validity (according to SIO) of a name. SIO only 00635 // accepts names starting with (regular expression) [A-Za-z_] and continuing 00636 // with [A-Za-z0-9_] (which most people will recognize as the definition of 00637 // a C/C++ variable name). 00638 // 00639 // ---------------------------------------------------------------------------- 00640 bool SIO_functions::validateName 00641 ( 00642 const char* name 00643 ) 00644 { 00645 00646 // 00647 // Just do it! 00648 // 00649 if( *name < 0 ) return( false ); 00650 if( !isalpha( (int)*name ) && *name != '_' ) return( false ); 00651 00652 for( name += 1; *name != '\0'; name++ ) 00653 { 00654 if( *name < 0 ) return( false ); 00655 if( !isalnum( (int)*name ) && *name != '_' ) return( false ); 00656 } 00657 00658 // 00659 // That's all folks! 00660 // 00661 return( true ); 00662 } 00663