// -*- Mode: C++; -*-
//                              File      : RDILog.cc
//                              Package   : omniNotify-Library
//                              Created on: 1-Jan-1998
//                              Authors   : gruber&panagos
//
//    Copyright (C) 1998-2000 AT&T Laboratories -- Research
//
//    This file is part of the omniNotify library
//    and is distributed with the omniNotify release.
//
//    The omniNotify library is free software; you can redistribute it and/or
//    modify it under the terms of the GNU Library General Public
//    License as published by the Free Software Foundation; either
//    version 2 of the License, or (at your option) any later version.
//
//    This library is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//    Library General Public License for more details.
//
//    You should have received a copy of the GNU Library General Public
//    License along with this library; if not, write to the Free
//    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
//    02111-1307, USA
//
//
// Description:
//    Implementation of RDI_Log [Logging support for persistency]
//
 
/*
$Log: RDILog.cc,v $
Revision 1.8  2000/11/15 21:17:30  alcfp
large number of changes to switch to use of RDIOplocks for safe object disposal support.  also reduced code duplication a little, and tried hard to make all the proxy code consistent

Revision 1.7  2000/11/05 04:48:11  alcfp
changed in defaults, env variable overrride, try_pull variants

Revision 1.6  2000/10/04 15:15:06  alcfp
more small updates to get rid of compiler warnings

Revision 1.5  2000/10/04 02:40:05  alcfp
small fixes to avoid some compiler warnings

Revision 1.4  2000/08/22 18:23:56  alcfp
added description to each file

Revision 1.3  2000/08/16 20:19:52  alcfp
Added licensing notice to each .h and .cc file where library files get GLPL notice and daemon file gets GPL notice -- examples do not claim any license but point out that the library and daemon code does have a license notice

*/
 
#include <new.h>
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <iostream.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/param.h>
#include "RDIDebug.h"
#include "RDIFileIO.h"
#include "RDILog.h"

// SunOS version 5.5 ::realpath is different from other flavors of unix
#if defined(__SOSV_MINOR_VER__) &&  (__SOSV_MINOR_VER__ == 55)
#  define REALPATH_ARG1_T char*
#else
#  define REALPATH_ARG1_T const char*
#endif

static const size_t dev_block_size = 512;
static const size_t min_log_size   = 128 * dev_block_size;
static const size_t max_rsv_size   =  64 * dev_block_size;

const RDI_LSN RDI_LSN::null;

inline static size_t rdi_rec_size(size_t size) 
{ return RDI_Align(sizeof(RDI_LRecHdr)+size, sizeof(RDI_LRecHdr)); }


// --------------------- Log Record Header --------------------- //
//                                                               //
// ------------------------------------------------------------- //

RDI_LRecHdr::RDI_LRecHdr(RDI_LREC_Kind kind, unsigned long csum, 
			 const RDI_LSN& plsn, size_t size) :
	_kind(kind), _csum(csum), _plsn(plsn), _size(size)
{ _time = ::time(0); }

ostream& operator << (ostream& out, const RDI_LRecHdr& v)
{
  char tnice[26];
  sprintf(tnice, ::ctime((const time_t *)&v._time));
  tnice[24] = ' ';
  return out << tnice << ((v._kind==RDI_LREC_USRAPP) ? "USER" :
			 (v._kind==RDI_LREC_LOGSYS) ? "LSYS" :
			 (v._kind==RDI_LREC_CHCK_S) ? "CP_S" : "CP_F") 
	<< " CSUM " << v._csum << " PLSN " << v._plsn << " SIZE " << v._size; 
}


// ---------------------- Log File Anchor ---------------------- //
//                                                               //
// ------------------------------------------------------------- //

RDI_LogAnchor::RDI_LogAnchor(CORBA::Boolean reuse, size_t size) :
		_lversion(RDI_CLogVersion), _circular(reuse),
		_wrap_cnt(1), _log_size(size), _frst_lsn(),
		_need_lsn(), _stbl_lsn(), _ckpt_lsn()
{ strcpy(_readystr, (const char *)RDI_ReadyString); } 

RDI_LogAnchor::RDI_LogAnchor(const RDI_LogAnchor& a) :
		_lversion(a._lversion), _circular(a._circular),
		_wrap_cnt(a._wrap_cnt), _log_size(a._log_size),
		_frst_lsn(a._frst_lsn), _need_lsn(a._need_lsn),
		_stbl_lsn(a._stbl_lsn), _ckpt_lsn(a._ckpt_lsn)
{ strcpy(_readystr, a._readystr); }

RDI_LogAnchor& RDI_LogAnchor::operator = (const RDI_LogAnchor& a) 
{
  strcpy(_readystr, a._readystr);
  _lversion=a._lversion; _circular=a._circular;
  _wrap_cnt=a._wrap_cnt; _log_size=a._log_size;
  _frst_lsn=a._frst_lsn; _need_lsn=a._need_lsn;
  _stbl_lsn=a._stbl_lsn; _ckpt_lsn=a._ckpt_lsn;
  return *this;
}

ostream& operator << (ostream& out, const RDI_LogAnchor& a)
{
  unsigned long h4 = (a._lversion & 0xFF000000) >> 24;
  unsigned long h3 = (a._lversion & 0x00FF0000) >> 16;
  unsigned long h2 = (a._lversion & 0x0000FF00) >>  8;
  unsigned long h1 = (a._lversion & 0x000000FF);

  return out << a._readystr << endl << "  Vers " << h4 << "." << h3 << "." 
	     << h2 << "." << h1 << (a._circular ? " CIRCULAR ":" ")
	     << "Wrap " << a._wrap_cnt << " Size " << a._log_size 
	     << endl << "  FrstLsn " << a._frst_lsn << " NeedLsn " 
	     << a._need_lsn << " StblLsn " << a._stbl_lsn 
	     << " ChckLsn " << a._ckpt_lsn;
}


// ------------------- Memory Buffer Manager ------------------- //
//                                                               //
// ------------------------------------------------------------- //

RDI_MemoryLog::RDI_MemoryLog(size_t size) : 
			_size(size), _offs(0), _buff(0), _data(0)
{
  // Since we will be storing an 'RDI_LRecHdr' object at the beginning
  // of the memory buffer, we make sure that alignment is not an issue
  _buff = new char[ _size ];
  size_t boffs = (size_t) _buff;
  size_t doffs = RDI_Align(boffs, sizeof(RDI_LRecHdr));
  _data = _buff + (doffs - boffs);
  _size = size - (doffs - boffs);
}

RDI_MemoryLog::~RDI_MemoryLog()
{
  if ( _buff )
     delete [] _buff;
  _size = _offs = 0;
  _buff = _data = 0;
}

void RDI_MemoryLog::append(const void* log_data, size_t data_size, 
			   RDI_LREC_Kind kind, const RDI_LSN& plsn)
{
  RDI_LRecHdr* lhdr = 0;
  size_t roffs = 0;

  // log_data may be NULL in the case of a begin checkpoint record
  if ( data_size > (_size - _offs) ) {
     RDI_DUMP("log record cannot fit in the available memory space");
     return;
  }
  lhdr = (RDI_LRecHdr *) (_data + _offs);
  new ( lhdr ) RDI_LRecHdr(kind, RDI_LREC_MAGIC, plsn, data_size);

  roffs  = _offs + sizeof(RDI_LRecHdr);	// Offset of the data portion
  _offs += rdi_rec_size(data_size);	// Offset of next log record

  if ( log_data && data_size ) {
     memcpy((_data + roffs), log_data, data_size);
     roffs += data_size;
     if ( roffs < _offs )	// Some padding is needed here
	memset((_data + roffs), '\0', (_offs - roffs));
  }
}

ostream& operator << (ostream& out, const RDI_MemoryLog& mlog)
{
  return out << "@ " << ((void *) mlog._data) <<
	        " size " << mlog._size << " offs " << mlog._offs;
}

// ------------------------ Log Manager ------------------------ //
//                                                               //
// When a log directory is used for storing log files,  the name //
// of each log file has the format: <log_name_base>.<number>. In //
// addition, the file <log_anchor_name> contains the log anchor. //
// ------------------------------------------------------------- //

static const char* log_name_base   = "RDILfile";
static const char* log_anchor_name = "RDIAnchor";

RDI_Log::RDI_Log() : _rawdisk(0), _dirpath(0),  _fformat(0), _cactive(0),
		     _invalid(0), _filedes(-1), _lanchor(),  _logtail(0),
		     _numrecs(0), _numchck(0),  _memflsn(),  _lastlsn(),
		     _nextlsn(),  _scheckp(),   _lock()
{ _logpath[0] = '\0'; }

RDI_Log::RDI_Log(const RDI_Log& /* l */)
{ RDI_DUMP("Copy constructor is disabled for RDI_Log"); }

RDI_Log::~RDI_Log()
{
  close();
  if ( _logtail ) delete _logtail;
  _logtail = 0; _numrecs = 0; _lanchor.clear(); _invalid= 1;
  _memflsn = _lastlsn = _nextlsn = _scheckp = RDI_LSN::null;
}

int RDI_Log::valid_log(const char* lpath)
{
  char fname[MAXPATHLEN];
  char rpath[MAXPATHLEN];
  int  fldes=-1;
  RDI_LogAnchor anchor;
  struct stat   stbuf;

  if ( ! lpath || (strlen(lpath) == 0) )
	return 0;
  if ( (::access(lpath, F_OK) != 0) || 
       (::stat(lpath, &stbuf) != 0) || ! ::realpath((REALPATH_ARG1_T)lpath, rpath) )
	return 0;
  int is_dir = S_ISDIR(stbuf.st_mode) ? 1: 0;
  int is_raw = is_dir ? 0 : (S_ISREG(stbuf.st_mode) ? 0 : 1);
  if ( is_dir ) {
	sprintf(fname, "%s/%s", rpath, log_anchor_name);
	if ( (::access(fname, F_OK) != 0) || (::stat(fname, &stbuf) != 0) )
	   return 0;
  } else {
	strcpy(fname, rpath);
  }
  if ( (fldes = ::open(fname, O_RDONLY, 0600)) == -1 )
  	return 0;
  if ( RDI_Read(fldes, &anchor, sizeof(RDI_LogAnchor), 0, is_raw) ) {
	(void) ::close(fldes);
	return 0;
  }
  if ( (anchor.lversion() != RDI_CLogVersion) || 
       ! strcmp(anchor._readystr, RDI_ReadyString) ) {
	(void) ::close(fldes);
	return 0;
  }
  (void) ::close(fldes);
  return 1;
}

RDI_Log* RDI_Log::create(const char*   lpath,
			 size_t        lsize,
		         CORBA::Boolean reuse,
			 CORBA::Boolean formt,
			 size_t        msize)
{
  RDI_Log* lgptr=0;
  size_t   dsksz=0, lansz=sizeof(RDI_LogAnchor);
  off_t    aoffs=RDI_Align(sizeof(RDI_LogAnchor), dev_block_size);
  char     fname[MAXPATHLEN];
  struct stat stbuf;

  if ( ! lpath || (strlen(lpath) == 0) ) {
	RDI_DUMP("NULL path was given for the log");
	return 0;
  }
  if ( lsize < min_log_size ) {
	RDI_DUMP("log size [" << lsize << "] must be > " << min_log_size);	
	return 0;
  }
  if ( msize < 16*RDI_OneKiloByte ) {
	RDI_DUMP("memory buffer too small; setting it to 16KBytes");
	msize = 16*RDI_OneKiloByte;
  }

  if ( !(lgptr=new RDI_Log()) || !(lgptr->_logtail=new RDI_MemoryLog(msize)) ) {
	RDI_DUMP("Failed to allocate memory for the log object");
	if ( lgptr ) delete lgptr;
	return 0;
  }
  if ( ::access(lpath, F_OK) == 0 ) {
	if ( ::stat(lpath, &stbuf) == -1 ) {
		RDI_DUMP("failed to stat() provided path: " << lpath);
		delete lgptr; return 0;
	}
	lgptr->_dirpath= S_ISDIR(stbuf.st_mode) ? 1: 0;
	lgptr->_rawdisk= lgptr->_dirpath ? 0 : (S_ISREG(stbuf.st_mode) ? 0 : 1);

	if ( ::realpath((REALPATH_ARG1_T)lpath, lgptr->_logpath) == (char *) 0 ) {
		RDI_DUMP("failed to resolve log path: " << lpath);
		delete lgptr; return 0;
	}
	if ( lgptr->_dirpath ) {
		sprintf(fname, "%s/%s.1", lgptr->_logpath, log_name_base);
	} else {
		strcpy(fname, lgptr->_logpath);
	}
  } else if ( errno == ENOENT ) {
	lgptr->_rawdisk = 0;
	lgptr->_dirpath = 0;
	strcpy(lgptr->_logpath, lpath);
	strcpy(fname, lpath);
  } else {
	RDI_DUMP("Invalid log name was given: " << lpath);
	delete lgptr; return 0;
  }
  
  // Compute needed log file size based on the provided values

  dsksz = lgptr->_dirpath ? RDI_Align(lsize, dev_block_size) :
			    RDI_Align(lsize + lansz, dev_block_size);

  lgptr->_lanchor._circular = reuse;
  lgptr->_lanchor._wrap_cnt = 1;
  lgptr->_lanchor._log_size = dsksz;
  lgptr->_lanchor._frst_lsn = RDI_LSN::null;
  lgptr->_lanchor._stbl_lsn = RDI_LSN::null;
  lgptr->_lanchor._ckpt_lsn = RDI_LSN::null;

  lgptr->_fformat = formt;
  lgptr->_nextlsn = lgptr->_dirpath ? RDI_LSN(0, lgptr->_lanchor._wrap_cnt) :
				      RDI_LSN(aoffs, lgptr->_lanchor._wrap_cnt);
  lgptr->_filedes = new_file(fname, dsksz, formt, lgptr->_rawdisk);

  if ( lgptr->_filedes == -1 ) {
	RDI_DUMP("failed to create log file: " << fname);
	delete lgptr; return 0;
  }
  if ( lgptr->_dirpath ) {
	int afd = 0;
	sprintf(fname, "%s/%s", lgptr->_logpath, log_anchor_name);

	if ( (afd = new_file(fname)) == -1 ) {
	   RDI_DUMP("failed to create log anchor: " << fname);
	   delete lgptr; return 0;
	}
	if ( RDI_Write(afd, &lgptr->_lanchor, lansz) ) {
	   RDI_DUMP("I/O error " << errno << " while writing log anchor");
	   ::close(afd); ::unlink(fname);
	   delete lgptr; return 0;
	}
	::fsync(afd);
	::close(afd);
  } else {
	int israw = lgptr->_rawdisk ? 1 : 0;
	if ( RDI_Write(lgptr->_filedes, &lgptr->_lanchor, lansz, 0, israw) ) {
	   RDI_DUMP("I/O error " << errno << " while writing to " << fname);
	   delete lgptr; return 0;
	}
  }

  return lgptr;
}

RDI_Log* RDI_Log::open(const char* lpath, CORBA::Boolean formt, size_t msize)
{
  RDI_Log* lgptr=0;
  unsigned long lfnum=1;
  char     fname[MAXPATHLEN];
  struct stat stbuf;

  if ( ! lpath || (strlen(lpath) == 0) ) {
	RDI_DUMP("NULL log path name was provided");
	return 0;
  }
  if ( (::access(lpath, F_OK) != 0) || (::stat(lpath, &stbuf) != 0) ) {
	RDI_DUMP("access()/stat() error " << errno << " for " << lpath);
	return 0;
  }
  if ( msize < 16*RDI_OneKiloByte ) {
        RDI_DUMP("memory buffer too small; setting it to 16KBytes");
        msize = 16*RDI_OneKiloByte;
  }
  if ( !(lgptr=new RDI_Log()) || !(lgptr->_logtail=new RDI_MemoryLog(msize)) ) {
        RDI_DUMP("Failed to allocate memory for the log object");
	if ( lgptr ) delete lgptr;
        return 0;
  }

  lgptr->_fformat = formt;
  lgptr->_dirpath = S_ISDIR(stbuf.st_mode) ? 1: 0;
  lgptr->_rawdisk = lgptr->_dirpath ? 0 : (S_ISREG(stbuf.st_mode) ? 0 : 1);

  if ( ::realpath((REALPATH_ARG1_T)lpath, lgptr->_logpath) == (char *) 0 ) {
	RDI_DUMP("failed to resolve log path: " << lpath);
	delete lgptr; return 0;
  }

  // Read the log anchor to locate the last log file known to the 
  // log manager as well as the last completed checkpoint record.

  if ( lgptr->_dirpath ) {
	sprintf(fname, "%s/%s", lgptr->_logpath, log_anchor_name);	
	if ( (::access(fname, F_OK) != 0) || (::stat(fname, &stbuf) != 0) ) {
		RDI_DUMP("[" << lpath << "] does not contain a valid log");
		RDI_DUMP("\t- log files either deleted or never created");
		RDI_DUMP("\t- use RDI_Log::create() to create a new log");
		delete lgptr; return 0;
	}
	if ( lgptr->read_anchor() ) {
		RDI_DUMP("failed to read log anchor from: " << fname);
		delete lgptr; return 0;
	}
	lfnum = lgptr->_lanchor._circular ? 1 : lgptr->_lanchor._wrap_cnt;
	sprintf(fname, "%s/%s.%ld", lgptr->_logpath, log_name_base, lfnum);	
  } else { 
	strcpy(fname, lgptr->_logpath);
  }

  if ( (lgptr->_filedes = ::open(fname, O_RDWR, 0600)) == -1 ) {
	RDI_DUMP("Failed to open log file " << fname << " - errno " << errno);
	delete lgptr; return 0;
  }
  if ( ! lgptr->_dirpath && lgptr->read_anchor() ) {
	RDI_DUMP("failed to read log anchor from: " << fname);
	delete lgptr; return 0;
  }

  if ( strcmp(lgptr->_lanchor._readystr, RDI_ReadyString) != 0 ) {
	RDI_DUMP("Invalid log anchor -- not generated by READY");
	delete lgptr; return 0;
  }

  if ( lgptr->_lanchor.lversion() != RDI_CLogVersion ) {
	RDI_DUMP("invalid log version was found on disk");
	delete lgptr; return 0;
  }

  if ( lgptr->establish_state() || lgptr->flush_tail(1) ) {
	RDI_DUMP("failed to establish a constistent state of the log");
	delete lgptr; return 0;
  }

  return lgptr;
}

void RDI_Log::close(CORBA::Boolean flush_log)
{
  omni_mutex_lock lock(_lock);
  if ( _filedes != -1 ) {
  	(void) flush_tail(flush_log);
	::close(_filedes);
  }
  _filedes = -1;
}

int  RDI_Log::flush()
{
  omni_mutex_lock lock(_lock);
  return this->flush_tail(1);
}

RDI_LSN RDI_Log::append(const void* data, size_t size)
{
  omni_mutex_lock lock(_lock);
  return append(data, size, RDI_LREC_USRAPP);
}

RDI_LSN RDI_Log::begin_checkpoint()
{
  omni_mutex_lock lock(_lock);
  return append(0, 0, RDI_LREC_CHCK_S);
}

RDI_LSN RDI_Log::finish_checkpoint(const RDI_LSN& olsn)
{
  omni_mutex_lock lock(_lock);
  return append(&olsn, sizeof(RDI_LSN), RDI_LREC_CHCK_F);
}

////////////////////
// Flush the tail of the log to disk.  We do not have to check for
// available space at this point since this is done at log append.
////////////////////

int RDI_Log::flush_tail(CORBA::Boolean force_anchor)
{
  off_t  loffs = _memflsn.get_offset();
  size_t msize = _logtail->used_space();
  int    israw = _rawdisk ? 1 : 0;

  if ( _invalid ) {
	RDI_DUMP("log state is invalid -- close and re-open the log");
	return -1;
  } 
  if ( (_filedes != -1) && msize ) {
	if ( RDI_Write(_filedes, _logtail->data_ptr(), msize, loffs, israw) ) {
	   RDI_DUMP("I/O error " << errno << " while writing log tail");
	   _invalid = 1; return -1;
	}
	if ( ::fsync(_filedes) ) {
	   RDI_DUMP("I/O error " << errno << " in fsync() system call");
	   _invalid = 1; return -1;
	}
	_lanchor._stbl_lsn = _lastlsn;
  	if ( _lanchor._frst_lsn == RDI_LSN::null )
       	   _lanchor._frst_lsn = _memflsn;
	_memflsn = RDI_LSN::null;
	_logtail->clear();
  }

  if ( force_anchor && ((_filedes != -1) || _dirpath) && write_anchor() ) {
	RDI_DUMP("failed to write log anchor to stable storage");
	_invalid = 1; return -1;
  }

  return 0;
}

////////////////////

RDI_LSN RDI_Log::append(const void* data, size_t size, RDI_LREC_Kind kind)
{
  size_t       lrsz = rdi_rec_size(size);

  if ( _invalid ) {
 	RDI_DUMP("log state is invalid -- close and re-open the log");
	return RDI_LSN::null;
  }
  if ( kind == RDI_LREC_CHCK_S ) {
	if ( _cactive ) {
	   RDI_DUMP("nested checkpoints are not supported!!!!");
	   return RDI_LSN::null;
	}
	_cactive = 1;
	_scheckp = _nextlsn;
  } else if ( kind == RDI_LREC_CHCK_F && ! _cactive ) {
	RDI_DUMP("cannot terminate a checkpoint that never started!!!!");
	return RDI_LSN::null;
  }

  if ( lrsz > _logtail->total_space() ) {
	RDI_DUMP("log record cannot fit in the in-memory log tail");
	return RDI_LSN::null;
  }

  // If we are not in the middle of checkpointing, check if we have
  // enough free log space before appending the new log record

  if ( !_cactive && allocate_space(lrsz) ) {
        RDI_DUMP("insufficient log space for " << size << "-byte record");
        return RDI_LSN::null;
  }

  if ( (_logtail->free_space() < lrsz) && this->flush_tail(0) ) {
        RDI_DUMP("failed to flush tail of the log to disk");
        return RDI_LSN::null;
  }

  _logtail->append(data, size, kind, _lastlsn);
  _lastlsn  = _nextlsn;
  if ( _memflsn == RDI_LSN::null )
        _memflsn = _nextlsn;
  _nextlsn += lrsz;
  _numrecs += 1;

  if ( kind == RDI_LREC_CHCK_F ) {
	RDI_LSN* needlsn = (RDI_LSN *) data;
	if ( *needlsn != RDI_LSN::null )
	   _lanchor._need_lsn = *needlsn;
	_lanchor._ckpt_lsn = _scheckp;
	if ( this->flush_tail(1) ) {
	   RDI_DUMP("failed to flush tail of the log to disk");
	   return RDI_LSN::null;
	}
	_cactive  = 0;
	_numchck += 1;
  }

  return _lastlsn;
}

////////////////////

int RDI_Log::valid_lsn(const RDI_LSN& lsn) const
{
  if ( (lsn == RDI_LSN::null) || 
       (lsn.get_wrapno() == 0) ||
       (lsn.get_wrapno() > _lanchor._wrap_cnt) || 
       ((size_t)lsn.get_offset() > _lanchor._log_size) )
        return 0;
  if ( (lsn < _lanchor._frst_lsn) || (lsn > _lanchor._stbl_lsn) )
        return 0;
  return 1;
}

////////////////////

ostream& operator << (ostream& out, const RDI_Log& log)
{
  return out << log._logpath  << " : "  << (log._rawdisk? "RAW ":" ") <<
	        (log._dirpath ? "DIR ":" ") <<
		(log._cactive ? "Active Checkpoint":"") << endl << "-" << 
	        " LastLSN " << log._lastlsn << " NextLSN " << log._nextlsn << 
		" ChckLSN " << log._scheckp << endl << "- NumRecs " << 
		log._numrecs << " NumChck " << log._numchck << " LogTail [" <<
		*(log._logtail) << "]" << endl << "- " << log._lanchor;
}

////////////////////
// The following performs a forward scan of the log file and tries
// to establish the most consistent state of the log.
////////////////////

int RDI_Log::establish_state()
{
  RDI_LRecHdr lrhdr;
  off_t aoffs = 0, loffs = 0;
  int   israw = _rawdisk ? 1 : 0;

  aoffs = _dirpath ? 0 : RDI_Align(sizeof(RDI_LogAnchor), dev_block_size);

  // Since some log records may have been written to disk after the
  // last known stable log record,  i.e., the record with LSN equal 
  // to '_lanchor._stbl_lsn', we should establish the current state
  // of the log to reflect this by scanning any log records written
  // after '_lanchor._stbl_lsn'.  If this LSN is NULL, we start the
  // scan at '_lanchor._frst_lsn'. If this is also NULL, we need to
  // take into account the case  where some log records were forced
  // to disk but the anchor was not.  Another fine point we have to
  // take into consideration is the following. When the end of some
  // log file is reached,  we increment '_lanchor._wrap_cnt' and we
  // force the anchor to disk.  Therefore, '_lanchor._stbl_lsn' may
  // be stored in the previous log file.  Here, we scan the new log
  // file from the beginning

  _lastlsn = (_lanchor._stbl_lsn != RDI_LSN::null) ? _lanchor._stbl_lsn
						   : _lanchor._frst_lsn;
  if ( _lastlsn == RDI_LSN::null ) {
	_nextlsn = RDI_LSN(aoffs, _lanchor._wrap_cnt);
  } else if ( ! _lanchor._circular && 
	      (_lastlsn.get_wrapno() != _lanchor._wrap_cnt) ) {
	_nextlsn = RDI_LSN(aoffs, _lanchor._wrap_cnt);
  } else {
  	_nextlsn = _lastlsn;
  }

  while ( 1 ) {
  	loffs = _nextlsn.get_offset();
	if ( (loffs + sizeof(RDI_LRecHdr)) > _lanchor._log_size ) {
	   if ( ! _lanchor._circular )
	      break;
	   _lanchor._wrap_cnt += 1;
	   _nextlsn = RDI_LSN(aoffs, _lanchor._wrap_cnt);
	   loffs    = _nextlsn.get_offset();
	}

	if ( RDI_Read(_filedes, &lrhdr, sizeof(RDI_LRecHdr), loffs, israw) ) {
	   RDI_DUMP("failed to read log record header @ " << _nextlsn);
	   return -1;
	}
	// RDI_DUMP(_nextlsn << ": " << lrhdr);

	// If either the checksum of the log record is not correct or 
	// previous LSN value is not the same as '_lastlsn', the scan
	// is terminated; we have to skip the second test in the case
 	// of the very first log record we read during the scan

	if ( (lrhdr._csum != RDI_LREC_MAGIC) || 
	     ((_lastlsn != _nextlsn) && (lrhdr._plsn != _lastlsn)) )
	   break;

	if ( lrhdr._kind == RDI_LREC_CHCK_F ) {
	   // Read payload of log record, which contains the LSN of the
	   // oldest log record that should be preserved.

	   off_t offs = loffs + sizeof(RDI_LRecHdr);
	   void* data = (void *)&_lanchor._need_lsn;
   	   if ( RDI_Read(_filedes, data, sizeof(RDI_LSN), offs, israw) ) {
	      RDI_DUMP("failed to read checkpoint record @ " << _nextlsn);
	      return -1;
	   }
	}

	_lastlsn = _nextlsn;
	if ( _lanchor._frst_lsn == RDI_LSN::null ) 
		_lanchor._frst_lsn = _nextlsn;
	_lanchor._stbl_lsn = _lastlsn;
	_nextlsn += rdi_rec_size(lrhdr._size);
  }

  return 0;
}

////////////////////
// The log anchor may be stored in its own file or it can be at the
// very beggining of the current log file.  In the former case,  we 
// open the anchor file and read it.  In the latter case, we assume
// that the log file is already open and we just read the anchor...
////////////////////

int RDI_Log::read_anchor()
{
  char fname[MAXPATHLEN];
  int  afdes = _filedes, israw = _rawdisk ? 1 : 0;

  if ( _dirpath ) {
	sprintf(fname, "%s/%s", _logpath, log_anchor_name);
	if ( (afdes = ::open(fname, O_RDONLY, 0600)) == -1 ) {
	   RDI_DUMP("Failed to open anchor " << fname << " - errno " << errno);
	   return -1;
	}
  }
  if ( RDI_Read(afdes, &_lanchor, sizeof(RDI_LogAnchor), 0, israw) ) {
	RDI_DUMP("Failed to read anchor " << fname << " - errno " << errno);
	if ( _dirpath ) (void) ::close(afdes);
	return -1;
  }
  if ( _dirpath && ::close(afdes) ) {
	RDI_DUMP("Failed to close anchor " << fname << " - errno " << errno);
	return -1;
  }
  return 0;
}

////////////////////
// The log anchor may be stored in its own file or it can be at the
// very beggining of the current log file.  In the former case,  we
// open the anchor file, write '_lanchor',  and close the file.  In
// the latter case,  we just write the  value of '_lanchor'........
////////////////////

int RDI_Log::write_anchor()
{
  char fname[MAXPATHLEN];
  int  afdes = _filedes, israw = _rawdisk ? 1 : 0;

  if ( _dirpath ) { 
	sprintf(fname, "%s/%s", _logpath, log_anchor_name);
	if ( (afdes = ::open(fname, O_RDWR | O_SYNC, 0600)) == -1 ) {
	   RDI_DUMP("failed to open anchor " << fname << " - errno " << errno);
	   return -1;
	}
  }
  if ( RDI_Write(afdes, &_lanchor, sizeof(RDI_LogAnchor), 0, israw) ) {
	RDI_DUMP("failed to write anchor " << fname << " - errno " << errno);
	if ( _dirpath ) (void) close(afdes);
	return -1;
  }
  if ( ::fsync(afdes) ) {
	RDI_DUMP("failed to fsync() anchor " << fname << " - errno " << errno);
	if ( _dirpath ) (void) ::close(afdes);
	return -1;
  }
  if ( _dirpath && ::close(afdes) ) {
	RDI_DUMP("failed to close anchor " << fname << " - errno " << errno);
	return -1;
  }
  return 0;
}

////////////////////

void RDI_Log::cleanup()
{
  char fname[MAXPATHLEN];

  if ( ! _dirpath || (_lanchor._frst_lsn == RDI_LSN::null) )
 	return;
  if ( _lanchor._frst_lsn.get_wrapno() != 1 ) {
	for (unsigned int i=1; i < _lanchor._frst_lsn.get_wrapno(); i++) {
	   sprintf(fname, "%s/%s.%d", _logpath, log_name_base, i);
	   if ( ::access(fname, F_OK) == 0 ) {
	      RDI_DUMP("Cleanup: unlinking log file " << fname);
	      (void) ::unlink(fname);
	   }
	}
  }
}

////////////////////

int RDI_Log::new_file(const char* fname, size_t size, 
		      CORBA::Boolean format, CORBA::Boolean israw)
{
  off_t foffs = 0;
  char  dummy[8192];
  int   fldes = -1;
  int   fmode = israw ? O_RDWR : (O_RDWR | O_CREAT | O_TRUNC);

  if ( (fldes = ::open(fname, fmode, 0600)) == -1 ) {
	RDI_DUMP("failed to create: " << fname << " error: " << errno);
	return -1;
  }
  if ( ! israw && ::ftruncate(fldes, size) == -1 ) {
	RDI_DUMP("ftruncate() failed on " << fname << " error: " << errno);
	return -1;
  }
  if ( ! format )
	return fldes;

  RDI_DUMP("** formating log file: " << fname);
  memset(dummy, '*', 8192);
  dummy[0] = dummy[2040] = dummy[4088] = dummy[6136] = 'T';
  dummy[1] = dummy[2041] = dummy[4089] = dummy[6137] = 'H';
  dummy[2] = dummy[2042] = dummy[4090] = dummy[6138] = 'I';
  dummy[3] = dummy[2043] = dummy[4091] = dummy[6139] = 'M';
  dummy[4] = dummy[2044] = dummy[4092] = dummy[6140] = 'I';
  dummy[5] = dummy[2045] = dummy[4093] = dummy[6141] = 'O';
  dummy[6] = dummy[2046] = dummy[4094] = dummy[6142] = 'S';

  int one4 = size/4;
  int curr = one4;

  while ( (size_t)foffs < size ) {
	if ( RDI_Write(fldes, dummy, 8192) ) {
	   RDI_DUMP("failed to intitialize log file " << fname);
	   (void) ::close(fldes); (void) ::unlink(fname);
	   return -1;
	}
	foffs += 8192;
	if ( foffs > curr ) {
	   RDI_DUMP("** ...............................");
	   curr *= 2;
	}
  }
  RDI_DUMP("** formating of log file completed");
  return fldes;
}

////////////////////
// Check if enough space is available in the current log file to
// store a 'size' record.  If not enough space is available, the
// following cases have to be handled:
// 1. Circular log file 
//	- flush memory buffer
//	- eliminate old log records, if possible, to make space
// 2. New log file can be created
//	- flush memory buffer
//	- create a new log file and initialize it
//	- cleanup any log files that are not needed anymore
////////////////////

int RDI_Log::allocate_space(size_t nbytes)
{
  RDI_LRecHdr lrhdr;
  size_t      rsize = nbytes + max_rsv_size;
  size_t      lrhsz = sizeof(RDI_LRecHdr);
  off_t       loffs = 0, foffs = 0, aoffs = 0;
  char        fname[MAXPATHLEN];
  int         israw = _rawdisk ? 1 : 0;

  if ( ! _lanchor._circular ) {
	if ( (_lanchor._log_size - _nextlsn.get_offset()) >= rsize )
	   return 0;
	if ( this->flush_tail(0) ) {
	   RDI_DUMP("failed to write log tail to stable storage");
	   return -1;
	}
	loffs = _nextlsn.get_offset();
	if ( (_lanchor._log_size - loffs) > lrhsz ) {	// Dummy record
	   lrhdr._kind = RDI_LREC_LOGSYS;
	   lrhdr._csum = RDI_LREC_MAGIC;
	   lrhdr._plsn = _lastlsn;
	   lrhdr._size = _lanchor._log_size - loffs - lrhsz;

	   if ( RDI_Write(_filedes, &lrhdr, lrhsz, loffs, israw) ) {
	      RDI_DUMP("I/O error " << errno << " while writing log record");
	      _invalid = 1; return -1;
	   }
	   _lastlsn = _nextlsn;
	}
	_lanchor._stbl_lsn = _lastlsn;
	// Only when we can create new log files we increment the wrap
	// counter stored with the log anchor
 	if ( _dirpath )
	   _lanchor._wrap_cnt += 1;

	// Update the anchor on disk to reflect the current log state.
	// To avoid creating many log files, we check if we can delete
	// some of them.  This happens when '_need_lsn' is stored in a
	// different log file than '_frst_lsn'

	if ( (_lanchor._need_lsn != RDI_LSN::null) && 
	     (_lanchor._frst_lsn != RDI_LSN::null) ) {
	   unsigned long nfiles = _lanchor._need_lsn.get_wrapno() -
	      		          _lanchor._frst_lsn.get_wrapno();
	   if ( nfiles > 5 )
	      _lanchor._frst_lsn = RDI_LSN(0, _lanchor._need_lsn.get_wrapno());
	}

	if ( write_anchor() ) {
	   RDI_DUMP("failed to write log anchor to stable storage");
	   _invalid = 1; return -1;
	}

 	// In case we use only one log file, we should close the file
	// after the anchor is written since the anchor is written in
	// the same file as the log records .....

	(void) ::close(_filedes);
	_filedes = -1;
	if ( ! _dirpath ) {	// New log file cannot be created
	   RDI_DUMP("cannot create a new log file based on log configuration");
	   return -1;
	}
	sprintf(fname, "%s/%s.%ld",_logpath,log_name_base,_lanchor._wrap_cnt);
	if ( (_filedes = new_file(fname,_lanchor._log_size,_fformat)) == -1 ) {
	   RDI_DUMP("failed to create new log file: " << fname);
	   _invalid = 1; return -1;
	}
	_nextlsn = RDI_LSN(0, _lanchor._wrap_cnt);
	// Remove any files that may not be needed any longer
	this->cleanup();
	return 0;
  } else {
	aoffs = _dirpath ? 0 : RDI_Align(sizeof(RDI_LogAnchor),dev_block_size);
	foffs = _lanchor._frst_lsn.get_offset();
	loffs = _nextlsn.get_offset();

	// Initially, we try to avoid overwriting existing log records, if
	// possible.  Next, we check if by overwriting all records that we
 	// can overwrite we have enough space. If both fail, the operation
	// fails as well

	if ( loffs >= foffs ) {
	   if ( (_lanchor._log_size - loffs) > rsize ) {
	      return 0;
	   } else if ( (size_t)(foffs - aoffs) > rsize ) {
	      if ( this->flush_tail(0) ) {
		 RDI_DUMP("failed to write log tail to stable storage");
		 return -1;
	      }
	      if ( (_lanchor._log_size - loffs) > lrhsz ) {
	         lrhdr._kind = RDI_LREC_LOGSYS;
		 lrhdr._csum = RDI_LREC_MAGIC;
		 lrhdr._plsn = _lastlsn;
		 lrhdr._size = _lanchor._log_size - loffs - lrhsz;

		 if ( RDI_Write(_filedes, &lrhdr,lrhsz, loffs,israw) ) {
		    RDI_DUMP("I/O error "<<errno<<" while writing log record");
		    _invalid = 1; return -1;
		 }
		 _lastlsn = _nextlsn;
	      } 
	      _lanchor._stbl_lsn  = _lastlsn;
	      _lanchor._wrap_cnt += 1;
	      if ( write_anchor() ) {
		 RDI_DUMP("failed to write log anchor to stable storage");
		 _invalid = 1; return -1;
	      }
	      _nextlsn = RDI_LSN(aoffs, _lanchor._wrap_cnt);
	      return 0;
	   }
	} else if ( (size_t)(foffs - loffs) > rsize ) {
	   return 0;
	}

	foffs = _lanchor._need_lsn.get_offset();
	if ( loffs >= foffs ) {
	   if ( (_lanchor._log_size - loffs) > rsize ) {
	      _lanchor._frst_lsn = _lanchor._need_lsn;
              return 0;
	   } else if ( (size_t)(foffs - aoffs) > rsize ) {
	      if ( this->flush_tail(0) ) {
		 RDI_DUMP("failed to write log tail to stable storage");
	         return -1;
	      }
              if ( (_lanchor._log_size - loffs) > lrhsz ) {
                 lrhdr._kind = RDI_LREC_LOGSYS;
                 lrhdr._csum = RDI_LREC_MAGIC;
                 lrhdr._plsn = _lastlsn;
                 lrhdr._size = _lanchor._log_size - loffs - lrhsz;

                 if ( RDI_Write(_filedes, &lrhdr,lrhsz, loffs,israw) ) {
		    RDI_DUMP("I/O error "<<errno<<" while writing log record");
                    _invalid = 1; return -1;
                 }
                 _lastlsn = _nextlsn;
              } 
              _lanchor._stbl_lsn  = _lastlsn;
	      _lanchor._frst_lsn  = _lanchor._need_lsn;
              _lanchor._wrap_cnt += 1;
	      if ( write_anchor() ) {
		 RDI_DUMP("failed to write log anchor to stable storage");
		 _invalid = 1; return -1;
	      }
              _nextlsn = RDI_LSN(aoffs, _lanchor._wrap_cnt);
              return 0;
           }
        } else if ( (size_t)(foffs - loffs) > rsize ) {
	   _lanchor._frst_lsn = _lanchor._need_lsn;
           return 0;
        }
  }

  return -1;
}

// ------------------------ Log Iterator ----------------------- //

#if (0)
RDI_LogIter::RDI_LogIter()
{ RDI_DUMP("Default constructor should NOT be used for log iterator"); }

RDI_LogIter::RDI_LogIter(const RDI_LogIter& /* li */)
{ RDI_DUMP("Copy constructor should NOT be used for log iterator"); }
#endif

RDI_LogIter::RDI_LogIter(const RDI_Log& log, CORBA::Boolean forward) :
	_logmngr(log), _filedes(-1), _filenum(0), _nextlsn(), _forward(forward)
{;}

RDI_LogIter::~RDI_LogIter()
{
  if ( _filedes > 0 )
	(void) ::close(_filedes);
  _filedes = -1;
}

////////////////////

int RDI_LogIter::fetch(const RDI_LSN& lsn, 
		       RDI_LRecHdr&   hdr, void* data, size_t size)
{
  unsigned long wrapn = lsn.get_wrapno();
  size_t        hdrsz = sizeof(RDI_LRecHdr);
  int           israw = _logmngr._rawdisk ? 1 : 0;

  if ( this->open_file(wrapn) ) {
	RDI_DUMP("failed to open log file containing record with lsn " << lsn);
	return -1;
  }
  if ( RDI_Read(_filedes, &hdr, hdrsz, lsn.get_offset(), israw) ) {
	RDI_DUMP("I/O error " << errno << " while reading log record header");
	return -1;
  }
  if ( hdr._csum == RDI_LREC_MAGIC ) {
	size_t rsize = (hdr._size > size) ? size : hdr._size;
	off_t  loffs = lsn.get_offset() + hdrsz;
	if ( data && RDI_Read(_filedes, data, rsize, loffs, israw) ) {
	   RDI_DUMP("I/O error " << errno << " while reading log record data");
	   return -1;
	}
  } else {
	RDI_DUMP("lsn " << lsn << " does not correspond to a log record");
	return -1;
  }

  return 0;
}

////////////////////

int RDI_LogIter::seek(const RDI_LSN& lsn)
{
  if ( ! _logmngr.valid_lsn(lsn) ) {
	RDI_DUMP("invalid lsn " << lsn << " -- no such log record");
	return -1;
  }
  _nextlsn = lsn;
  return 0;
}

////////////////////

RDI_LSN RDI_LogIter::next(RDI_LRecHdr& hdr, void* data, size_t size)
{
  off_t   aoffs = RDI_Align(sizeof(RDI_LogAnchor), dev_block_size);
  off_t   loffs = _logmngr._dirpath ? 0 : aoffs;
  RDI_LSN rtlsn = _nextlsn;

  if ( ! _logmngr.valid_lsn(_nextlsn) ) {
	// We are done with the scanning
	return RDI_LSN::null;
  }
  if ( this->fetch(_nextlsn, hdr, data, size) != 0 ) {
	RDI_DUMP("failed to fetch log record @ " << _nextlsn);
	return RDI_LSN::null;
  }
  if ( _forward ) {
	// Here, we have to be careful since we may done with scanning
	// a log file and need to switch to the next log file, if any.
	_nextlsn += rdi_rec_size(hdr._size);
	if ( (size_t)_nextlsn.get_offset() >= _logmngr._lanchor.log_size() ) {
	   unsigned long nwrap = _nextlsn.get_wrapno() + 1;
	   _nextlsn = RDI_LSN(loffs, nwrap);
	}
  } else {
	_nextlsn = hdr._plsn;
  }
  return rtlsn;
}

////////////////////

int RDI_LogIter::open_file(unsigned long fno)
{
  char fname[MAXPATHLEN];

  // Check is the log file with the given number is already opened

  if ( (_filedes > 0) && ( !_logmngr._dirpath || 
			   _logmngr._lanchor.circular() || (fno == _filenum)) )
	return 0;

  if ( _filedes > 0 ) {		// Close already open log file
	(void) ::close(_filedes);
	_filedes = -1;
  }

  if ( ! _logmngr._dirpath ) {
	strcpy(fname, _logmngr._logpath);
  } else if ( _logmngr._lanchor.circular() ) {
	sprintf(fname, "%s/%s.1", _logmngr._logpath, log_name_base);
  } else {
	sprintf(fname, "%s/%s.%ld", _logmngr._logpath, log_name_base, fno);
  }

  if ( (_filedes = ::open(fname, O_RDONLY, 0600)) == -1 ) {
	RDI_DUMP("I/O error " << errno << " while opening log file " << fname);
	return -1;
  }
  _filenum = fno;
  return 0;
}
