Google

Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

gnDNXSource.cpp

Go to the documentation of this file.
00001 
00002 // File:            gnDNXSource.h
00003 // Purpose:         Implements gnBaseSource for .DNX files
00004 // Description:     
00005 // Changes:        
00006 // Version:         libGenome 0.1.0 
00007 // Author:          Aaron Darling 
00008 // Last Edited:     April 15, 2001, 11:13:00pm 
00009 // Modified by:     
00010 // Copyright:       (c) Aaron Darling 
00011 // Licenses:        Proprietary 
00013 #include "gn/gnDNXSource.h"
00014 #include "gn/gnSourceSpec.h"
00015 #include "gn/gnStringSpec.h"
00016 #include "gn/gnSourceFactory.h"
00017 #include "gn/gnFASSource.h"
00018 #include "gn/gnGBKSource.h"
00019 #include "gn/gnBaseHeader.h"
00020 #include "gn/gnFilter.h"
00021 #include "gn/gnDebug.h"
00022 #include "gn/gnStringTools.h"
00023 #include <string>
00024 
00025 gnDNXSource::gnDNXSource()
00026 {
00027         m_DNXSpec = new gnGenomeSpec();
00028         m_pFilter = gnFilter::fullDNASeqFilter();
00029         if(m_pFilter == NULL){
00030                 DebugMsg("Error using static sequence filters.");
00031         }
00032 }
00033 
00034 gnDNXSource::gnDNXSource( const gnDNXSource& s ) : gnFileSource(s)
00035 {
00036         if(s.m_DNXSpec != NULL)
00037                 m_DNXSpec = s.m_DNXSpec->Clone();
00038 }
00039 
00040 gnDNXSource::~gnDNXSource()
00041 {
00042         m_ifstream.close();
00043         delete m_DNXSpec;
00044 }
00045 boolean gnDNXSource::HasContig( const string& name ) const
00046 {
00047         for(uint32 contigI = 0; contigI < m_DNXSpec->GetSpecListLength(); contigI++){
00048                 if(m_DNXSpec->GetSpec(contigI)->GetName() == name)
00049                         return true;
00050         }
00051         return false;
00052 }
00053 uint32 gnDNXSource::GetContigID( const string& name ) const
00054 {
00055         for(uint32 contigI = 0; contigI < m_DNXSpec->GetSpecListLength(); contigI++){
00056                 if(m_DNXSpec->GetSpec(contigI)->GetName() == name)
00057                         return contigI;
00058         }
00059         return ALL_CONTIGS;
00060 }
00061 string gnDNXSource::GetContigName( const uint32 i ) const
00062 {
00063         if(i < m_DNXSpec->GetSpecListLength()){
00064                 gnBaseSpec *gnbs = m_DNXSpec->GetSpec(i);
00065                 return gnbs->GetName();
00066         }
00067         return "";
00068 }
00069 gnSeqI gnDNXSource::GetContigSeqLength( const uint32 i ) const
00070 {
00071         if( i == ALL_CONTIGS){
00072                 return m_DNXSpec->GetLength();
00073         }else if(i < m_DNXSpec->GetSpecListLength()){
00074                 gnBaseSpec *gnbs = m_DNXSpec->GetSpec(i);
00075                 return gnbs->GetLength();
00076         }
00077         return 0;
00078 }
00079 //read raw data from the file
00080 
00081 void gnDNXSource::ValidateName(string& name){
00082         if(name == ""){ //make a random one.
00083                 name.resize(4);
00084                 srand(time(NULL));
00085                 for(int i=0; i < 4; i++)        
00086                         name[i] = (rand() % 26) + 64;
00087         }
00088 }
00089 
00090 boolean gnDNXSource::Write(gnGenomeSpec* spec, const string& filename){
00091         ofstream m_ofstream(filename.c_str(), ios::out | ios::binary);
00092         gnSourceFactory* m_sSourceFactory = gnSourceFactory::GetSourceFactory();
00093         if(!m_ofstream.is_open())
00094                 return false;
00095         for(uint32 i=0; i < spec->GetSpecListLength(); i++){    //each of these will be dnx statements
00096                 gnFragmentSpec* curStatementSpec = spec->GetSpec(i);
00097                 string sourceName = spec->GetSourceName();
00098                 string statementName = spec->GetName();
00099                 if(!m_sSourceFactory->HasSource(sourceName)){
00100                         ValidateName(statementName);
00101                         statementName += ".seq";
00102                         m_ofstream << statementName << "=";
00103                 }else
00104                         m_ofstream << sourceName << "=";
00105                 for(uint32 j=0; j < curStatementSpec->GetSpecListLength(); j++){        //each of these will be the files
00106                                                                                                                         //referred to by the dnx statement
00107                         gnContigSpec* curSubSpec = curStatementSpec->GetSpec(i);
00108                         sourceName = curStatementSpec->GetSourceName();
00109                         string contigName = curStatementSpec->GetName();
00110                         if(!m_sSourceFactory->HasSource(sourceName)){
00111                                 ValidateName(contigName);
00112                                 string writename = contigName+".seq";
00113                                 gnSequence gns = *curSubSpec;
00114                                 gnGBKSource::Write(gns, writename);
00115                                 m_ofstream << writename;
00116                         }else
00117                                 m_ofstream << sourceName;
00118                         if( j + 1 < curStatementSpec->GetSpecListLength())
00119                                 m_ofstream << "+";
00120                 }
00121                 m_ofstream << ";";
00122                 gnBaseHeader *gpbh = spec->GetHeader(0);
00123                 string header = "";
00124                 if(gpbh != NULL){
00125                         header = gpbh->GetHeader();
00126                         //delete everything after the first newline.
00127                         uint32 newlinepos = header.find_first_of('\n', 0);
00128                         if(newlinepos != string::npos)
00129                                 header = header.substr(0, newlinepos - 1);
00130                 }
00131                 m_ofstream << header << "\r\n";
00132         }
00133         m_ofstream.close();
00134         return true;
00135 }
00136 
00137 gnFileContig* gnDNXSource::GetFileContig( const uint32 contigI ) const{
00138         return NULL;    //returning NULL
00139 }
00140 
00141 //reads an inputstream and creates fills the spec vector appropriately
00142 boolean gnDNXSource::ParseStream( istream& fin )
00143 {
00144         // INIT temp varables
00145         uint32 readState = 0;  //10 - currently inside a comment
00146         uint32 sectionStart = 0;
00147         gnFragmentSpec* currentFragSpec = 0;
00148         gnBaseSource *currentSource;
00149         string currentSourceName;
00150         uint32 currentContig = ALL_CONTIGS;
00151         uint32 currentSeqStart = 0;
00152         boolean currentRevComp = false;
00153         // INIT buffer
00154         uint64 bufReadLen = 0;
00155         uint64 remainingBuffer = 0;
00156         char* buf = new char[BUFFER_SIZE];
00157         string curliteral;
00158         
00159         //Get the source factory and add the current dnx path to it.
00160         gnSourceFactory *sourceFactory = gnSourceFactory::GetSourceFactory();
00161         sourceFactory->AddPath(getPathString(m_openString));
00162 
00163         while( !fin.eof() )
00164         {
00165                 if(sectionStart > 0){
00166                         remainingBuffer = bufReadLen - sectionStart;
00167                         if(readState == 5){     //add literal
00168                                 curliteral += string(buf, sectionStart, remainingBuffer);
00169                                 remainingBuffer = 0;
00170                                 sectionStart = bufReadLen;
00171                         }else
00172                                 memmove(buf, buf+sectionStart, remainingBuffer);
00173                 }
00174                   // read chars
00175                 fin.read( buf + remainingBuffer, BUFFER_SIZE - (bufReadLen - sectionStart));
00176                 sectionStart = 0;
00177                 bufReadLen = fin.gcount() + remainingBuffer;
00178                 
00179                 for( uint32 i=0 ; i < bufReadLen ; i++ )
00180                 {
00181                         char ch = buf[i];
00182                         switch( readState )
00183                         {
00184                                 case 0: // Get name of genome
00185                                         if(ch == '='){
00186                                                 //genome name is from sectionStart to i
00187                                                 string contigName(buf+sectionStart, i - sectionStart);
00188                                                 currentFragSpec = new gnFragmentSpec();
00189                                                 currentFragSpec->SetName(contigName);
00190                                                 currentFragSpec->SetSourceName(m_openString);
00191                                                 m_DNXSpec->AddSpec(currentFragSpec);
00192                                                 sectionStart = i+1;
00193                                                 readState = 1;
00194                                         }
00195                                         break;
00196                                 case 1: // Ignore whitespace before filename or literal
00197                                         if((ch == ' ')||(ch == '        '))
00198                                                 break;
00199                                 case 2: // Are we getting a new source file name or a literal?
00200                                         if(ch == '"'){ //getting a literal
00201                                                 readState = 5;
00202                                                 sectionStart = i+1;
00203                                                 break;
00204                                         }
00205                                         readState = 3;
00206                                         sectionStart = i;
00207                                 case 3: // Get a new source file name
00208                                         //stop on >, (, +, and \n
00209                                         if(ch == '\n' && sectionStart == i -1){
00210                                                 if(buf[sectionStart]=='\r'){
00211                                                         sectionStart = i + 1;
00212                                                         break;
00213                                                 }
00214                                         }
00215                                         if((ch == '+')||(ch == '>')||(ch == '(')||(ch == '\n')||(ch == ';')){
00216                                                 //use the entire source file
00217                                                 string seqfile(buf, sectionStart, i - sectionStart);
00218                                                 currentSourceName = seqfile;
00219                                                 currentSource = sourceFactory->AddSource(seqfile, true);
00220                                                 if (currentSource==NULL)
00221                                                 {
00222                                                         delete[] buf;
00223                                                         return false;
00224                                                 }
00225                                                 if((ch == '+')||(ch == '\n')||(ch == ';')){
00226                                                         gnSourceSpec* tmp_spec = new gnSourceSpec(currentSource);
00227                                                         tmp_spec->SetSourceName(seqfile);
00228                                                         currentFragSpec->AddSpec(tmp_spec);
00229                                                         readState = 1;
00230                                                         if(ch == '\n'){ //reached the end of the statement.  parse another.
00231                                                                 readState = 0;
00232                                                         }else if(ch == ';'){    //hit a comment.
00233                                                                 readState = 9;
00234                                                         }
00235                                                 }else if(ch == '>'){  //select a contig to use
00236                                                         readState = 4;
00237                                                 }else if(ch == '('){  // use a specified section of the entire file
00238                                                         readState = 6;
00239                                                 }
00240                                                 sectionStart = i + 1;
00241                                         }
00242                                         break;
00243                                 case 4: // Get a specific contig to use
00244                                         //stop on (, +, ;, and \n
00245                                         if((ch == '+')||(ch == '\n')||(ch == ';')||(ch == '(')){
00246                                                 //use the entire contig
00247                                                 string contigname(buf, sectionStart, i - sectionStart);
00248                                                 currentContig = currentSource->GetContigID(contigname);
00249                                                 if((ch == '+')||(ch == '\n')||(ch == ';')){
00250                                                         gnSourceSpec* tmp_spec = new gnSourceSpec(currentSource, currentContig);
00251                                                         tmp_spec->SetSourceName(currentSourceName);
00252                                                         currentFragSpec->AddSpec(tmp_spec);
00253                                                         readState = 1;
00254                                                         if(ch == '\n'){ //reached the end of the statement.  parse another.
00255                                                                 readState = 0;
00256                                                         }else if(ch == ';'){    //hit a comment.
00257                                                                 readState = 9;
00258                                                         }
00259                                                 }else if(ch == '('){  //use the specified section
00260                                                         readState = 6;
00261                                                 }
00262                                                 sectionStart = i + 1;
00263                                         }
00264                                         break;
00265                                 case 5: // read in a literal
00266                                         // stop on "
00267                                         if(ch == '"'){
00268                                                 //now create a string spec from sectionStart to i-1
00269                                                 string literal(buf, sectionStart, i - sectionStart);
00270                                                 if(curliteral.length() > 0){
00271                                                         literal += curliteral;
00272                                                         curliteral = "";
00273                                                 }
00274                                                 gnStringSpec *gpss = new gnStringSpec(literal, currentFragSpec->GetSpecListLength());
00275                                                 currentFragSpec->AddSpec(gpss);
00276                                         }
00277                                 case 6: // read in a specified section
00278                                         //stop on , or < or >
00279                                         if((ch == ',') || (ch == '<') || (ch == '>')){
00280                                                 string seqstartstring(buf, sectionStart, i - sectionStart);
00281                                                 if(seqstartstring == "lend"){
00282                                                         currentSeqStart = 0;
00283                                                 }else if (seqstartstring == "rend"){
00284                                                         currentSeqStart = GNSEQI_END;
00285                                                 }else
00286                                                         currentSeqStart = atoi(seqstartstring.c_str()) - 1;
00287                                                 if(ch == '<')
00288                                                         currentRevComp = true;
00289                                                 sectionStart = i + 1;
00290                                                 readState = 7;
00291                                         }
00292                                         break;
00293                                 case 7: // read in the second half of a specified section
00294                                         //stop on )
00295                                         if(ch == ')'){
00296                                                 string seqendstring(buf, sectionStart, i - sectionStart);
00297                                                 uint32 currentSeqEnd = GNSEQI_END;
00298                                                 if(seqendstring == "lend"){
00299                                                         currentSeqEnd = 0;
00300                                                 }else if (seqendstring == "rend"){
00301                                                         currentSeqEnd = GNSEQI_END;
00302                                                 }else
00303                                                         currentSeqEnd = atoi(seqendstring.c_str()) - 1;
00304                                                 gnSourceSpec* tmp_spec = new gnSourceSpec(currentSource, currentContig, currentSeqStart, currentSeqEnd, currentRevComp);
00305                                                 tmp_spec->SetSourceName(currentSourceName);
00306                                                 currentFragSpec->AddSpec(tmp_spec);
00307                                                 currentRevComp = false; //set it back to its default value.
00308                                                 sectionStart = i + 1;
00309                                                 readState = 8; //look for connective operator
00310                                         }
00311                                         break;
00312                                 case 8: //skip whitespace until a connective or terminating operator is reached.
00313                                         if(ch == '+'){
00314                                                 sectionStart = i + 1;
00315                                                 readState = 1; //start over
00316                                         }
00317                                         if(ch == '\n'){
00318                                                 sectionStart = i + 1;
00319                                                 readState = 0;
00320                                         }
00321                                         if(ch == ';'){
00322                                                 sectionStart = i + 1;
00323                                                 readState = 9;
00324                                         }
00325                                         break;
00326                                 case 9: //skip comment until newline.
00327                                         if(ch == '\n'){
00328                                                 sectionStart = i + 1;
00329                                                 readState = 0;
00330                                         }
00331                                         break;
00332                                 default:
00333                                         DebugMsg("ERROR in file\n");
00334                                         return false;
00335                                         break;
00336                         }
00337                 }// for all buf
00338         }// while !eof
00339         // CLEAN UP
00340         delete[] buf;
00341         return true;
00342 }

Generated at Fri Nov 30 15:36:51 2001 for libGenome by doxygen1.2.8.1 written by Dimitri van Heesch, © 1997-2001