Skip to content

Commit c40a2ed

Browse files
committed
create plain peptide .idx file on-the-fly if specified but does not exist
1 parent 0671fa9 commit c40a2ed

File tree

3 files changed

+78
-12
lines changed

3 files changed

+78
-12
lines changed

CometSearch/CometFragmentIndex.cpp

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ void CometFragmentIndex::AddFragments(vector<PlainPeptideIndex>& g_vRawPeptides,
512512
if (dCalcPepMass > g_massRange.dMaxMass || dCalcPepMass < g_staticParams.options.dPeptideMassLow)
513513
return;
514514

515-
if (!g_bIndexPrecursors[BIN(dCalcPepMass)])
515+
if (!g_staticParams.options.iFragIndexSkipReadPrecursors && !g_bIndexPrecursors[BIN(dCalcPepMass)])
516516
return;
517517

518518
unsigned int uiCurrentFragmentPeptide = -1;
@@ -632,11 +632,19 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
632632
{
633633
FILE *fp;
634634
bool bSucceeded;
635+
bool bSwapIdxExtension = false;
635636
string strOut;
636637

637638
string strIndexFile;
638639

639-
strIndexFile = g_staticParams.databaseInfo.szDatabase + string(".idx");
640+
if (strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx"))
641+
{
642+
strIndexFile = g_staticParams.databaseInfo.szDatabase; // .idx specified but not present to create it
643+
g_staticParams.databaseInfo.szDatabase[strlen(g_staticParams.databaseInfo.szDatabase) - 4] = '\0';
644+
bSwapIdxExtension = true; // need to make database regular fasta, then RunSearch to get plain peptides, then swap back
645+
}
646+
else
647+
strIndexFile = g_staticParams.databaseInfo.szDatabase + string(".idx"); // fasta specified so add .idx extension
640648

641649
if ((fp = fopen(strIndexFile.c_str(), "wb")) == NULL)
642650
{
@@ -656,19 +664,27 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
656664
if (!bSucceeded)
657665
return bSucceeded;
658666

659-
// tp->fillPool( g_staticParams.options.iNumThreads < 0 ? 0 : g_staticParams.options.iNumThreads-1);
660667
if (g_massRange.dMaxMass - g_massRange.dMinMass > g_massRange.dMinMass)
661668
g_massRange.bNarrowMassRange = true;
662669
else
663670
g_massRange.bNarrowMassRange = false;
664671

665672
if (bSucceeded)
666673
{
674+
g_staticParams.options.bCreateIndex = true;
675+
g_staticParams.bIndexDb = false;
676+
667677
// this step calls RunSearch just to pull out all peptides
668678
// to write into the .idx pepties/proteins file
669679
bSucceeded = CometSearch::RunSearch(0, 0, tp);
680+
681+
g_staticParams.options.bCreateIndex = false;
682+
g_staticParams.bIndexDb = true;
670683
}
671684

685+
if (bSwapIdxExtension)
686+
strcat(g_staticParams.databaseInfo.szDatabase, ".idx");
687+
672688
if (!bSucceeded)
673689
{
674690
char szErrorMsg[SIZE_ERROR];
@@ -877,7 +893,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
877893
if (g_bPlainPeptideIndexRead)
878894
return 1;
879895

880-
if (g_staticParams.options.bCreateIndex)
896+
if (g_staticParams.options.bCreateIndex && !strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx"))
881897
strIndexFile = g_staticParams.databaseInfo.szDatabase + string(".idx");
882898
else // database already is .idx
883899
strIndexFile = g_staticParams.databaseInfo.szDatabase;
@@ -1069,6 +1085,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
10691085
int iLen;
10701086
char szPeptide[MAX_PEPTIDE_LEN];
10711087

1088+
g_vRawPeptides.clear();
10721089
for (size_t it = 0; it < tNumPeptides; ++it)
10731090
{
10741091
tTmp = fread(&iLen, sizeof(int), 1, fp);
@@ -1091,7 +1108,6 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
10911108

10921109
g_pvProteinsList.clear();
10931110
g_pvProteinsList.reserve(tSize);
1094-
10951111
for (size_t it = 0; it < tSize; ++it)
10961112
{
10971113
size_t tNumProteinOffsets;
@@ -1126,13 +1142,15 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
11261142

11271143
int iTmp;
11281144
char szTmp[MAX_PEPTIDE_LEN];
1145+
MOD_SEQS.clear();
11291146
for (unsigned long i = 0; i < ulSizeModSeqs; ++i)
11301147
{
11311148
tTmp = fread(&iTmp, sizeof(int), 1, fp); // read length
11321149
tTmp = fread(szTmp, 1, iTmp, fp);
11331150
szTmp[iTmp]='\0';
11341151
MOD_SEQS.push_back(szTmp);
11351152
}
1153+
MOD_NUMBERS.clear();
11361154
for (unsigned long i = 0; i < ulModNumSize; ++i)
11371155
{
11381156
ModificationNumber sTmp;

CometSearch/CometPreprocess.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -321,9 +321,7 @@ bool CometPreprocess::ReadPrecursors(MSReader &mstReader)
321321
iEnd = iMaxBin;
322322

323323
for (int x = iStart ; x <= iEnd; ++x)
324-
{
325324
g_bIndexPrecursors[x] = true;
326-
}
327325

328326
// now go through each isotope offset
329327
if (g_staticParams.tolerances.iIsotopeError > 0)
@@ -375,11 +373,11 @@ bool CometPreprocess::ReadPrecursors(MSReader &mstReader)
375373
iStart = BIN(dMassLow + C13_DIFF * PROTON_MASS); // do -1 offset
376374
iEnd = BIN(dMassHigh + C13_DIFF * PROTON_MASS);
377375
if (iStart < 0)
378-
iStart = 0;
376+
iStart = 0;
379377
if (iEnd > iMaxBin)
380-
iEnd = iMaxBin;
378+
iEnd = iMaxBin;
381379
for (int x = iStart ; x <= iEnd; ++x)
382-
g_bIndexPrecursors[x] = true;
380+
g_bIndexPrecursors[x] = true;
383381

384382
if (g_staticParams.tolerances.iIsotopeError == 6) // do -2 and -3 offsets
385383
{

CometSearch/CometSearchManager.cpp

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,55 @@ static bool ValidateSequenceDatabaseFile()
474474
FILE *fpcheck;
475475
char szErrorMsg[SIZE_ERROR];
476476

477+
// if .idx database specified but does not exist, first see if corresponding
478+
// fasta exists and if it does, create the .idx file
479+
if (strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx"))
480+
{
481+
if ((fpcheck=fopen(g_staticParams.databaseInfo.szDatabase, "r")) == NULL)
482+
{
483+
string strFasta = g_staticParams.databaseInfo.szDatabase;
484+
strFasta.erase(strFasta.length() - 4); // remove .idx extension
485+
486+
if ((fpcheck=fopen(strFasta.c_str(), "r")) == NULL)
487+
{
488+
sprintf(szErrorMsg, " Error - peptide index file \"%s\" and corresponding FASTA file\n are both missing.\n",
489+
g_staticParams.databaseInfo.szDatabase);
490+
string strErrorMsg(szErrorMsg);
491+
g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg);
492+
logerr(szErrorMsg);
493+
return false;
494+
}
495+
else
496+
{
497+
fclose(fpcheck);
498+
g_staticParams.options.bCreateIndex = true; // set to true to make the index
499+
return true;
500+
}
501+
}
502+
else
503+
{
504+
string strFasta = g_staticParams.databaseInfo.szDatabase;
505+
strFasta.erase(strFasta.length() - 4); // remove .idx extension
506+
507+
if ((fpcheck=fopen(strFasta.c_str(), "r")) == NULL)
508+
{
509+
sprintf(szErrorMsg, " Error - peptide index file \"%s\" specified is present\n but corresponding FASTA file \"%s\" file is missing.\n",
510+
g_staticParams.databaseInfo.szDatabase,
511+
strFasta.c_str());
512+
string strErrorMsg(szErrorMsg);
513+
g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg);
514+
logerr(szErrorMsg);
515+
return false;
516+
}
517+
else
518+
{
519+
fclose(fpcheck);
520+
g_staticParams.options.bCreateIndex = false;
521+
return true;
522+
}
523+
}
524+
}
525+
477526
#ifndef WIN32
478527
// do a quick test if specified file is a directory
479528
struct stat st;
@@ -1938,7 +1987,7 @@ void CometSearchManager::ResetSearchStatus()
19381987
bool CometSearchManager::CreateIndex()
19391988
{
19401989
// Override the Create Index flag to force it to create
1941-
g_staticParams.options.bCreateIndex = 1;
1990+
g_staticParams.options.bCreateIndex = true;
19421991

19431992
// The DoSearch will create the index and exit
19441993
return DoSearch();
@@ -2033,7 +2082,8 @@ bool CometSearchManager::DoSearch()
20332082

20342083
CometSearch::DeallocateMemory(g_staticParams.options.iNumThreads);
20352084

2036-
return bSucceeded;
2085+
if (g_pvInputFiles.size() == 0)
2086+
return bSucceeded;
20372087
}
20382088

20392089
if (g_staticParams.options.bOutputOutFiles)

0 commit comments

Comments
 (0)