Skip to content

Commit

Permalink
create plain peptide .idx file on-the-fly if specified but does not e…
Browse files Browse the repository at this point in the history
…xist
  • Loading branch information
jke000 committed Oct 16, 2024
1 parent 0671fa9 commit c40a2ed
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 12 deletions.
28 changes: 23 additions & 5 deletions CometSearch/CometFragmentIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ void CometFragmentIndex::AddFragments(vector<PlainPeptideIndex>& g_vRawPeptides,
if (dCalcPepMass > g_massRange.dMaxMass || dCalcPepMass < g_staticParams.options.dPeptideMassLow)
return;

if (!g_bIndexPrecursors[BIN(dCalcPepMass)])
if (!g_staticParams.options.iFragIndexSkipReadPrecursors && !g_bIndexPrecursors[BIN(dCalcPepMass)])
return;

unsigned int uiCurrentFragmentPeptide = -1;
Expand Down Expand Up @@ -632,11 +632,19 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
{
FILE *fp;
bool bSucceeded;
bool bSwapIdxExtension = false;
string strOut;

string strIndexFile;

strIndexFile = g_staticParams.databaseInfo.szDatabase + string(".idx");
if (strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx"))
{
strIndexFile = g_staticParams.databaseInfo.szDatabase; // .idx specified but not present to create it
g_staticParams.databaseInfo.szDatabase[strlen(g_staticParams.databaseInfo.szDatabase) - 4] = '\0';
bSwapIdxExtension = true; // need to make database regular fasta, then RunSearch to get plain peptides, then swap back
}
else
strIndexFile = g_staticParams.databaseInfo.szDatabase + string(".idx"); // fasta specified so add .idx extension

if ((fp = fopen(strIndexFile.c_str(), "wb")) == NULL)
{
Expand All @@ -656,19 +664,27 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
if (!bSucceeded)
return bSucceeded;

// tp->fillPool( g_staticParams.options.iNumThreads < 0 ? 0 : g_staticParams.options.iNumThreads-1);
if (g_massRange.dMaxMass - g_massRange.dMinMass > g_massRange.dMinMass)
g_massRange.bNarrowMassRange = true;
else
g_massRange.bNarrowMassRange = false;

if (bSucceeded)
{
g_staticParams.options.bCreateIndex = true;
g_staticParams.bIndexDb = false;

// this step calls RunSearch just to pull out all peptides
// to write into the .idx pepties/proteins file
bSucceeded = CometSearch::RunSearch(0, 0, tp);

g_staticParams.options.bCreateIndex = false;
g_staticParams.bIndexDb = true;
}

if (bSwapIdxExtension)
strcat(g_staticParams.databaseInfo.szDatabase, ".idx");

if (!bSucceeded)
{
char szErrorMsg[SIZE_ERROR];
Expand Down Expand Up @@ -877,7 +893,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
if (g_bPlainPeptideIndexRead)
return 1;

if (g_staticParams.options.bCreateIndex)
if (g_staticParams.options.bCreateIndex && !strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx"))
strIndexFile = g_staticParams.databaseInfo.szDatabase + string(".idx");
else // database already is .idx
strIndexFile = g_staticParams.databaseInfo.szDatabase;
Expand Down Expand Up @@ -1069,6 +1085,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
int iLen;
char szPeptide[MAX_PEPTIDE_LEN];

g_vRawPeptides.clear();
for (size_t it = 0; it < tNumPeptides; ++it)
{
tTmp = fread(&iLen, sizeof(int), 1, fp);
Expand All @@ -1091,7 +1108,6 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)

g_pvProteinsList.clear();
g_pvProteinsList.reserve(tSize);

for (size_t it = 0; it < tSize; ++it)
{
size_t tNumProteinOffsets;
Expand Down Expand Up @@ -1126,13 +1142,15 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)

int iTmp;
char szTmp[MAX_PEPTIDE_LEN];
MOD_SEQS.clear();
for (unsigned long i = 0; i < ulSizeModSeqs; ++i)
{
tTmp = fread(&iTmp, sizeof(int), 1, fp); // read length
tTmp = fread(szTmp, 1, iTmp, fp);
szTmp[iTmp]='\0';
MOD_SEQS.push_back(szTmp);
}
MOD_NUMBERS.clear();
for (unsigned long i = 0; i < ulModNumSize; ++i)
{
ModificationNumber sTmp;
Expand Down
8 changes: 3 additions & 5 deletions CometSearch/CometPreprocess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,7 @@ bool CometPreprocess::ReadPrecursors(MSReader &mstReader)
iEnd = iMaxBin;

for (int x = iStart ; x <= iEnd; ++x)
{
g_bIndexPrecursors[x] = true;
}

// now go through each isotope offset
if (g_staticParams.tolerances.iIsotopeError > 0)
Expand Down Expand Up @@ -375,11 +373,11 @@ bool CometPreprocess::ReadPrecursors(MSReader &mstReader)
iStart = BIN(dMassLow + C13_DIFF * PROTON_MASS); // do -1 offset
iEnd = BIN(dMassHigh + C13_DIFF * PROTON_MASS);
if (iStart < 0)
iStart = 0;
iStart = 0;
if (iEnd > iMaxBin)
iEnd = iMaxBin;
iEnd = iMaxBin;
for (int x = iStart ; x <= iEnd; ++x)
g_bIndexPrecursors[x] = true;
g_bIndexPrecursors[x] = true;

if (g_staticParams.tolerances.iIsotopeError == 6) // do -2 and -3 offsets
{
Expand Down
54 changes: 52 additions & 2 deletions CometSearch/CometSearchManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,55 @@ static bool ValidateSequenceDatabaseFile()
FILE *fpcheck;
char szErrorMsg[SIZE_ERROR];

// if .idx database specified but does not exist, first see if corresponding
// fasta exists and if it does, create the .idx file
if (strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx"))
{
if ((fpcheck=fopen(g_staticParams.databaseInfo.szDatabase, "r")) == NULL)
{
string strFasta = g_staticParams.databaseInfo.szDatabase;
strFasta.erase(strFasta.length() - 4); // remove .idx extension

if ((fpcheck=fopen(strFasta.c_str(), "r")) == NULL)
{
sprintf(szErrorMsg, " Error - peptide index file \"%s\" and corresponding FASTA file\n are both missing.\n",
g_staticParams.databaseInfo.szDatabase);
string strErrorMsg(szErrorMsg);
g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg);
logerr(szErrorMsg);
return false;
}
else
{
fclose(fpcheck);
g_staticParams.options.bCreateIndex = true; // set to true to make the index
return true;
}
}
else
{
string strFasta = g_staticParams.databaseInfo.szDatabase;
strFasta.erase(strFasta.length() - 4); // remove .idx extension

if ((fpcheck=fopen(strFasta.c_str(), "r")) == NULL)
{
sprintf(szErrorMsg, " Error - peptide index file \"%s\" specified is present\n but corresponding FASTA file \"%s\" file is missing.\n",
g_staticParams.databaseInfo.szDatabase,
strFasta.c_str());
string strErrorMsg(szErrorMsg);
g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg);
logerr(szErrorMsg);
return false;
}
else
{
fclose(fpcheck);
g_staticParams.options.bCreateIndex = false;
return true;
}
}
}

#ifndef WIN32
// do a quick test if specified file is a directory
struct stat st;
Expand Down Expand Up @@ -1938,7 +1987,7 @@ void CometSearchManager::ResetSearchStatus()
bool CometSearchManager::CreateIndex()
{
// Override the Create Index flag to force it to create
g_staticParams.options.bCreateIndex = 1;
g_staticParams.options.bCreateIndex = true;

// The DoSearch will create the index and exit
return DoSearch();
Expand Down Expand Up @@ -2033,7 +2082,8 @@ bool CometSearchManager::DoSearch()

CometSearch::DeallocateMemory(g_staticParams.options.iNumThreads);

return bSucceeded;
if (g_pvInputFiles.size() == 0)
return bSucceeded;
}

if (g_staticParams.options.bOutputOutFiles)
Expand Down

0 comments on commit c40a2ed

Please sign in to comment.