High Resolution Isotope Cluster Calculator

English

Motivation

There are several open source project for mass spectrometry support "isotope cluster" calculation routine, but most of them are low resolution.  The proteomics project that I was contributed running LC/FT-ICR so that we definitry need high resolution.

Implementation 

Most of the case, input chemical formula is the amino acid sequence in proteomics otherwise chemical formula.  QtPlatz already has a simple chemical formula parser that convert to elemental composition list from simple chemical formula that user enter from keyboard.  Whichever input is amino acid sequence or chemical formula, it will be converted to elemental composition.  Two classes explained below holds related information;

 

namespace adcontrols {
    namespace mol {
       class isotope { double mass, double abundance; }
       class molecule { std::vecto< isotope > cluster; std::vector< element > elements; }
    }
}

For compute an isotope cluster, set all elemet information into molecule::elements that is automatically carried out by adcontrols::ChemicalFormula::getComposition() method.  And then, throw molecule class into an isotopeCluster functor object.  The isotopeCluster object will fills molecule::cluster member that is the output.  The isotopeCluster has a property called threshold_daltons that user can specify mass resolution.  Isotope cluster calculation is quite simple loop as below;


bool
isotopeCluster::operator()( mol::molecule& mol ) const
{
    mol.cluster.clear();
    // add one isotope into cluster data, for for-loop shown in red going forward. 
    mol << mol::isotope( 0.0, 1.0 );
    
    for ( auto& element : mol.elements ) { // walk-through all elements in the molecule

        for ( int k = 0; k < element.count(); ++k ) {   // loop number of atoms in the molecule

            std::vector< mol::isotope > cluster;
            
            for ( auto& p: mol.cluster ) {  // loop all computed isotopes

                for ( auto& i: element.isotopes() ) {   // loop all isotopes

                    mol::isotope mi( p.mass + i.mass, p.abundance * i.abundabnce );

                    // save new isotope cluster data in mass orderd array
                    auto it = std::lower_bound( cluster.begin(), cluster.end(), mi.mass
                                     , []( const mol::isotope& mi, const double& mass ){
                                          return mi.mass < mass; });

                    if ( it == cluster.end() || !marge( *it, mi ) )
                        cluster.insert( it, mi );
                }
            }
            
            mol.cluster = cluster;
        }
    }
    return true;
}

 

 


 

TEST

 

#include <adcontrols/chemicalformula.hpp>
#include <adcontrols/isotopecluster.hpp>
#include <adcontrols/molecule.hpp>
#include <boost/format.hpp>
#include <iostream>
#include <chrono>
#include <cstring>
#include <numeric>

bool
scanner( std::string& line, adcontrols::mol::molecule& mol )
{
    return adcontrols::ChemicalFormula::getComposition( mol.elements, line );
}

int
main(int argc, char * argv[])
{
    adcontrols::isotopeCluster cluster;
    
    double threshold_daltons = cluster.threshold_daltons();
    
    if ( --argc ) {
        ++argv;
        while( argc-- ) {
            if ( std::strcmp( *argv, "-d" ) == 0 && argc ) {
                --argc;
                ++argv;
                threshold_daltons = atof( *argv ) / 1000;
                cluster.threshold_daltons( threshold_daltons );
            }
        }
    }
    std::cout <<
        boost::format( "Set daltons threshold to: %gmDa\n" )
        % ( cluster.threshold_daltons() * 1000);

    std::string line;
    std::cerr << "Enter formula: ";
    while (std::getline(std::cin, line))  {
            
        if (line.empty() || line[0] == 'q' || line[0] == 'Q')
            break;
            
        adcontrols::mol::molecule mol;
        if ( scanner( line, mol ) ) {
            std::cout << "formula: ";
            std::for_each( mol.elements.begin(), mol.elements.end()
                           , [&]( const adcontrols::mol::element& e ){
                               std::cout << e.symbol() << e.count() << " ";
                           });
            std::cout << std::endl;
            
            std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
            
            cluster( mol );
            
            std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
            
            auto it = std::max_element( mol.cluster.begin(), mol.cluster.end()
                                        , [](const adcontrols::mol::isotope& a
                                             , const adcontrols::mol::isotope& b){
                                            return a.abundance < b.abundance;});
            const double pmax = it->abundance;

            const double sum = std::accumulate(
                mol.cluster.begin()
                , mol.cluster.end(), 0.0
                , []( double x
                      , const adcontrols::mol::isotope& a ){
                    return x + a.abundance;
                } );
            
            int idx = 0;
            for ( auto& i: mol.cluster ) {
                double ratio = i.abundance / pmax * 100.0;
                if ( ratio >= 0.05 )
                    std::cout << boost::format( "[%2d] %.8f\t%.8e\t%.6f\n" )
                        % idx++
                        % i.mass
                        % i.abundance
                        % ratio;
            }
            namespace chrono = std::chrono;
            auto elapsed = chrono::duration_cast< chrono::microseconds >( end - start );
            std::cout << "processed "
                      << mol.cluster.size() << " peaks in: "
                      << double( elapsed.count() ) / 1000 << "ms."
                      << std::endl;
            std::cout << boost::format( "sum =%.14f" ) % sum << std::endl;
        } else {
            std::cout << "Parse failed: " << line << std::endl;
        }
        std::cerr << "Enter formula: ";
    }

    std::cout << "Bye... :-) \n\n";
    return 0;
}


Enter formula: CH3(C3H4(NH2)2)18CH3
formula: H150 N36 C56
[ 0] 1327.28441898      4.71232287e-001 100.000000
[ 1] 1328.28145388      6.26594402e-002 13.296933
[ 2] 1328.28777382      2.85416341e-001 60.568078
[ 3] 1328.29069573      8.12969187e-003 1.725198
[ 4] 1329.27848877      1.96079784e-003 0.416100
[ 5] 1329.27848877      2.08937474e-003 0.443385
[ 6] 1329.28480871      3.79516189e-002 8.053697
[ 7] 1329.28773062      3.00277683e-004 0.063722
[ 8] 1329.28773062      7.80721975e-004 0.165677
[ 9] 1329.29112866      8.48921040e-002 18.014917
[10] 1329.29405057      4.92399815e-003 1.044920
[11] 1330.28184361      1.88815897e-003 0.400685
[12] 1330.28184361      5.64952739e-004 0.119888
[13] 1330.28816355      1.12880459e-002 2.395431
[14] 1330.29108546      4.43638802e-004 0.094144
[15] 1330.29448350      1.65270588e-002 3.507200
[16] 1330.29740540      1.46455722e-003 0.310793
[17] 1331.28519845      6.56310713e-004 0.139275
[18] 1331.29151839      2.19759188e-003 0.466350
[19] 1331.29783833      2.36846635e-003 0.502611
[20] 1331.30076024      2.85124553e-004 0.060506
[21] 1332.29487323      3.14933376e-004 0.066832
[22] 1332.30119317      2.66413560e-004 0.056536
processed 5033 peaks in: 125.129ms.
sum =0.99999999999840

User login