IsoSpec 2.2.1
Loading...
Searching...
No Matches
marginalTrek++.cpp
1/*
2 * Copyright (C) 2015-2020 Mateusz Łącki and Michał Startek.
3 *
4 * This file is part of IsoSpec.
5 *
6 * IsoSpec is free software: you can redistribute it and/or modify
7 * it under the terms of the Simplified ("2-clause") BSD licence.
8 *
9 * IsoSpec is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 *
13 * You should have received a copy of the Simplified BSD Licence
14 * along with IsoSpec. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
15 */
16
17
18#include <cmath>
19#include <algorithm>
20#include <vector>
21#include <cstdlib>
22#include <queue>
23#include <utility>
24#include <cstring>
25#include <string>
26#include <limits>
27#include <memory>
28#include "platform.h"
29#include "marginalTrek++.h"
30#include "conf.h"
31#include "allocator.h"
32#include "operators.h"
33#include "summator.h"
34#include "element_tables.h"
35#include "misc.h"
36
37
38namespace IsoSpec
39{
40
42
50void writeInitialConfiguration(const int atomCnt, const int isotopeNo, const double* lprobs, int* res)
51{
57 // This approximates the mode (heuristics: the mean is close to the mode).
58 for(int i = 0; i < isotopeNo; ++i)
59 res[i] = static_cast<int>( atomCnt * exp(lprobs[i]) ) + 1;
60
61 // The number of assigned atoms above.
62 int s = 0;
63
64 for(int i = 0; i < isotopeNo; ++i) s += res[i];
65
66 int diff = atomCnt - s;
67
68 // Too little: enlarging fist index.
69 if( diff > 0 ){
70 res[0] += diff;
71 }
72 // Too much: trying to redistribute the difference: hopefully the first element is the largest.
73 if( diff < 0 ){
74 diff = abs(diff);
75 int i = 0;
76
77 while( diff > 0){
78 int coordDiff = res[i] - diff;
79
80 if( coordDiff >= 0 ){
81 res[i] -= diff;
82 diff = 0;
83 } else {
84 res[i] = 0;
85 i++;
86 diff = abs(coordDiff);
87 }
88 }
89 }
90
91 // What we computed so far will be very close to the mode: hillclimb the rest of the way
92
93 bool modified = true;
94 double LP = unnormalized_logProb(res, lprobs, isotopeNo);
95 double NLP;
96
97 while(modified)
98 {
99 modified = false;
100 for(int ii = 0; ii < isotopeNo; ii++)
101 for(int jj = 0; jj < isotopeNo; jj++)
102 if(ii != jj && res[ii] > 0)
103 {
104 res[ii]--;
105 res[jj]++;
106 NLP = unnormalized_logProb(res, lprobs, isotopeNo);
107 if(NLP > LP || (NLP == LP && ii > jj))
108 {
109 modified = true;
110 LP = NLP;
111 }
112 else
113 {
114 res[ii]++;
115 res[jj]--;
116 }
117 }
118 }
119}
120
121
122double* getMLogProbs(const double* probs, int isoNo)
123{
129 for(int ii = 0; ii < isoNo; ii++)
130 if(probs[ii] <= 0.0 || probs[ii] > 1.0)
131 throw std::invalid_argument("All isotope probabilities p must fulfill: 0.0 < p <= 1.0");
132
133 double* ret = new double[isoNo];
134
135 // here we change the table of probabilities and log it.
136 for(int i = 0; i < isoNo; i++)
137 {
138 ret[i] = log(probs[i]);
139 for(int j = 0; j < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
140 if(elem_table_probability[j] == probs[i])
141 {
142 ret[i] = elem_table_log_probability[j];
143 break;
144 }
145 }
146 return ret;
147}
148
149double get_loggamma_nominator(int x)
150{
151 // calculate log gamma of the nominator calculated in the binomial exression.
152 double ret = lgamma(x+1);
153 return ret;
154}
155
156int verify_atom_cnt(int atomCnt)
157{
158 #if !ISOSPEC_BUILDING_OPENMS
159 if(ISOSPEC_G_FACT_TABLE_SIZE-1 <= atomCnt)
160 throw std::length_error("Subisotopologue too large, size limit (that is, the maximum number of atoms of a single element in a molecule) is: " + std::to_string(ISOSPEC_G_FACT_TABLE_SIZE-1));
161 #endif
162 return atomCnt;
163}
164
166 const double* _masses,
167 const double* _probs,
168 int _isotopeNo,
169 int _atomCnt
170) :
171disowned(false),
172isotopeNo(_isotopeNo),
173atomCnt(verify_atom_cnt(_atomCnt)),
174atom_lProbs(getMLogProbs(_probs, isotopeNo)),
175atom_masses(array_copy<double>(_masses, _isotopeNo)),
176loggamma_nominator(get_loggamma_nominator(_atomCnt)),
177mode_conf(nullptr)
178// Deliberately not initializing mode_lprob
179{}
180
182disowned(false),
183isotopeNo(other.isotopeNo),
184atomCnt(other.atomCnt),
185atom_lProbs(array_copy<double>(other.atom_lProbs, isotopeNo)),
186atom_masses(array_copy<double>(other.atom_masses, isotopeNo)),
187loggamma_nominator(other.loggamma_nominator)
188{
189 if(other.mode_conf == nullptr)
190 {
191 mode_conf = nullptr;
192 // Deliberately not initializing mode_lprob. In this state other.mode_lprob is uninitialized too.
193 }
194 else
195 {
196 mode_conf = array_copy<int>(other.mode_conf, isotopeNo);
197 mode_lprob = other.mode_lprob;
198 }
199}
200
201
202// the move-constructor: used in the specialization of the marginal.
204disowned(other.disowned),
205isotopeNo(other.isotopeNo),
206atomCnt(other.atomCnt),
207atom_lProbs(other.atom_lProbs),
208atom_masses(other.atom_masses),
209loggamma_nominator(other.loggamma_nominator)
210{
211 other.disowned = true;
212 if(other.mode_conf == nullptr)
213 {
214 mode_conf = nullptr;
215 // Deliberately not initializing mode_lprob. In this state other.mode_lprob is uninitialized too.
216 }
217 else
218 {
219 mode_conf = other.mode_conf;
220 mode_lprob = other.mode_lprob;
221 }
222}
223
225{
226 if(!disowned)
227 {
228 delete[] atom_masses;
229 delete[] atom_lProbs;
230 delete[] mode_conf;
231 }
232}
233
234
236{
237 Conf res = new int[isotopeNo];
239 return res;
240}
241
242void Marginal::setupMode()
243{
244 ISOSPEC_IMPOSSIBLE(mode_conf != nullptr);
246 mode_lprob = logProb(mode_conf);
247}
248
249
251{
252 double ret_mass = std::numeric_limits<double>::infinity();
253 for(unsigned int ii = 0; ii < isotopeNo; ii++)
254 if( ret_mass > atom_masses[ii] )
255 ret_mass = atom_masses[ii];
256 return ret_mass*atomCnt;
257}
258
260{
261 double ret_mass = 0.0;
262 for(unsigned int ii = 0; ii < isotopeNo; ii++)
263 if( ret_mass < atom_masses[ii] )
264 ret_mass = atom_masses[ii];
265 return ret_mass*atomCnt;
266}
267
269{
270 double found_prob = -std::numeric_limits<double>::infinity();
271 double found_mass = 0.0; // to avoid uninitialized var warning
272 for(unsigned int ii = 0; ii < isotopeNo; ii++)
273 if( found_prob < atom_lProbs[ii] )
274 {
275 found_prob = atom_lProbs[ii];
276 found_mass = atom_masses[ii];
277 }
278 return found_mass*atomCnt;
279}
280
282{
283 double ret = 0.0;
284 for(unsigned int ii = 0; ii < isotopeNo; ii++)
285 ret += exp(atom_lProbs[ii]) * atom_masses[ii];
286 return ret;
287}
288
289double Marginal::variance() const
290{
291 double ret = 0.0;
292 double mean = getAtomAverageMass();
293 for(size_t ii = 0; ii < isotopeNo; ii++)
294 {
295 double msq = atom_masses[ii] - mean;
296 ret += exp(atom_lProbs[ii]) * msq * msq;
297 }
298 return ret * atomCnt;
299}
300
301double Marginal::getLogSizeEstimate(double logEllipsoidRadius) const
302{
303 if(isotopeNo <= 1)
304 return -std::numeric_limits<double>::infinity();
305
306 const double i = static_cast<double>(isotopeNo);
307 const double k = i - 1.0;
308 const double n = static_cast<double>(atomCnt);
309
310 double sum_lprobs = 0.0;
311 for(int jj = 0; jj < i; jj++)
312 sum_lprobs += atom_lProbs[jj];
313
314 double log_V_simplex = k * log(n) - lgamma(i);
315 double log_N_simplex = lgamma(n+i) - lgamma(n+1.0) - lgamma(i);
316 double log_V_ellipsoid = (k * (log(n) + logpi + logEllipsoidRadius) + sum_lprobs) * 0.5 - lgamma((i+1)*0.5);
317
318 return log_N_simplex + log_V_ellipsoid - log_V_simplex;
319}
320
321
322// this is roughly an equivalent of IsoSpec-Threshold-Generator
324 Marginal&& m,
325 int tabSize,
326 int
327) :
328Marginal(std::move(m)),
329current_count(0),
330orderMarginal(atom_lProbs, isotopeNo),
331pq(),
332allocator(isotopeNo, tabSize),
333min_lprob(*std::min_element(atom_lProbs, atom_lProbs+isotopeNo))
334{
335 int* initialConf = allocator.makeCopy(mode_conf);
336
337 pq.push({mode_lprob, initialConf});
338
339 current_count = 0;
340
341 fringe.resize_and_wipe(1);
342
343 current_bucket = 0;
344 initialized_until = 1;
345
346 add_next_conf();
347}
348
349
350bool MarginalTrek::add_next_conf()
351{
356 if(pq.empty())
357 {
358 current_bucket++;
359 while(current_bucket < initialized_until && fringe[current_bucket].empty())
360 {
361// std::cout << "EMPTY bucket, id: " << current_bucket << std::endl;
362 current_bucket++;
363 }
364
365// std::cout << "Entering bucket, size: " << fringe[current_bucket].size() << std::endl;
366
367 if(current_bucket >= initialized_until)
368 return false;
369
370 // std::cout << "Fringe size at pop: " << fringe[current_bucket].size() << std::endl;
371 pq = std::priority_queue<ProbAndConfPtr, pod_vector<ProbAndConfPtr> >(std::less<ProbAndConfPtr>(), pod_vector<ProbAndConfPtr>(std::move(fringe[current_bucket])));
372 };
373
374 double logprob = pq.top().first;
375 Conf topConf = pq.top().second;
376
377 pq.pop();
378 ++current_count;
379
380 _confs.push_back(topConf);
381
382 _conf_masses.push_back(calc_mass(topConf, atom_masses, isotopeNo));
383 _conf_lprobs.push_back(logprob);
384
385 for( unsigned int j = 0; j < isotopeNo; ++j )
386 {
387 if( topConf[j] > mode_conf[j])
388 continue;
389
390 if( topConf[j] > 0 )
391 {
392 for( unsigned int i = 0; i < isotopeNo; ++i )
393 {
394 if( topConf[i] < mode_conf[i] )
395 continue;
396 // Growing index different than decreasing one AND
397 // Remain on simplex condition.
398 if( i != j ){
399 Conf acceptedCandidate = allocator.makeCopy(topConf);
400
401 ++acceptedCandidate[i];
402 --acceptedCandidate[j];
403
404 double new_prob = logProb(acceptedCandidate);
405 size_t bucket_nr = bucket_no(new_prob);
406
407 if(bucket_nr >= initialized_until)
408 {
409 // std::cout << "Extending to: " << bucket_nr << std::endl;
410 initialized_until = bucket_nr+1;
411 fringe.resize_and_wipe(initialized_until);
412 }
413
414 ISOSPEC_IMPOSSIBLE(bucket_nr < current_bucket);
415 if(bucket_nr == current_bucket)
416 pq.push({new_prob, acceptedCandidate});
417 else
418 fringe[bucket_nr].push_back({new_prob, acceptedCandidate});
419
420 }
421
422 if( topConf[i] > mode_conf[i] )
423 break;
424 }
425 }
426 if( topConf[j] < mode_conf[j] )
427 break;
428 }
429
430 return true;
431}
432
433
434MarginalTrek::~MarginalTrek()
435{
436 const size_t fringe_size = fringe.size();
437 for(size_t ii = 0; ii < fringe_size; ii++)
438 fringe[ii].clear();
439}
440
441
442
444 double lCutOff,
445 bool sort,
446 int tabSize,
447 int
448) : Marginal(std::move(m)),
449allocator(isotopeNo, tabSize)
450{
451 Conf currentConf = allocator.makeCopy(mode_conf);
452 if(logProb(currentConf) >= lCutOff)
453 {
454 configurations.push_back(currentConf);
455 lProbs.push_back(mode_lprob);
456 }
457
458 unsigned int idx = 0;
459
460 std::unique_ptr<double[]> prob_partials(new double[isotopeNo]);
461 std::unique_ptr<double[]> prob_part_acc(new double[isotopeNo+1]);
462 prob_part_acc[0] = loggamma_nominator;
463
464 while(idx < configurations.size())
465 {
466 currentConf = configurations[idx];
467 idx++;
468
469 for(size_t ii = 0; ii < isotopeNo; ii++)
470 prob_partials[ii] = minuslogFactorial(currentConf[ii]) + currentConf[ii] * atom_lProbs[ii];
471
472 for(unsigned int ii = 0; ii < isotopeNo; ii++ )
473 {
474 if(currentConf[ii] > mode_conf[ii])
475 continue;
476
477 if(currentConf[ii] != 0)
478 {
479 double prev_partial_ii = prob_partials[ii];
480 currentConf[ii]--;
481 prob_partials[ii] = minuslogFactorial(currentConf[ii]) + currentConf[ii] * atom_lProbs[ii];
482
483 for(unsigned int jj = 0; jj < isotopeNo; jj++ )
484 {
485 prob_part_acc[jj+1] = prob_part_acc[jj] + prob_partials[jj];
486
487 if(currentConf[jj] < mode_conf[jj])
488 continue;
489
490 if( ii != jj )
491 {
492 double logp = prob_part_acc[jj] + minuslogFactorial(1+currentConf[jj]) + (1+currentConf[jj]) * atom_lProbs[jj];
493 for(size_t kk = jj+1; kk < isotopeNo; kk++)
494 logp += prob_partials[kk];
495
496 if (logp >= lCutOff)
497 {
498 auto tmp = allocator.makeCopy(currentConf);
499 tmp[jj]++;
500 configurations.push_back(tmp);
501 lProbs.push_back(logp);
502 }
503 }
504 else
505 prob_part_acc[jj+1] = prob_part_acc[jj] + prob_partials[jj];
506
507 if (currentConf[jj] > mode_conf[jj])
508 break;
509 }
510 currentConf[ii]++;
511 prob_partials[ii] = prev_partial_ii;
512 }
513
514 if(currentConf[ii] < mode_conf[ii])
515 break;
516 }
517 }
518
519 no_confs = configurations.size();
520 confs = configurations.data();
521
522 if(sort && no_confs > 0)
523 {
524 std::unique_ptr<size_t[]> order_arr(get_inverse_order(lProbs.data(), no_confs));
525 impose_order(order_arr.get(), no_confs, lProbs.data(), confs);
526 }
527
528 probs = new double[no_confs];
529 masses = new double[no_confs];
530
531
532 for(unsigned int ii = 0; ii < no_confs; ii++)
533 {
534 probs[ii] = exp(lProbs[ii]);
535 masses[ii] = calc_mass(confs[ii], atom_masses, isotopeNo);
536 }
537
538 lProbs.push_back(-std::numeric_limits<double>::infinity());
539}
540
541
543{
544 if(masses != nullptr)
545 delete[] masses;
546 if(probs != nullptr)
547 delete[] probs;
548}
549
550
551
552
553
554
555
557: Marginal(std::move(m)), current_threshold(1.0), allocator(isotopeNo, tabSize),
558equalizer(isotopeNo), keyHasher(isotopeNo)
559{
560 fringe.push_back(mode_conf);
561 lProbs.push_back(std::numeric_limits<double>::infinity());
562 fringe_unn_lprobs.push_back(unnormalized_logProb(mode_conf));
563 lProbs.push_back(-std::numeric_limits<double>::infinity());
564 guarded_lProbs = lProbs.data()+1;
565}
566
567bool LayeredMarginal::extend(double new_threshold, bool do_sort)
568{
569 new_threshold -= loggamma_nominator;
570 if(fringe.empty())
571 return false;
572
573 lProbs.pop_back(); // Remove the +inf guardian
574
575 pod_vector<Conf> new_fringe;
576 pod_vector<double> new_fringe_unn_lprobs;
577
578 while(!fringe.empty())
579 {
580 Conf currentConf = fringe.back();
581 fringe.pop_back();
582
583 double opc = fringe_unn_lprobs.back();
584
585 fringe_unn_lprobs.pop_back();
586 if(opc < new_threshold)
587 {
588 new_fringe.push_back(currentConf);
589 new_fringe_unn_lprobs.push_back(opc);
590 }
591
592 else
593 {
594 configurations.push_back(currentConf);
595 lProbs.push_back(opc+loggamma_nominator);
596 for(unsigned int ii = 0; ii < isotopeNo; ii++ )
597 {
598 if(currentConf[ii] > mode_conf[ii])
599 continue;
600
601 if(currentConf[ii] > 0)
602 {
603 currentConf[ii]--;
604 for(unsigned int jj = 0; jj < isotopeNo; jj++ )
605 {
606 if(currentConf[jj] < mode_conf[jj])
607 continue;
608
609 if( ii != jj )
610 {
611 Conf nc = allocator.makeCopy(currentConf);
612 nc[jj]++;
613
614 double lpc = unnormalized_logProb(nc);
615 if(lpc >= new_threshold)
616 {
617 fringe.push_back(nc);
618 fringe_unn_lprobs.push_back(lpc);
619 }
620 else
621 {
622 new_fringe.push_back(nc);
623 new_fringe_unn_lprobs.push_back(lpc);
624 }
625 }
626
627 if(currentConf[jj] > mode_conf[jj])
628 break;
629 }
630 currentConf[ii]++;
631 }
632
633 if(currentConf[ii] < mode_conf[ii])
634 break;
635 }
636 }
637 }
638
639 current_threshold = new_threshold;
640 fringe.swap(new_fringe);
641 fringe_unn_lprobs.swap(new_fringe_unn_lprobs);
642
643 if(do_sort)
644 {
645 size_t to_sort_size = configurations.size() - probs.size();
646 if(to_sort_size > 0)
647 {
648 std::unique_ptr<size_t[]> order_arr(get_inverse_order(lProbs.data()+1+probs.size(), to_sort_size));
649 double* P = lProbs.data()+1+probs.size();
650 Conf* C = configurations.data()+probs.size();
651 size_t* O = order_arr.get();
652 impose_order(O, to_sort_size, P, C);
653 }
654 }
655
656 if(probs.capacity() * 2 < configurations.size() + 2)
657 {
658 // Reserve space for new values
659 probs.reserve(configurations.size());
660 masses.reserve(configurations.size());
661 } // Otherwise we're growing slowly enough that standard reallocations on push_back work better - we waste some extra memory
662 // but don't reallocate on every call
663
664// printVector(lProbs);
665 for(unsigned int ii = probs.size(); ii < configurations.size(); ii++)
666 {
667 probs.push_back(exp(lProbs[ii+1]));
668 masses.push_back(calc_mass(configurations[ii], atom_masses, isotopeNo));
669 }
670
671 lProbs.push_back(-std::numeric_limits<double>::infinity()); // Restore guardian
672
673 guarded_lProbs = lProbs.data()+1; // Vector might have reallocated its backing storage
674
675 return true;
676}
677
678
680{
681 double ret = std::numeric_limits<double>::infinity();
682 for(pod_vector<double>::const_iterator it = masses.cbegin(); it != masses.cend(); ++it)
683 if(*it < ret)
684 ret = *it;
685 return ret;
686}
687
688
690{
691 double ret = -std::numeric_limits<double>::infinity();
692 for(pod_vector<double>::const_iterator it = masses.cbegin(); it != masses.cend(); ++it)
693 if(*it > ret)
694 ret = *it;
695 return ret;
696}
697
698} // namespace IsoSpec
bool extend(double new_threshold, bool do_sort=true)
Extend the set of computed subisotopologues to those above the new threshold.
double get_max_mass() const
Get the maximal mass in current layer.
double get_min_mass() const
Get the minimal mass in current layer.
LayeredMarginal(Marginal &&m, int tabSize=1000, int hashSize=1000)
Move constructor: specializes the Marginal class.
The marginal distribution class (a subisotopologue).
Conf computeModeConf() const
The the probability of the mode subisotopologue.
Marginal(const double *_masses, const double *_probs, int _isotopeNo, int _atomCnt)
Class constructor.
const unsigned int atomCnt
double getLightestConfMass() const
Get the mass of the lightest subisotopologue.
const unsigned int isotopeNo
const double *const atom_masses
double variance() const
Calculate the variance of the theoretical distribution describing the subisotopologue.
ISOSPEC_FORCE_INLINE double unnormalized_logProb(Conf conf) const
Calculate the log-probability of a given subisotopologue.
const double loggamma_nominator
double getHeaviestConfMass() const
Get the mass of the heaviest subisotopologue.
double getAtomAverageMass() const
The average mass of a single atom.
double getMonoisotopicConfMass() const
Get the mass of the monoisotopic subisotopologue.
virtual ~Marginal()
Destructor.
double getLogSizeEstimate(double logEllipsoidRadius) const
Return estimated logarithm of size of the marginal at a given ellipsoid radius.
const double *const atom_lProbs
MarginalTrek(Marginal &&m, int tabSize=1000, int hashSize=1000)
Move constructor: specializes the Marginal class.
virtual ~PrecalculatedMarginal()
Destructor.
PrecalculatedMarginal(Marginal &&m, double lCutOff, bool sort=true, int tabSize=1000, int hashSize=1000)
The move constructor (disowns the Marginal).
double * getMLogProbs(const double *probs, int isoNo)
void writeInitialConfiguration(const int atomCnt, const int isotopeNo, const double *lprobs, int *res)
Find one of the most probable subisotopologues.