libpappsomspp
Library for mass spectrometry
grpexperiment.cpp
Go to the documentation of this file.
1
2/*******************************************************************************
3 * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
4 *
5 * This file is part of the PAPPSOms++ library.
6 *
7 * PAPPSOms++ is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * PAPPSOms++ is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
19 *
20 * Contributors:
21 * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
22 *implementation
23 ******************************************************************************/
24
25#include "grpexperiment.h"
26#include "grpprotein.h"
27#include "grppeptide.h"
28
29#include "grpgroup.h"
30#include "grpsubgroup.h"
31#include "../pappsoexception.h"
32
33#include <QObject>
34
35using namespace pappso;
36
37GrpExperiment::GrpExperiment(GrpGroupingMonitorInterface *p_monitor)
38{
39 mp_monitor = p_monitor;
40}
41
43{
44}
45void
47{
49}
50
51void
53{
54 GrpPeptideSet peptide_set(sp_protein.get());
56}
57
58
59void
61{
62 GrpPeptideSet peptide_set(sp_protein.get());
64}
65
66std::vector<GrpGroupSpConst>
68{
69 std::vector<GrpGroupSpConst> grp_list;
70 for(GrpGroupSp group : m_grpGroupSpList)
71 {
72 grp_list.push_back(group);
73 }
74 return grp_list;
75}
76
78GrpExperiment::getGrpProteinSp(const QString &accession,
79 const QString &description)
80{
81 GrpProtein grpProtein(accession, description);
82 auto insertedPair = m_mapProteins.insert(std::pair<QString, GrpProteinSp>(
83 accession, std::make_shared<GrpProtein>(grpProtein)));
84 if(insertedPair.second)
85 {
86 m_grpProteinList.push_back(insertedPair.first->second);
87 m_remainingGrpProteinList.push_back(insertedPair.first->second.get());
88 }
89 return (insertedPair.first->second);
90}
91
94 const QString &sequence,
95 pappso_double mass)
96{
97 proteinSp.get()->countPlus();
98 GrpPeptideSp sp_grppeptide =
99 std::make_shared<GrpPeptide>(GrpPeptide(sequence, mass));
100
101 auto insertedPair = m_mapPeptides.insert(
102 std::pair<QString, std::map<unsigned long, GrpPeptideSp>>(
103 sp_grppeptide.get()->m_sequence,
104 std::map<unsigned long, GrpPeptideSp>()));
105 auto secondInsertedPair =
106 insertedPair.first->second.insert(std::pair<unsigned long, GrpPeptideSp>(
107 (unsigned long)(mass * 100), sp_grppeptide));
108 if(secondInsertedPair.second)
109 {
110 m_grpPeptideList.push_back(secondInsertedPair.first->second);
111 }
112 proteinSp.get()->push_back(secondInsertedPair.first->second.get());
113 return (secondInsertedPair.first->second);
114}
115
116void
118{
119 qDebug() << "GrpExperiment::startGrouping begin";
120 if(mp_monitor != nullptr)
122 m_grpPeptideList.size());
123 m_isGroupingStarted = true;
124 m_mapPeptides.clear();
125 m_mapProteins.clear();
126 qDebug() << "GrpExperiment::startGrouping sort protein list "
127 "m_remainingGrpProteinList.size() "
129 // m_remainingGrpProteinList.sort();
130 // m_remainingGrpProteinList.unique();
131
133 {
134 // TODO clean protein list to remove contaminant peptides before grouping
135 }
136
137
138 GrpMapPeptideToGroup grp_map_peptide_to_group;
139 qDebug() << "GrpExperiment::startGrouping grouping begin";
140 for(auto p_grpProtein : m_remainingGrpProteinList)
141 {
142 p_grpProtein->strip();
143 if(p_grpProtein->m_count == 0)
144 {
145 // no peptides : do not group this protein
146 }
147 else
148 {
149 GrpSubGroupSp grpSubGroupSp =
150 GrpSubGroup(p_grpProtein).makeGrpSubGroupSp();
151
152 if(mp_monitor != nullptr)
154 this->addSubGroupSp(grp_map_peptide_to_group, grpSubGroupSp);
155 }
156 }
157 grp_map_peptide_to_group.clear(m_grpGroupSpList);
158 qDebug() << "GrpExperiment::startGrouping grouping end";
159
160 qDebug() << "GrpExperiment::startGrouping grouping m_grpGroupSpList.size() "
161 << m_grpGroupSpList.size();
162
164 {
166 }
167
168 // post grouping protein group removal
169 // remove any group containing contaminants
170 m_grpGroupSpList.remove_if([this](GrpGroupSp &groupSp) {
171 return (
172 groupSp.get()->containsAny(this->m_grpPostGroupingProteinListRemoval));
173 });
174
175
176 numbering();
177 if(mp_monitor != nullptr)
179 // GrpGroup(this, *m_remainingGrpProteinList.begin());
180 qDebug() << "GrpExperiment::startGrouping end";
181}
182
183
185{
186 ContainsAny(const GrpPeptideSet &peptide_set) : _peptide_set(peptide_set)
187 {
188 }
189
190 typedef bool result_type;
191
192 bool
193 operator()(const GrpGroupSp &testGroupSp)
194 {
195 return testGroupSp.get()->containsAny(_peptide_set);
196 }
197
199};
200
201
202void
204 GrpSubGroupSp &grpSubGroupSp) const
205{
206 qDebug() << "GrpExperiment::addSubGroupSp begin "
207 << grpSubGroupSp.get()->getFirstAccession();
208
209 std::list<GrpGroupSp> new_group_list;
210 grp_map_peptide_to_group.getGroupList(grpSubGroupSp.get()->getPeptideSet(),
211 new_group_list);
212
213 if(new_group_list.size() == 0)
214 {
215 qDebug() << "GrpExperiment::addSubGroupSp create a new group";
216 // create a new group
217 GrpGroupSp sp_group = GrpGroup(grpSubGroupSp).makeGrpGroupSp();
218 // m_grpGroupSpList.push_back(sp_group);
219
220 grp_map_peptide_to_group.set(grpSubGroupSp.get()->getPeptideSet(),
221 sp_group);
222 }
223 else
224 {
225 qDebug() << "GrpExperiment::addSubGroupSp fusion groupList.size() "
226 << new_group_list.size();
227 // fusion group and add the subgroup
228 auto itGroup = new_group_list.begin();
229 GrpGroupSp p_keepGroup = *itGroup;
230 qDebug() << "GrpExperiment::addSubGroupSp "
231 "p_keepGroup->addSubGroupSp(grpSubGroupSp) "
232 << p_keepGroup.get();
233 p_keepGroup->addSubGroupSp(grpSubGroupSp);
234 grp_map_peptide_to_group.set(grpSubGroupSp.get()->getPeptideSet(),
235 p_keepGroup);
236
237 itGroup++;
238 while(itGroup != new_group_list.end())
239 {
240 qDebug()
241 << "GrpExperiment::addSubGroupSp p_keepGroup->addGroup(*itGroup) "
242 << itGroup->get();
243 p_keepGroup->addGroup(itGroup->get());
244 grp_map_peptide_to_group.set((*itGroup)->getGrpPeptideSet(),
245 p_keepGroup);
246
247 // m_grpGroupSpList.remove_if([itGroup](GrpGroupSp & groupSp) {
248 // return (itGroup->get() == groupSp.get()) ;
249 //});
250 itGroup++;
251 }
252 }
253
254 qDebug() << "GrpExperiment::addSubGroupSp end";
255}
256
257void
259{
260 qDebug() << "GrpExperiment::numbering begin";
261 if(mp_monitor != nullptr)
263 for(auto &&group_sp : m_grpGroupSpList)
264 {
265 group_sp.get()->numbering();
266 }
267 m_grpGroupSpList.sort([](GrpGroupSp &first, GrpGroupSp &second) {
268 return ((*first.get()) < (*second.get()));
269 });
270 unsigned int i = 1;
271 for(auto &&group_sp : m_grpGroupSpList)
272 {
273 group_sp.get()->setGroupNumber(i);
274 i++;
275 }
276
277 qDebug() << "GrpExperiment::numbering end";
278}
279
280std::vector<GrpProteinSpConst>
282{
283 std::vector<GrpProteinSpConst> grouped_protein_list;
285 {
286 throw PappsoException(
287 QObject::tr("unable to get grouped protein list before grouping"));
288 }
289 for(auto &&protein_sp : m_grpProteinList)
290 {
291 if(protein_sp.get()->getGroupNumber() > 0)
292 {
293 grouped_protein_list.push_back(protein_sp);
294 }
295 }
296 return grouped_protein_list;
297}
298
299void
301{
302 qDebug() << "GrpExperiment::removeNonInformativeSubGroups begin";
303 if(mp_monitor != nullptr)
305 m_grpGroupSpList.size());
306
307 std::list<GrpGroupSp> old_grp_group_sp_list(m_grpGroupSpList);
308 m_grpGroupSpList.clear();
309 auto it_group = old_grp_group_sp_list.begin();
310 while(it_group != old_grp_group_sp_list.end())
311 {
312 if(mp_monitor != nullptr)
314 if(it_group->get()->removeNonInformativeSubGroups())
315 {
316 // need to regroup it
317 GrpGroupSp old_group_sp = *it_group;
318 GrpMapPeptideToGroup grp_map_peptide_to_group;
319
320 std::list<GrpSubGroupSp> dispatch_sub_group_set =
321 old_group_sp.get()->getSubGroupSpList();
322 for(GrpSubGroupSp &grp_subgroup : dispatch_sub_group_set)
323 {
324 addSubGroupSp(grp_map_peptide_to_group, grp_subgroup);
325 }
326 grp_map_peptide_to_group.clear(m_grpGroupSpList);
327 }
328 else
329 {
330 qDebug() << "GrpExperiment::removeNonInformativeSubGroups no "
331 "removeNonInformativeSubGroups";
332 m_grpGroupSpList.push_back(*it_group);
333 }
334 it_group++;
335 }
336 if(mp_monitor != nullptr)
338 m_grpGroupSpList.size());
339
340 qDebug() << "GrpExperiment::removeNonInformativeSubGroups end";
341}
void addSubGroupSp(GrpMapPeptideToGroup &grp_map_peptide_to_group, GrpSubGroupSp &grpSubGroupSp) const
std::list< GrpGroupSp > m_grpGroupSpList
Definition: grpexperiment.h:58
GrpPeptideSp & setGrpPeptide(const GrpProteinSp &proteinSp, const QString &sequence, pappso_double mass)
GrpGroupingMonitorInterface * mp_monitor
Definition: grpexperiment.h:44
GrpProteinSp & getGrpProteinSp(const QString &acc, const QString &description)
void addPostGroupingGrpProteinSpRemoval(GrpProteinSp sp_protein)
protein to remove with its entire group after grouping is completed typically : to use with protein c...
bool m_isRemoveNonInformativeSubgroups
Definition: grpexperiment.h:45
std::list< GrpPeptideSp > m_grpPeptideList
Definition: grpexperiment.h:49
std::vector< GrpGroupSpConst > getGrpGroupSpList() const
void setRemoveNonInformativeSubgroups(bool ok)
GrpPeptideSet m_grpPreGroupingProteinListRemoval
Definition: grpexperiment.h:56
void addPreGroupingGrpProteinSpRemoval(GrpProteinSp sp_protein)
protein peptides to remove before grouping typically : remove protein contaminants in special metapro...
std::map< QString, std::map< unsigned long, GrpPeptideSp > > m_mapPeptides
Definition: grpexperiment.h:47
std::list< GrpProteinSp > m_grpProteinList
Definition: grpexperiment.h:50
GrpPeptideSet m_grpPostGroupingProteinListRemoval
Definition: grpexperiment.h:54
std::list< GrpProtein * > m_remainingGrpProteinList
Definition: grpexperiment.h:52
std::vector< GrpProteinSpConst > getGrpProteinSpList() const
std::map< QString, GrpProteinSp > m_mapProteins
Definition: grpexperiment.h:46
GrpGroupSp makeGrpGroupSp()
Definition: grpgroup.cpp:98
virtual void removingNonInformativeSubGroupsInGroup()=0
virtual void startNumberingAllGroups(std::size_t total_number_group)=0
virtual void startRemovingNonInformativeSubGroupsInAllGroups(std::size_t total_number_group)=0
virtual void startGrouping(std::size_t total_number_protein, std::size_t total_number_peptide)=0
virtual void stopRemovingNonInformativeSubGroupsInAllGroups(std::size_t total_number_group)=0
void clear(std::list< GrpGroupSp > &grp_group_list)
void getGroupList(const GrpPeptideSet &peptide_set_in, std::list< GrpGroupSp > &impacted_group_list) const
get all groups concerned by a list of peptides
void set(const GrpPeptideSet &peptide_set_in, GrpGroupSp grp_group)
set peptide keys pointing on the group
unsigned int size() const
Definition: grppeptideset.h:54
void addAll(const GrpPeptideSet &peptideSet)
GrpSubGroupSp makeGrpSubGroupSp()
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< GrpProtein > GrpProteinSp
Definition: grpprotein.h:37
std::shared_ptr< GrpSubGroup > GrpSubGroupSp
Definition: grpsubgroup.h:39
std::shared_ptr< GrpPeptide > GrpPeptideSp
Definition: grppeptide.h:40
double pappso_double
A type definition for doubles.
Definition: types.h:50
std::shared_ptr< GrpGroup > GrpGroupSp
Definition: grpgroup.h:39
bool operator()(const GrpGroupSp &testGroupSp)
GrpPeptideSet _peptide_set
ContainsAny(const GrpPeptideSet &peptide_set)