libpappsomspp
Library for mass spectrometry
enzyme.h
Go to the documentation of this file.
1/*******************************************************************************
2 * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
3 *
4 * This file is part of the PAPPSOms++ library.
5 *
6 * PAPPSOms++ is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * PAPPSOms++ is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Contributors:
20 * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
21 *implementation
22 ******************************************************************************/
23
24#pragma once
25
27#include <QRegularExpression>
28
29namespace pappso
30{
32{
33 public:
34 /** \brief build the default enzyme (trypsin) with recognition_site =
35 * "([KR])([^P])"
36 * */
37 Enzyme();
38
39 /** \brief build any enzyme given a recognition_site
40 * \param recognition_site is a regular expression that must identify 2 motifs
41 * : one on Nter side one on Cter side
42 * */
43 Enzyme(const QString &recognition_site);
44 ~Enzyme();
45
46 /** \brief digest a protein into enzyme products
47 * \param sequence_database_id integer that references the sequence fatabase
48 * (file, stream, url...) \param protein_sp is the original protein to be
49 * digested \param is_decoy tell if the current protein is a decoy (true) or
50 * normal (false) protein \param enzyme_product is the object that will
51 * receive the digestion products
52 * */
53 void eat(std::int8_t sequence_database_id,
54 const ProteinSp &protein_sp,
55 bool is_decoy,
56 EnzymeProductInterface &enzyme_product) const;
57
58 /** \brief sets the maximum number of missed cleavage allowed in the digestion
59 * \param miscleavage maximum number of missed cleavade to allow (defaults is
60 * 0)
61 * */
62 void setMiscleavage(unsigned int miscleavage);
63
64 /** \brief get the maximum number of missed cleavage allowed in the digestion
65 * @return miscleavage maximum number of missed cleavade to allow (defaults is
66 * 0)
67 * */
68 unsigned int getMiscleavage() const;
69
70
71 /** \brief take only first m_takeOnlyFirstWildcard
72 * \param bool true : switch to take only the first possibility if there are
73 * X, B or Z wildcards in sequence
74 */
75 void setTakeOnlyFirstWildcard(bool take_only_first_wildcard);
76
77 /** \brief if there are wildcards in the protein sequence : restrict the
78 * number of possible peptide sequences \param max_peptide_variant_list_size
79 * maximum number of peptide variant (default is 100)
80 */
81 void setMaxPeptideVariantListSize(std::size_t max_peptide_variant_list_size);
82
83
84 const QRegularExpression &getQRegExpRecognitionSite() const;
85
86
87 private:
88 /** \brief example with a kinase == [K,R] */
89 QRegularExpression m_recognitionSite;
90 unsigned int m_miscleavage = 0;
91 bool m_takeOnlyFirstWildcard = false;
92
93 std::size_t m_maxPeptideVariantListSize = 100;
94
95
96 std::vector<char> m_wildCardX;
97 std::vector<char> m_wildCardB;
98 std::vector<char> m_wildCardZ;
99
100 void sanityCheck(EnzymeProductInterface &enzyme_product,
101 std::int8_t sequence_database_id,
102 const ProteinSp &protein_sp,
103 bool is_decoy,
104 const PeptideStr &peptide,
105 unsigned int start,
106 bool is_nter,
107 unsigned int missed_cleavage_number,
108 bool semi_enzyme) const;
109 void replaceWildcards(std::vector<std::string> *p_peptide_variant_list) const;
110};
111
112} // namespace pappso
QRegularExpression m_recognitionSite
example with a kinase == [K,R]
Definition: enzyme.h:89
std::vector< char > m_wildCardB
Definition: enzyme.h:97
std::vector< char > m_wildCardZ
Definition: enzyme.h:98
std::vector< char > m_wildCardX
Definition: enzyme.h:96
#define PMSPP_LIB_DECL
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
QString PeptideStr
A type definition for PeptideStr.
Definition: types.h:45
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object
Definition: protein.h:47