Pteros  2.0
Molecular modeling library for human beings!
align-2d.h
1 // Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
2 // Distributed under the Boost Software License, Version 1.0.
3 // (See accompanying file LICENSE_1_0.txt or copy at
4 // http://www.boost.org/LICENSE_1_0.txt)
5 
6 #pragma once
7 
8 #include <string>
9 #include <vector>
10 #include <algorithm>
11 
12 typedef std::basic_string<uint8> sec_structure;
13 
14 // --------------------------------------------------------------------
15 // entry is a multiple sequence alignment 'entry', a sequence with an ID and more.
16 
17 struct entry
18 {
19  entry(const entry& e)
20  : m_nr(e.m_nr)
21  , m_id(e.m_id)
22  , m_seq(e.m_seq)
23  , m_weight(e.m_weight)
24  , m_positions(e.m_positions)
25  {
26  }
27 
28  entry(uint32 nr, const std::string& id, const sequence& seq = sequence(), float weight = 1.0f)
29  : m_nr(nr)
30  , m_id(id)
31  , m_seq(seq)
32  , m_weight(weight) {}
33 
34  uint32 nr() const { return m_nr; }
35  float weight() const { return m_weight; }
36  uint32 length() const { return static_cast<uint32>(m_seq.length()); }
37 
38  //bool has_gaps() const { return std::find(m_seq.begin(), m_seq.end(), kSignalGapCode) != m_seq.end(); }
39 
40  void insert_gap(uint32 pos);
41  void append_gap();
42 
43  void remove_gap(uint32 pos);
44 
45  void remove_gaps();
46  void dump_positions() { m_positions.clear(); }
47 
48  uint32 m_nr;
49  std::string m_id;
50  sequence m_seq;
51  sec_structure m_ss;
52  float m_weight;
53  std::vector<int16>
54  m_positions;
55 };
56 
57 // --------------------------------------------------------------------
58 // nodes used in calculating a guide tree
59 
60 struct base_node
61 {
62  virtual ~base_node() {}
63 
64  virtual void print(std::ostream& s) = 0;
65 
66  virtual base_node* left() const { return 0; }
67  virtual base_node* right() const { return 0; }
68 
69  virtual void add_weight(float w) = 0;
70  virtual uint32 leaf_count() const { return 1; }
71 
72  virtual uint32 length() const = 0;
73  virtual uint32 cost() const { return 0; }
74  virtual uint32 cumulative_cost() const
75  { return 0; }
76 };
77 
78 std::ostream& operator<<(std::ostream& lhs, base_node& rhs);
79 
80 struct joined_node : public base_node
81 {
82  joined_node();
83 
84  joined_node(base_node* left, base_node* right,
85  float d_left, float d_right);
86 
87  virtual ~joined_node();
88 
89  virtual void print(std::ostream& s);
90 
91  virtual base_node* left() const { return m_left; }
92  virtual base_node* right() const { return m_right; }
93 
94  virtual void add_weight(float w)
95  {
96  m_left->add_weight(w);
97  m_right->add_weight(w);
98  }
99 
100  virtual uint32 leaf_count() const { return m_leaf_count; }
101  virtual uint32 length() const { return m_length; }
102 
103  virtual uint32 cost() const { return m_length * m_leaf_count; }
104  virtual uint32 cumulative_cost() const
105  { return cost() + m_left->cumulative_cost() + m_right->cumulative_cost(); }
106 
107  base_node* m_left;
108  base_node* m_right;
109  float m_d_left;
110  float m_d_right;
111  uint32 m_leaf_count;
112  uint32 m_length;
113 };
114 
115 struct leaf_node : public base_node
116 {
117  leaf_node(entry& e)
118  : m_entry(e)
119  {
120  m_entry.m_weight = 0;
121  }
122 
123  virtual void print(std::ostream& s);
124 
125  virtual void add_weight(float w)
126  {
127  m_entry.m_weight += w;
128  }
129 
130  virtual uint32 length() const { return static_cast<uint32>(m_entry.m_seq.length()); }
131 
132  entry& m_entry;
133 };
134 
135 class substitution_matrix_family;
136 
137 template<typename T>
138 class matrix;
139 
140 // prototype
141 void align(
142  const joined_node* node,
143  std::vector<entry*>& a, std::vector<entry*>& b, std::vector<entry*>& c,
144  const substitution_matrix_family& mat_fam,
145  float gop, float gep, float magic,
146  bool ignorePositions);
147 
148 void print_matrix(std::ostream& os,
149  const matrix<int8>& tb, const sequence& sx, const sequence& sy);