Alexandria  2.16
Please provide a description of the project.
PdfModeExtraction.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012-2020 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
26 #include <cstddef>
27 #include <utility>
28 #include <iterator>
29 #include <vector>
30 #include <tuple>
32 
33 namespace Euclid {
34 namespace MathUtils {
35 
36 
37 
38 std::pair<std::vector<double>, std::vector<double>> getXYs(const XYDataset::XYDataset& pdf) {
39  std::vector<double> xs{};
40  std::vector<double> ys{};
41 
42  auto iter = pdf.begin();
43  while (iter != pdf.end()) {
44  xs.push_back((*iter).first);
45  ys.push_back((*iter).second);
46  ++iter;
47  }
48 
49  return std::make_pair(xs, ys);
50 }
51 
52 size_t findMaximumIndex(const std::vector<double>& pdf) {
53  double max = -1;
54  size_t index = 0;
55  size_t max_index = 0;
56  for (auto iter = pdf.begin(); iter != pdf.end(); ++iter) {
57  if (*iter > max) {
58  max = *iter;
59  max_index = index;
60  }
61  ++index;
62  }
63 
64  return max_index;
65 }
66 
67 
68 
69 std::pair<size_t, size_t> catchPeak(const std::vector<double>& pdf, size_t center_index, double merge_ratio) {
70 
71  double peak_value = pdf[center_index];
72  double threshold = (1.0-merge_ratio)*peak_value;
73  size_t min_x = 0;
74  size_t max_x = pdf.size()-1;
75 
76  for (size_t index = 0; index <= center_index; ++index) {
77  size_t position = center_index-index;
78  if (position == 0 || pdf[position] == 0) {
79  break;
80  }
81  if ((pdf[position] < threshold && pdf[position-1] >= pdf[position])) {
82  min_x = position;
83  break;
84  }
85  min_x = position;
86  }
87 
88 
89  for (size_t position = center_index; position <= pdf.size(); ++position) {
90  if (position == pdf.size()-1 || pdf[position] == 0) {
91  break;
92  }
93  if ((pdf[position] < threshold && pdf[position+1] >= pdf[position] )) {
94  max_x = position;
95  break;
96  }
97  max_x = position;
98  }
99 
100  return std::make_pair(min_x, max_x);
101 }
102 
103 // basic integration may be refined
104 std::pair<double, double> avgArea(std::pair<std::vector<double>, std::vector<double>>& pdf, size_t min_x, size_t max_x) {
105 
106  double num = 0.;
107  double area = 0.;
108  for (size_t index = min_x; index <= max_x; ++index) {
109  double dx = 0.;
110  if (index == 0) {
111  dx = pdf.first[index+1] - pdf.first[index];
112  } else if (index == pdf.first.size()-1) {
113  dx = pdf.first[index] - pdf.first[index-1];
114  } else {
115  dx = (pdf.first[index+1] - pdf.first[index-1])/2.0;
116  }
117 
118  num += pdf.first[index]*pdf.second[index]*dx;
119  area += pdf.second[index]*dx;
120  }
121 
122  return std::make_pair(num/area, area);
123 }
124 
125 double getInterpolationAround(const std::pair<std::vector<double>, std::vector<double>>& pdf, size_t x_index) {
126  /*
127  without assuming equi-distant sampling
128 
129  x_max= ((x1^2-x0^2)*y2 - (x2^2-x0^2)*y1+(x2^2-x1^2)*y0)/(2*((x1-x0)*y2-(x2-x0)*y1+(x2-x1)*y0))
130 
131  */
132  if (x_index>= pdf.first.size()) {
133  throw Elements::Exception("getInterpolationAround: x_index out of range.");
134  }
135 
136  // ensure we are not on the border
137  if (x_index == 0) {
138  x_index+=1;
139  } else if (x_index +1 == pdf.first.size()) {
140  x_index -= 1;
141  }
142 
143  double x0 = pdf.first[x_index-1];
144  double y0 = pdf.second[x_index-1];
145 
146  double x1 = pdf.first[x_index];
147  double y1 = pdf.second[x_index];
148 
149  double x2 = pdf.first[x_index+1];
150  double y2 = pdf.second[x_index+1];
151 
152  double denom = 2*((x1-x0)*y2 - (x2-x0)*y1 + (x2-x1)*y0);
153  double num = (x1*x1-x0*x0)*y2 - (x2*x2-x0*x0)*y1 + (x2*x2-x1*x1)*y0;
154 
155  double x_max = x1;
156  // protect against division by 0 (flat interval)
157  if (denom != 0) {
158  x_max = num/denom;
159  }
160 
161  // check in interval
162  if (x_max < pdf.first.front()) {
163  x_max = pdf.first.front();
164  } else if (x_max > pdf.first.back()) {
165  x_max = pdf.first.back();
166  }
167 
168  return x_max;
169 }
170 
171 
172 std::pair<std::vector<double>, std::vector<double>> flatternPeak(const std::pair<std::vector<double>,
173  std::vector<double>>& pdf,
174  size_t min_x,
175  size_t max_x,
176  double value) {
177  std::vector<double> flatterned{pdf.second};
178  for (size_t index = min_x; index <= max_x; ++index) {
179  flatterned[index] = value;
180  }
181  return std::make_pair(pdf.first, flatterned);
182 }
183 
184 
185 std::vector<ModeInfo> extractNHighestModes(std::vector<double>& x_sampling,
186  std::vector<double>& pdf_sampling,
187  double merge_ratio,
188  size_t n) {
189  std::vector<ModeInfo> result{};
190  auto pdf_xy = std::make_pair(x_sampling, pdf_sampling);
191 
192  for (size_t idx = 0; idx < n; ++idx) {
193  auto peak_index = findMaximumIndex(pdf_xy.second);
194  auto min_max = catchPeak(pdf_xy.second, peak_index, merge_ratio);
195  auto mean_area = avgArea(pdf_xy, min_max.first, min_max.second);
196  auto max_inter = getInterpolationAround(pdf_xy, peak_index);
197 
198  result.push_back(ModeInfo(pdf_xy.first[peak_index], mean_area.first, max_inter, mean_area.second));
199  pdf_xy = flatternPeak(pdf_xy, min_max.first, min_max.second, 0.0);
200  }
201  return result;
202 }
203 
204 
205 std::vector<ModeInfo> extractNHighestModes(const XYDataset::XYDataset& pdf, double merge_ratio, size_t n) {
206  auto pdf_xy = getXYs(pdf);
207  return extractNHighestModes(pdf_xy.first, pdf_xy.second, merge_ratio, n);
208 }
209 
210 
211 std::vector<ModeInfo> extractNBigestModes(std::vector<double>& x_sampling,
212  std::vector<double>& pdf_sampling,
213  double merge_ratio,
214  size_t n) {
215  auto pdf_xy = std::make_pair(x_sampling, pdf_sampling);
216  double total_area = avgArea(pdf_xy, 0, x_sampling.size() -1).second;
217 
218  std::vector<ModeInfo> result{};
219 
220  bool out = false;
221  size_t loop = 0;
222  while (!out) {
223 
224  auto peak_index = findMaximumIndex(pdf_xy.second);
225  auto min_max = catchPeak(pdf_xy.second, peak_index, merge_ratio);
226  auto mean_area = avgArea(pdf_xy, min_max.first, min_max.second);
227  auto max_sample = pdf_xy.first[peak_index];
228  double interp_max_value = getInterpolationAround(pdf_xy, peak_index);
229  auto new_mode = ModeInfo(max_sample, mean_area.first, interp_max_value, mean_area.second);
230 
231  auto iter_mode = result.begin();
232  while (iter_mode != result.end()) {
233  if ((*iter_mode).getModeArea() < mean_area.second) {
234  break;
235  }
236  ++iter_mode;
237  }
238 
239  if (iter_mode != result.end()) {
240  result.insert(iter_mode, new_mode);
241  } else if (result.size() < n) {
242  result.push_back(new_mode);
243  }
244  total_area -= mean_area.second;
245  out = result.size() >= n && (result.back().getModeArea() > total_area || loop > 3*n);
246 
247  pdf_xy = flatternPeak(pdf_xy, min_max.first, min_max.second, 0.0);
248  ++loop;
249  }
250 
251  return result;
252 }
253 
254 
255 std::vector<ModeInfo> extractNBigestModes(const XYDataset::XYDataset& pdf, double merge_ratio, size_t n) {
256  auto pdf_xy = getXYs(pdf);
257  return extractNBigestModes(pdf_xy.first, pdf_xy.second, merge_ratio, n);
258 }
259 
260 
261 } // MathUtils namespace
262 }
263 
264 
Class for storing the information of a PDF mode.
constexpr double second
std::pair< std::vector< double >, std::vector< double > > getXYs(const XYDataset::XYDataset &pdf)
const_iterator end() const
Returns a const iterator to the one after last pair dataset.
Definition: XYDataset.cpp:42
std::pair< std::vector< double >, std::vector< double > > flatternPeak(const std::pair< std::vector< double >, std::vector< double >> &pdf, size_t min_x, size_t max_x, double value)
size_t findMaximumIndex(const std::vector< double > &pdf)
std::vector< ModeInfo > extractNHighestModes(const XYDataset::XYDataset &pdf, double merge_ratio, size_t n)
std::pair< size_t, size_t > catchPeak(const std::vector< double > &pdf, size_t center_index, double merge_ratio)
std::pair< double, double > avgArea(std::pair< std::vector< double >, std::vector< double >> &pdf, size_t min_x, size_t max_x)
double getInterpolationAround(const std::pair< std::vector< double >, std::vector< double >> &pdf, size_t x_index)
This module provides an interface for accessing two dimensional datasets (pairs of (X...
Definition: XYDataset.h:59
std::vector< ModeInfo > extractNBigestModes(const XYDataset::XYDataset &pdf, double merge_ratio, size_t n)
const_iterator begin() const
Returns a const iterator to the first pair of the dataset.
Definition: XYDataset.cpp:38