Alexandria  2.27.0
SDC-CH common library for the Euclid project
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PdfModeExtraction.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2012-2021 Euclid Science Ground Segment
3  *
4  * This library is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU Lesser General Public License as published by the Free
6  * Software Foundation; either version 3.0 of the License, or (at your option)
7  * any later version.
8  *
9  * This library is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
27 #include <cstddef>
28 #include <iterator>
29 #include <tuple>
30 #include <utility>
31 #include <vector>
32 
33 namespace Euclid {
34 namespace MathUtils {
35 
39 
40  auto iter = pdf.begin();
41  while (iter != pdf.end()) {
42  xs.push_back((*iter).first);
43  ys.push_back((*iter).second);
44  ++iter;
45  }
46 
47  return std::make_pair(xs, ys);
48 }
49 
51  double max = -1;
52  size_t index = 0;
53  size_t max_index = 0;
54  for (auto iter = pdf.begin(); iter != pdf.end(); ++iter) {
55  if (*iter > max) {
56  max = *iter;
57  max_index = index;
58  }
59  ++index;
60  }
61 
62  return max_index;
63 }
64 
65 std::pair<size_t, size_t> catchPeak(const std::vector<double>& pdf, size_t center_index, double merge_ratio) {
66 
67  double peak_value = pdf[center_index];
68  double threshold = (1.0 - merge_ratio) * peak_value;
69  size_t min_x = 0;
70  size_t max_x = pdf.size() - 1;
71 
72  for (size_t index = 0; index <= center_index; ++index) {
73  size_t position = center_index - index;
74  if (position == 0 || pdf[position] == 0) {
75  break;
76  }
77  if ((pdf[position] < threshold && pdf[position - 1] >= pdf[position])) {
78  min_x = position;
79  break;
80  }
81  min_x = position;
82  }
83 
84  for (size_t position = center_index; position <= pdf.size(); ++position) {
85  if (position == pdf.size() - 1 || pdf[position] == 0) {
86  break;
87  }
88  if ((pdf[position] < threshold && pdf[position + 1] >= pdf[position])) {
89  max_x = position;
90  break;
91  }
92  max_x = position;
93  }
94 
95  return std::make_pair(min_x, max_x);
96 }
97 
98 // basic integration may be refined
100  size_t max_x) {
101 
102  double num = 0.;
103  double area = 0.;
104  for (size_t index = min_x; index <= max_x; ++index) {
105  double dx = 0.;
106  if (index == 0) {
107  dx = pdf.first[index + 1] - pdf.first[index];
108  } else if (index == pdf.first.size() - 1) {
109  dx = pdf.first[index] - pdf.first[index - 1];
110  } else {
111  dx = (pdf.first[index + 1] - pdf.first[index - 1]) / 2.0;
112  }
113 
114  num += pdf.first[index] * pdf.second[index] * dx;
115  area += pdf.second[index] * dx;
116  }
117 
118  return std::make_pair(num / area, area);
119 }
120 
122  /*
123  without assuming equi-distant sampling
124 
125  x_max= ((x1^2-x0^2)*y2 - (x2^2-x0^2)*y1+(x2^2-x1^2)*y0)/(2*((x1-x0)*y2-(x2-x0)*y1+(x2-x1)*y0))
126 
127  */
128  if (x_index >= pdf.first.size()) {
129  throw Elements::Exception("getInterpolationAround: x_index out of range.");
130  }
131 
132  // ensure we are not on the border
133  if (x_index == 0) {
134  x_index += 1;
135  } else if (x_index + 1 == pdf.first.size()) {
136  x_index -= 1;
137  }
138 
139  double x0 = pdf.first[x_index - 1];
140  double y0 = pdf.second[x_index - 1];
141 
142  double x1 = pdf.first[x_index];
143  double y1 = pdf.second[x_index];
144 
145  double x2 = pdf.first[x_index + 1];
146  double y2 = pdf.second[x_index + 1];
147 
148  double denom = 2 * ((x1 - x0) * y2 - (x2 - x0) * y1 + (x2 - x1) * y0);
149  double num = (x1 * x1 - x0 * x0) * y2 - (x2 * x2 - x0 * x0) * y1 + (x2 * x2 - x1 * x1) * y0;
150 
151  double x_max = x1;
152  // protect against division by 0 (flat interval)
153  if (denom != 0) {
154  x_max = num / denom;
155  }
156 
157  // check in interval
158  if (x_max < pdf.first.front()) {
159  x_max = pdf.first.front();
160  } else if (x_max > pdf.first.back()) {
161  x_max = pdf.first.back();
162  }
163 
164  return x_max;
165 }
166 
168 flatternPeak(const std::pair<std::vector<double>, std::vector<double>>& pdf, size_t min_x, size_t max_x, double value) {
169  std::vector<double> flatterned{pdf.second};
170  for (size_t index = min_x; index <= max_x; ++index) {
171  flatterned[index] = value;
172  }
173  return std::make_pair(pdf.first, flatterned);
174 }
175 
177  double merge_ratio, size_t n) {
178  std::vector<ModeInfo> result{};
179  auto pdf_xy = std::make_pair(x_sampling, pdf_sampling);
180 
181  for (size_t idx = 0; idx < n; ++idx) {
182  auto peak_index = findMaximumIndex(pdf_xy.second);
183  auto min_max = catchPeak(pdf_xy.second, peak_index, merge_ratio);
184  auto mean_area = avgArea(pdf_xy, min_max.first, min_max.second);
185  auto max_inter = getInterpolationAround(pdf_xy, peak_index);
186 
187  result.push_back(ModeInfo(pdf_xy.first[peak_index], mean_area.first, max_inter, mean_area.second));
188  pdf_xy = flatternPeak(pdf_xy, min_max.first, min_max.second, 0.0);
189  }
190  return result;
191 }
192 
193 std::vector<ModeInfo> extractNHighestModes(const XYDataset::XYDataset& pdf, double merge_ratio, size_t n) {
194  auto pdf_xy = getXYs(pdf);
195  return extractNHighestModes(pdf_xy.first, pdf_xy.second, merge_ratio, n);
196 }
197 
199  double merge_ratio, size_t n) {
200  auto pdf_xy = std::make_pair(x_sampling, pdf_sampling);
201  double total_area = avgArea(pdf_xy, 0, x_sampling.size() - 1).second;
202 
203  std::vector<ModeInfo> result{};
204 
205  bool out = false;
206  size_t loop = 0;
207  while (!out) {
208 
209  auto peak_index = findMaximumIndex(pdf_xy.second);
210  auto min_max = catchPeak(pdf_xy.second, peak_index, merge_ratio);
211  auto mean_area = avgArea(pdf_xy, min_max.first, min_max.second);
212  auto max_sample = pdf_xy.first[peak_index];
213  double interp_max_value = getInterpolationAround(pdf_xy, peak_index);
214  auto new_mode = ModeInfo(max_sample, mean_area.first, interp_max_value, mean_area.second);
215 
216  auto iter_mode = result.begin();
217  while (iter_mode != result.end()) {
218  if ((*iter_mode).getModeArea() < mean_area.second) {
219  break;
220  }
221  ++iter_mode;
222  }
223 
224  if (iter_mode != result.end()) {
225  result.insert(iter_mode, new_mode);
226  } else if (result.size() < n) {
227  result.push_back(new_mode);
228  }
229  total_area -= mean_area.second;
230  out = result.size() >= n && (result.back().getModeArea() > total_area || loop > 3 * n);
231 
232  pdf_xy = flatternPeak(pdf_xy, min_max.first, min_max.second, 0.0);
233  ++loop;
234  }
235 
236  return result;
237 }
238 
239 std::vector<ModeInfo> extractNBigestModes(const XYDataset::XYDataset& pdf, double merge_ratio, size_t n) {
240  auto pdf_xy = getXYs(pdf);
241  return extractNBigestModes(pdf_xy.first, pdf_xy.second, merge_ratio, n);
242 }
243 
244 } // namespace MathUtils
245 } // namespace Euclid
Class for storing the information of a PDF mode.
std::pair< std::vector< double >, std::vector< double > > getXYs(const XYDataset::XYDataset &pdf)
std::pair< std::vector< double >, std::vector< double > > flatternPeak(const std::pair< std::vector< double >, std::vector< double >> &pdf, size_t min_x, size_t max_x, double value)
T end(T...args)
size_t findMaximumIndex(const std::vector< double > &pdf)
std::vector< ModeInfo > extractNHighestModes(const XYDataset::XYDataset &pdf, double merge_ratio, size_t n)
const_iterator begin() const
Returns a const iterator to the first pair of the dataset.
Definition: XYDataset.cpp:36
std::pair< size_t, size_t > catchPeak(const std::vector< double > &pdf, size_t center_index, double merge_ratio)
T make_pair(T...args)
T size(T...args)
std::pair< double, double > avgArea(std::pair< std::vector< double >, std::vector< double >> &pdf, size_t min_x, size_t max_x)
T begin(T...args)
double getInterpolationAround(const std::pair< std::vector< double >, std::vector< double >> &pdf, size_t x_index)
This module provides an interface for accessing two dimensional datasets (pairs of (X...
Definition: XYDataset.h:59
std::vector< ModeInfo > extractNBigestModes(const XYDataset::XYDataset &pdf, double merge_ratio, size_t n)
const_iterator end() const
Returns a const iterator to the one after last pair dataset.
Definition: XYDataset.cpp:40
constexpr double second