satyr  0.26
distance.h
Go to the documentation of this file.
1 /*
2  distance.h
3 
4  Copyright (C) 2010 Red Hat, Inc.
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation; either version 2 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License along
17  with this program; if not, write to the Free Software Foundation, Inc.,
18  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20 #ifndef SATYR_DISTANCE_H
21 #define SATYR_DISTANCE_H
22 
28 #ifdef __cplusplus
29 extern "C" {
30 #endif
31 
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdlib.h>
35 
36 struct sr_thread;
37 
38 enum sr_distance_type
39 {
40  /* Jaro-Winkler distance:
41  *
42  * Gets number of matching function names(match_count) from both
43  * threads and number of transpositions in place(trans_count) if
44  * the transpositioned function names are not farther away than
45  * frame_count/2 - 1. Then computes the Jaro-Winkler distance
46  * according to the formula. NOTE: The Jaro-Winkler distance is
47  * not a metric distance as it does not satisfy the triangle
48  * inequality. Returns a number between 0 and 1: 0 = no
49  * similarity, 1 = similar threads.
50  */
51  SR_DISTANCE_JARO_WINKLER,
52 
53  /* Jaccard distance:
54  *
55  * Gives a number representing the difference of the size of the
56  * intersection and union divided by the size of the union of two
57  * threads. Function names positions in the thread are not taken
58  * into account. Returns a number between 0 and 1: 0 = similar
59  * threads, 1 = no similarity.
60  */
61  SR_DISTANCE_JACCARD,
62 
63  /* Levenshtein distance:
64  *
65  * Computes the distance of two threads creating a matrix of
66  * distances between all the frames in each thread. Computes a
67  * number representing how many function names need to be
68  * different in one thread to match all the function names in
69  * second thread at each respective positions or how many function
70  * names need to be different to have a match. The number is
71  * always between 0 and n, where n is the frame count of longer
72  * thread 0 = similar threads, n = no similar function names.
73  * This is normalized to a result between 0 and 1 that is
74  * returned.
75  */
76  SR_DISTANCE_LEVENSHTEIN,
77 
78  /* Damerau-Levenshtein distance:
79  *
80  * Like the Levenshtein distance, but with transpositions. NOTE:
81  * The triangle inequality does not hold.
82  */
83  SR_DISTANCE_DAMERAU_LEVENSHTEIN,
84 
85  /* Sentinel, keep it the last entry. */
86  SR_DISTANCE_NUM
87 };
88 
89 float
90 sr_distance(enum sr_distance_type distance_type,
91  struct sr_thread *thread1,
92  struct sr_thread *thread2);
93 
101 {
102  int m;
103  int n;
104  float *distances;
105 };
106 
117 struct sr_distances *
118 sr_distances_new(int m, int n);
119 
128 struct sr_distances *
129 sr_distances_dup(struct sr_distances *distances);
130 
136 void
137 sr_distances_free(struct sr_distances *distances);
138 
151 float
152 sr_distances_get_distance(struct sr_distances *distances, int i, int j);
153 
165 void
166 sr_distances_set_distance(struct sr_distances *distances,
167  int i,
168  int j,
169  float d);
170 
184 struct sr_distances *
185 sr_threads_compare(struct sr_thread **threads, int m, int n,
186  enum sr_distance_type dist_type);
187 
195 {
196  /* Same as for struct sr_distances. */
197  int m;
198  int n;
199  /* First coordinates of this part. */
200  int m_begin;
201  int n_begin;
202  /* Length of the part. */
203  size_t len;
204  /* Distance type. */
205  enum sr_distance_type dist_type;
206  /* Checksum of the threads used to compute this part to catch situations
207  * when sr_distances_part_compute is called with wrong array of threads. */
208  uint32_t checksum;
209  /* The actual result. */
210  float *distances;
211  /* Make it possible to create lists of sr_distances_part. */
212  struct sr_distances_part *next;
213 };
214 
219 struct sr_distances_part *
220 sr_distances_part_new(int m, int n, enum sr_distance_type dist_type,
221  int m_begin, int n_begin, size_t len);
222 
236 struct sr_distances_part *
237 sr_distances_part_create(int m, int n, enum sr_distance_type dist_type,
238  unsigned nparts);
239 
247 void
249  struct sr_thread **threads);
250 
259 struct sr_distances *
261 
269 void
270 sr_distances_part_free(struct sr_distances_part *part, bool follow_links);
271 
272 #ifdef __cplusplus
273 }
274 #endif
275 
276 #endif
struct sr_distances * sr_distances_dup(struct sr_distances *distances)
struct sr_distances * sr_threads_compare(struct sr_thread **threads, int m, int n, enum sr_distance_type dist_type)
void sr_distances_set_distance(struct sr_distances *distances, int i, int j, float d)
struct sr_distances_part * sr_distances_part_new(int m, int n, enum sr_distance_type dist_type, int m_begin, int n_begin, size_t len)
void sr_distances_free(struct sr_distances *distances)
A part of a distance matrix to be computed (possibly in different threads/processes and even differen...
Definition: distance.h:194
A distance matrix of stack trace threads.
Definition: distance.h:100
void sr_distances_part_free(struct sr_distances_part *part, bool follow_links)
float sr_distances_get_distance(struct sr_distances *distances, int i, int j)
void sr_distances_part_compute(struct sr_distances_part *part, struct sr_thread **threads)
struct sr_distances * sr_distances_part_merge(struct sr_distances_part *parts)
struct sr_distances * sr_distances_new(int m, int n)
struct sr_distances_part * sr_distances_part_create(int m, int n, enum sr_distance_type dist_type, unsigned nparts)