liberasurecode  1.4.0
Erasure Code API library
erasurecode_preprocessing.c
Go to the documentation of this file.
1 /*
2  * Copyright 2014 Tushar Gohad, Kevin M Greenan, Eric Lambert, Mark Storer
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * Redistributions of source code must retain the above copyright notice, this
8  * list of conditions and the following disclaimer.
9  *
10  * Redistributions in binary form must reproduce the above copyright notice, this
11  * list of conditions and the following disclaimer in the documentation and/or
12  * other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY
13  * THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
14  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
15  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
16  * EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
17  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
18  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
20  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
21  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
22  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  * liberasurecode proprocssing helpers implementation
25  *
26  * vi: set noai tw=79 ts=4 sw=4:
27  */
28 
29 #include "erasurecode_backend.h"
30 #include "erasurecode_helpers.h"
31 #include "erasurecode_helpers_ext.h"
32 #include "erasurecode_log.h"
33 #include "erasurecode_preprocessing.h"
34 #include "erasurecode_stdinc.h"
35 
36 int prepare_fragments_for_encode(ec_backend_t instance,
37  int k, int m,
38  const char *orig_data, uint64_t orig_data_size, /* input */
39  char **encoded_data, char **encoded_parity, /* output */
40  int *blocksize)
41 {
42  int i, ret = 0;
43  int data_len; /* data len to write to fragment headers */
44  int aligned_data_len; /* EC algorithm compatible data length */
45  int buffer_size, payload_size = 0;
46 
47  /* Calculate data sizes, aligned_data_len guaranteed to be divisible by k*/
48  data_len = orig_data_size;
49  aligned_data_len = get_aligned_data_size(instance, orig_data_size);
50  *blocksize = payload_size = (aligned_data_len / k);
51  buffer_size = payload_size + instance->common.backend_metadata_size;
52 
53  for (i = 0; i < k; i++) {
54  int copy_size = data_len > payload_size ? payload_size : data_len;
55  char *fragment = (char *) alloc_fragment_buffer(buffer_size);
56  if (NULL == fragment) {
57  ret = -ENOMEM;
58  goto out_error;
59  }
60 
61  /* Copy existing data into clean, zero'd out buffer */
62  encoded_data[i] = get_data_ptr_from_fragment(fragment);
63 
64  if (data_len > 0) {
65  memcpy(encoded_data[i], orig_data, copy_size);
66  }
67 
68  orig_data += copy_size;
69  data_len -= copy_size;
70  }
71 
72  for (i = 0; i < m; i++) {
73  char *fragment = (char *) alloc_fragment_buffer(buffer_size);
74  if (NULL == fragment) {
75  ret = -ENOMEM;
76  goto out_error;
77  }
78 
79  encoded_parity[i] = get_data_ptr_from_fragment(fragment);
80  }
81 
82 out:
83  return ret;
84 
85 out_error:
86  printf ("ERROR in encode\n");
87  if (encoded_data) {
88  for (i = 0; i < k; i++) {
89  if (encoded_data[i])
90  free_fragment_buffer(encoded_data[i]);
91  }
92  check_and_free_buffer(encoded_data);
93  }
94 
95  if (encoded_parity) {
96  for (i = 0; i < m; i++) {
97  if (encoded_parity[i])
98  free_fragment_buffer(encoded_parity[i]);
99  }
100  check_and_free_buffer(encoded_parity);
101  }
102 
103  goto out;
104 }
105 
106 /*
107  * Note that the caller should always check realloc_bm during success or
108  * failure to free buffers allocated here. We could free up in this function,
109  * but it is internal to this library and only used in a few places. In any
110  * case, the caller has to free up in the success case, so it may as well do
111  * so in the failure case.
112  */
114  int k, int m,
115  char **data, char **parity,
116  int *missing_idxs,
117  int *orig_size, int *fragment_payload_size, int fragment_size,
118  uint64_t *realloc_bm)
119 {
120  int i; /* a counter */
121  unsigned long long missing_bm; /* bitmap form of missing indexes list */
122  int orig_data_size = -1;
123  int payload_size = -1;
124 
125  missing_bm = convert_list_to_bitmap(missing_idxs);
126 
127  /*
128  * Determine if each data fragment is:
129  * 1.) Alloc'd: if not, alloc new buffer (for missing fragments)
130  * 2.) Aligned to 16-byte boundaries: if not, alloc a new buffer
131  * memcpy the contents and free the old buffer
132  */
133  for (i = 0; i < k; i++) {
134  /*
135  * Allocate or replace with aligned buffer if the buffer was not
136  * aligned.
137  * DO NOT FREE: the python GC should free the original when cleaning up
138  * 'data_list'
139  */
140  if (NULL == data[i]) {
141  data[i] = alloc_fragment_buffer(fragment_size - sizeof(fragment_header_t));
142  if (NULL == data[i]) {
143  log_error("Could not allocate data buffer!");
144  return -ENOMEM;
145  }
146  *realloc_bm = *realloc_bm | (1 << i);
147  } else if (!is_addr_aligned((unsigned long)data[i], 16)) {
148  char *tmp_buf = alloc_fragment_buffer(fragment_size - sizeof(fragment_header_t));
149  if (NULL == tmp_buf) {
150  log_error("Could not allocate temp buffer!");
151  return -ENOMEM;
152  }
153  memcpy(tmp_buf, data[i], fragment_size);
154  data[i] = tmp_buf;
155  *realloc_bm = *realloc_bm | (1 << i);
156  }
157 
158  /* Need to determine the size of the original data */
159  if (((missing_bm & (1 << i)) == 0) && orig_data_size < 0) {
160  orig_data_size = get_orig_data_size(data[i]);
161  if (orig_data_size < 0) {
162  log_error("Invalid orig_data_size in fragment header!");
163  return -EBADHEADER;
164  }
165  payload_size = get_fragment_payload_size(data[i]);
166  if (orig_data_size < 0) {
167  log_error("Invalid fragment_size in fragment header!");
168  return -EBADHEADER;
169  }
170  }
171  }
172 
173  /* Perform the same allocation, alignment checks on the parity fragments */
174  for (i = 0; i < m; i++) {
175  /*
176  * Allocate or replace with aligned buffer, if the buffer was not aligned.
177  * DO NOT FREE: the python GC should free the original when cleaning up 'data_list'
178  */
179  if (NULL == parity[i]) {
180  parity[i] = alloc_fragment_buffer(fragment_size-sizeof(fragment_header_t));
181  if (NULL == parity[i]) {
182  log_error("Could not allocate parity buffer!");
183  return -ENOMEM;
184  }
185  *realloc_bm = *realloc_bm | (1 << (k + i));
186  } else if (!is_addr_aligned((unsigned long)parity[i], 16)) {
187  char *tmp_buf = alloc_fragment_buffer(fragment_size-sizeof(fragment_header_t));
188  if (NULL == tmp_buf) {
189  log_error("Could not allocate temp buffer!");
190  return -ENOMEM;
191  }
192  memcpy(tmp_buf, parity[i], fragment_size);
193  parity[i] = tmp_buf;
194  *realloc_bm = *realloc_bm | (1 << (k + i));
195  }
196 
197  /* Need to determine the size of the original data */
198  if (((missing_bm & (1 << (k + i))) == 0) && orig_data_size < 0) {
199  orig_data_size = get_orig_data_size(parity[i]);
200  if (orig_data_size < 0) {
201  log_error("Invalid orig_data_size in fragment header!");
202  return -EBADHEADER;
203  }
204  payload_size = get_fragment_payload_size(parity[i]);
205  if (orig_data_size < 0) {
206  log_error("Invalid fragment_size in fragment header!");
207  return -EBADHEADER;
208  }
209  }
210 
211  }
212 
213  *orig_size = orig_data_size;
214  *fragment_payload_size = payload_size;
215 
216  return 0;
217 }
218 
220  int k, int m,
221  char **fragments, int num_fragments,
222  char **data, char **parity, int *missing)
223 {
224  int i = 0;
225  int num_missing = 0;
226  int index;
227 
228  /*
229  * Set all data and parity entries to NULL
230  */
231  for (i = 0; i < k; i++) {
232  data[i] = NULL;
233  }
234  for (i = 0; i < m; i++) {
235  parity[i] = NULL;
236  }
237 
238  /*
239  * Fill in data and parity with available fragments
240  */
241  for (i = 0; i < num_fragments; i++) {
242  index = get_fragment_idx(fragments[i]);
243  if (index < 0 || index > (k + m)) {
244  return -EBADHEADER;
245  }
246  if (index < k) {
247  data[index] = fragments[i];
248  } else {
249  parity[index - k] = fragments[i];
250  }
251  }
252 
253  /*
254  * Fill in missing array with missing indexes
255  */
256  for (i = 0; i < k; i++) {
257  if (NULL == data[i]) {
258  missing[num_missing] = i;
259  num_missing++;
260  }
261  }
262  for (i = 0; i < m; i++) {
263  if (NULL == parity[i]) {
264  missing[num_missing] = i + k;
265  num_missing++;
266  }
267  }
268  // TODO: In general, it is possible to reconstruct one or more fragments
269  // when more than m fragments are missing (e.g. flat XOR codes)
270  return (num_missing > m) ? -EINSUFFFRAGS : 0;
271 }
272 
273 int fragments_to_string(int k, int m,
274  char **fragments, int num_fragments,
275  char **orig_payload, uint64_t *payload_len)
276 {
277  char *internal_payload = NULL;
278  char **data = NULL;
279  int orig_data_size = -1;
280  int i;
281  int index;
282  int data_size;
283  int num_data = 0;
284  int string_off = 0;
285  int ret = -1;
286 
287  if (num_fragments < k) {
288  /*
289  * This is not necessarily an error condition, so *do not log here*
290  * We can maybe debug log, if necessary.
291  */
292  goto out;
293  }
294 
295  data = (char **) get_aligned_buffer16(sizeof(char *) * k);
296 
297  if (NULL == data) {
298  log_error("Could not allocate buffer for data!!");
299  ret = -ENOMEM;
300  goto out;
301  }
302 
303  for (i = 0; i < num_fragments; i++) {
304  index = get_fragment_idx(fragments[i]);
305  data_size = get_fragment_payload_size(fragments[i]);
306  if ((index < 0) || (data_size < 0)) {
307  log_error("Invalid fragment header information!");
308  ret = -EBADHEADER;
309  goto out;
310  }
311 
312  /* Validate the original data size */
313  if (orig_data_size < 0) {
314  orig_data_size = get_orig_data_size(fragments[i]);
315  } else {
316  if (get_orig_data_size(fragments[i]) != orig_data_size) {
317  log_error("Inconsistent orig_data_size in fragment header!");
318  ret = -EBADHEADER;
319  goto out;
320  }
321  }
322 
323  /* Skip parity fragments, put data fragments in index order */
324  if (index >= k) {
325  continue;
326  } else {
327  /* Make sure we account for duplicates */
328  if (NULL == data[index]) {
329  data[index] = fragments[i];
330  num_data++;
331  }
332  }
333  }
334 
335  /* We do not have enough data fragments to do this! */
336  if (num_data != k) {
337  /*
338  * This is not necessarily an error condition, so *do not log here*
339  * We can maybe debug log, if necessary.
340  */
341  goto out;
342  }
343 
344  /* Create the string to return */
345  internal_payload = (char *) get_aligned_buffer16(orig_data_size);
346  if (NULL == internal_payload) {
347  log_error("Could not allocate buffer for decoded string!");
348  ret = -ENOMEM;
349  goto out;
350  }
351 
352  /* Pass the original data length back */
353  *payload_len = orig_data_size;
354 
355  /* Copy fragment data into cstring (fragments should be in index order) */
356  for (i = 0; i < num_data && orig_data_size > 0; i++) {
357  char* fragment_data = get_data_ptr_from_fragment(data[i]);
358  int fragment_size = get_fragment_payload_size(data[i]);
359  int payload_size = orig_data_size > fragment_size ? fragment_size : orig_data_size;
360 
361  memcpy(internal_payload + string_off, fragment_data, payload_size);
362  orig_data_size -= payload_size;
363  string_off += payload_size;
364  }
365 
366  /* Everything worked just fine */
367  ret = 0;
368 
369 out:
370  if (NULL != data) {
371  free(data);
372  }
373 
374  *orig_payload = internal_payload;
375  return ret;
376 }
377 
378 
int get_fragment_partition(int k, int m, char **fragments, int num_fragments, char **data, char **parity, int *missing)
int get_fragment_idx(char *buf)
int get_aligned_data_size(ec_backend_t instance, int data_len)
Compute a size aligned to the number of data and the underlying wordsize of the EC algorithm...
char * get_data_ptr_from_fragment(char *buf)
int free_fragment_buffer(char *buf)
int prepare_fragments_for_decode(int k, int m, char **data, char **parity, int *missing_idxs, int *orig_size, int *fragment_payload_size, int fragment_size, uint64_t *realloc_bm)
int get_orig_data_size(char *buf)
char * alloc_fragment_buffer(int size)
void * check_and_free_buffer(void *buf)
Deallocate memory buffer if it&#39;s not NULL.
void * get_aligned_buffer16(int size)
Memory Management Methods.
int get_fragment_payload_size(char *buf)
int fragments_to_string(int k, int m, char **fragments, int num_fragments, char **orig_payload, uint64_t *payload_len)
int prepare_fragments_for_encode(ec_backend_t instance, int k, int m, const char *orig_data, uint64_t orig_data_size, char **encoded_data, char **encoded_parity, int *blocksize)